nvidia_gpu_share.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package container_device
  15. import (
  16. "fmt"
  17. "path"
  18. "path/filepath"
  19. "strings"
  20. "yunion.io/x/pkg/errors"
  21. "yunion.io/x/onecloud/pkg/hostman/isolated_device"
  22. )
  23. func init() {
  24. isolated_device.RegisterContainerDeviceManager(newNvidiaGPUShareManager())
  25. }
  26. type nvidiaGPUShareManager struct {
  27. nvidiaGPUManager
  28. }
  29. func newNvidiaGPUShareManager() *nvidiaGPUShareManager {
  30. return &nvidiaGPUShareManager{}
  31. }
  32. func (m *nvidiaGPUShareManager) GetType() isolated_device.ContainerDeviceType {
  33. return isolated_device.ContainerDeviceTypeNvidiaGpuShare
  34. }
  35. func (m *nvidiaGPUShareManager) ProbeDevices() ([]isolated_device.IDevice, error) {
  36. return nil, nil
  37. }
  38. func (m *nvidiaGPUShareManager) NewDevices(dev *isolated_device.ContainerDevice) ([]isolated_device.IDevice, error) {
  39. if !strings.HasPrefix(dev.Path, "/dev/dri/renderD") {
  40. return nil, errors.Errorf("device path %q doesn't start with /dev/dri/renderD", dev.Path)
  41. }
  42. if err := CheckVirtualNumber(dev); err != nil {
  43. return nil, err
  44. }
  45. gpuDevs := make([]isolated_device.IDevice, 0)
  46. for i := 0; i < dev.VirtualNumber; i++ {
  47. gpuDev, err := newNvidiaGpuShare(dev.Path, i)
  48. if err != nil {
  49. return nil, errors.Wrapf(err, "new CPH AMD GPU with index %d", i)
  50. }
  51. gpuDevs = append(gpuDevs, gpuDev)
  52. }
  53. return gpuDevs, nil
  54. }
  55. type nvidiaGpuShareDev struct {
  56. nvidiaGPU
  57. CardPath string
  58. RenderPath string
  59. }
  60. func (dev *nvidiaGpuShareDev) GetCardPath() string {
  61. return dev.CardPath
  62. }
  63. func (dev *nvidiaGpuShareDev) GetRenderPath() string {
  64. return dev.RenderPath
  65. }
  66. type nvidiaGpuUsage struct {
  67. *nvidiaGPU
  68. Used bool
  69. }
  70. var nvidiaGpuUsages map[string]*nvidiaGpuUsage = nil
  71. func getNvidiaGpuUsage() (map[string]*nvidiaGpuUsage, error) {
  72. if nvidiaGpuUsages != nil {
  73. return nvidiaGpuUsages, nil
  74. }
  75. devs, err := getNvidiaGPUs()
  76. if err != nil {
  77. return nil, err
  78. }
  79. if len(devs) == 0 {
  80. return nil, nil
  81. }
  82. gpuUsages := map[string]*nvidiaGpuUsage{}
  83. for i := range devs {
  84. gpuUsages[devs[i].GetAddr()] = &nvidiaGpuUsage{
  85. nvidiaGPU: devs[i],
  86. Used: false,
  87. }
  88. }
  89. nvidiaGpuUsages = gpuUsages
  90. return nvidiaGpuUsages, nil
  91. }
  92. func newNvidiaGpuShare(devPath string, index int) (*nvidiaGpuShareDev, error) {
  93. devUsages, err := getNvidiaGpuUsage()
  94. if err != nil {
  95. return nil, errors.Wrap(err, "getNvidiaGpuUsage")
  96. }
  97. dev, err := newPCIGPURenderBaseDevice(devPath, index, isolated_device.ContainerDeviceTypeNvidiaGpuShare)
  98. if err != nil {
  99. return nil, errors.Wrap(err, "new PCIGPURenderBaseDevice")
  100. }
  101. devAddr := dev.GetOriginAddr()
  102. cardPath := path.Join("/dev/dri/by-path", fmt.Sprintf("pci-0000:%s-card", devAddr))
  103. cardLinkPath, err := filepath.EvalSymlinks(cardPath)
  104. if err != nil {
  105. return nil, errors.Wrapf(err, "read link of %s", cardPath)
  106. }
  107. nvidiaGPUDev, ok := devUsages[devAddr]
  108. if !ok {
  109. return nil, errors.Errorf("newNvidiaGpuShare dev addr not found %s", devAddr)
  110. }
  111. devUsages[devAddr].Used = true
  112. dev.SetDevicePath(nvidiaGPUDev.Path)
  113. return &nvidiaGpuShareDev{
  114. nvidiaGPU: nvidiaGPU{
  115. BaseDevice: dev,
  116. memSize: devUsages[devAddr].memSize,
  117. gpuIndex: devUsages[devAddr].gpuIndex,
  118. deviceMinor: devUsages[devAddr].deviceMinor,
  119. },
  120. CardPath: cardLinkPath,
  121. RenderPath: devPath,
  122. }, nil
  123. }