nvidia_vgpu.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package isolated_device
  15. import (
  16. "fmt"
  17. "io/ioutil"
  18. "path"
  19. "strings"
  20. "yunion.io/x/jsonutils"
  21. "yunion.io/x/pkg/errors"
  22. "yunion.io/x/pkg/util/regutils"
  23. "yunion.io/x/onecloud/pkg/apis/compute"
  24. "yunion.io/x/onecloud/pkg/hostman/guestman/desc"
  25. "yunion.io/x/onecloud/pkg/util/fileutils2"
  26. )
  27. type sNVIDIAVgpuDevice struct {
  28. pfDev *PCIDevice
  29. cloudId string
  30. hostId string
  31. guestId string
  32. devType string
  33. mdevId string
  34. model string
  35. profile map[string]string
  36. }
  37. func (dev *sNVIDIAVgpuDevice) String() string {
  38. return jsonutils.Marshal(dev).String()
  39. }
  40. func (dev *sNVIDIAVgpuDevice) IsInfinibandNic() bool {
  41. return false
  42. }
  43. func (dev *sNVIDIAVgpuDevice) GetCloudId() string {
  44. return dev.cloudId
  45. }
  46. func (dev *sNVIDIAVgpuDevice) GetHostId() string {
  47. return dev.hostId
  48. }
  49. func (dev *sNVIDIAVgpuDevice) SetHostId(hId string) {
  50. dev.hostId = hId
  51. }
  52. func (dev *sNVIDIAVgpuDevice) GetGuestId() string {
  53. return dev.guestId
  54. }
  55. func (dev *sNVIDIAVgpuDevice) GetWireId() string {
  56. return ""
  57. }
  58. func (dev *sNVIDIAVgpuDevice) GetOvsOffloadInterfaceName() string {
  59. return ""
  60. }
  61. func (dev *sNVIDIAVgpuDevice) GetVendorDeviceId() string {
  62. return dev.pfDev.GetVendorDeviceId()
  63. }
  64. func (dev *sNVIDIAVgpuDevice) GetAddr() string {
  65. return dev.pfDev.Addr
  66. }
  67. func (dev *sNVIDIAVgpuDevice) GetDeviceType() string {
  68. return dev.devType
  69. }
  70. func (dev *sNVIDIAVgpuDevice) GetModelName() string {
  71. modelName := dev.pfDev.ModelName
  72. if dev.pfDev.ModelName == "" {
  73. modelName = dev.pfDev.DeviceName
  74. }
  75. return modelName + "-" + dev.model
  76. }
  77. func (dev *sNVIDIAVgpuDevice) CustomProbe(idx int) error {
  78. return nil
  79. }
  80. func (dev *sNVIDIAVgpuDevice) GetDevicePath() string {
  81. return ""
  82. }
  83. func (dev *sNVIDIAVgpuDevice) GetNvidiaMpsMemoryLimit() int {
  84. return -1
  85. }
  86. func (dev *sNVIDIAVgpuDevice) GetNvidiaMpsMemoryTotal() int {
  87. return -1
  88. }
  89. func (dev *sNVIDIAVgpuDevice) GetNvidiaMpsThreadPercentage() int {
  90. return -1
  91. }
  92. func (dev *sNVIDIAVgpuDevice) GetCardPath() string {
  93. return ""
  94. }
  95. func (dev *sNVIDIAVgpuDevice) GetRenderPath() string {
  96. return ""
  97. }
  98. func (dev *sNVIDIAVgpuDevice) GetIndex() int {
  99. return -1
  100. }
  101. func (dev *sNVIDIAVgpuDevice) GetDeviceMinor() int {
  102. return -1
  103. }
  104. func (dev *sNVIDIAVgpuDevice) SetDeviceInfo(info CloudDeviceInfo) {
  105. if len(info.Id) != 0 {
  106. dev.cloudId = info.Id
  107. }
  108. if len(info.GuestId) != 0 {
  109. dev.guestId = info.GuestId
  110. }
  111. if len(info.HostId) != 0 {
  112. dev.hostId = info.HostId
  113. }
  114. if len(info.DevType) != 0 {
  115. dev.devType = info.DevType
  116. }
  117. }
  118. func (dev *sNVIDIAVgpuDevice) GetNVIDIAVgpuProfile() map[string]string {
  119. return dev.profile
  120. }
  121. func (dev *sNVIDIAVgpuDevice) GetMdevId() string {
  122. return dev.mdevId
  123. }
  124. func (dev *sNVIDIAVgpuDevice) DetectByAddr() error {
  125. return nil
  126. }
  127. func (dev *sNVIDIAVgpuDevice) GetPassthroughOptions() map[string]string {
  128. return nil
  129. }
  130. func (dev *sNVIDIAVgpuDevice) GetPassthroughCmd(index int) string {
  131. return ""
  132. }
  133. func (dev *sNVIDIAVgpuDevice) GetIOMMUGroupDeviceCmd() string {
  134. return ""
  135. }
  136. func (dev *sNVIDIAVgpuDevice) GetIOMMUGroupRestAddrs() []string {
  137. return nil
  138. }
  139. func (dev *sNVIDIAVgpuDevice) GetPfName() string {
  140. return ""
  141. }
  142. func (dev *sNVIDIAVgpuDevice) GetVirtfn() int {
  143. return -1
  144. }
  145. func (dev *sNVIDIAVgpuDevice) GetNVMESizeMB() int {
  146. return -1
  147. }
  148. func (dev *sNVIDIAVgpuDevice) GetVGACmd() string {
  149. return ""
  150. }
  151. func (dev *sNVIDIAVgpuDevice) GetCPUCmd() string {
  152. return ""
  153. }
  154. func (dev *sNVIDIAVgpuDevice) GetQemuId() string {
  155. return "dev_" + dev.mdevId
  156. }
  157. func (dev *sNVIDIAVgpuDevice) GetNumaNode() (int, error) {
  158. numaNodePath := fmt.Sprintf("/sys/bus/pci/devices/0000:%s/numa_node", dev.GetAddr())
  159. numaNode, err := fileutils2.FileGetIntContent(numaNodePath)
  160. if err != nil {
  161. return -1, errors.Wrap(err, "get device numa node")
  162. }
  163. return numaNode, nil
  164. }
  165. func (dev *sNVIDIAVgpuDevice) GetHotPlugOptions(isolatedDev *desc.SGuestIsolatedDevice, guestDesc *desc.SGuestDesc) ([]*HotPlugOption, error) {
  166. ret := make([]*HotPlugOption, 0)
  167. var masterDevOpt *HotPlugOption
  168. for i := 0; i < len(isolatedDev.VfioDevs); i++ {
  169. sysfsdev := path.Join("/sys/bus/mdev/devices", isolatedDev.MdevId)
  170. opts := map[string]string{
  171. "sysfsdev": sysfsdev,
  172. "bus": isolatedDev.VfioDevs[i].BusStr(),
  173. "addr": isolatedDev.VfioDevs[i].SlotFunc(),
  174. "id": isolatedDev.VfioDevs[i].Id,
  175. }
  176. if isolatedDev.VfioDevs[i].Multi != nil {
  177. if *isolatedDev.VfioDevs[i].Multi {
  178. opts["multifunction"] = "on"
  179. } else {
  180. opts["multifunction"] = "off"
  181. }
  182. }
  183. devOpt := &HotPlugOption{
  184. Device: isolatedDev.VfioDevs[i].DevType,
  185. Options: opts,
  186. }
  187. if isolatedDev.VfioDevs[i].Function == 0 {
  188. masterDevOpt = devOpt
  189. } else {
  190. ret = append(ret, devOpt)
  191. }
  192. }
  193. // if PCI slot function 0 already assigned, qemu will reject hotplug function
  194. // so put function 0 at the enda
  195. if masterDevOpt == nil {
  196. return nil, errors.Errorf("GPU Device no function 0 found")
  197. }
  198. ret = append(ret, masterDevOpt)
  199. return ret, nil
  200. }
  201. func (dev *sNVIDIAVgpuDevice) GetHotUnplugOptions(isolatedDev *desc.SGuestIsolatedDevice) ([]*HotUnplugOption, error) {
  202. if len(isolatedDev.VfioDevs) == 0 {
  203. return nil, errors.Errorf("device %s no pci ids", isolatedDev.Id)
  204. }
  205. return []*HotUnplugOption{
  206. {
  207. Id: isolatedDev.VfioDevs[0].Id,
  208. },
  209. }, nil
  210. }
  211. // GetPCIEInfo implements IDevice.
  212. func (dev *sNVIDIAVgpuDevice) GetPCIEInfo() *compute.IsolatedDevicePCIEInfo {
  213. return dev.pfDev.PCIEInfo
  214. }
  215. func NewNvidiaVgpuDevice(dev *PCIDevice, devType, mdevId, model string, profile map[string]string) *sNVIDIAVgpuDevice {
  216. return &sNVIDIAVgpuDevice{
  217. pfDev: dev,
  218. devType: devType,
  219. mdevId: mdevId,
  220. model: model,
  221. profile: profile,
  222. }
  223. }
  224. func getNvidiaVGpus(gpuPF string) ([]*sNVIDIAVgpuDevice, error) {
  225. mdevDeviceDir := fmt.Sprintf("/sys/class/mdev_bus/0000:%s", gpuPF)
  226. if !fileutils2.Exists(mdevDeviceDir) {
  227. return nil, errors.Errorf("unknown device %s", gpuPF)
  228. }
  229. pfDev, err := detectPCIDevByAddrWithoutIOMMUGroup(gpuPF)
  230. if err != nil {
  231. return nil, errors.Wrap(err, "detect pf device")
  232. }
  233. // regutils.MatchUUID(self.HostId)
  234. files, err := ioutil.ReadDir(mdevDeviceDir)
  235. if err != nil {
  236. return nil, errors.Wrap(err, "read mdev device path")
  237. }
  238. nvidiaVgpus := make([]*sNVIDIAVgpuDevice, 0)
  239. for i := range files {
  240. if !regutils.MatchUUID(files[i].Name()) {
  241. continue
  242. }
  243. mdevPath := path.Join(mdevDeviceDir, files[i].Name())
  244. model, err := fileutils2.FileGetContents(path.Join(mdevPath, "mdev_type", "name"))
  245. if err != nil {
  246. return nil, errors.Wrap(err, "read file mdev_type/name")
  247. }
  248. model = strings.TrimSpace(model)
  249. // eg: num_heads=4, frl_config=60, framebuffer=1024M, max_resolution=5120x2880, max_instance=24
  250. vgpuProfile, err := fileutils2.FileGetContents(path.Join(mdevPath, "mdev_type", "description"))
  251. if err != nil {
  252. return nil, errors.Wrap(err, "read file mdev_type/description")
  253. }
  254. keys := []string{"num_heads", "frl_config", "framebuffer", "max_resolution", "max_instance"}
  255. profile := make(map[string]string)
  256. for _, key := range keys {
  257. keyWithValue := key + "="
  258. if strings.Contains(vgpuProfile, keyWithValue) {
  259. startIndex := strings.Index(vgpuProfile, keyWithValue)
  260. endIndex := startIndex + len(keyWithValue)
  261. value := strings.Split(vgpuProfile[endIndex:], ",")[0]
  262. profile[key] = strings.TrimSpace(value)
  263. }
  264. }
  265. mdev := NewNvidiaVgpuDevice(pfDev, compute.LEGACY_VGPU_TYPE, files[i].Name(), model, profile)
  266. nvidiaVgpus = append(nvidiaVgpus, mdev)
  267. }
  268. return nvidiaVgpus, nil
  269. }