isolated_device.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package compute
  15. import (
  16. "fmt"
  17. "yunion.io/x/jsonutils"
  18. "yunion.io/x/pkg/errors"
  19. "yunion.io/x/onecloud/pkg/apis"
  20. )
  21. const (
  22. ErrMsgIsolatedDeviceUsedByServer = "Isolated device used by server"
  23. )
  24. type IsolateDeviceDetails struct {
  25. apis.StandaloneResourceDetails
  26. apis.SharableResourceBaseInfo
  27. HostResourceInfo
  28. SIsolatedDevice
  29. // 云主机名称
  30. Guest string `json:"guest"`
  31. // 云主机状态
  32. GuestStatus string `json:"guest_status"`
  33. }
  34. type IsolatedDeviceListInput struct {
  35. apis.StandaloneResourceListInput
  36. apis.ExternalizedResourceBaseListInput
  37. apis.DomainizedResourceListInput
  38. HostFilterListInput
  39. // 只列出GPU直通设备
  40. Gpu *bool `json:"gpu"`
  41. // 只列出USB直通设备
  42. Usb *bool `json:"usb"`
  43. // 只列出未使用的直通设备
  44. Unused *bool `json:"unused"`
  45. // # PCI / GPU-HPC / GPU-VGA / USB / NIC
  46. // 设备类型
  47. DevType []string `json:"dev_type"`
  48. // # Specific device name read from lspci command, e.g. `Tesla K40m` ...
  49. Model []string `json:"model"`
  50. // # pci address of `Bus:Device.Function` format, or usb bus address of `bus.addr`
  51. Addr []string `json:"addr"`
  52. // 设备路径
  53. DevicePath []string `json:"device_path"`
  54. // 设备VENDOE编号
  55. VendorDeviceId []string `json:"vendor_device_id"`
  56. // NUMA节点序号
  57. NumaNode []uint8 `json:"numa_node"`
  58. // 展示物理机的上的设备
  59. ShowBaremetalIsolatedDevices bool `json:"show_baremetal_isolated_devices"`
  60. // 列出虚拟机上挂载的设备
  61. GuestId string `json:"guest_id"`
  62. // GPU index
  63. Index *int `json:"index"`
  64. // Nvidia GPU minor number, parsing from /proc/driver/nvidia/gpus/*/information
  65. DeviceMinor *int `json:"device_minor"`
  66. }
  67. type IsolatedDeviceCreateInput struct {
  68. apis.StandaloneResourceCreateInput
  69. HostResourceInput
  70. IsolatedDeviceReservedResourceInput
  71. // 设备类型USB/GPU
  72. // example: GPU
  73. DevType string `json:"dev_type"`
  74. // 设备型号
  75. // # Specific device name read from lspci command, e.g. `Tesla K40m` ...
  76. Model string `json:"model"`
  77. // PCI地址
  78. // # pci address of `Bus:Device.Function` format, or usb bus address of `bus.addr`
  79. Addr string `json:"addr"`
  80. // legacy vgpu mdev id
  81. MdevId string `json:"mdev_id"`
  82. // 设备VendorId
  83. VendorDeviceId string `json:"vendor_device_id"`
  84. // PCIE information
  85. PCIEInfo *IsolatedDevicePCIEInfo `json:"pcie_info"`
  86. // Host device path
  87. DevicePath string `json:"device_path"`
  88. }
  89. type IsolatedDeviceReservedResourceInput struct {
  90. // GPU 预留内存
  91. ReservedMemory *int `json:"reserved_memory"`
  92. // GPU 预留CPU
  93. ReservedCpu *int `json:"reserved_cpu"`
  94. // GPU 预留磁盘
  95. ReservedStorage *int `json:"reserved_storage"`
  96. }
  97. type IsolatedDeviceUpdateInput struct {
  98. apis.StandaloneResourceBaseUpdateInput
  99. IsolatedDeviceReservedResourceInput
  100. DevType string `json:"dev_type"`
  101. // PCIE information
  102. PCIEInfo *IsolatedDevicePCIEInfo `json:"pcie_info"`
  103. // Host device path
  104. DevicePath string `json:"device_path"`
  105. Index int `json:"index"`
  106. DeviceMinor int `json:"device_minor"`
  107. }
  108. type IsolatedDeviceJsonDesc struct {
  109. Id string `json:"id"`
  110. DevType string `json:"dev_type"`
  111. Model string `json:"model"`
  112. Addr string `json:"addr"`
  113. VendorDeviceId string `json:"vendor_device_id"`
  114. Vendor string `json:"vendor"`
  115. NetworkIndex int `json:"network_index"`
  116. IsInfinibandNic bool `json:"is_infiniband_nic"`
  117. OvsOffloadInterface string `json:"ovs_offload_interface"`
  118. DiskIndex int8 `json:"disk_index"`
  119. NvmeSizeMB int `json:"nvme_size_mb"`
  120. MdevId string `json:"mdev_id"`
  121. NumaNode int8 `json:"numa_node"`
  122. }
  123. type IsolatedDeviceModelCreateInput struct {
  124. apis.StandaloneAnonResourceCreateInput
  125. // 设备类型
  126. // example: NPU
  127. DevType string `json:"dev_type"`
  128. // 设备型号
  129. Model string `json:"model"`
  130. // 设备VendorId
  131. VendorId string `json:"vendor_id"`
  132. // 设备DeviceId
  133. DeviceId string `json:"device_id"`
  134. // 支持热插拔 HotPluggable
  135. HotPluggable bool `json:"hot_pluggable"`
  136. // hosts scan isolated device after isolated_device_model created
  137. Hosts []string `json:"hosts"`
  138. }
  139. type IsolatedDeviceModelUpdateInput struct {
  140. apis.StandaloneAnonResourceBaseUpdateInput
  141. // 设备类型
  142. // example: NPU
  143. DevType string `json:"dev_type"`
  144. // 设备型号
  145. Model string `json:"model"`
  146. // 设备VendorId
  147. VendorId string `json:"vendor_id"`
  148. // 设备DeviceId
  149. DeviceId string `json:"device_id"`
  150. // 支持热插拔 HotPluggable
  151. HotPluggable bool `json:"hot_pluggable"`
  152. }
  153. type IsolatedDeviceModelListInput struct {
  154. apis.StandaloneAnonResourceListInput
  155. // 设备类型
  156. // example: NPU
  157. DevType []string `json:"dev_type"`
  158. // 设备型号
  159. Model []string `json:"model"`
  160. // 设备VendorId
  161. VendorId string `json:"vendor_id"`
  162. // 设备DeviceId
  163. DeviceId string `json:"device_id"`
  164. // 支持热插拔 HotPluggable
  165. HotPluggable bool `json:"hot_pluggable"`
  166. // 宿主机 Id
  167. HostId string `json:"host_id"`
  168. }
  169. type IsolatedDeviceModelHardwareInfo struct {
  170. // GPU memory size
  171. MemoryMB int `json:"memory_mb" help:"Memory size MB of the device"`
  172. // GPU bandwidth. The unit is GB/s
  173. Bandwidth float64 `json:"bandwidth" help:"Bandwidth of the device, and the unit is GB/s"`
  174. // TFLOPS stands for number of floating point operations per second.
  175. TFLOPS float64 `json:"tflops" help:"TFLOPS of the device, which standing for number of floating point operations per second"`
  176. }
  177. type IsolatedDevicePCIEInfo struct {
  178. // Transder rate per lane
  179. // Transfer rate refers to the encoded serial bit rate; 2.5 GT/s means 2.5 Gbit/s serial data rate.
  180. TransferRatePerLane string `json:"transfer_rate_per_lane"`
  181. // Lane width
  182. LaneWidth int `json:"lane_width,omitzero"`
  183. // The following attributes are calculated by TransferRatePerLane and LaneWidth
  184. // Throughput indicates the unencoded bandwidth (without 8b/10b, 128b/130b, or 242B/256B encoding overhead).
  185. // The PCIe 1.0 transfer rate of 2.5 GT/s per lane means a 2.5 Gbit/s serial bit rate corresponding to a throughput of 2.0 Gbit/s or 250 MB/s prior to 8b/10b encoding.
  186. Throughput string `json:"throughput"`
  187. // Version is the PCIE version
  188. Version string `json:"version"`
  189. }
  190. func NewIsolatedDevicePCIEInfo(transferRate string, laneWidth int) (*IsolatedDevicePCIEInfo, error) {
  191. info := &IsolatedDevicePCIEInfo{
  192. TransferRatePerLane: transferRate,
  193. LaneWidth: laneWidth,
  194. }
  195. if err := info.fillData(); err != nil {
  196. return info, errors.Wrap(err, "fillData")
  197. }
  198. return info, nil
  199. }
  200. func (info *IsolatedDevicePCIEInfo) String() string {
  201. return jsonutils.Marshal(info).String()
  202. }
  203. func (info *IsolatedDevicePCIEInfo) IsZero() bool {
  204. if *info == (IsolatedDevicePCIEInfo{}) {
  205. return true
  206. }
  207. return false
  208. }
  209. const (
  210. PCIEVersion1 = "1.0"
  211. PCIEVersion2 = "2.0"
  212. PCIEVersion3 = "3.0"
  213. PCIEVersion4 = "4.0"
  214. PCIEVersion5 = "5.0"
  215. PCIEVersion6 = "6.0"
  216. PCIEVersion7 = "7.0"
  217. PCIEVersionUnknown = "unknown"
  218. )
  219. type PCIEVersionThroughput struct {
  220. Version string `json:"version"`
  221. Throughput float64 `json:"throughput"`
  222. }
  223. func NewPCIEVersionThroughput(version string) PCIEVersionThroughput {
  224. // FROM: https://en.wikipedia.org/wiki/PCI_Express
  225. var (
  226. v1 = 0.25
  227. v2 = 0.5
  228. v3 = 0.985
  229. v4 = 1.969
  230. v5 = 3.938
  231. v6 = 7.563
  232. v7 = 15.125
  233. )
  234. table := map[string]float64{
  235. PCIEVersion1: v1,
  236. PCIEVersion2: v2,
  237. PCIEVersion3: v3,
  238. PCIEVersion4: v4,
  239. PCIEVersion5: v5,
  240. PCIEVersion6: v6,
  241. PCIEVersion7: v7,
  242. }
  243. tp, ok := table[version]
  244. if ok {
  245. return PCIEVersionThroughput{
  246. Version: version,
  247. Throughput: tp,
  248. }
  249. }
  250. return PCIEVersionThroughput{
  251. Version: PCIEVersionUnknown,
  252. Throughput: -1,
  253. }
  254. }
  255. func (info *IsolatedDevicePCIEInfo) fillData() error {
  256. vTp := info.GetThroughputPerLane()
  257. info.Version = vTp.Version
  258. info.Throughput = fmt.Sprintf("%.2f GB/s", vTp.Throughput*float64(info.LaneWidth))
  259. return nil
  260. }
  261. func (info IsolatedDevicePCIEInfo) GetThroughputPerLane() PCIEVersionThroughput {
  262. table := map[string]PCIEVersionThroughput{
  263. // version 1.0: 2003
  264. "2.5": NewPCIEVersionThroughput(PCIEVersion1),
  265. // version 2.0: 2007
  266. "5": NewPCIEVersionThroughput(PCIEVersion2),
  267. "5.0": NewPCIEVersionThroughput(PCIEVersion2),
  268. // version 3.0: 2010
  269. "8": NewPCIEVersionThroughput(PCIEVersion3),
  270. "8.0": NewPCIEVersionThroughput(PCIEVersion3),
  271. // version 4.0: 2017
  272. "16": NewPCIEVersionThroughput(PCIEVersion4),
  273. "16.0": NewPCIEVersionThroughput(PCIEVersion4),
  274. // version 5.0: 2019
  275. "32": NewPCIEVersionThroughput(PCIEVersion5),
  276. "32.0": NewPCIEVersionThroughput(PCIEVersion5),
  277. // version 6.0: 2022
  278. "64": NewPCIEVersionThroughput(PCIEVersion6),
  279. "64.0": NewPCIEVersionThroughput(PCIEVersion6),
  280. // version 7.0: 2025(planned)
  281. "128": NewPCIEVersionThroughput(PCIEVersion7),
  282. "128.0": NewPCIEVersionThroughput(PCIEVersion7),
  283. }
  284. for key, val := range table {
  285. if fmt.Sprintf("%sGT/s", key) == info.TransferRatePerLane {
  286. return val
  287. }
  288. }
  289. return NewPCIEVersionThroughput(PCIEVersionUnknown)
  290. }
  291. type HostIsolatedDeviceModelDetails struct {
  292. SHostJointsBase
  293. HostJointResourceDetails
  294. // 宿主机Id
  295. HostId string `json:"host_id"`
  296. // 存储Id
  297. IsolatedDeviceModelId string `json:"isolated_device_model_id"`
  298. Model string `json:"model"`
  299. VendorId string `json:"vendor_id"`
  300. DeviceId string `json:"device_id"`
  301. DevType string `json:"dev_type"`
  302. HotPluggable bool `json:"hot_pluggable"`
  303. DisableAutoDetect bool `json:"disable_auto_detect"`
  304. }