isolated_device_predicate.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package predicates
  15. import (
  16. "context"
  17. "fmt"
  18. "yunion.io/x/onecloud/pkg/apis/compute"
  19. "yunion.io/x/onecloud/pkg/scheduler/core"
  20. )
  21. // IsolatedDevicePredicate check mode, and number of scheduled
  22. // device configurations and current resources.
  23. type IsolatedDevicePredicate struct {
  24. BasePredicate
  25. }
  26. func (f *IsolatedDevicePredicate) Name() string {
  27. return "host_isolated_device"
  28. }
  29. func (f *IsolatedDevicePredicate) Clone() core.FitPredicate {
  30. return &IsolatedDevicePredicate{}
  31. }
  32. func (f *IsolatedDevicePredicate) PreExecute(ctx context.Context, u *core.Unit, cs []core.Candidater) (bool, error) {
  33. data := u.SchedData()
  34. if data.ResetCpuNumaPin {
  35. return false, nil
  36. }
  37. if len(data.IsolatedDevices) > 0 {
  38. return true, nil
  39. }
  40. networks := data.Networks
  41. for i := 0; i < len(networks); i++ {
  42. if networks[i].SriovDevice != nil {
  43. return true, nil
  44. }
  45. }
  46. disks := data.Disks
  47. for i := 0; i < len(disks); i++ {
  48. if disks[i].NVMEDevice != nil {
  49. return true, nil
  50. }
  51. }
  52. return false, nil
  53. }
  54. func (f *IsolatedDevicePredicate) getIsolatedDeviceCountByType(getter core.CandidatePropertyGetter, devType string) int {
  55. devs := getter.UnusedIsolatedDevicesByType(devType)
  56. if devType != compute.CONTAINER_DEV_NVIDIA_MPS && devType != compute.CONTAINER_DEV_NVIDIA_GPU_SHARE {
  57. return len(devs)
  58. } else {
  59. devMap := map[string]struct{}{}
  60. for _, dev := range devs {
  61. devMap[dev.DevicePath] = struct{}{}
  62. }
  63. return len(devMap)
  64. }
  65. }
  66. func (f *IsolatedDevicePredicate) Execute(ctx context.Context, u *core.Unit, c core.Candidater) (bool, []core.PredicateFailureReason, error) {
  67. h := NewPredicateHelper(f, u, c)
  68. reqIsoDevs := u.SchedData().IsolatedDevices
  69. if reqIsoDevs == nil {
  70. reqIsoDevs = []*compute.IsolatedDeviceConfig{}
  71. }
  72. networks := u.SchedData().Networks
  73. for i := 0; i < len(networks); i++ {
  74. if networks[i].SriovDevice != nil {
  75. reqIsoDevs = append(reqIsoDevs, networks[i].SriovDevice)
  76. }
  77. }
  78. disks := u.SchedData().Disks
  79. for i := 0; i < len(disks); i++ {
  80. if disks[i].NVMEDevice != nil {
  81. reqIsoDevs = append(reqIsoDevs, disks[i].NVMEDevice)
  82. }
  83. }
  84. getter := c.Getter()
  85. minCapacity := int64(0xFFFFFFFF)
  86. // check by specify device id
  87. for _, dev := range reqIsoDevs {
  88. if len(dev.Id) == 0 {
  89. continue
  90. }
  91. if fDev := getter.GetIsolatedDevice(dev.Id); fDev != nil {
  92. if len(fDev.GuestID) != 0 {
  93. h.Exclude(fmt.Sprintf("IsolatedDevice %q already used by guest %q", dev.Id, fDev.GuestID))
  94. return h.GetResult()
  95. }
  96. } else {
  97. h.Exclude(fmt.Sprintf("Not found IsolatedDevice %q", dev.Id))
  98. return h.GetResult()
  99. }
  100. minCapacity = 1
  101. }
  102. reqCount := len(reqIsoDevs)
  103. freeCount := len(getter.UnusedIsolatedDevices()) - getter.GetPendingUsage().IsolatedDevice
  104. totalCount := len(getter.GetIsolatedDevices())
  105. // check host isolated device count
  106. if freeCount < reqCount {
  107. h.AppendInsufficientResourceError(int64(reqCount), int64(totalCount), int64(freeCount))
  108. h.Exclude(fmt.Sprintf(
  109. "IsolatedDevice count not enough, request: %d, hostTotal: %d, hostFree: %d",
  110. reqCount, totalCount, freeCount))
  111. return h.GetResult()
  112. }
  113. // check host device by type
  114. devTypeRequest := make(map[string]int, 0)
  115. for _, dev := range reqIsoDevs {
  116. if len(dev.DevType) != 0 {
  117. devTypeRequest[dev.DevType] += 1
  118. }
  119. }
  120. for devType, reqCount := range devTypeRequest {
  121. freeCount := f.getIsolatedDeviceCountByType(getter, devType)
  122. if freeCount < reqCount {
  123. h.Exclude(fmt.Sprintf("IsolatedDevice type %q not enough, request: %d, hostFree: %d", devType, reqCount, freeCount))
  124. return h.GetResult()
  125. }
  126. cap := freeCount / reqCount
  127. if int64(cap) < minCapacity {
  128. minCapacity = int64(cap)
  129. }
  130. }
  131. // check host device by model
  132. devVendorModelRequest := make(map[string]int, 0)
  133. for _, dev := range reqIsoDevs {
  134. if len(dev.Model) != 0 {
  135. devVendorModelRequest[fmt.Sprintf("%s:%s", dev.Vendor, dev.Model)] += 1
  136. }
  137. }
  138. for vendorModel, reqCount := range devVendorModelRequest {
  139. freeCount := len(getter.UnusedIsolatedDevicesByVendorModel(vendorModel))
  140. if freeCount < reqCount {
  141. h.Exclude(fmt.Sprintf("IsolatedDevice vendor:model %q not enough, request: %d, hostFree: %d", vendorModel, reqCount, freeCount))
  142. return h.GetResult()
  143. }
  144. cap := freeCount / reqCount
  145. if int64(cap) < minCapacity {
  146. minCapacity = int64(cap)
  147. }
  148. }
  149. // check host device by device_path
  150. devicePathReq := make(map[string]int, 0)
  151. for _, dev := range reqIsoDevs {
  152. if len(dev.DevicePath) != 0 {
  153. devicePathReq[dev.DevicePath] += 1
  154. }
  155. }
  156. for devPath, reqCnt := range devicePathReq {
  157. freeCount := len(getter.UnusedIsolatedDevicesByDevicePath(devPath))
  158. if freeCount < reqCount {
  159. h.Exclude(fmt.Sprintf("IsolatedDevice device_path %q not enough, request: %d, hostFree: %d", devPath, reqCount, freeCount))
  160. return h.GetResult()
  161. }
  162. cap := freeCount / reqCnt
  163. if int64(cap) < minCapacity {
  164. minCapacity = int64(cap)
  165. }
  166. }
  167. h.SetCapacity(minCapacity)
  168. return h.GetResult()
  169. }