guesthelper.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package guestman
  15. import (
  16. "fmt"
  17. "path"
  18. "sort"
  19. "sync"
  20. "github.com/jaypipes/ghw/pkg/topology"
  21. "yunion.io/x/cloudmux/pkg/multicloud/esxi/vcenter"
  22. "yunion.io/x/jsonutils"
  23. "yunion.io/x/log"
  24. "yunion.io/x/pkg/errors"
  25. "yunion.io/x/onecloud/pkg/apis"
  26. "yunion.io/x/onecloud/pkg/apis/compute"
  27. hostapi "yunion.io/x/onecloud/pkg/apis/host"
  28. "yunion.io/x/onecloud/pkg/hostman/guestman/desc"
  29. "yunion.io/x/onecloud/pkg/hostman/options"
  30. "yunion.io/x/onecloud/pkg/hostman/storageman"
  31. "yunion.io/x/onecloud/pkg/mcclient"
  32. "yunion.io/x/onecloud/pkg/util/cgrouputils/cpuset"
  33. "yunion.io/x/onecloud/pkg/util/fileutils2"
  34. )
  35. type SBaseParams struct {
  36. Sid string
  37. Body jsonutils.JSONObject
  38. }
  39. type SGuestDeploy struct {
  40. UserCred mcclient.TokenCredential
  41. Sid string
  42. Body jsonutils.JSONObject
  43. IsInit bool
  44. }
  45. type SSrcPrepareMigrate struct {
  46. Sid string
  47. LiveMigrate bool
  48. LiveMigrateUseTLS bool
  49. }
  50. type SDestPrepareMigrate struct {
  51. Sid string
  52. ServerUrl string
  53. QemuVersion string
  54. MigrateCerts map[string]string
  55. EnableTLS bool
  56. SnapshotsUri string
  57. DisksUri string
  58. // TargetStorageId string
  59. TargetStorageIds []string
  60. LiveMigrate bool
  61. RebaseDisks bool
  62. Desc *desc.SGuestDesc
  63. SrcDesc *desc.SGuestDesc
  64. DisksBackingFile jsonutils.JSONObject
  65. DiskSnapsChain jsonutils.JSONObject
  66. OutChainSnaps jsonutils.JSONObject
  67. SysDiskHasTemplate bool
  68. MemorySnapshotsUri string
  69. SrcMemorySnapshots []string
  70. UserCred mcclient.TokenCredential
  71. }
  72. type SLiveMigrate struct {
  73. Sid string
  74. DestPort int
  75. NbdServerPort int
  76. DestIp string
  77. IsLocal bool
  78. EnableTLS bool
  79. MaxBandwidthMB *int64
  80. QuicklyFinish bool
  81. }
  82. type SDriverMirror struct {
  83. Sid string
  84. NbdServerUri string
  85. Desc *desc.SGuestDesc
  86. }
  87. type SGuestHotplugCpuMem struct {
  88. Sid string
  89. AddCpuCount int64
  90. AddMemSize int64
  91. TotalCpuCount *int64
  92. TotalMemSize *int64
  93. CpuNumaPin []*desc.SCpuNumaPin
  94. }
  95. type SReloadDisk struct {
  96. Sid string
  97. Disk storageman.IDisk
  98. }
  99. type SBackupDiskConfig struct {
  100. compute.DiskConfig
  101. Name string `json:"name"`
  102. BackupAsTar *compute.DiskBackupAsTarInput `json:"backup_as_tar"`
  103. }
  104. type SDiskSnapshot struct {
  105. UserCred mcclient.TokenCredential
  106. Sid string
  107. SnapshotId string
  108. BackupDiskConfig *SBackupDiskConfig
  109. Disk storageman.IDisk
  110. }
  111. type SMemorySnapshot struct {
  112. *hostapi.GuestMemorySnapshotRequest
  113. Sid string
  114. }
  115. type SMemorySnapshotReset struct {
  116. *hostapi.GuestMemorySnapshotResetRequest
  117. Sid string
  118. }
  119. type SMemorySnapshotDelete struct {
  120. *hostapi.GuestMemorySnapshotDeleteRequest
  121. }
  122. type SDiskBackup struct {
  123. Sid string
  124. SnapshotId string
  125. BackupId string
  126. Disk storageman.IDisk
  127. }
  128. type SDeleteDiskSnapshot struct {
  129. Sid string
  130. DeleteSnapshot string
  131. Disk storageman.IDisk
  132. ConvertSnapshot string
  133. BlockStream bool
  134. EncryptInfo apis.SEncryptInfo
  135. TotalDeleteSnapshotCount int
  136. DeletedSnapshotCount int
  137. }
  138. type SLibvirtServer struct {
  139. Uuid string
  140. MacIp map[string]string
  141. }
  142. type SLibvirtDomainImportConfig struct {
  143. LibvritDomainXmlDir string
  144. Servers []SLibvirtServer
  145. }
  146. type SGuestCreateFromLibvirt struct {
  147. Sid string
  148. MonitorPath string
  149. GuestDesc *desc.SGuestDesc
  150. DisksPath *jsonutils.JSONDict
  151. }
  152. type SGuestIoThrottle struct {
  153. Sid string
  154. Input *compute.ServerSetDiskIoThrottleInput
  155. }
  156. type SGuestCreateFromEsxi struct {
  157. Sid string
  158. GuestDesc *desc.SGuestDesc
  159. EsxiAccessInfo SEsxiAccessInfo
  160. }
  161. type SEsxiAccessInfo struct {
  162. Datastore vcenter.SVCenterAccessInfo
  163. HostIp string
  164. GuestExtId string
  165. }
  166. type SGuestCreateFromCloudpods struct {
  167. Sid string
  168. GuestDesc *desc.SGuestDesc
  169. CloudpodsAccessInfo SCloudpodsAccessInfo
  170. }
  171. type SCloudpodsAccessInfo struct {
  172. HostIp string
  173. OriginDisksId []string
  174. }
  175. type SQgaGuestSetPassword struct {
  176. *hostapi.GuestSetPasswordRequest
  177. Sid string
  178. }
  179. type SQgaGuestSetNetwork struct {
  180. Timeout int
  181. Sid string
  182. Device string
  183. Ipmask string
  184. Gateway string
  185. Ip6mask string
  186. Gateway6 string
  187. }
  188. type CpuSetCounter struct {
  189. Nodes []*NumaNode
  190. NumaEnabled bool
  191. CPUCmtbound float32
  192. MEMCmtbound float32
  193. GuestIds map[string]struct{}
  194. Lock sync.Mutex
  195. }
  196. func NewGuestCpuSetCounter(
  197. info *hostapi.HostTopology, reservedCpus cpuset.CPUSet, numaAllocate, isContainerHost bool,
  198. hugepageSizeKB int, cpuCmtbound, memCmtBound float32, reservedMemMb int,
  199. ) (*CpuSetCounter, error) {
  200. cpuSetCounter := new(CpuSetCounter)
  201. cpuSetCounter.Nodes = make([]*NumaNode, len(info.Nodes))
  202. cpuSetCounter.NumaEnabled = numaAllocate
  203. cpuSetCounter.CPUCmtbound = cpuCmtbound
  204. cpuSetCounter.MEMCmtbound = memCmtBound
  205. cpuSetCounter.GuestIds = map[string]struct{}{}
  206. if len(info.Nodes) == 0 {
  207. return cpuSetCounter, nil
  208. }
  209. hasL3Cache := false
  210. nodeReserveMem := reservedMemMb / len(info.Nodes) * 1024
  211. for i := 0; i < len(info.Nodes); i++ {
  212. node, err := NewNumaNode(
  213. info.Nodes[i],
  214. cpuSetCounter.NumaEnabled,
  215. isContainerHost,
  216. hugepageSizeKB,
  217. memCmtBound,
  218. nodeReserveMem,
  219. )
  220. if err != nil {
  221. return nil, err
  222. }
  223. reservedCpuCnt := 0
  224. cpuDies := make([]*CPUDie, 0)
  225. for j := 0; j < len(info.Nodes[i].Caches); j++ {
  226. if info.Nodes[i].Caches[j].Level != 3 {
  227. continue
  228. }
  229. hasL3Cache = true
  230. cpuDie := new(CPUDie)
  231. dieBuilder := cpuset.NewBuilder()
  232. for k := 0; k < len(info.Nodes[i].Caches[j].LogicalProcessors); k++ {
  233. if reservedCpus.Contains(int(info.Nodes[i].Caches[j].LogicalProcessors[k])) {
  234. reservedCpuCnt += 1
  235. continue
  236. }
  237. dieBuilder.Add(int(info.Nodes[i].Caches[j].LogicalProcessors[k]))
  238. }
  239. cpuDie.LogicalProcessors = dieBuilder.Result()
  240. node.CpuCount += cpuDie.LogicalProcessors.Size()
  241. node.LogicalProcessors = node.LogicalProcessors.Union(cpuDie.LogicalProcessors)
  242. cpuDie.initCpuFree(cpuCmtbound)
  243. cpuDies = append(cpuDies, cpuDie)
  244. }
  245. if !hasL3Cache {
  246. cpuDie := new(CPUDie)
  247. dieBuilder := cpuset.NewBuilder()
  248. for j := 0; j < len(info.Nodes[i].Cores); j++ {
  249. for k := 0; k < len(info.Nodes[i].Cores[j].LogicalProcessors); k++ {
  250. if reservedCpus.Contains(info.Nodes[i].Cores[j].LogicalProcessors[k]) {
  251. reservedCpuCnt += 1
  252. continue
  253. }
  254. dieBuilder.Add(info.Nodes[i].Cores[j].LogicalProcessors[k])
  255. }
  256. }
  257. cpuDie.LogicalProcessors = dieBuilder.Result()
  258. node.CpuCount += cpuDie.LogicalProcessors.Size()
  259. node.LogicalProcessors = node.LogicalProcessors.Union(cpuDie.LogicalProcessors)
  260. cpuDie.initCpuFree(cpuCmtbound)
  261. cpuDies = append(cpuDies, cpuDie)
  262. }
  263. hasL3Cache = false
  264. node.CpuDies = cpuDies
  265. node.ReserveCpuCount = reservedCpuCnt
  266. sort.Sort(node.CpuDies)
  267. cpuSetCounter.Nodes[i] = node
  268. }
  269. sort.Sort(cpuSetCounter)
  270. log.Infof("cpusetcounter %s", jsonutils.Marshal(cpuSetCounter))
  271. return cpuSetCounter, nil
  272. }
  273. func (pq *CpuSetCounter) AllocCpusetWithNodeCount(vcpuCount int, memSizeKB int64, nodeCount int, guestId string) (map[int]SAllocNumaCpus, error) {
  274. if nodeCount <= 0 {
  275. return nil, nil
  276. }
  277. if !pq.NumaEnabled {
  278. return pq.AllocCpuset(vcpuCount, memSizeKB, nil, guestId)
  279. }
  280. if len(pq.Nodes) < nodeCount {
  281. return nil, nil
  282. }
  283. pq.GuestIds[guestId] = struct{}{}
  284. pq.Lock.Lock()
  285. defer pq.Lock.Unlock()
  286. var res = map[int]SAllocNumaCpus{}
  287. var nodeAllocSize = memSizeKB / int64(nodeCount)
  288. if pq.nodesEnough(nodeCount, vcpuCount, int(memSizeKB)) {
  289. var pcpuCount = vcpuCount / nodeCount
  290. var remPcpuCount = vcpuCount % nodeCount
  291. for i := 0; i < nodeCount; i++ {
  292. var npcpuCount = pcpuCount
  293. if remPcpuCount > 0 {
  294. npcpuCount += 1
  295. remPcpuCount -= 1
  296. }
  297. res[pq.Nodes[i].NodeId] = SAllocNumaCpus{
  298. Cpuset: pq.Nodes[i].AllocCpuset(npcpuCount),
  299. MemSizeKB: nodeAllocSize,
  300. Unregular: false,
  301. }
  302. pq.Nodes[i].NumaNodeFreeMemSizeKB -= nodeAllocSize
  303. pq.Nodes[i].VcpuCount += npcpuCount
  304. }
  305. }
  306. return res, nil
  307. }
  308. type SAllocNumaCpus struct {
  309. Cpuset []int
  310. MemSizeKB int64
  311. Unregular bool
  312. }
  313. func (pq *CpuSetCounter) IsNumaEnabled() bool {
  314. return pq.NumaEnabled
  315. }
  316. func (pq *CpuSetCounter) AllocCpuset(vcpuCount int, memSizeKB int64, preferNumaNodes []int8, guestId string) (map[int]SAllocNumaCpus, error) {
  317. pq.Lock.Lock()
  318. defer pq.Lock.Unlock()
  319. if len(pq.Nodes) == 0 {
  320. return nil, nil
  321. }
  322. pq.GuestIds[guestId] = struct{}{}
  323. if pq.NumaEnabled && len(preferNumaNodes) > 0 {
  324. sortedNumaDistance := pq.getDistancesSeqByPreferNodes(preferNumaNodes, int(memSizeKB))
  325. for nodeCount := 1; nodeCount <= len(pq.Nodes); nodeCount *= 2 {
  326. ret := pq.allocCpuNumaNodesByPreferNodes(vcpuCount, int(memSizeKB), nodeCount, sortedNumaDistance)
  327. if ret != nil {
  328. for i := range pq.Nodes {
  329. if cpupin, ok := ret[pq.Nodes[i].NodeId]; ok {
  330. pq.Nodes[i].VcpuCount += vcpuCount
  331. pq.Nodes[i].NumaNodeFreeMemSizeKB -= cpupin.MemSizeKB
  332. }
  333. }
  334. sort.Sort(pq)
  335. return ret, nil
  336. }
  337. }
  338. }
  339. res := map[int]SAllocNumaCpus{}
  340. sourceVcpuCount := vcpuCount
  341. if pq.NumaEnabled {
  342. err := pq.AllocNumaNodes(vcpuCount, memSizeKB, res)
  343. return res, err
  344. } else {
  345. for vcpuCount > 0 {
  346. count := vcpuCount
  347. if vcpuCount > pq.Nodes[0].CpuCount {
  348. count = vcpuCount/2 + vcpuCount%2
  349. }
  350. res[pq.Nodes[0].NodeId] = SAllocNumaCpus{
  351. Cpuset: pq.Nodes[0].AllocCpuset(count),
  352. }
  353. pq.Nodes[0].VcpuCount += sourceVcpuCount
  354. vcpuCount -= count
  355. sort.Sort(pq)
  356. }
  357. return res, nil
  358. }
  359. }
  360. func (pq *CpuSetCounter) allocCpuNumaNodesByPreferNodes(
  361. vcpuCount, memSizeKB, nodeCount int, sortedNumaDistance []SSortedNumaDistance,
  362. ) map[int]SAllocNumaCpus {
  363. res := map[int]SAllocNumaCpus{}
  364. var nodeAllocSize = memSizeKB / nodeCount
  365. var pcpuCount = vcpuCount / nodeCount
  366. var remPcpuCount = vcpuCount % nodeCount
  367. allocatedNode := 0
  368. for i := range sortedNumaDistance {
  369. if allocatedNode >= nodeCount {
  370. break
  371. }
  372. var npcpuCount = pcpuCount
  373. if remPcpuCount > 0 {
  374. npcpuCount += 1
  375. remPcpuCount -= 1
  376. }
  377. nodeIdx := sortedNumaDistance[i].NodeIndex
  378. if pq.Nodes[nodeIdx].nodeEnough(vcpuCount, memSizeKB, pq.CPUCmtbound, pq.NumaEnabled) {
  379. cpuNumaPin := SAllocNumaCpus{
  380. Cpuset: pq.Nodes[nodeIdx].AllocCpuset(npcpuCount),
  381. }
  382. cpuNumaPin.MemSizeKB = int64(nodeAllocSize)
  383. res[pq.Nodes[nodeIdx].NodeId] = cpuNumaPin
  384. allocatedNode += 1
  385. } else {
  386. log.Infof("node %v not enough", pq.Nodes[i])
  387. }
  388. log.Infof("node %d, free mems %d, vcpuCount %d, GuestCounts %v", pq.Nodes[nodeIdx].NodeId, pq.Nodes[nodeIdx].NumaNodeFreeMemSizeKB, pq.Nodes[nodeIdx].VcpuCount, len(pq.GuestIds))
  389. }
  390. if allocatedNode < nodeCount {
  391. return nil
  392. }
  393. return res
  394. }
  395. type SSortedNumaDistance struct {
  396. NodeIndex int
  397. Distance int
  398. FreeMemSize int
  399. UsedRate float32
  400. CpuReserved bool
  401. }
  402. func (pq *CpuSetCounter) getDistancesSeqByPreferNodes(preferNumaNodes []int8, memSizeKB int) []SSortedNumaDistance {
  403. sortedNumaDistance := make([]SSortedNumaDistance, len(pq.Nodes))
  404. for i := range pq.Nodes {
  405. distance := 0
  406. for j := range preferNumaNodes {
  407. distance += pq.Nodes[i].Distances[preferNumaNodes[j]]
  408. }
  409. var useableCpuRate float32 = 1.0
  410. if pq.Nodes[i].ReserveCpuCount > 0 {
  411. useableCpuRate = float32(pq.Nodes[i].CpuCount) / float32(pq.Nodes[i].CpuCount+pq.Nodes[i].ReserveCpuCount)
  412. }
  413. usedMems := float32(pq.Nodes[i].NumaNodeMemSizeKB - pq.Nodes[i].NumaNodeFreeMemSizeKB)
  414. usedRate := usedMems / (float32(pq.Nodes[i].MemTotalSizeKB) * pq.MEMCmtbound * useableCpuRate)
  415. //memCmt := float32(usedMems / pq.Nodes[i].NumaNodeMemSizeKB)
  416. //cpuPro := float32(pq.Nodes[i].CpuCount) * pq.CPUCmtbound / (float32(pq.Nodes[i].CpuCount)*pq.CPUCmtbound - float32(pq.Nodes[i].VcpuCount))
  417. sortedNumaDistance[i] = SSortedNumaDistance{
  418. NodeIndex: i,
  419. Distance: distance,
  420. FreeMemSize: int(pq.Nodes[i].NumaNodeFreeMemSizeKB),
  421. UsedRate: usedRate,
  422. CpuReserved: pq.Nodes[i].ReserveCpuCount > 0,
  423. }
  424. }
  425. sort.Slice(sortedNumaDistance, func(i, j int) bool {
  426. // 7 is tolerant max distances
  427. if sortedNumaDistance[i].Distance > (7 + sortedNumaDistance[j].Distance) {
  428. return false
  429. } else if (sortedNumaDistance[i].Distance + 7) < sortedNumaDistance[j].Distance {
  430. return true
  431. }
  432. if sortedNumaDistance[i].CpuReserved {
  433. return sortedNumaDistance[i].UsedRate < sortedNumaDistance[j].UsedRate
  434. }
  435. if sortedNumaDistance[i].Distance < sortedNumaDistance[j].Distance {
  436. return sortedNumaDistance[i].FreeMemSize > memSizeKB && sortedNumaDistance[j].FreeMemSize-sortedNumaDistance[i].FreeMemSize <= 2*memSizeKB
  437. } else {
  438. return sortedNumaDistance[j].FreeMemSize > memSizeKB && sortedNumaDistance[i].FreeMemSize-sortedNumaDistance[j].FreeMemSize >= 2*memSizeKB
  439. }
  440. })
  441. return sortedNumaDistance
  442. }
  443. func (pq *CpuSetCounter) AllocNumaNodes(vcpuCount int, memSizeKB int64, res map[int]SAllocNumaCpus) error {
  444. var allocated = false
  445. // alloc numa nodes in order 1, 2, 4, ...
  446. if !allocated {
  447. for nodeCount := 1; nodeCount <= len(pq.Nodes); nodeCount *= 2 {
  448. if nodeCount > vcpuCount {
  449. break
  450. }
  451. if ok := pq.nodesEnough(nodeCount, vcpuCount, int(memSizeKB)); !ok {
  452. log.Infof("node count %d not enough", nodeCount)
  453. continue
  454. }
  455. var nodeAllocSize = memSizeKB / int64(nodeCount)
  456. if nodeAllocSize/1024%1024 > 0 {
  457. continue
  458. }
  459. var pcpuCount = vcpuCount / nodeCount
  460. var remPcpuCount = vcpuCount % nodeCount
  461. for i := 0; i < nodeCount; i++ {
  462. var npcpuCount = pcpuCount
  463. if remPcpuCount > 0 {
  464. npcpuCount += 1
  465. remPcpuCount -= 1
  466. }
  467. res[pq.Nodes[i].NodeId] = SAllocNumaCpus{
  468. Cpuset: pq.Nodes[i].AllocCpuset(npcpuCount),
  469. MemSizeKB: nodeAllocSize,
  470. Unregular: false,
  471. }
  472. pq.Nodes[i].NumaNodeFreeMemSizeKB -= nodeAllocSize
  473. pq.Nodes[i].VcpuCount += npcpuCount
  474. }
  475. allocated = true
  476. break
  477. }
  478. }
  479. // alloc numa nodes in order free numa node size
  480. //if !allocated {
  481. // if ok := pq.nodesFreeMemSizeEnough(len(pq.Nodes), memSizeKB); !ok {
  482. // return errors.Errorf("free hugepage is not enough")
  483. // }
  484. //}
  485. sort.Sort(pq)
  486. return nil
  487. }
  488. func (pq *CpuSetCounter) nodesEnough(nodeCount, vcpuCount int, memSizeKB int) bool {
  489. var leastFree = memSizeKB / nodeCount
  490. var leastCpuCount = vcpuCount / nodeCount
  491. var remPcpuCount = vcpuCount % nodeCount
  492. for i := 0; i < nodeCount; i++ {
  493. if pq.NumaEnabled {
  494. if int(pq.Nodes[i].NumaNodeFreeMemSizeKB) < leastFree {
  495. return false
  496. }
  497. }
  498. requireCpuCount := leastCpuCount
  499. if remPcpuCount > 0 {
  500. requireCpuCount += 1
  501. remPcpuCount -= 1
  502. }
  503. if (pq.Nodes[i].VcpuCount + requireCpuCount) > int(float32(pq.Nodes[i].CpuCount)*pq.CPUCmtbound) {
  504. return false
  505. }
  506. }
  507. return true
  508. }
  509. func (pq *CpuSetCounter) nodesFreeMemSizeEnough(nodeCount int, memSizeKB int64) bool {
  510. var freeMem int64 = 0
  511. var leastFree = memSizeKB / int64(nodeCount)
  512. log.Debugf("request memsize %d, least free %d", memSizeKB, leastFree)
  513. for i := 0; i < nodeCount; i++ {
  514. log.Debugf("index %d node %d free size %d", i, pq.Nodes[i].NodeId, pq.Nodes[i].NumaNodeFreeMemSizeKB)
  515. if pq.Nodes[i].NumaNodeFreeMemSizeKB < leastFree {
  516. return false
  517. }
  518. freeMem += pq.Nodes[i].NumaNodeFreeMemSizeKB
  519. }
  520. return freeMem >= memSizeKB
  521. }
  522. func (pq *CpuSetCounter) setNumaNodes(numaMaps map[int]int, vcpuCount int64) map[int]SAllocNumaCpus {
  523. res := map[int]SAllocNumaCpus{}
  524. for i := range pq.Nodes {
  525. if size, ok := numaMaps[pq.Nodes[i].NodeId]; ok {
  526. allocMem := int64(size) * 1024
  527. //npcpuCount := int(vcpuCount*allocMem/memSizeKB + (vcpuCount*allocMem)%memSizeKB)
  528. res[pq.Nodes[i].NodeId] = SAllocNumaCpus{
  529. Cpuset: pq.Nodes[i].AllocCpuset(int(vcpuCount)),
  530. MemSizeKB: allocMem,
  531. Unregular: true,
  532. }
  533. pq.Nodes[i].NumaNodeFreeMemSizeKB -= allocMem
  534. pq.Nodes[i].VcpuCount += int(vcpuCount)
  535. }
  536. }
  537. sort.Sort(pq)
  538. return res
  539. }
  540. func (pq *CpuSetCounter) ReleaseCpus(cpus []int, vcpuCount int) {
  541. var numaCpuCount = map[int][]int{}
  542. for i := 0; i < len(cpus); i++ {
  543. for j := 0; j < len(pq.Nodes); j++ {
  544. if pq.Nodes[j].LogicalProcessors.Contains(cpus[i]) {
  545. if numaCpus, ok := numaCpuCount[pq.Nodes[j].NodeId]; !ok {
  546. numaCpuCount[pq.Nodes[j].NodeId] = []int{cpus[i]}
  547. } else {
  548. numaCpuCount[pq.Nodes[j].NodeId] = append(numaCpus, cpus[i])
  549. }
  550. break
  551. }
  552. }
  553. }
  554. for i := 0; i < len(pq.Nodes); i++ {
  555. if numaCpus, ok := numaCpuCount[pq.Nodes[i].NodeId]; ok {
  556. pq.Nodes[i].CpuDies.ReleaseCpus(numaCpus, vcpuCount)
  557. pq.Nodes[i].VcpuCount -= vcpuCount
  558. }
  559. }
  560. sort.Sort(pq)
  561. }
  562. func (pq *CpuSetCounter) ReleaseNumaCpus(memSizeMb int64, hostNode int, cpus []int, vcpuCount int) {
  563. for i := 0; i < len(pq.Nodes); i++ {
  564. if pq.Nodes[i].NodeId != hostNode {
  565. continue
  566. }
  567. pq.Nodes[i].CpuDies.ReleaseCpus(cpus, vcpuCount)
  568. pq.Nodes[i].VcpuCount -= vcpuCount
  569. pq.Nodes[i].NumaNodeFreeMemSizeKB += memSizeMb * 1024
  570. }
  571. sort.Sort(pq)
  572. }
  573. func (pq *CpuSetCounter) LoadNumaCpus(memSizeMb int64, hostNode int, cpus []int, vcpuCount int) {
  574. for i := 0; i < len(pq.Nodes); i++ {
  575. if pq.Nodes[i].NodeId != hostNode {
  576. continue
  577. }
  578. pq.Nodes[i].CpuDies.LoadCpus(cpus, vcpuCount)
  579. pq.Nodes[i].VcpuCount += vcpuCount
  580. pq.Nodes[i].NumaNodeFreeMemSizeKB -= memSizeMb * 1024
  581. }
  582. sort.Sort(pq)
  583. }
  584. func (pq *CpuSetCounter) LoadCpus(cpus []int, vcpuCpunt int) {
  585. var numaCpuCount = map[int][]int{}
  586. for i := 0; i < len(cpus); i++ {
  587. for j := 0; j < len(pq.Nodes); j++ {
  588. if pq.Nodes[j].LogicalProcessors.Contains(cpus[i]) {
  589. if numaCpus, ok := numaCpuCount[pq.Nodes[j].NodeId]; !ok {
  590. numaCpuCount[pq.Nodes[j].NodeId] = []int{cpus[i]}
  591. } else {
  592. numaCpuCount[pq.Nodes[j].NodeId] = append(numaCpus, cpus[i])
  593. }
  594. break
  595. }
  596. }
  597. }
  598. for i := 0; i < len(pq.Nodes); i++ {
  599. if numaCpus, ok := numaCpuCount[pq.Nodes[i].NodeId]; ok {
  600. pq.Nodes[i].CpuDies.LoadCpus(numaCpus, vcpuCpunt)
  601. pq.Nodes[i].VcpuCount += vcpuCpunt
  602. }
  603. }
  604. sort.Sort(pq)
  605. }
  606. func (pq CpuSetCounter) Len() int { return len(pq.Nodes) }
  607. func (pq CpuSetCounter) Less(i, j int) bool {
  608. freeCpuI := int(float32(pq.Nodes[i].CpuCount)*pq.CPUCmtbound) - pq.Nodes[i].VcpuCount
  609. freeCpuJ := int(float32(pq.Nodes[i].CpuCount)*pq.CPUCmtbound) - pq.Nodes[j].VcpuCount
  610. if pq.NumaEnabled {
  611. if pq.Nodes[i].NumaNodeFreeMemSizeKB == pq.Nodes[j].NumaNodeFreeMemSizeKB {
  612. return freeCpuI > freeCpuJ
  613. }
  614. return pq.Nodes[i].NumaNodeFreeMemSizeKB > pq.Nodes[j].NumaNodeFreeMemSizeKB
  615. } else {
  616. return freeCpuI > freeCpuJ
  617. }
  618. }
  619. func (pq CpuSetCounter) Swap(i, j int) {
  620. pq.Nodes[i], pq.Nodes[j] = pq.Nodes[j], pq.Nodes[i]
  621. }
  622. func (pq *CpuSetCounter) Push(item interface{}) {
  623. (*pq).Nodes = append((*pq).Nodes, item.(*NumaNode))
  624. }
  625. func (pq *CpuSetCounter) Pop() interface{} {
  626. old := *pq
  627. n := len(old.Nodes)
  628. item := old.Nodes[n-1]
  629. old.Nodes[n-1] = nil // avoid memory leak
  630. (*pq).Nodes = old.Nodes[0 : n-1]
  631. return item
  632. }
  633. type NumaNode struct {
  634. CpuDies SorttedCPUDie
  635. LogicalProcessors cpuset.CPUSet
  636. VcpuCount int
  637. CpuCount int
  638. ReserveCpuCount int
  639. NodeId int
  640. Distances []int
  641. NumaNodeMemSizeKB int64
  642. MemTotalSizeKB int64
  643. NumaNodeFreeMemSizeKB int64
  644. }
  645. func NewNumaNode(
  646. nodeInfo *topology.Node,
  647. numaAllocate, isContainerHost bool,
  648. hugepageSizeKB int, memCmtBound float32,
  649. reservedMemSizeKB int,
  650. ) (*NumaNode, error) {
  651. n := new(NumaNode)
  652. n.LogicalProcessors = cpuset.NewCPUSet()
  653. n.NodeId = nodeInfo.ID
  654. n.Distances = nodeInfo.Distances
  655. if !numaAllocate {
  656. return n, nil
  657. }
  658. if isContainerHost {
  659. if nodeInfo.Memory == nil {
  660. return nil, errors.Errorf("node %d no memory info: %#v", nodeInfo.ID, nodeInfo)
  661. }
  662. n.NumaNodeMemSizeKB = int64(float32(nodeInfo.Memory.TotalUsableBytes/1024-int64(reservedMemSizeKB)) * memCmtBound)
  663. n.MemTotalSizeKB = nodeInfo.Memory.TotalUsableBytes / 1024
  664. } else {
  665. nodeHugepagePath := fmt.Sprintf("/sys/devices/system/node/node%d/hugepages/hugepages-%dkB", n.NodeId, hugepageSizeKB)
  666. if !fileutils2.Exists(nodeHugepagePath) {
  667. return n, nil
  668. }
  669. nrHugepage, err := fileutils2.FileGetIntContent(path.Join(nodeHugepagePath, "nr_hugepages"))
  670. if err != nil {
  671. log.Errorf("failed get node %d nr hugepage %s", n.NodeId, err)
  672. return nil, errors.Wrap(err, "get numa node nr hugepage")
  673. }
  674. n.NumaNodeMemSizeKB = int64(nrHugepage) * int64(hugepageSizeKB)
  675. }
  676. n.NumaNodeFreeMemSizeKB = n.NumaNodeMemSizeKB
  677. return n, nil
  678. }
  679. func (n *NumaNode) nodeEnough(vcpuCount, memSizeKB int, cmtBound float32, enableNumaAlloc bool) bool {
  680. if int(float32(n.CpuCount)*cmtBound)-n.VcpuCount < vcpuCount {
  681. return false
  682. }
  683. if enableNumaAlloc {
  684. if int(n.NumaNodeFreeMemSizeKB) < memSizeKB {
  685. return false
  686. }
  687. }
  688. return true
  689. }
  690. func (n *NumaNode) AllocCpuset(vcpuCount int) []int {
  691. if options.HostOptions.EnableStrictCpuBind {
  692. return n.allocCpusetStrict(vcpuCount)
  693. }
  694. return n.allocCpusetOnNode(vcpuCount)
  695. }
  696. func (n *NumaNode) allocCpusetStrict(vcpuCount int) []int {
  697. var allocCount = vcpuCount
  698. var dieCnt = 0
  699. // If request vcpu count great then node cpucount,
  700. // vcpus should evenly distributed to all dies.
  701. // Otherwise figure out how many dies can hold
  702. // all of vcpus at first, and evenly distributed
  703. // to selected dies.
  704. if vcpuCount > n.CpuCount {
  705. dieCnt = len(n.CpuDies)
  706. } else {
  707. var pcpuCount = 0
  708. for dieCnt < len(n.CpuDies) {
  709. pcpuCount += n.CpuDies[dieCnt].LogicalProcessors.Size()
  710. dieCnt += 1
  711. if pcpuCount >= vcpuCount {
  712. break
  713. }
  714. }
  715. }
  716. var perDieCpuCount = vcpuCount / dieCnt
  717. var allocCpuCountMap = make([]int, dieCnt)
  718. for allocCount > 0 {
  719. for i := 0; i < dieCnt; i++ {
  720. var allocNum = perDieCpuCount
  721. if allocCount < allocNum {
  722. allocNum = allocCount
  723. }
  724. allocCount -= allocNum
  725. allocCpuCountMap[i] += allocNum
  726. }
  727. }
  728. defer sort.Sort(n.CpuDies)
  729. var ret = make([]int, 0)
  730. for i := 0; i < len(allocCpuCountMap); i++ {
  731. var allocCpuCount = allocCpuCountMap[i]
  732. for allocCpuCount > 0 {
  733. pcpus := n.CpuDies[i].LogicalProcessors.ToSliceNoSort()
  734. for j := 0; j < len(pcpus); j++ {
  735. if n.CpuDies[i].CpuFree[pcpus[j]] > 0 {
  736. ret = append(ret, n.CpuDies[i].CpuFree[pcpus[j]])
  737. n.CpuDies[i].CpuFree[pcpus[j]] -= 1
  738. }
  739. allocCpuCount -= 1
  740. if allocCpuCount <= 0 {
  741. break
  742. }
  743. }
  744. }
  745. }
  746. return ret
  747. }
  748. func (n *NumaNode) allocCpusetOnNode(vcpuCount int) []int {
  749. cpus := make([]int, 0)
  750. var allocCount = vcpuCount
  751. for i := range n.CpuDies {
  752. n.CpuDies[i].VcpuCount += vcpuCount
  753. cpus = append(cpus, n.CpuDies[i].LogicalProcessors.ToSliceNoSort()...)
  754. if allocCount > n.CpuDies[i].LogicalProcessors.Size() {
  755. allocCount -= n.CpuDies[i].LogicalProcessors.Size()
  756. } else {
  757. break
  758. }
  759. }
  760. sort.Sort(n.CpuDies)
  761. return cpus
  762. }
  763. type CPUDie struct {
  764. CpuFree map[int]int
  765. LogicalProcessors cpuset.CPUSet
  766. VcpuCount int
  767. }
  768. func (d *CPUDie) initCpuFree(cpuCmtbound float32) {
  769. cpuFree := map[int]int{}
  770. for _, cpuId := range d.LogicalProcessors.ToSliceNoSort() {
  771. cpuFree[cpuId] = int(cpuCmtbound)
  772. }
  773. d.CpuFree = cpuFree
  774. }
  775. type SorttedCPUDie []*CPUDie
  776. func (pq SorttedCPUDie) Len() int { return len(pq) }
  777. func (pq SorttedCPUDie) Less(i, j int) bool {
  778. return pq[i].VcpuCount < pq[j].VcpuCount
  779. }
  780. func (pq SorttedCPUDie) Swap(i, j int) {
  781. pq[i], pq[j] = pq[j], pq[i]
  782. }
  783. func (pq *SorttedCPUDie) Push(item interface{}) {
  784. *pq = append(*pq, item.(*CPUDie))
  785. }
  786. func (pq *SorttedCPUDie) Pop() interface{} {
  787. old := *pq
  788. n := len(old)
  789. item := old[n-1]
  790. old[n-1] = nil // avoid memory leak
  791. *pq = old[0 : n-1]
  792. return item
  793. }
  794. func (pq *SorttedCPUDie) ReleaseCpus(cpus []int, vcpuCount int) {
  795. var cpuDies = map[int][]int{}
  796. for i := 0; i < len(cpus); i++ {
  797. for j := 0; j < len(*pq); j++ {
  798. if (*pq)[j].LogicalProcessors.Contains(cpus[i]) {
  799. if cpuDie, ok := cpuDies[j]; !ok {
  800. cpuDies[j] = []int{cpus[i]}
  801. } else {
  802. cpuDies[j] = append(cpuDie, cpus[i])
  803. }
  804. break
  805. }
  806. }
  807. }
  808. for i := 0; i < len(*pq); i++ {
  809. if _, ok := cpuDies[i]; ok {
  810. d := (*pq)[i]
  811. for _, cpu := range cpus {
  812. d.CpuFree[cpu] += 1
  813. }
  814. d.VcpuCount -= vcpuCount
  815. }
  816. }
  817. sort.Sort(pq)
  818. }
  819. func (pq *SorttedCPUDie) LoadCpus(cpus []int, vcpuCount int) {
  820. var cpuDies = map[int][]int{}
  821. for i := 0; i < len(cpus); i++ {
  822. for j := 0; j < len(*pq); j++ {
  823. if (*pq)[j].LogicalProcessors.Contains(cpus[i]) {
  824. if cpuDie, ok := cpuDies[j]; !ok {
  825. cpuDies[j] = []int{cpus[i]}
  826. } else {
  827. cpuDies[j] = append(cpuDie, cpus[i])
  828. }
  829. break
  830. }
  831. }
  832. }
  833. for i := 0; i < len(*pq); i++ {
  834. if cpus, ok := cpuDies[i]; ok {
  835. d := (*pq)[i]
  836. for _, cpu := range cpus {
  837. d.CpuFree[cpu] -= 1
  838. }
  839. d.VcpuCount += vcpuCount
  840. }
  841. }
  842. sort.Sort(pq)
  843. }