sysinfo.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package sysinfo
  15. import (
  16. "fmt"
  17. "os"
  18. "regexp"
  19. "strconv"
  20. "strings"
  21. info "github.com/google/cadvisor/info/v1"
  22. "github.com/google/cadvisor/utils/sysfs"
  23. "k8s.io/klog/v2"
  24. )
  25. var (
  26. schedulerRegExp = regexp.MustCompile(`.*\[(.*)\].*`)
  27. nodeDirRegExp = regexp.MustCompile(`node/node(\d*)`)
  28. cpuDirRegExp = regexp.MustCompile(`/cpu(\d+)`)
  29. memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`)
  30. cpusPath = "/sys/devices/system/cpu"
  31. )
  32. const (
  33. cacheLevel2 = 2
  34. hugepagesDir = "hugepages/"
  35. )
  36. // Get information about block devices present on the system.
  37. // Uses the passed in system interface to retrieve the low level OS information.
  38. func GetBlockDeviceInfo(sysfs sysfs.SysFs) (map[string]info.DiskInfo, error) {
  39. disks, err := sysfs.GetBlockDevices()
  40. if err != nil {
  41. return nil, err
  42. }
  43. diskMap := make(map[string]info.DiskInfo)
  44. for _, disk := range disks {
  45. name := disk.Name()
  46. // Ignore non-disk devices.
  47. // TODO(rjnagal): Maybe just match hd, sd, and dm prefixes.
  48. if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") || strings.HasPrefix(name, "sr") {
  49. continue
  50. }
  51. diskInfo := info.DiskInfo{
  52. Name: name,
  53. }
  54. dev, err := sysfs.GetBlockDeviceNumbers(name)
  55. if err != nil {
  56. return nil, err
  57. }
  58. n, err := fmt.Sscanf(dev, "%d:%d", &diskInfo.Major, &diskInfo.Minor)
  59. if err != nil || n != 2 {
  60. return nil, fmt.Errorf("could not parse device numbers from %s for device %s", dev, name)
  61. }
  62. out, err := sysfs.GetBlockDeviceSize(name)
  63. if err != nil {
  64. return nil, err
  65. }
  66. // Remove trailing newline before conversion.
  67. size, err := strconv.ParseUint(strings.TrimSpace(out), 10, 64)
  68. if err != nil {
  69. return nil, err
  70. }
  71. // size is in 512 bytes blocks.
  72. diskInfo.Size = size * 512
  73. diskInfo.Scheduler = "none"
  74. blkSched, err := sysfs.GetBlockDeviceScheduler(name)
  75. if err == nil {
  76. matches := schedulerRegExp.FindSubmatch([]byte(blkSched))
  77. if len(matches) >= 2 {
  78. diskInfo.Scheduler = string(matches[1])
  79. }
  80. }
  81. device := fmt.Sprintf("%d:%d", diskInfo.Major, diskInfo.Minor)
  82. diskMap[device] = diskInfo
  83. }
  84. return diskMap, nil
  85. }
  86. // Get information about network devices present on the system.
  87. func GetNetworkDevices(sysfs sysfs.SysFs) ([]info.NetInfo, error) {
  88. devs, err := sysfs.GetNetworkDevices()
  89. if err != nil {
  90. return nil, err
  91. }
  92. netDevices := []info.NetInfo{}
  93. for _, dev := range devs {
  94. name := dev.Name()
  95. // Ignore docker, loopback, and veth devices.
  96. ignoredDevices := []string{"lo", "veth", "docker"}
  97. ignored := false
  98. for _, prefix := range ignoredDevices {
  99. if strings.HasPrefix(name, prefix) {
  100. ignored = true
  101. break
  102. }
  103. }
  104. if ignored {
  105. continue
  106. }
  107. address, err := sysfs.GetNetworkAddress(name)
  108. if err != nil {
  109. return nil, err
  110. }
  111. mtuStr, err := sysfs.GetNetworkMtu(name)
  112. if err != nil {
  113. return nil, err
  114. }
  115. var mtu int64
  116. n, err := fmt.Sscanf(mtuStr, "%d", &mtu)
  117. if err != nil || n != 1 {
  118. return nil, fmt.Errorf("could not parse mtu from %s for device %s", mtuStr, name)
  119. }
  120. netInfo := info.NetInfo{
  121. Name: name,
  122. MacAddress: strings.TrimSpace(address),
  123. Mtu: mtu,
  124. }
  125. speed, err := sysfs.GetNetworkSpeed(name)
  126. // Some devices don't set speed.
  127. if err == nil {
  128. var s int64
  129. n, err := fmt.Sscanf(speed, "%d", &s)
  130. if err != nil || n != 1 {
  131. return nil, fmt.Errorf("could not parse speed from %s for device %s", speed, name)
  132. }
  133. netInfo.Speed = s
  134. }
  135. netDevices = append(netDevices, netInfo)
  136. }
  137. return netDevices, nil
  138. }
  139. // GetHugePagesInfo returns information about pre-allocated huge pages
  140. // hugepagesDirectory should be top directory of hugepages
  141. // Such as: /sys/kernel/mm/hugepages/
  142. func GetHugePagesInfo(sysFs sysfs.SysFs, hugepagesDirectory string) ([]info.HugePagesInfo, error) {
  143. var hugePagesInfo []info.HugePagesInfo
  144. files, err := sysFs.GetHugePagesInfo(hugepagesDirectory)
  145. if err != nil {
  146. // treat as non-fatal since kernels and machine can be
  147. // configured to disable hugepage support
  148. return hugePagesInfo, nil
  149. }
  150. for _, st := range files {
  151. nameArray := strings.Split(st.Name(), "-")
  152. pageSizeArray := strings.Split(nameArray[1], "kB")
  153. pageSize, err := strconv.ParseUint(string(pageSizeArray[0]), 10, 64)
  154. if err != nil {
  155. return hugePagesInfo, err
  156. }
  157. val, err := sysFs.GetHugePagesNr(hugepagesDirectory, st.Name())
  158. if err != nil {
  159. return hugePagesInfo, err
  160. }
  161. var numPages uint64
  162. // we use sscanf as the file as a new-line that trips up ParseUint
  163. // it returns the number of tokens successfully parsed, so if
  164. // n != 1, it means we were unable to parse a number from the file
  165. n, err := fmt.Sscanf(string(val), "%d", &numPages)
  166. if err != nil || n != 1 {
  167. return hugePagesInfo, fmt.Errorf("could not parse file nr_hugepage for %s, contents %q", st.Name(), string(val))
  168. }
  169. hugePagesInfo = append(hugePagesInfo, info.HugePagesInfo{
  170. NumPages: numPages,
  171. PageSize: pageSize,
  172. })
  173. }
  174. return hugePagesInfo, nil
  175. }
  176. // GetNodesInfo returns information about NUMA nodes and their topology
  177. func GetNodesInfo(sysFs sysfs.SysFs) ([]info.Node, int, error) {
  178. nodes := []info.Node{}
  179. allLogicalCoresCount := 0
  180. nodesDirs, err := sysFs.GetNodesPaths()
  181. if err != nil {
  182. return nil, 0, err
  183. }
  184. if len(nodesDirs) == 0 {
  185. klog.Warningf("Nodes topology is not available, providing CPU topology")
  186. return getCPUTopology(sysFs)
  187. }
  188. for _, nodeDir := range nodesDirs {
  189. id, err := getMatchedInt(nodeDirRegExp, nodeDir)
  190. if err != nil {
  191. return nil, 0, err
  192. }
  193. node := info.Node{Id: id}
  194. cpuDirs, err := sysFs.GetCPUsPaths(nodeDir)
  195. if len(cpuDirs) == 0 {
  196. klog.Warningf("Found node without any CPU, nodeDir: %s, number of cpuDirs %d, err: %v", nodeDir, len(cpuDirs), err)
  197. } else {
  198. cores, err := getCoresInfo(sysFs, cpuDirs)
  199. if err != nil {
  200. return nil, 0, err
  201. }
  202. node.Cores = cores
  203. for _, core := range cores {
  204. allLogicalCoresCount += len(core.Threads)
  205. }
  206. }
  207. // On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
  208. // So, we should ignore error here.
  209. err = addCacheInfo(sysFs, &node)
  210. if err != nil {
  211. klog.V(1).Infof("Found node without cache information, nodeDir: %s", nodeDir)
  212. }
  213. node.Memory, err = getNodeMemInfo(sysFs, nodeDir)
  214. if err != nil {
  215. return nil, 0, err
  216. }
  217. hugepagesDirectory := fmt.Sprintf("%s/%s", nodeDir, hugepagesDir)
  218. node.HugePages, err = GetHugePagesInfo(sysFs, hugepagesDirectory)
  219. if err != nil {
  220. return nil, 0, err
  221. }
  222. node.Distances, err = getDistances(sysFs, nodeDir)
  223. if err != nil {
  224. return nil, 0, err
  225. }
  226. nodes = append(nodes, node)
  227. }
  228. return nodes, allLogicalCoresCount, err
  229. }
  230. func getCPUTopology(sysFs sysfs.SysFs) ([]info.Node, int, error) {
  231. nodes := []info.Node{}
  232. cpusPaths, err := sysFs.GetCPUsPaths(cpusPath)
  233. if err != nil {
  234. return nil, 0, err
  235. }
  236. cpusCount := len(cpusPaths)
  237. if cpusCount == 0 {
  238. err = fmt.Errorf("Any CPU is not available, cpusPath: %s", cpusPath)
  239. return nil, 0, err
  240. }
  241. cpusByPhysicalPackageID, err := getCpusByPhysicalPackageID(sysFs, cpusPaths)
  242. if err != nil {
  243. return nil, 0, err
  244. }
  245. if len(cpusByPhysicalPackageID) == 0 {
  246. klog.Warningf("Cannot read any physical package id for any CPU")
  247. return nil, cpusCount, nil
  248. }
  249. for physicalPackageID, cpus := range cpusByPhysicalPackageID {
  250. node := info.Node{Id: physicalPackageID}
  251. cores, err := getCoresInfo(sysFs, cpus)
  252. if err != nil {
  253. return nil, 0, err
  254. }
  255. node.Cores = cores
  256. // On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
  257. // So, we should ignore error here.
  258. err = addCacheInfo(sysFs, &node)
  259. if err != nil {
  260. klog.V(1).Infof("Found cpu without cache information, cpuPath: %s", cpus)
  261. }
  262. nodes = append(nodes, node)
  263. }
  264. return nodes, cpusCount, nil
  265. }
  266. func getCpusByPhysicalPackageID(sysFs sysfs.SysFs, cpusPaths []string) (map[int][]string, error) {
  267. cpuPathsByPhysicalPackageID := make(map[int][]string)
  268. for _, cpuPath := range cpusPaths {
  269. rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuPath)
  270. if os.IsNotExist(err) {
  271. klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuPath, err)
  272. continue
  273. } else if err != nil {
  274. return nil, err
  275. }
  276. physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
  277. if err != nil {
  278. return nil, err
  279. }
  280. if _, ok := cpuPathsByPhysicalPackageID[physicalPackageID]; !ok {
  281. cpuPathsByPhysicalPackageID[physicalPackageID] = make([]string, 0)
  282. }
  283. cpuPathsByPhysicalPackageID[physicalPackageID] = append(cpuPathsByPhysicalPackageID[physicalPackageID], cpuPath)
  284. }
  285. return cpuPathsByPhysicalPackageID, nil
  286. }
  287. // addCacheInfo adds information about cache for NUMA node
  288. func addCacheInfo(sysFs sysfs.SysFs, node *info.Node) error {
  289. for coreID, core := range node.Cores {
  290. threadID := core.Threads[0] //get any thread for core
  291. caches, err := GetCacheInfo(sysFs, threadID)
  292. if err != nil {
  293. return err
  294. }
  295. numThreadsPerCore := len(core.Threads)
  296. numThreadsPerNode := len(node.Cores) * numThreadsPerCore
  297. for _, cache := range caches {
  298. c := info.Cache{
  299. Id: cache.Id,
  300. Size: cache.Size,
  301. Level: cache.Level,
  302. Type: cache.Type,
  303. }
  304. if cache.Level > cacheLevel2 {
  305. if cache.Cpus == numThreadsPerNode {
  306. // Add a node level cache.
  307. cacheFound := false
  308. for _, nodeCache := range node.Caches {
  309. if nodeCache == c {
  310. cacheFound = true
  311. }
  312. }
  313. if !cacheFound {
  314. node.Caches = append(node.Caches, c)
  315. }
  316. } else {
  317. // Add uncore cache, for architecture in which l3 cache only shared among some cores.
  318. uncoreCacheFound := false
  319. for _, uncoreCache := range node.Cores[coreID].UncoreCaches {
  320. if uncoreCache == c {
  321. uncoreCacheFound = true
  322. }
  323. }
  324. if !uncoreCacheFound {
  325. node.Cores[coreID].UncoreCaches = append(node.Cores[coreID].UncoreCaches, c)
  326. }
  327. }
  328. } else if cache.Cpus == numThreadsPerCore {
  329. // Add core level cache
  330. node.Cores[coreID].Caches = append(node.Cores[coreID].Caches, c)
  331. }
  332. // Ignore unknown caches.
  333. }
  334. }
  335. return nil
  336. }
  337. // getNodeMemInfo returns information about total memory for NUMA node
  338. func getNodeMemInfo(sysFs sysfs.SysFs, nodeDir string) (uint64, error) {
  339. rawMem, err := sysFs.GetMemInfo(nodeDir)
  340. if err != nil {
  341. //Ignore if per-node info is not available.
  342. klog.Warningf("Found node without memory information, nodeDir: %s", nodeDir)
  343. return 0, nil
  344. }
  345. matches := memoryCapacityRegexp.FindStringSubmatch(rawMem)
  346. if len(matches) != 2 {
  347. return 0, fmt.Errorf("failed to match regexp in output: %q", string(rawMem))
  348. }
  349. memory, err := strconv.ParseUint(matches[1], 10, 64)
  350. if err != nil {
  351. return 0, err
  352. }
  353. memory = memory * 1024 // Convert to bytes
  354. return uint64(memory), nil
  355. }
  356. // getDistances returns information about distances between NUMA nodes
  357. func getDistances(sysFs sysfs.SysFs, nodeDir string) ([]uint64, error) {
  358. rawDistance, err := sysFs.GetDistances(nodeDir)
  359. if err != nil {
  360. //Ignore if per-node info is not available.
  361. klog.Warningf("Found node without distance information, nodeDir: %s", nodeDir)
  362. return nil, nil
  363. }
  364. distances := []uint64{}
  365. for _, distance := range strings.Split(rawDistance, " ") {
  366. distanceUint, err := strconv.ParseUint(distance, 10, 64)
  367. if err != nil {
  368. return nil, fmt.Errorf("cannot convert %s to int", distance)
  369. }
  370. distances = append(distances, distanceUint)
  371. }
  372. return distances, nil
  373. }
  374. // getCoresInfo returns information about physical cores
  375. func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
  376. cores := make([]info.Core, 0, len(cpuDirs))
  377. for _, cpuDir := range cpuDirs {
  378. cpuID, err := getMatchedInt(cpuDirRegExp, cpuDir)
  379. if err != nil {
  380. return nil, fmt.Errorf("unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir)
  381. }
  382. if !sysFs.IsCPUOnline(cpuDir) {
  383. continue
  384. }
  385. rawPhysicalID, err := sysFs.GetCoreID(cpuDir)
  386. if os.IsNotExist(err) {
  387. klog.Warningf("Cannot read core id for %s, core_id file does not exist, err: %s", cpuDir, err)
  388. continue
  389. } else if err != nil {
  390. return nil, err
  391. }
  392. physicalID, err := strconv.Atoi(rawPhysicalID)
  393. if err != nil {
  394. return nil, err
  395. }
  396. rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir)
  397. if os.IsNotExist(err) {
  398. klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err)
  399. continue
  400. } else if err != nil {
  401. return nil, err
  402. }
  403. physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
  404. if err != nil {
  405. return nil, err
  406. }
  407. coreIDx := -1
  408. for id, core := range cores {
  409. if core.Id == physicalID && core.SocketID == physicalPackageID {
  410. coreIDx = id
  411. }
  412. }
  413. if coreIDx == -1 {
  414. cores = append(cores, info.Core{})
  415. coreIDx = len(cores) - 1
  416. }
  417. desiredCore := &cores[coreIDx]
  418. desiredCore.Id = physicalID
  419. desiredCore.SocketID = physicalPackageID
  420. if len(desiredCore.Threads) == 0 {
  421. desiredCore.Threads = []int{cpuID}
  422. } else {
  423. desiredCore.Threads = append(desiredCore.Threads, cpuID)
  424. }
  425. }
  426. return cores, nil
  427. }
  428. // GetCacheInfo return information about a cache accessible from the given cpu thread
  429. func GetCacheInfo(sysFs sysfs.SysFs, id int) ([]sysfs.CacheInfo, error) {
  430. caches, err := sysFs.GetCaches(id)
  431. if err != nil {
  432. return nil, err
  433. }
  434. info := []sysfs.CacheInfo{}
  435. for _, cache := range caches {
  436. if !strings.HasPrefix(cache.Name(), "index") {
  437. continue
  438. }
  439. cacheInfo, err := sysFs.GetCacheInfo(id, cache.Name())
  440. if err != nil {
  441. return nil, err
  442. }
  443. info = append(info, cacheInfo)
  444. }
  445. return info, nil
  446. }
  447. func getNetworkStats(name string, sysFs sysfs.SysFs) (info.InterfaceStats, error) {
  448. var stats info.InterfaceStats
  449. var err error
  450. stats.Name = name
  451. stats.RxBytes, err = sysFs.GetNetworkStatValue(name, "rx_bytes")
  452. if err != nil {
  453. return stats, err
  454. }
  455. stats.RxPackets, err = sysFs.GetNetworkStatValue(name, "rx_packets")
  456. if err != nil {
  457. return stats, err
  458. }
  459. stats.RxErrors, err = sysFs.GetNetworkStatValue(name, "rx_errors")
  460. if err != nil {
  461. return stats, err
  462. }
  463. stats.RxDropped, err = sysFs.GetNetworkStatValue(name, "rx_dropped")
  464. if err != nil {
  465. return stats, err
  466. }
  467. stats.TxBytes, err = sysFs.GetNetworkStatValue(name, "tx_bytes")
  468. if err != nil {
  469. return stats, err
  470. }
  471. stats.TxPackets, err = sysFs.GetNetworkStatValue(name, "tx_packets")
  472. if err != nil {
  473. return stats, err
  474. }
  475. stats.TxErrors, err = sysFs.GetNetworkStatValue(name, "tx_errors")
  476. if err != nil {
  477. return stats, err
  478. }
  479. stats.TxDropped, err = sysFs.GetNetworkStatValue(name, "tx_dropped")
  480. if err != nil {
  481. return stats, err
  482. }
  483. return stats, nil
  484. }
  485. func GetSystemUUID(sysFs sysfs.SysFs) (string, error) {
  486. return sysFs.GetSystemUUID()
  487. }
  488. func getMatchedInt(rgx *regexp.Regexp, str string) (int, error) {
  489. matches := rgx.FindStringSubmatch(str)
  490. if len(matches) != 2 {
  491. return 0, fmt.Errorf("failed to match regexp, str: %s", str)
  492. }
  493. valInt, err := strconv.Atoi(matches[1])
  494. if err != nil {
  495. return 0, err
  496. }
  497. return valInt, nil
  498. }
  499. // GetSocketFromCPU returns Socket ID of passed CPU. If is not present, returns -1.
  500. func GetSocketFromCPU(topology []info.Node, cpu int) int {
  501. for _, node := range topology {
  502. found, coreID := node.FindCoreByThread(cpu)
  503. if found {
  504. return node.Cores[coreID].SocketID
  505. }
  506. }
  507. return -1
  508. }
  509. // GetOnlineCPUs returns available cores.
  510. func GetOnlineCPUs(topology []info.Node) []int {
  511. onlineCPUs := make([]int, 0)
  512. for _, node := range topology {
  513. for _, core := range node.Cores {
  514. onlineCPUs = append(onlineCPUs, core.Threads...)
  515. }
  516. }
  517. return onlineCPUs
  518. }