handler.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. // Copyright 2018 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package libcontainer
  15. import (
  16. "bufio"
  17. "bytes"
  18. "encoding/json"
  19. "flag"
  20. "fmt"
  21. "io"
  22. "io/ioutil"
  23. "os"
  24. "path"
  25. "regexp"
  26. "strconv"
  27. "strings"
  28. "time"
  29. "github.com/opencontainers/runc/libcontainer"
  30. "github.com/opencontainers/runc/libcontainer/cgroups"
  31. "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
  32. "k8s.io/klog/v2"
  33. "github.com/google/cadvisor/container"
  34. "github.com/google/cadvisor/container/common"
  35. info "github.com/google/cadvisor/info/v1"
  36. )
  37. var (
  38. whitelistedUlimits = [...]string{"max_open_files"}
  39. referencedResetInterval = flag.Uint64("referenced_reset_interval", 0,
  40. "Reset interval for referenced bytes (container_referenced_bytes metric), number of measurement cycles after which referenced bytes are cleared, if set to 0 referenced bytes are never cleared (default: 0)")
  41. smapsFilePathPattern = "/proc/%d/smaps"
  42. clearRefsFilePathPattern = "/proc/%d/clear_refs"
  43. referencedRegexp = regexp.MustCompile(`Referenced:\s*([0-9]+)\s*kB`)
  44. )
  45. type Handler struct {
  46. cgroupManager cgroups.Manager
  47. rootFs string
  48. pid int
  49. includedMetrics container.MetricSet
  50. // pidMetricsCache holds CPU scheduler stats for existing processes (map key is PID) between calls to schedulerStatsFromProcs.
  51. pidMetricsCache map[int]*info.CpuSchedstat
  52. // pidMetricsSaved holds accumulated CPU scheduler stats for processes that no longer exist.
  53. pidMetricsSaved info.CpuSchedstat
  54. cycles uint64
  55. }
  56. func NewHandler(cgroupManager cgroups.Manager, rootFs string, pid int, includedMetrics container.MetricSet) *Handler {
  57. return &Handler{
  58. cgroupManager: cgroupManager,
  59. rootFs: rootFs,
  60. pid: pid,
  61. includedMetrics: includedMetrics,
  62. pidMetricsCache: make(map[int]*info.CpuSchedstat),
  63. }
  64. }
  65. // Get cgroup and networking stats of the specified container
  66. func (h *Handler) GetStats() (*info.ContainerStats, error) {
  67. ignoreStatsError := false
  68. if cgroups.IsCgroup2UnifiedMode() {
  69. // On cgroup v2 the root cgroup stats have been introduced in recent kernel versions,
  70. // so not all kernel versions have all the data. This means that stat fetching can fail
  71. // due to lacking cgroup stat files, but that some data is provided.
  72. if h.cgroupManager.Path("") == fs2.UnifiedMountpoint {
  73. ignoreStatsError = true
  74. }
  75. }
  76. cgroupStats, err := h.cgroupManager.GetStats()
  77. if err != nil {
  78. if !ignoreStatsError {
  79. return nil, err
  80. }
  81. klog.V(4).Infof("Ignoring errors when gathering stats for root cgroup since some controllers don't have stats on the root cgroup: %v", err)
  82. }
  83. libcontainerStats := &libcontainer.Stats{
  84. CgroupStats: cgroupStats,
  85. }
  86. stats := newContainerStats(libcontainerStats, h.includedMetrics)
  87. if h.includedMetrics.Has(container.ProcessSchedulerMetrics) {
  88. stats.Cpu.Schedstat, err = h.schedulerStatsFromProcs()
  89. if err != nil {
  90. klog.V(4).Infof("Unable to get Process Scheduler Stats: %v", err)
  91. }
  92. }
  93. if h.includedMetrics.Has(container.ReferencedMemoryMetrics) {
  94. h.cycles++
  95. pids, err := h.cgroupManager.GetPids()
  96. if err != nil {
  97. klog.V(4).Infof("Could not get PIDs for container %d: %v", h.pid, err)
  98. } else {
  99. stats.ReferencedMemory, err = referencedBytesStat(pids, h.cycles, *referencedResetInterval)
  100. if err != nil {
  101. klog.V(4).Infof("Unable to get referenced bytes: %v", err)
  102. }
  103. }
  104. }
  105. // If we know the pid then get network stats from /proc/<pid>/net/dev
  106. if h.pid > 0 {
  107. if h.includedMetrics.Has(container.NetworkUsageMetrics) {
  108. netStats, err := networkStatsFromProc(h.rootFs, h.pid)
  109. if err != nil {
  110. klog.V(4).Infof("Unable to get network stats from pid %d: %v", h.pid, err)
  111. } else {
  112. stats.Network.Interfaces = append(stats.Network.Interfaces, netStats...)
  113. }
  114. }
  115. if h.includedMetrics.Has(container.NetworkTcpUsageMetrics) {
  116. t, err := tcpStatsFromProc(h.rootFs, h.pid, "net/tcp")
  117. if err != nil {
  118. klog.V(4).Infof("Unable to get tcp stats from pid %d: %v", h.pid, err)
  119. } else {
  120. stats.Network.Tcp = t
  121. }
  122. t6, err := tcpStatsFromProc(h.rootFs, h.pid, "net/tcp6")
  123. if err != nil {
  124. klog.V(4).Infof("Unable to get tcp6 stats from pid %d: %v", h.pid, err)
  125. } else {
  126. stats.Network.Tcp6 = t6
  127. }
  128. }
  129. if h.includedMetrics.Has(container.NetworkAdvancedTcpUsageMetrics) {
  130. ta, err := advancedTCPStatsFromProc(h.rootFs, h.pid, "net/netstat", "net/snmp")
  131. if err != nil {
  132. klog.V(4).Infof("Unable to get advanced tcp stats from pid %d: %v", h.pid, err)
  133. } else {
  134. stats.Network.TcpAdvanced = ta
  135. }
  136. }
  137. if h.includedMetrics.Has(container.NetworkUdpUsageMetrics) {
  138. u, err := udpStatsFromProc(h.rootFs, h.pid, "net/udp")
  139. if err != nil {
  140. klog.V(4).Infof("Unable to get udp stats from pid %d: %v", h.pid, err)
  141. } else {
  142. stats.Network.Udp = u
  143. }
  144. u6, err := udpStatsFromProc(h.rootFs, h.pid, "net/udp6")
  145. if err != nil {
  146. klog.V(4).Infof("Unable to get udp6 stats from pid %d: %v", h.pid, err)
  147. } else {
  148. stats.Network.Udp6 = u6
  149. }
  150. }
  151. }
  152. // some process metrics are per container ( number of processes, number of
  153. // file descriptors etc.) and not required a proper container's
  154. // root PID (systemd services don't have the root PID atm)
  155. if h.includedMetrics.Has(container.ProcessMetrics) {
  156. path, ok := common.GetControllerPath(h.cgroupManager.GetPaths(), "cpu", cgroups.IsCgroup2UnifiedMode())
  157. if !ok {
  158. klog.V(4).Infof("Could not find cgroups CPU for container %d", h.pid)
  159. } else {
  160. stats.Processes, err = processStatsFromProcs(h.rootFs, path, h.pid)
  161. if err != nil {
  162. klog.V(4).Infof("Unable to get Process Stats: %v", err)
  163. }
  164. }
  165. // if include processes metrics, just set threads metrics if exist, and has no relationship with cpu path
  166. setThreadsStats(cgroupStats, stats)
  167. }
  168. // For backwards compatibility.
  169. if len(stats.Network.Interfaces) > 0 {
  170. stats.Network.InterfaceStats = stats.Network.Interfaces[0]
  171. }
  172. return stats, nil
  173. }
  174. func parseUlimit(value string) (int64, error) {
  175. num, err := strconv.ParseInt(value, 10, 64)
  176. if err != nil {
  177. if strings.EqualFold(value, "unlimited") {
  178. // -1 implies unlimited except for priority and nice; man limits.conf
  179. num = -1
  180. } else {
  181. // Value is not a number or "unlimited"; return an error
  182. return 0, fmt.Errorf("unable to parse limit: %s", value)
  183. }
  184. }
  185. return num, nil
  186. }
  187. func isUlimitWhitelisted(name string) bool {
  188. for _, whitelist := range whitelistedUlimits {
  189. if name == whitelist {
  190. return true
  191. }
  192. }
  193. return false
  194. }
  195. func processLimitsFile(fileData string) []info.UlimitSpec {
  196. limits := strings.Split(fileData, "\n")
  197. ulimits := make([]info.UlimitSpec, 0, len(limits))
  198. for _, lim := range limits {
  199. // Skip any headers/footers
  200. if strings.HasPrefix(lim, "Max") {
  201. // Line format: Max open files 16384 16384 files
  202. fields := regexp.MustCompile(`[\s]{2,}`).Split(lim, -1)
  203. name := strings.Replace(strings.ToLower(strings.TrimSpace(fields[0])), " ", "_", -1)
  204. found := isUlimitWhitelisted(name)
  205. if !found {
  206. continue
  207. }
  208. soft := strings.TrimSpace(fields[1])
  209. softNum, softErr := parseUlimit(soft)
  210. hard := strings.TrimSpace(fields[2])
  211. hardNum, hardErr := parseUlimit(hard)
  212. // Omit metric if there were any parsing errors
  213. if softErr == nil && hardErr == nil {
  214. ulimitSpec := info.UlimitSpec{
  215. Name: name,
  216. SoftLimit: int64(softNum),
  217. HardLimit: int64(hardNum),
  218. }
  219. ulimits = append(ulimits, ulimitSpec)
  220. }
  221. }
  222. }
  223. return ulimits
  224. }
  225. func processRootProcUlimits(rootFs string, rootPid int) []info.UlimitSpec {
  226. filePath := path.Join(rootFs, "/proc", strconv.Itoa(rootPid), "limits")
  227. out, err := ioutil.ReadFile(filePath)
  228. if err != nil {
  229. klog.V(4).Infof("error while listing directory %q to read ulimits: %v", filePath, err)
  230. return []info.UlimitSpec{}
  231. }
  232. return processLimitsFile(string(out))
  233. }
  234. func processStatsFromProcs(rootFs string, cgroupPath string, rootPid int) (info.ProcessStats, error) {
  235. var fdCount, socketCount uint64
  236. filePath := path.Join(cgroupPath, "cgroup.procs")
  237. out, err := ioutil.ReadFile(filePath)
  238. if err != nil {
  239. return info.ProcessStats{}, fmt.Errorf("couldn't open cpu cgroup procs file %v : %v", filePath, err)
  240. }
  241. pids := strings.Split(string(out), "\n")
  242. // EOL is also treated as a new line while reading "cgroup.procs" file with ioutil.ReadFile.
  243. // The last value is an empty string "". Ex: pids = ["22", "1223", ""]
  244. // Trim the last value
  245. if len(pids) != 0 && pids[len(pids)-1] == "" {
  246. pids = pids[:len(pids)-1]
  247. }
  248. for _, pid := range pids {
  249. dirPath := path.Join(rootFs, "/proc", pid, "fd")
  250. fds, err := ioutil.ReadDir(dirPath)
  251. if err != nil {
  252. klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err)
  253. continue
  254. }
  255. fdCount += uint64(len(fds))
  256. for _, fd := range fds {
  257. fdPath := path.Join(dirPath, fd.Name())
  258. linkName, err := os.Readlink(fdPath)
  259. if err != nil {
  260. klog.V(4).Infof("error while reading %q link: %v", fdPath, err)
  261. continue
  262. }
  263. if strings.HasPrefix(linkName, "socket") {
  264. socketCount++
  265. }
  266. }
  267. }
  268. processStats := info.ProcessStats{
  269. ProcessCount: uint64(len(pids)),
  270. FdCount: fdCount,
  271. SocketCount: socketCount,
  272. }
  273. if rootPid > 0 {
  274. processStats.Ulimits = processRootProcUlimits(rootFs, rootPid)
  275. }
  276. return processStats, nil
  277. }
  278. func (h *Handler) schedulerStatsFromProcs() (info.CpuSchedstat, error) {
  279. pids, err := h.cgroupManager.GetAllPids()
  280. if err != nil {
  281. return info.CpuSchedstat{}, fmt.Errorf("Could not get PIDs for container %d: %w", h.pid, err)
  282. }
  283. alivePids := make(map[int]struct{}, len(pids))
  284. for _, pid := range pids {
  285. f, err := os.Open(path.Join(h.rootFs, "proc", strconv.Itoa(pid), "schedstat"))
  286. if err != nil {
  287. return info.CpuSchedstat{}, fmt.Errorf("couldn't open scheduler statistics for process %d: %v", pid, err)
  288. }
  289. defer f.Close()
  290. contents, err := ioutil.ReadAll(f)
  291. if err != nil {
  292. return info.CpuSchedstat{}, fmt.Errorf("couldn't read scheduler statistics for process %d: %v", pid, err)
  293. }
  294. alivePids[pid] = struct{}{}
  295. rawMetrics := bytes.Split(bytes.TrimRight(contents, "\n"), []byte(" "))
  296. if len(rawMetrics) != 3 {
  297. return info.CpuSchedstat{}, fmt.Errorf("unexpected number of metrics in schedstat file for process %d", pid)
  298. }
  299. cacheEntry, ok := h.pidMetricsCache[pid]
  300. if !ok {
  301. cacheEntry = &info.CpuSchedstat{}
  302. h.pidMetricsCache[pid] = cacheEntry
  303. }
  304. for i, rawMetric := range rawMetrics {
  305. metric, err := strconv.ParseUint(string(rawMetric), 10, 64)
  306. if err != nil {
  307. return info.CpuSchedstat{}, fmt.Errorf("parsing error while reading scheduler statistics for process: %d: %v", pid, err)
  308. }
  309. switch i {
  310. case 0:
  311. cacheEntry.RunTime = metric
  312. case 1:
  313. cacheEntry.RunqueueTime = metric
  314. case 2:
  315. cacheEntry.RunPeriods = metric
  316. }
  317. }
  318. }
  319. schedstats := h.pidMetricsSaved // copy
  320. for p, v := range h.pidMetricsCache {
  321. schedstats.RunPeriods += v.RunPeriods
  322. schedstats.RunqueueTime += v.RunqueueTime
  323. schedstats.RunTime += v.RunTime
  324. if _, alive := alivePids[p]; !alive {
  325. // PID p is gone: accumulate its stats ...
  326. h.pidMetricsSaved.RunPeriods += v.RunPeriods
  327. h.pidMetricsSaved.RunqueueTime += v.RunqueueTime
  328. h.pidMetricsSaved.RunTime += v.RunTime
  329. // ... and remove its cache entry, to prevent
  330. // pidMetricsCache from growing.
  331. delete(h.pidMetricsCache, p)
  332. }
  333. }
  334. return schedstats, nil
  335. }
  336. // referencedBytesStat gets and clears referenced bytes
  337. // see: https://github.com/brendangregg/wss#wsspl-referenced-page-flag
  338. func referencedBytesStat(pids []int, cycles uint64, resetInterval uint64) (uint64, error) {
  339. referencedKBytes, err := getReferencedKBytes(pids)
  340. if err != nil {
  341. return uint64(0), err
  342. }
  343. err = clearReferencedBytes(pids, cycles, resetInterval)
  344. if err != nil {
  345. return uint64(0), err
  346. }
  347. return referencedKBytes * 1024, nil
  348. }
  349. func getReferencedKBytes(pids []int) (uint64, error) {
  350. referencedKBytes := uint64(0)
  351. readSmapsContent := false
  352. foundMatch := false
  353. for _, pid := range pids {
  354. smapsFilePath := fmt.Sprintf(smapsFilePathPattern, pid)
  355. smapsContent, err := ioutil.ReadFile(smapsFilePath)
  356. if err != nil {
  357. klog.V(5).Infof("Cannot read %s file, err: %s", smapsFilePath, err)
  358. if os.IsNotExist(err) {
  359. continue // smaps file does not exists for all PIDs
  360. }
  361. return 0, err
  362. }
  363. readSmapsContent = true
  364. allMatches := referencedRegexp.FindAllSubmatch(smapsContent, -1)
  365. if len(allMatches) == 0 {
  366. klog.V(5).Infof("Not found any information about referenced bytes in %s file", smapsFilePath)
  367. continue // referenced bytes may not exist in smaps file
  368. }
  369. for _, matches := range allMatches {
  370. if len(matches) != 2 {
  371. return 0, fmt.Errorf("failed to match regexp in output: %s", string(smapsContent))
  372. }
  373. foundMatch = true
  374. referenced, err := strconv.ParseUint(string(matches[1]), 10, 64)
  375. if err != nil {
  376. return 0, err
  377. }
  378. referencedKBytes += referenced
  379. }
  380. }
  381. if len(pids) != 0 {
  382. if !readSmapsContent {
  383. klog.Warningf("Cannot read smaps files for any PID from %s", "CONTAINER")
  384. } else if !foundMatch {
  385. klog.Warningf("Not found any information about referenced bytes in smaps files for any PID from %s", "CONTAINER")
  386. }
  387. }
  388. return referencedKBytes, nil
  389. }
  390. func clearReferencedBytes(pids []int, cycles uint64, resetInterval uint64) error {
  391. if resetInterval == 0 {
  392. return nil
  393. }
  394. if cycles%resetInterval == 0 {
  395. for _, pid := range pids {
  396. clearRefsFilePath := fmt.Sprintf(clearRefsFilePathPattern, pid)
  397. clerRefsFile, err := os.OpenFile(clearRefsFilePath, os.O_WRONLY, 0o644)
  398. if err != nil {
  399. // clear_refs file may not exist for all PIDs
  400. continue
  401. }
  402. _, err = clerRefsFile.WriteString("1\n")
  403. if err != nil {
  404. return err
  405. }
  406. err = clerRefsFile.Close()
  407. if err != nil {
  408. return err
  409. }
  410. }
  411. }
  412. return nil
  413. }
  414. func networkStatsFromProc(rootFs string, pid int) ([]info.InterfaceStats, error) {
  415. netStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), "/net/dev")
  416. ifaceStats, err := scanInterfaceStats(netStatsFile)
  417. if err != nil {
  418. return []info.InterfaceStats{}, fmt.Errorf("couldn't read network stats: %v", err)
  419. }
  420. return ifaceStats, nil
  421. }
  422. var ignoredDevicePrefixes = []string{"lo", "veth", "docker"}
  423. func isIgnoredDevice(ifName string) bool {
  424. for _, prefix := range ignoredDevicePrefixes {
  425. if strings.HasPrefix(strings.ToLower(ifName), prefix) {
  426. return true
  427. }
  428. }
  429. return false
  430. }
  431. func scanInterfaceStats(netStatsFile string) ([]info.InterfaceStats, error) {
  432. file, err := os.Open(netStatsFile)
  433. if err != nil {
  434. return nil, fmt.Errorf("failure opening %s: %v", netStatsFile, err)
  435. }
  436. defer file.Close()
  437. scanner := bufio.NewScanner(file)
  438. // Discard header lines
  439. for i := 0; i < 2; i++ {
  440. if b := scanner.Scan(); !b {
  441. return nil, scanner.Err()
  442. }
  443. }
  444. stats := []info.InterfaceStats{}
  445. for scanner.Scan() {
  446. line := scanner.Text()
  447. line = strings.Replace(line, ":", "", -1)
  448. fields := strings.Fields(line)
  449. // If the format of the line is invalid then don't trust any of the stats
  450. // in this file.
  451. if len(fields) != 17 {
  452. return nil, fmt.Errorf("invalid interface stats line: %v", line)
  453. }
  454. devName := fields[0]
  455. if isIgnoredDevice(devName) {
  456. continue
  457. }
  458. i := info.InterfaceStats{
  459. Name: devName,
  460. }
  461. statFields := append(fields[1:5], fields[9:13]...)
  462. statPointers := []*uint64{
  463. &i.RxBytes, &i.RxPackets, &i.RxErrors, &i.RxDropped,
  464. &i.TxBytes, &i.TxPackets, &i.TxErrors, &i.TxDropped,
  465. }
  466. err := setInterfaceStatValues(statFields, statPointers)
  467. if err != nil {
  468. return nil, fmt.Errorf("cannot parse interface stats (%v): %v", err, line)
  469. }
  470. stats = append(stats, i)
  471. }
  472. return stats, nil
  473. }
  474. func setInterfaceStatValues(fields []string, pointers []*uint64) error {
  475. for i, v := range fields {
  476. val, err := strconv.ParseUint(v, 10, 64)
  477. if err != nil {
  478. return err
  479. }
  480. *pointers[i] = val
  481. }
  482. return nil
  483. }
  484. func tcpStatsFromProc(rootFs string, pid int, file string) (info.TcpStat, error) {
  485. tcpStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file)
  486. tcpStats, err := scanTCPStats(tcpStatsFile)
  487. if err != nil {
  488. return tcpStats, fmt.Errorf("couldn't read tcp stats: %v", err)
  489. }
  490. return tcpStats, nil
  491. }
  492. func advancedTCPStatsFromProc(rootFs string, pid int, file1, file2 string) (info.TcpAdvancedStat, error) {
  493. var advancedStats info.TcpAdvancedStat
  494. var err error
  495. netstatFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file1)
  496. err = scanAdvancedTCPStats(&advancedStats, netstatFile)
  497. if err != nil {
  498. return advancedStats, err
  499. }
  500. snmpFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file2)
  501. err = scanAdvancedTCPStats(&advancedStats, snmpFile)
  502. if err != nil {
  503. return advancedStats, err
  504. }
  505. return advancedStats, nil
  506. }
  507. func scanAdvancedTCPStats(advancedStats *info.TcpAdvancedStat, advancedTCPStatsFile string) error {
  508. data, err := ioutil.ReadFile(advancedTCPStatsFile)
  509. if err != nil {
  510. return fmt.Errorf("failure opening %s: %v", advancedTCPStatsFile, err)
  511. }
  512. reader := strings.NewReader(string(data))
  513. scanner := bufio.NewScanner(reader)
  514. scanner.Split(bufio.ScanLines)
  515. advancedTCPStats := make(map[string]interface{})
  516. for scanner.Scan() {
  517. nameParts := strings.Split(scanner.Text(), " ")
  518. scanner.Scan()
  519. valueParts := strings.Split(scanner.Text(), " ")
  520. // Remove trailing :. and ignore non-tcp
  521. protocol := nameParts[0][:len(nameParts[0])-1]
  522. if protocol != "TcpExt" && protocol != "Tcp" {
  523. continue
  524. }
  525. if len(nameParts) != len(valueParts) {
  526. return fmt.Errorf("mismatch field count mismatch in %s: %s",
  527. advancedTCPStatsFile, protocol)
  528. }
  529. for i := 1; i < len(nameParts); i++ {
  530. if strings.Contains(valueParts[i], "-") {
  531. vInt64, err := strconv.ParseInt(valueParts[i], 10, 64)
  532. if err != nil {
  533. return fmt.Errorf("decode value: %s to int64 error: %s", valueParts[i], err)
  534. }
  535. advancedTCPStats[nameParts[i]] = vInt64
  536. } else {
  537. vUint64, err := strconv.ParseUint(valueParts[i], 10, 64)
  538. if err != nil {
  539. return fmt.Errorf("decode value: %s to uint64 error: %s", valueParts[i], err)
  540. }
  541. advancedTCPStats[nameParts[i]] = vUint64
  542. }
  543. }
  544. }
  545. b, err := json.Marshal(advancedTCPStats)
  546. if err != nil {
  547. return err
  548. }
  549. err = json.Unmarshal(b, advancedStats)
  550. if err != nil {
  551. return err
  552. }
  553. return scanner.Err()
  554. }
  555. func scanTCPStats(tcpStatsFile string) (info.TcpStat, error) {
  556. var stats info.TcpStat
  557. data, err := ioutil.ReadFile(tcpStatsFile)
  558. if err != nil {
  559. return stats, fmt.Errorf("failure opening %s: %v", tcpStatsFile, err)
  560. }
  561. tcpStateMap := map[string]uint64{
  562. "01": 0, // ESTABLISHED
  563. "02": 0, // SYN_SENT
  564. "03": 0, // SYN_RECV
  565. "04": 0, // FIN_WAIT1
  566. "05": 0, // FIN_WAIT2
  567. "06": 0, // TIME_WAIT
  568. "07": 0, // CLOSE
  569. "08": 0, // CLOSE_WAIT
  570. "09": 0, // LAST_ACK
  571. "0A": 0, // LISTEN
  572. "0B": 0, // CLOSING
  573. }
  574. reader := strings.NewReader(string(data))
  575. scanner := bufio.NewScanner(reader)
  576. scanner.Split(bufio.ScanLines)
  577. // Discard header line
  578. if b := scanner.Scan(); !b {
  579. return stats, scanner.Err()
  580. }
  581. for scanner.Scan() {
  582. line := scanner.Text()
  583. state := strings.Fields(line)
  584. // TCP state is the 4th field.
  585. // Format: sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
  586. tcpState := state[3]
  587. _, ok := tcpStateMap[tcpState]
  588. if !ok {
  589. return stats, fmt.Errorf("invalid TCP stats line: %v", line)
  590. }
  591. tcpStateMap[tcpState]++
  592. }
  593. stats = info.TcpStat{
  594. Established: tcpStateMap["01"],
  595. SynSent: tcpStateMap["02"],
  596. SynRecv: tcpStateMap["03"],
  597. FinWait1: tcpStateMap["04"],
  598. FinWait2: tcpStateMap["05"],
  599. TimeWait: tcpStateMap["06"],
  600. Close: tcpStateMap["07"],
  601. CloseWait: tcpStateMap["08"],
  602. LastAck: tcpStateMap["09"],
  603. Listen: tcpStateMap["0A"],
  604. Closing: tcpStateMap["0B"],
  605. }
  606. return stats, nil
  607. }
  608. func udpStatsFromProc(rootFs string, pid int, file string) (info.UdpStat, error) {
  609. var err error
  610. var udpStats info.UdpStat
  611. udpStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file)
  612. r, err := os.Open(udpStatsFile)
  613. if err != nil {
  614. return udpStats, fmt.Errorf("failure opening %s: %v", udpStatsFile, err)
  615. }
  616. udpStats, err = scanUDPStats(r)
  617. if err != nil {
  618. return udpStats, fmt.Errorf("couldn't read udp stats: %v", err)
  619. }
  620. return udpStats, nil
  621. }
  622. func scanUDPStats(r io.Reader) (info.UdpStat, error) {
  623. var stats info.UdpStat
  624. scanner := bufio.NewScanner(r)
  625. scanner.Split(bufio.ScanLines)
  626. // Discard header line
  627. if b := scanner.Scan(); !b {
  628. return stats, scanner.Err()
  629. }
  630. listening := uint64(0)
  631. dropped := uint64(0)
  632. rxQueued := uint64(0)
  633. txQueued := uint64(0)
  634. for scanner.Scan() {
  635. line := scanner.Text()
  636. // Format: sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops
  637. listening++
  638. fs := strings.Fields(line)
  639. if len(fs) != 13 {
  640. continue
  641. }
  642. rx, tx := uint64(0), uint64(0)
  643. fmt.Sscanf(fs[4], "%X:%X", &rx, &tx)
  644. rxQueued += rx
  645. txQueued += tx
  646. d, err := strconv.Atoi(string(fs[12]))
  647. if err != nil {
  648. continue
  649. }
  650. dropped += uint64(d)
  651. }
  652. stats = info.UdpStat{
  653. Listen: listening,
  654. Dropped: dropped,
  655. RxQueued: rxQueued,
  656. TxQueued: txQueued,
  657. }
  658. return stats, nil
  659. }
  660. func (h *Handler) GetProcesses() ([]int, error) {
  661. pids, err := h.cgroupManager.GetPids()
  662. if err != nil {
  663. return nil, err
  664. }
  665. return pids, nil
  666. }
  667. // Convert libcontainer stats to info.ContainerStats.
  668. func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
  669. ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode
  670. ret.Cpu.Usage.System = s.CpuStats.CpuUsage.UsageInKernelmode
  671. ret.Cpu.Usage.Total = s.CpuStats.CpuUsage.TotalUsage
  672. ret.Cpu.CFS.Periods = s.CpuStats.ThrottlingData.Periods
  673. ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods
  674. ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime
  675. if !withPerCPU {
  676. return
  677. }
  678. if len(s.CpuStats.CpuUsage.PercpuUsage) == 0 {
  679. // libcontainer's 'GetStats' can leave 'PercpuUsage' nil if it skipped the
  680. // cpuacct subsystem.
  681. return
  682. }
  683. ret.Cpu.Usage.PerCpu = s.CpuStats.CpuUsage.PercpuUsage
  684. }
  685. func setDiskIoStats(s *cgroups.Stats, ret *info.ContainerStats) {
  686. ret.DiskIo.IoServiceBytes = diskStatsCopy(s.BlkioStats.IoServiceBytesRecursive)
  687. ret.DiskIo.IoServiced = diskStatsCopy(s.BlkioStats.IoServicedRecursive)
  688. ret.DiskIo.IoQueued = diskStatsCopy(s.BlkioStats.IoQueuedRecursive)
  689. ret.DiskIo.Sectors = diskStatsCopy(s.BlkioStats.SectorsRecursive)
  690. ret.DiskIo.IoServiceTime = diskStatsCopy(s.BlkioStats.IoServiceTimeRecursive)
  691. ret.DiskIo.IoWaitTime = diskStatsCopy(s.BlkioStats.IoWaitTimeRecursive)
  692. ret.DiskIo.IoMerged = diskStatsCopy(s.BlkioStats.IoMergedRecursive)
  693. ret.DiskIo.IoTime = diskStatsCopy(s.BlkioStats.IoTimeRecursive)
  694. }
  695. func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
  696. ret.Memory.Usage = s.MemoryStats.Usage.Usage
  697. ret.Memory.MaxUsage = s.MemoryStats.Usage.MaxUsage
  698. ret.Memory.Failcnt = s.MemoryStats.Usage.Failcnt
  699. if cgroups.IsCgroup2UnifiedMode() {
  700. ret.Memory.Cache = s.MemoryStats.Stats["file"]
  701. ret.Memory.RSS = s.MemoryStats.Stats["anon"]
  702. ret.Memory.Swap = s.MemoryStats.SwapUsage.Usage - s.MemoryStats.Usage.Usage
  703. ret.Memory.MappedFile = s.MemoryStats.Stats["file_mapped"]
  704. } else if s.MemoryStats.UseHierarchy {
  705. ret.Memory.Cache = s.MemoryStats.Stats["total_cache"]
  706. ret.Memory.RSS = s.MemoryStats.Stats["total_rss"]
  707. ret.Memory.Swap = s.MemoryStats.Stats["total_swap"]
  708. ret.Memory.MappedFile = s.MemoryStats.Stats["total_mapped_file"]
  709. } else {
  710. ret.Memory.Cache = s.MemoryStats.Stats["cache"]
  711. ret.Memory.RSS = s.MemoryStats.Stats["rss"]
  712. ret.Memory.Swap = s.MemoryStats.Stats["swap"]
  713. ret.Memory.MappedFile = s.MemoryStats.Stats["mapped_file"]
  714. }
  715. if v, ok := s.MemoryStats.Stats["pgfault"]; ok {
  716. ret.Memory.ContainerData.Pgfault = v
  717. ret.Memory.HierarchicalData.Pgfault = v
  718. }
  719. if v, ok := s.MemoryStats.Stats["pgmajfault"]; ok {
  720. ret.Memory.ContainerData.Pgmajfault = v
  721. ret.Memory.HierarchicalData.Pgmajfault = v
  722. }
  723. inactiveFileKeyName := "total_inactive_file"
  724. if cgroups.IsCgroup2UnifiedMode() {
  725. inactiveFileKeyName = "inactive_file"
  726. }
  727. workingSet := ret.Memory.Usage
  728. if v, ok := s.MemoryStats.Stats[inactiveFileKeyName]; ok {
  729. if workingSet < v {
  730. workingSet = 0
  731. } else {
  732. workingSet -= v
  733. }
  734. }
  735. ret.Memory.WorkingSet = workingSet
  736. }
  737. func setCPUSetStats(s *cgroups.Stats, ret *info.ContainerStats) {
  738. ret.CpuSet.MemoryMigrate = s.CPUSetStats.MemoryMigrate
  739. }
  740. func getNumaStats(memoryStats map[uint8]uint64) map[uint8]uint64 {
  741. stats := make(map[uint8]uint64, len(memoryStats))
  742. for node, usage := range memoryStats {
  743. stats[node] = usage
  744. }
  745. return stats
  746. }
  747. func setMemoryNumaStats(s *cgroups.Stats, ret *info.ContainerStats) {
  748. ret.Memory.ContainerData.NumaStats.File = getNumaStats(s.MemoryStats.PageUsageByNUMA.File.Nodes)
  749. ret.Memory.ContainerData.NumaStats.Anon = getNumaStats(s.MemoryStats.PageUsageByNUMA.Anon.Nodes)
  750. ret.Memory.ContainerData.NumaStats.Unevictable = getNumaStats(s.MemoryStats.PageUsageByNUMA.Unevictable.Nodes)
  751. ret.Memory.HierarchicalData.NumaStats.File = getNumaStats(s.MemoryStats.PageUsageByNUMA.Hierarchical.File.Nodes)
  752. ret.Memory.HierarchicalData.NumaStats.Anon = getNumaStats(s.MemoryStats.PageUsageByNUMA.Hierarchical.Anon.Nodes)
  753. ret.Memory.HierarchicalData.NumaStats.Unevictable = getNumaStats(s.MemoryStats.PageUsageByNUMA.Hierarchical.Unevictable.Nodes)
  754. }
  755. func setHugepageStats(s *cgroups.Stats, ret *info.ContainerStats) {
  756. ret.Hugetlb = make(map[string]info.HugetlbStats)
  757. for k, v := range s.HugetlbStats {
  758. ret.Hugetlb[k] = info.HugetlbStats{
  759. Usage: v.Usage,
  760. MaxUsage: v.MaxUsage,
  761. Failcnt: v.Failcnt,
  762. }
  763. }
  764. }
  765. func setNetworkStats(libcontainerStats *libcontainer.Stats, ret *info.ContainerStats) {
  766. ret.Network.Interfaces = make([]info.InterfaceStats, len(libcontainerStats.Interfaces))
  767. for i := range libcontainerStats.Interfaces {
  768. ret.Network.Interfaces[i] = info.InterfaceStats{
  769. Name: libcontainerStats.Interfaces[i].Name,
  770. RxBytes: libcontainerStats.Interfaces[i].RxBytes,
  771. RxPackets: libcontainerStats.Interfaces[i].RxPackets,
  772. RxErrors: libcontainerStats.Interfaces[i].RxErrors,
  773. RxDropped: libcontainerStats.Interfaces[i].RxDropped,
  774. TxBytes: libcontainerStats.Interfaces[i].TxBytes,
  775. TxPackets: libcontainerStats.Interfaces[i].TxPackets,
  776. TxErrors: libcontainerStats.Interfaces[i].TxErrors,
  777. TxDropped: libcontainerStats.Interfaces[i].TxDropped,
  778. }
  779. }
  780. // Add to base struct for backwards compatibility.
  781. if len(ret.Network.Interfaces) > 0 {
  782. ret.Network.InterfaceStats = ret.Network.Interfaces[0]
  783. }
  784. }
  785. // read from pids path not cpu
  786. func setThreadsStats(s *cgroups.Stats, ret *info.ContainerStats) {
  787. if s != nil {
  788. ret.Processes.ThreadsCurrent = s.PidsStats.Current
  789. ret.Processes.ThreadsMax = s.PidsStats.Limit
  790. }
  791. }
  792. func newContainerStats(libcontainerStats *libcontainer.Stats, includedMetrics container.MetricSet) *info.ContainerStats {
  793. ret := &info.ContainerStats{
  794. Timestamp: time.Now(),
  795. }
  796. if s := libcontainerStats.CgroupStats; s != nil {
  797. setCPUStats(s, ret, includedMetrics.Has(container.PerCpuUsageMetrics))
  798. if includedMetrics.Has(container.DiskIOMetrics) {
  799. setDiskIoStats(s, ret)
  800. }
  801. setMemoryStats(s, ret)
  802. if includedMetrics.Has(container.MemoryNumaMetrics) {
  803. setMemoryNumaStats(s, ret)
  804. }
  805. if includedMetrics.Has(container.HugetlbUsageMetrics) {
  806. setHugepageStats(s, ret)
  807. }
  808. if includedMetrics.Has(container.CPUSetMetrics) {
  809. setCPUSetStats(s, ret)
  810. }
  811. }
  812. if len(libcontainerStats.Interfaces) > 0 {
  813. setNetworkStats(libcontainerStats, ret)
  814. }
  815. return ret
  816. }