utils.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. //go:build linux
  2. // +build linux
  3. // Copyright 2021 Google Inc. All Rights Reserved.
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. // Utilities.
  17. package resctrl
  18. import (
  19. "bufio"
  20. "bytes"
  21. "fmt"
  22. "io/ioutil"
  23. "os"
  24. "path/filepath"
  25. "strconv"
  26. "strings"
  27. "github.com/opencontainers/runc/libcontainer/cgroups"
  28. "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
  29. "github.com/opencontainers/runc/libcontainer/intelrdt"
  30. )
  31. const (
  32. cpuCgroup = "cpu"
  33. rootContainer = "/"
  34. monitoringGroupDir = "mon_groups"
  35. processTask = "task"
  36. cpusFileName = "cpus"
  37. cpusListFileName = "cpus_list"
  38. schemataFileName = "schemata"
  39. tasksFileName = "tasks"
  40. infoDirName = "info"
  41. monDataDirName = "mon_data"
  42. monGroupsDirName = "mon_groups"
  43. noPidsPassedError = "there are no pids passed"
  44. noContainerNameError = "there are no container name passed"
  45. noControlGroupFoundError = "couldn't find control group matching container"
  46. llcOccupancyFileName = "llc_occupancy"
  47. mbmLocalBytesFileName = "mbm_local_bytes"
  48. mbmTotalBytesFileName = "mbm_total_bytes"
  49. containerPrefix = '/'
  50. minContainerNameLen = 2 // "/<container_name>" e.g. "/a"
  51. unavailable = "Unavailable"
  52. monGroupPrefix = "cadvisor"
  53. )
  54. var (
  55. rootResctrl = ""
  56. pidsPath = ""
  57. processPath = "/proc"
  58. enabledMBM = false
  59. enabledCMT = false
  60. isResctrlInitialized = false
  61. groupDirectories = map[string]struct{}{
  62. cpusFileName: {},
  63. cpusListFileName: {},
  64. infoDirName: {},
  65. monDataDirName: {},
  66. monGroupsDirName: {},
  67. schemataFileName: {},
  68. tasksFileName: {},
  69. }
  70. )
  71. func Setup() error {
  72. var err error
  73. rootResctrl, err = intelrdt.Root()
  74. if err != nil {
  75. return fmt.Errorf("unable to initialize resctrl: %v", err)
  76. }
  77. if cgroups.IsCgroup2UnifiedMode() {
  78. pidsPath = fs2.UnifiedMountpoint
  79. } else {
  80. pidsPath = filepath.Join(fs2.UnifiedMountpoint, cpuCgroup)
  81. }
  82. enabledMBM = intelrdt.IsMBMEnabled()
  83. enabledCMT = intelrdt.IsCMTEnabled()
  84. isResctrlInitialized = true
  85. return nil
  86. }
  87. func prepareMonitoringGroup(containerName string, getContainerPids func() ([]string, error), inHostNamespace bool) (string, error) {
  88. if containerName == rootContainer {
  89. return rootResctrl, nil
  90. }
  91. pids, err := getContainerPids()
  92. if err != nil {
  93. return "", err
  94. }
  95. if len(pids) == 0 {
  96. return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName)
  97. }
  98. // Firstly, find the control group to which the container belongs.
  99. // Consider the root group.
  100. controlGroupPath, err := findGroup(rootResctrl, pids, true, false)
  101. if err != nil {
  102. return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err)
  103. }
  104. if controlGroupPath == "" {
  105. return "", fmt.Errorf("%q %q", noControlGroupFoundError, containerName)
  106. }
  107. // Check if there is any monitoring group.
  108. monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, false, true)
  109. if err != nil {
  110. return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err)
  111. }
  112. // Prepare new one if not exists.
  113. if monGroupPath == "" {
  114. // Remove leading prefix.
  115. // e.g. /my/container -> my/container
  116. if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix {
  117. containerName = containerName[1:]
  118. }
  119. // Add own prefix and use `-` instead `/`.
  120. // e.g. my/container -> cadvisor-my-container
  121. properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1))
  122. monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName)
  123. err = os.MkdirAll(monGroupPath, os.ModePerm)
  124. if err != nil {
  125. return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err)
  126. }
  127. if !inHostNamespace {
  128. processPath = "/rootfs/proc"
  129. }
  130. for _, pid := range pids {
  131. processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask))
  132. if err != nil {
  133. return "", err
  134. }
  135. for _, thread := range processThreads {
  136. err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread)
  137. if err != nil {
  138. secondError := os.Remove(monGroupPath)
  139. if secondError != nil {
  140. return "", fmt.Errorf(
  141. "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v",
  142. containerName, err, containerName, secondError)
  143. }
  144. return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err)
  145. }
  146. }
  147. }
  148. }
  149. return monGroupPath, nil
  150. }
  151. func getPids(containerName string) ([]int, error) {
  152. if len(containerName) == 0 {
  153. // No container name passed.
  154. return nil, fmt.Errorf(noContainerNameError)
  155. }
  156. pids, err := cgroups.GetAllPids(filepath.Join(pidsPath, containerName))
  157. if err != nil {
  158. return nil, fmt.Errorf("couldn't obtain pids for %q container: %v", containerName, err)
  159. }
  160. return pids, nil
  161. }
  162. // getAllProcessThreads obtains all available processes from directory.
  163. // e.g. ls /proc/4215/task/ -> 4215, 4216, 4217, 4218
  164. // func will return [4215, 4216, 4217, 4218].
  165. func getAllProcessThreads(path string) ([]int, error) {
  166. processThreads := make([]int, 0)
  167. threadDirs, err := ioutil.ReadDir(path)
  168. if err != nil {
  169. return processThreads, err
  170. }
  171. for _, dir := range threadDirs {
  172. pid, err := strconv.Atoi(dir.Name())
  173. if err != nil {
  174. return nil, fmt.Errorf("couldn't parse %q dir: %v", dir.Name(), err)
  175. }
  176. processThreads = append(processThreads, pid)
  177. }
  178. return processThreads, nil
  179. }
  180. // findGroup returns the path of a control/monitoring group in which the pids are.
  181. func findGroup(group string, pids []string, includeGroup bool, exclusive bool) (string, error) {
  182. if len(pids) == 0 {
  183. return "", fmt.Errorf(noPidsPassedError)
  184. }
  185. availablePaths := make([]string, 0)
  186. if includeGroup {
  187. availablePaths = append(availablePaths, group)
  188. }
  189. files, err := ioutil.ReadDir(group)
  190. for _, file := range files {
  191. if _, ok := groupDirectories[file.Name()]; !ok {
  192. availablePaths = append(availablePaths, filepath.Join(group, file.Name()))
  193. }
  194. }
  195. if err != nil {
  196. return "", fmt.Errorf("couldn't obtain groups paths: %w", err)
  197. }
  198. for _, path := range availablePaths {
  199. groupFound, err := arePIDsInGroup(path, pids, exclusive)
  200. if err != nil {
  201. return "", err
  202. }
  203. if groupFound {
  204. return path, nil
  205. }
  206. }
  207. return "", nil
  208. }
  209. // arePIDsInGroup returns true if all of the pids are within control group.
  210. func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) {
  211. if len(pids) == 0 {
  212. return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError)
  213. }
  214. tasks, err := readTasksFile(filepath.Join(path, tasksFileName))
  215. if err != nil {
  216. return false, err
  217. }
  218. any := false
  219. for _, pid := range pids {
  220. _, ok := tasks[pid]
  221. if !ok {
  222. // There are missing pids within group.
  223. if any {
  224. return false, fmt.Errorf("there should be all pids in group")
  225. }
  226. return false, nil
  227. }
  228. any = true
  229. }
  230. // Check if there should be only passed pids in group.
  231. if exclusive {
  232. if len(tasks) != len(pids) {
  233. return false, fmt.Errorf("group should have container pids only")
  234. }
  235. }
  236. return true, nil
  237. }
  238. // readTasksFile returns pids map from given tasks path.
  239. func readTasksFile(tasksPath string) (map[string]struct{}, error) {
  240. tasks := make(map[string]struct{})
  241. tasksFile, err := os.Open(tasksPath)
  242. if err != nil {
  243. return tasks, fmt.Errorf("couldn't read tasks file from %q path: %w", tasksPath, err)
  244. }
  245. defer tasksFile.Close()
  246. scanner := bufio.NewScanner(tasksFile)
  247. for scanner.Scan() {
  248. tasks[scanner.Text()] = struct{}{}
  249. }
  250. if err := scanner.Err(); err != nil {
  251. return tasks, fmt.Errorf("couldn't obtain pids from %q path: %w", tasksPath, err)
  252. }
  253. return tasks, nil
  254. }
  255. func readStatFrom(path string, vendorID string) (uint64, error) {
  256. context, err := ioutil.ReadFile(path)
  257. if err != nil {
  258. return 0, err
  259. }
  260. contextString := string(bytes.TrimSpace(context))
  261. if contextString == unavailable {
  262. err := fmt.Errorf("\"Unavailable\" value from file %q", path)
  263. if vendorID == "AuthenticAMD" {
  264. kernelBugzillaLink := "https://bugzilla.kernel.org/show_bug.cgi?id=213311"
  265. err = fmt.Errorf("%v, possible bug: %q", err, kernelBugzillaLink)
  266. }
  267. return 0, err
  268. }
  269. stat, err := strconv.ParseUint(contextString, 10, 64)
  270. if err != nil {
  271. return stat, fmt.Errorf("unable to parse %q as a uint from file %q", string(context), path)
  272. }
  273. return stat, nil
  274. }
  275. func getIntelRDTStatsFrom(path string, vendorID string) (intelrdt.Stats, error) {
  276. stats := intelrdt.Stats{}
  277. statsDirectories, err := filepath.Glob(filepath.Join(path, monDataDirName, "*"))
  278. if err != nil {
  279. return stats, err
  280. }
  281. if len(statsDirectories) == 0 {
  282. return stats, fmt.Errorf("there is no mon_data stats directories: %q", path)
  283. }
  284. var cmtStats []intelrdt.CMTNumaNodeStats
  285. var mbmStats []intelrdt.MBMNumaNodeStats
  286. for _, dir := range statsDirectories {
  287. if enabledCMT {
  288. llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName), vendorID)
  289. if err != nil {
  290. return stats, err
  291. }
  292. cmtStats = append(cmtStats, intelrdt.CMTNumaNodeStats{LLCOccupancy: llcOccupancy})
  293. }
  294. if enabledMBM {
  295. mbmTotalBytes, err := readStatFrom(filepath.Join(dir, mbmTotalBytesFileName), vendorID)
  296. if err != nil {
  297. return stats, err
  298. }
  299. mbmLocalBytes, err := readStatFrom(filepath.Join(dir, mbmLocalBytesFileName), vendorID)
  300. if err != nil {
  301. return stats, err
  302. }
  303. mbmStats = append(mbmStats, intelrdt.MBMNumaNodeStats{
  304. MBMTotalBytes: mbmTotalBytes,
  305. MBMLocalBytes: mbmLocalBytes,
  306. })
  307. }
  308. }
  309. stats.CMTStats = &cmtStats
  310. stats.MBMStats = &mbmStats
  311. return stats, nil
  312. }