intelrdt.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. package intelrdt
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "github.com/moby/sys/mountinfo"
  14. "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
  15. "github.com/opencontainers/runc/libcontainer/configs"
  16. )
  17. /*
  18. * About Intel RDT features:
  19. * Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
  20. * Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
  21. * two sub-features of RDT.
  22. *
  23. * Cache Allocation Technology (CAT) provides a way for the software to restrict
  24. * cache allocation to a defined 'subset' of L3 cache which may be overlapping
  25. * with other 'subsets'. The different subsets are identified by class of
  26. * service (CLOS) and each CLOS has a capacity bitmask (CBM).
  27. *
  28. * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
  29. * over memory bandwidth for the software. A user controls the resource by
  30. * indicating the percentage of maximum memory bandwidth or memory bandwidth
  31. * limit in MBps unit if MBA Software Controller is enabled.
  32. *
  33. * More details about Intel RDT CAT and MBA can be found in the section 17.18
  34. * of Intel Software Developer Manual:
  35. * https://software.intel.com/en-us/articles/intel-sdm
  36. *
  37. * About Intel RDT kernel interface:
  38. * In Linux 4.10 kernel or newer, the interface is defined and exposed via
  39. * "resource control" filesystem, which is a "cgroup-like" interface.
  40. *
  41. * Comparing with cgroups, it has similar process management lifecycle and
  42. * interfaces in a container. But unlike cgroups' hierarchy, it has single level
  43. * filesystem layout.
  44. *
  45. * CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
  46. * "resource control" filesystem.
  47. *
  48. * Intel RDT "resource control" filesystem hierarchy:
  49. * mount -t resctrl resctrl /sys/fs/resctrl
  50. * tree /sys/fs/resctrl
  51. * /sys/fs/resctrl/
  52. * |-- info
  53. * | |-- L3
  54. * | | |-- cbm_mask
  55. * | | |-- min_cbm_bits
  56. * | | |-- num_closids
  57. * | |-- L3_MON
  58. * | | |-- max_threshold_occupancy
  59. * | | |-- mon_features
  60. * | | |-- num_rmids
  61. * | |-- MB
  62. * | |-- bandwidth_gran
  63. * | |-- delay_linear
  64. * | |-- min_bandwidth
  65. * | |-- num_closids
  66. * |-- ...
  67. * |-- schemata
  68. * |-- tasks
  69. * |-- <clos>
  70. * |-- ...
  71. * |-- schemata
  72. * |-- tasks
  73. *
  74. * For runc, we can make use of `tasks` and `schemata` configuration for L3
  75. * cache and memory bandwidth resources constraints.
  76. *
  77. * The file `tasks` has a list of tasks that belongs to this group (e.g.,
  78. * <container_id>" group). Tasks can be added to a group by writing the task ID
  79. * to the "tasks" file (which will automatically remove them from the previous
  80. * group to which they belonged). New tasks created by fork(2) and clone(2) are
  81. * added to the same group as their parent.
  82. *
  83. * The file `schemata` has a list of all the resources available to this group.
  84. * Each resource (L3 cache, memory bandwidth) has its own line and format.
  85. *
  86. * L3 cache schema:
  87. * It has allocation bitmasks/values for L3 cache on each socket, which
  88. * contains L3 cache id and capacity bitmask (CBM).
  89. * Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
  90. * For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
  91. * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
  92. *
  93. * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
  94. * be set is less than the max bit. The max bits in the CBM is varied among
  95. * supported Intel CPU models. Kernel will check if it is valid when writing.
  96. * e.g., default value 0xfffff in root indicates the max bits of CBM is 20
  97. * bits, which mapping to entire L3 cache capacity. Some valid CBM values to
  98. * set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
  99. *
  100. * Memory bandwidth schema:
  101. * It has allocation values for memory bandwidth on each socket, which contains
  102. * L3 cache id and memory bandwidth.
  103. * Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
  104. * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
  105. *
  106. * The minimum bandwidth percentage value for each CPU model is predefined and
  107. * can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
  108. * that is allocated is also dependent on the CPU model and can be looked up at
  109. * "info/MB/bandwidth_gran". The available bandwidth control steps are:
  110. * min_bw + N * bw_gran. Intermediate values are rounded to the next control
  111. * step available on the hardware.
  112. *
  113. * If MBA Software Controller is enabled through mount option "-o mba_MBps":
  114. * mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
  115. * We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
  116. * instead of "percentages". The kernel underneath would use a software feedback
  117. * mechanism or a "Software Controller" which reads the actual bandwidth using
  118. * MBM counters and adjust the memory bandwidth percentages to ensure:
  119. * "actual memory bandwidth < user specified memory bandwidth".
  120. *
  121. * For example, on a two-socket machine, the schema line could be
  122. * "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
  123. * and 7000 MBps memory bandwidth limit on socket 1.
  124. *
  125. * For more information about Intel RDT kernel interface:
  126. * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
  127. *
  128. * An example for runc:
  129. * Consider a two-socket machine with two L3 caches where the default CBM is
  130. * 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
  131. * with a memory bandwidth granularity of 10%.
  132. *
  133. * Tasks inside the container only have access to the "upper" 7/11 of L3 cache
  134. * on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
  135. * maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
  136. *
  137. * "linux": {
  138. * "intelRdt": {
  139. * "l3CacheSchema": "L3:0=7f0;1=1f",
  140. * "memBwSchema": "MB:0=20;1=70"
  141. * }
  142. * }
  143. */
  144. type Manager interface {
  145. // Applies Intel RDT configuration to the process with the specified pid
  146. Apply(pid int) error
  147. // Returns statistics for Intel RDT
  148. GetStats() (*Stats, error)
  149. // Destroys the Intel RDT container-specific 'container_id' group
  150. Destroy() error
  151. // Returns Intel RDT path to save in a state file and to be able to
  152. // restore the object later
  153. GetPath() string
  154. // Set Intel RDT "resource control" filesystem as configured.
  155. Set(container *configs.Config) error
  156. }
  157. // This implements interface Manager
  158. type intelRdtManager struct {
  159. mu sync.Mutex
  160. config *configs.Config
  161. id string
  162. path string
  163. }
  164. func NewManager(config *configs.Config, id string, path string) Manager {
  165. return &intelRdtManager{
  166. config: config,
  167. id: id,
  168. path: path,
  169. }
  170. }
  171. const (
  172. intelRdtTasks = "tasks"
  173. )
  174. var (
  175. // The flag to indicate if Intel RDT/CAT is enabled
  176. catEnabled bool
  177. // The flag to indicate if Intel RDT/MBA is enabled
  178. mbaEnabled bool
  179. // The flag to indicate if Intel RDT/MBA Software Controller is enabled
  180. mbaScEnabled bool
  181. // For Intel RDT initialization
  182. initOnce sync.Once
  183. errNotFound = errors.New("Intel RDT resctrl mount point not found")
  184. )
  185. // Check if Intel RDT sub-features are enabled in featuresInit()
  186. func featuresInit() {
  187. initOnce.Do(func() {
  188. // 1. Check if hardware and kernel support Intel RDT sub-features
  189. flagsSet, err := parseCpuInfoFile("/proc/cpuinfo")
  190. if err != nil {
  191. return
  192. }
  193. // 2. Check if Intel RDT "resource control" filesystem is available.
  194. // The user guarantees to mount the filesystem.
  195. root, err := Root()
  196. if err != nil {
  197. return
  198. }
  199. // 3. Double check if Intel RDT sub-features are available in
  200. // "resource control" filesystem. Intel RDT sub-features can be
  201. // selectively disabled or enabled by kernel command line
  202. // (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
  203. if flagsSet.CAT {
  204. if _, err := os.Stat(filepath.Join(root, "info", "L3")); err == nil {
  205. catEnabled = true
  206. }
  207. }
  208. if mbaScEnabled {
  209. // We confirm MBA Software Controller is enabled in step 2,
  210. // MBA should be enabled because MBA Software Controller
  211. // depends on MBA
  212. mbaEnabled = true
  213. } else if flagsSet.MBA {
  214. if _, err := os.Stat(filepath.Join(root, "info", "MB")); err == nil {
  215. mbaEnabled = true
  216. }
  217. }
  218. if flagsSet.MBMTotal || flagsSet.MBMLocal || flagsSet.CMT {
  219. if _, err := os.Stat(filepath.Join(root, "info", "L3_MON")); err != nil {
  220. return
  221. }
  222. enabledMonFeatures, err = getMonFeatures(root)
  223. if err != nil {
  224. return
  225. }
  226. if enabledMonFeatures.mbmTotalBytes || enabledMonFeatures.mbmLocalBytes {
  227. mbmEnabled = true
  228. }
  229. if enabledMonFeatures.llcOccupancy {
  230. cmtEnabled = true
  231. }
  232. }
  233. })
  234. }
  235. // Return the mount point path of Intel RDT "resource control" filesysem
  236. func findIntelRdtMountpointDir(f io.Reader) (string, error) {
  237. mi, err := mountinfo.GetMountsFromReader(f, func(m *mountinfo.Info) (bool, bool) {
  238. // similar to mountinfo.FSTypeFilter but stops after the first match
  239. if m.FSType == "resctrl" {
  240. return false, true // don't skip, stop
  241. }
  242. return true, false // skip, keep going
  243. })
  244. if err != nil {
  245. return "", err
  246. }
  247. if len(mi) < 1 {
  248. return "", errNotFound
  249. }
  250. // Check if MBA Software Controller is enabled through mount option "-o mba_MBps"
  251. if strings.Contains(","+mi[0].VFSOptions+",", ",mba_MBps,") {
  252. mbaScEnabled = true
  253. }
  254. return mi[0].Mountpoint, nil
  255. }
  256. // For Root() use only.
  257. var (
  258. intelRdtRoot string
  259. rootMu sync.Mutex
  260. )
  261. // Root returns the Intel RDT "resource control" filesystem mount point.
  262. func Root() (string, error) {
  263. rootMu.Lock()
  264. defer rootMu.Unlock()
  265. if intelRdtRoot != "" {
  266. return intelRdtRoot, nil
  267. }
  268. f, err := os.Open("/proc/self/mountinfo")
  269. if err != nil {
  270. return "", err
  271. }
  272. root, err := findIntelRdtMountpointDir(f)
  273. f.Close()
  274. if err != nil {
  275. return "", err
  276. }
  277. if _, err := os.Stat(root); err != nil {
  278. return "", err
  279. }
  280. intelRdtRoot = root
  281. return intelRdtRoot, nil
  282. }
  283. type cpuInfoFlags struct {
  284. CAT bool // Cache Allocation Technology
  285. MBA bool // Memory Bandwidth Allocation
  286. // Memory Bandwidth Monitoring related.
  287. MBMTotal bool
  288. MBMLocal bool
  289. CMT bool // Cache Monitoring Technology
  290. }
  291. func parseCpuInfoFile(path string) (cpuInfoFlags, error) {
  292. infoFlags := cpuInfoFlags{}
  293. f, err := os.Open(path)
  294. if err != nil {
  295. return infoFlags, err
  296. }
  297. defer f.Close()
  298. s := bufio.NewScanner(f)
  299. for s.Scan() {
  300. line := s.Text()
  301. // Search "cat_l3" and "mba" flags in first "flags" line
  302. if strings.HasPrefix(line, "flags") {
  303. flags := strings.Split(line, " ")
  304. // "cat_l3" flag for CAT and "mba" flag for MBA
  305. for _, flag := range flags {
  306. switch flag {
  307. case "cat_l3":
  308. infoFlags.CAT = true
  309. case "mba":
  310. infoFlags.MBA = true
  311. case "cqm_mbm_total":
  312. infoFlags.MBMTotal = true
  313. case "cqm_mbm_local":
  314. infoFlags.MBMLocal = true
  315. case "cqm_occup_llc":
  316. infoFlags.CMT = true
  317. }
  318. }
  319. return infoFlags, nil
  320. }
  321. }
  322. if err := s.Err(); err != nil {
  323. return infoFlags, err
  324. }
  325. return infoFlags, nil
  326. }
  327. // Gets a single uint64 value from the specified file.
  328. func getIntelRdtParamUint(path, file string) (uint64, error) {
  329. fileName := filepath.Join(path, file)
  330. contents, err := os.ReadFile(fileName)
  331. if err != nil {
  332. return 0, err
  333. }
  334. res, err := fscommon.ParseUint(string(bytes.TrimSpace(contents)), 10, 64)
  335. if err != nil {
  336. return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
  337. }
  338. return res, nil
  339. }
  340. // Gets a string value from the specified file
  341. func getIntelRdtParamString(path, file string) (string, error) {
  342. contents, err := os.ReadFile(filepath.Join(path, file))
  343. if err != nil {
  344. return "", err
  345. }
  346. return string(bytes.TrimSpace(contents)), nil
  347. }
  348. func writeFile(dir, file, data string) error {
  349. if dir == "" {
  350. return fmt.Errorf("no such directory for %s", file)
  351. }
  352. if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
  353. return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err))
  354. }
  355. return nil
  356. }
  357. // Get the read-only L3 cache information
  358. func getL3CacheInfo() (*L3CacheInfo, error) {
  359. l3CacheInfo := &L3CacheInfo{}
  360. rootPath, err := Root()
  361. if err != nil {
  362. return l3CacheInfo, err
  363. }
  364. path := filepath.Join(rootPath, "info", "L3")
  365. cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
  366. if err != nil {
  367. return l3CacheInfo, err
  368. }
  369. minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
  370. if err != nil {
  371. return l3CacheInfo, err
  372. }
  373. numClosids, err := getIntelRdtParamUint(path, "num_closids")
  374. if err != nil {
  375. return l3CacheInfo, err
  376. }
  377. l3CacheInfo.CbmMask = cbmMask
  378. l3CacheInfo.MinCbmBits = minCbmBits
  379. l3CacheInfo.NumClosids = numClosids
  380. return l3CacheInfo, nil
  381. }
  382. // Get the read-only memory bandwidth information
  383. func getMemBwInfo() (*MemBwInfo, error) {
  384. memBwInfo := &MemBwInfo{}
  385. rootPath, err := Root()
  386. if err != nil {
  387. return memBwInfo, err
  388. }
  389. path := filepath.Join(rootPath, "info", "MB")
  390. bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran")
  391. if err != nil {
  392. return memBwInfo, err
  393. }
  394. delayLinear, err := getIntelRdtParamUint(path, "delay_linear")
  395. if err != nil {
  396. return memBwInfo, err
  397. }
  398. minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth")
  399. if err != nil {
  400. return memBwInfo, err
  401. }
  402. numClosids, err := getIntelRdtParamUint(path, "num_closids")
  403. if err != nil {
  404. return memBwInfo, err
  405. }
  406. memBwInfo.BandwidthGran = bandwidthGran
  407. memBwInfo.DelayLinear = delayLinear
  408. memBwInfo.MinBandwidth = minBandwidth
  409. memBwInfo.NumClosids = numClosids
  410. return memBwInfo, nil
  411. }
  412. // Get diagnostics for last filesystem operation error from file info/last_cmd_status
  413. func getLastCmdStatus() (string, error) {
  414. rootPath, err := Root()
  415. if err != nil {
  416. return "", err
  417. }
  418. path := filepath.Join(rootPath, "info")
  419. lastCmdStatus, err := getIntelRdtParamString(path, "last_cmd_status")
  420. if err != nil {
  421. return "", err
  422. }
  423. return lastCmdStatus, nil
  424. }
  425. // WriteIntelRdtTasks writes the specified pid into the "tasks" file
  426. func WriteIntelRdtTasks(dir string, pid int) error {
  427. if dir == "" {
  428. return fmt.Errorf("no such directory for %s", intelRdtTasks)
  429. }
  430. // Don't attach any pid if -1 is specified as a pid
  431. if pid != -1 {
  432. if err := os.WriteFile(filepath.Join(dir, intelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil {
  433. return newLastCmdError(fmt.Errorf("intelrdt: unable to add pid %d: %w", pid, err))
  434. }
  435. }
  436. return nil
  437. }
  438. // Check if Intel RDT/CAT is enabled
  439. func IsCATEnabled() bool {
  440. featuresInit()
  441. return catEnabled
  442. }
  443. // Check if Intel RDT/MBA is enabled
  444. func IsMBAEnabled() bool {
  445. featuresInit()
  446. return mbaEnabled
  447. }
  448. // Check if Intel RDT/MBA Software Controller is enabled
  449. func IsMBAScEnabled() bool {
  450. featuresInit()
  451. return mbaScEnabled
  452. }
  453. // Get the path of the clos group in "resource control" filesystem that the container belongs to
  454. func (m *intelRdtManager) getIntelRdtPath() (string, error) {
  455. rootPath, err := Root()
  456. if err != nil {
  457. return "", err
  458. }
  459. clos := m.id
  460. if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID != "" {
  461. clos = m.config.IntelRdt.ClosID
  462. }
  463. return filepath.Join(rootPath, clos), nil
  464. }
  465. // Applies Intel RDT configuration to the process with the specified pid
  466. func (m *intelRdtManager) Apply(pid int) (err error) {
  467. // If intelRdt is not specified in config, we do nothing
  468. if m.config.IntelRdt == nil {
  469. return nil
  470. }
  471. path, err := m.getIntelRdtPath()
  472. if err != nil {
  473. return err
  474. }
  475. m.mu.Lock()
  476. defer m.mu.Unlock()
  477. if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" {
  478. // Check that the CLOS exists, i.e. it has been pre-configured to
  479. // conform with the runtime spec
  480. if _, err := os.Stat(path); err != nil {
  481. return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err)
  482. }
  483. }
  484. if err := os.MkdirAll(path, 0o755); err != nil {
  485. return newLastCmdError(err)
  486. }
  487. if err := WriteIntelRdtTasks(path, pid); err != nil {
  488. return newLastCmdError(err)
  489. }
  490. m.path = path
  491. return nil
  492. }
  493. // Destroys the Intel RDT container-specific 'container_id' group
  494. func (m *intelRdtManager) Destroy() error {
  495. // Don't remove resctrl group if closid has been explicitly specified. The
  496. // group is likely externally managed, i.e. by some other entity than us.
  497. // There are probably other containers/tasks sharing the same group.
  498. if m.config.IntelRdt == nil || m.config.IntelRdt.ClosID == "" {
  499. m.mu.Lock()
  500. defer m.mu.Unlock()
  501. if err := os.RemoveAll(m.GetPath()); err != nil {
  502. return err
  503. }
  504. m.path = ""
  505. }
  506. return nil
  507. }
  508. // Returns Intel RDT path to save in a state file and to be able to
  509. // restore the object later
  510. func (m *intelRdtManager) GetPath() string {
  511. if m.path == "" {
  512. m.path, _ = m.getIntelRdtPath()
  513. }
  514. return m.path
  515. }
  516. // Returns statistics for Intel RDT
  517. func (m *intelRdtManager) GetStats() (*Stats, error) {
  518. // If intelRdt is not specified in config
  519. if m.config.IntelRdt == nil {
  520. return nil, nil
  521. }
  522. m.mu.Lock()
  523. defer m.mu.Unlock()
  524. stats := newStats()
  525. rootPath, err := Root()
  526. if err != nil {
  527. return nil, err
  528. }
  529. // The read-only L3 cache and memory bandwidth schemata in root
  530. tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
  531. if err != nil {
  532. return nil, err
  533. }
  534. schemaRootStrings := strings.Split(tmpRootStrings, "\n")
  535. // The L3 cache and memory bandwidth schemata in container's clos group
  536. containerPath := m.GetPath()
  537. tmpStrings, err := getIntelRdtParamString(containerPath, "schemata")
  538. if err != nil {
  539. return nil, err
  540. }
  541. schemaStrings := strings.Split(tmpStrings, "\n")
  542. if IsCATEnabled() {
  543. // The read-only L3 cache information
  544. l3CacheInfo, err := getL3CacheInfo()
  545. if err != nil {
  546. return nil, err
  547. }
  548. stats.L3CacheInfo = l3CacheInfo
  549. // The read-only L3 cache schema in root
  550. for _, schemaRoot := range schemaRootStrings {
  551. if strings.Contains(schemaRoot, "L3") {
  552. stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot)
  553. }
  554. }
  555. // The L3 cache schema in container's clos group
  556. for _, schema := range schemaStrings {
  557. if strings.Contains(schema, "L3") {
  558. stats.L3CacheSchema = strings.TrimSpace(schema)
  559. }
  560. }
  561. }
  562. if IsMBAEnabled() {
  563. // The read-only memory bandwidth information
  564. memBwInfo, err := getMemBwInfo()
  565. if err != nil {
  566. return nil, err
  567. }
  568. stats.MemBwInfo = memBwInfo
  569. // The read-only memory bandwidth information
  570. for _, schemaRoot := range schemaRootStrings {
  571. if strings.Contains(schemaRoot, "MB") {
  572. stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot)
  573. }
  574. }
  575. // The memory bandwidth schema in container's clos group
  576. for _, schema := range schemaStrings {
  577. if strings.Contains(schema, "MB") {
  578. stats.MemBwSchema = strings.TrimSpace(schema)
  579. }
  580. }
  581. }
  582. if IsMBMEnabled() || IsCMTEnabled() {
  583. err = getMonitoringStats(containerPath, stats)
  584. if err != nil {
  585. return nil, err
  586. }
  587. }
  588. return stats, nil
  589. }
  590. // Set Intel RDT "resource control" filesystem as configured.
  591. func (m *intelRdtManager) Set(container *configs.Config) error {
  592. // About L3 cache schema:
  593. // It has allocation bitmasks/values for L3 cache on each socket,
  594. // which contains L3 cache id and capacity bitmask (CBM).
  595. // Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
  596. // For example, on a two-socket machine, the schema line could be:
  597. // L3:0=ff;1=c0
  598. // which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM
  599. // is 0xc0.
  600. //
  601. // The valid L3 cache CBM is a *contiguous bits set* and number of
  602. // bits that can be set is less than the max bit. The max bits in the
  603. // CBM is varied among supported Intel CPU models. Kernel will check
  604. // if it is valid when writing. e.g., default value 0xfffff in root
  605. // indicates the max bits of CBM is 20 bits, which mapping to entire
  606. // L3 cache capacity. Some valid CBM values to set in a group:
  607. // 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
  608. //
  609. //
  610. // About memory bandwidth schema:
  611. // It has allocation values for memory bandwidth on each socket, which
  612. // contains L3 cache id and memory bandwidth.
  613. // Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
  614. // For example, on a two-socket machine, the schema line could be:
  615. // "MB:0=20;1=70"
  616. //
  617. // The minimum bandwidth percentage value for each CPU model is
  618. // predefined and can be looked up through "info/MB/min_bandwidth".
  619. // The bandwidth granularity that is allocated is also dependent on
  620. // the CPU model and can be looked up at "info/MB/bandwidth_gran".
  621. // The available bandwidth control steps are: min_bw + N * bw_gran.
  622. // Intermediate values are rounded to the next control step available
  623. // on the hardware.
  624. //
  625. // If MBA Software Controller is enabled through mount option
  626. // "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
  627. // We could specify memory bandwidth in "MBps" (Mega Bytes per second)
  628. // unit instead of "percentages". The kernel underneath would use a
  629. // software feedback mechanism or a "Software Controller" which reads
  630. // the actual bandwidth using MBM counters and adjust the memory
  631. // bandwidth percentages to ensure:
  632. // "actual memory bandwidth < user specified memory bandwidth".
  633. //
  634. // For example, on a two-socket machine, the schema line could be
  635. // "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on
  636. // socket 0 and 7000 MBps memory bandwidth limit on socket 1.
  637. if container.IntelRdt != nil {
  638. path := m.GetPath()
  639. l3CacheSchema := container.IntelRdt.L3CacheSchema
  640. memBwSchema := container.IntelRdt.MemBwSchema
  641. // TODO: verify that l3CacheSchema and/or memBwSchema match the
  642. // existing schemata if ClosID has been specified. This is a more
  643. // involved than reading the file and doing plain string comparison as
  644. // the value written in does not necessarily match what gets read out
  645. // (leading zeros, cache id ordering etc).
  646. // Write a single joint schema string to schemata file
  647. if l3CacheSchema != "" && memBwSchema != "" {
  648. if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
  649. return err
  650. }
  651. }
  652. // Write only L3 cache schema string to schemata file
  653. if l3CacheSchema != "" && memBwSchema == "" {
  654. if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
  655. return err
  656. }
  657. }
  658. // Write only memory bandwidth schema string to schemata file
  659. if l3CacheSchema == "" && memBwSchema != "" {
  660. if err := writeFile(path, "schemata", memBwSchema); err != nil {
  661. return err
  662. }
  663. }
  664. }
  665. return nil
  666. }
  667. func newLastCmdError(err error) error {
  668. status, err1 := getLastCmdStatus()
  669. if err1 == nil {
  670. return fmt.Errorf("%w, last_cmd_status: %s", err, status)
  671. }
  672. return err
  673. }