cgroup.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package pod
  15. import (
  16. "fmt"
  17. "path/filepath"
  18. "strconv"
  19. "strings"
  20. "github.com/opencontainers/runtime-spec/specs-go"
  21. "golang.org/x/sys/unix"
  22. "yunion.io/x/log"
  23. "yunion.io/x/pkg/errors"
  24. "yunion.io/x/onecloud/pkg/util/fileutils2"
  25. "yunion.io/x/onecloud/pkg/util/procutils"
  26. )
  27. type TCgroupController string
  28. const (
  29. CGROUP_PATH_SYSFS = "/sys/fs/cgroup"
  30. CgroupControllerMemory TCgroupController = "memory"
  31. )
  32. type CgroupUtil interface {
  33. SetMemoryLimitBytes(ctrId string, bytes int64) error
  34. SetCPUCfs(ctrId string, quota int64, period int64) error
  35. SetDevicesAllow(ctrId string, allows []string) error
  36. SetPidsMax(ctrId string, max int) error
  37. SetCpusetCloneChildren(ctrId string) error
  38. SetCgroupKeyValue(ctrId string, ctrler TCgroupController, key, value string) error
  39. }
  40. type podCgroupV1Util struct {
  41. parentPath string
  42. }
  43. func NewPodCgroupV1Util(parentPath string) CgroupUtil {
  44. return &podCgroupV1Util{
  45. parentPath: parentPath,
  46. }
  47. }
  48. func (p podCgroupV1Util) getContainerControllerPath(controller string, ctrId string) string {
  49. return filepath.Join(CGROUP_PATH_SYSFS, controller, p.parentPath, ctrId)
  50. }
  51. func (p podCgroupV1Util) getContainerCGFilePath(controller string, ctrId string, filename string) string {
  52. return filepath.Join(p.getContainerControllerPath(controller, ctrId), filename)
  53. }
  54. func (p podCgroupV1Util) write(fp string, content string) error {
  55. cmd := fmt.Sprintf("echo %q > %s", content, fp)
  56. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  57. if err != nil {
  58. return errors.Wrapf(err, "%s: %s", cmd, out)
  59. }
  60. return nil
  61. }
  62. func (p podCgroupV1Util) SetMemoryLimitBytes(ctrId string, bytes int64) error {
  63. memFp := p.getContainerCGFilePath("memory", ctrId, "memory.limit_in_bytes")
  64. return p.write(memFp, fmt.Sprintf("%d", bytes))
  65. }
  66. func (p podCgroupV1Util) SetCgroupKeyValue(ctrId string, ctrler TCgroupController, key, value string) error {
  67. memFp := p.getContainerCGFilePath(string(ctrler), ctrId, key)
  68. return p.write(memFp, value)
  69. }
  70. func (p podCgroupV1Util) SetCPUCfs(ctrId string, quota int64, period int64) error {
  71. quotaFp := p.getContainerCGFilePath("cpu,cpuacct", ctrId, "cpu.cfs_quota_us")
  72. periodFp := p.getContainerCGFilePath("cpu,cpuacct", ctrId, "cpu.cfs_period_us")
  73. if err := p.write(quotaFp, fmt.Sprintf("%d", quota)); err != nil {
  74. return errors.Wrapf(err, "write quota: %d", quota)
  75. }
  76. if err := p.write(periodFp, fmt.Sprintf("%d", period)); err != nil {
  77. return errors.Wrapf(err, "write period: %d", period)
  78. }
  79. return nil
  80. }
  81. func (p podCgroupV1Util) SetDevicesAllow(ctrId string, allows []string) error {
  82. devicesFp := p.getContainerCGFilePath("devices", ctrId, "devices.allow")
  83. for _, allowStr := range allows {
  84. if err := p.write(devicesFp, allowStr); err != nil {
  85. return errors.Wrapf(err, "write: %s", allowStr)
  86. }
  87. }
  88. return nil
  89. }
  90. func (p podCgroupV1Util) SetPidsMax(ctrId string, max int) error {
  91. pidFp := p.getContainerCGFilePath("pids", ctrId, "pids.max")
  92. return p.write(pidFp, fmt.Sprintf("%d", max))
  93. }
  94. func (p podCgroupV1Util) SetCpusetCloneChildren(ctrId string) error {
  95. ccFp := p.getContainerCGFilePath("cpuset", ctrId, "cgroup.clone_children")
  96. return p.write(ccFp, "1")
  97. }
  98. type podCgroupV2Util struct {
  99. parentPath string
  100. }
  101. func NewPodCgroupV2Util(parentPath string) CgroupUtil {
  102. return &podCgroupV2Util{
  103. parentPath: parentPath,
  104. }
  105. }
  106. // DetectCgroupVersion 检测当前系统的 cgroup 版本
  107. // 返回 true 表示 cgroup v2,false 表示 cgroup v1
  108. func DetectCgroupVersion() (bool, error) {
  109. cgroupPath := CGROUP_PATH_SYSFS
  110. if !fileutils2.Exists(cgroupPath) {
  111. // 如果 /sys/fs/cgroup 不存在,尝试 /cgroup
  112. cgroupPath = "/cgroup"
  113. if !fileutils2.Exists(cgroupPath) {
  114. return false, errors.Errorf("cgroup path not found")
  115. }
  116. }
  117. // 使用 stat 命令检测文件系统类型
  118. // cgroup v2 的文件系统类型是 "cgroup2fs"
  119. output, err := procutils.NewCommand("stat", "-fc", "%T", cgroupPath).Output()
  120. if err != nil {
  121. return false, errors.Wrapf(err, "stat cgroup path %s", cgroupPath)
  122. }
  123. cgroupfs := strings.TrimSpace(string(output))
  124. return cgroupfs == "cgroup2fs", nil
  125. }
  126. // NewPodCgroupUtil 根据系统自动检测 cgroup 版本并返回相应的实现
  127. func NewPodCgroupUtil(parentPath string) (CgroupUtil, error) {
  128. isV2, err := DetectCgroupVersion()
  129. if err != nil {
  130. // 如果检测失败,默认使用 v1(向后兼容)
  131. return NewPodCgroupV1Util(parentPath), nil
  132. }
  133. if isV2 {
  134. return NewPodCgroupV2Util(parentPath), nil
  135. }
  136. return NewPodCgroupV1Util(parentPath), nil
  137. }
  138. func (p podCgroupV2Util) getContainerPath(ctrId string) string {
  139. return filepath.Join(CGROUP_PATH_SYSFS, p.parentPath, ctrId)
  140. }
  141. func (p podCgroupV2Util) getContainerCGFilePath(ctrId string, filename string) string {
  142. return filepath.Join(p.getContainerPath(ctrId), filename)
  143. }
  144. func (p podCgroupV2Util) write(fp string, content string) error {
  145. cmd := fmt.Sprintf("echo %q > %s", content, fp)
  146. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  147. if err != nil {
  148. return errors.Wrapf(err, "%s: %s", cmd, out)
  149. }
  150. return nil
  151. }
  152. func (p podCgroupV2Util) getParentPath() string {
  153. return filepath.Join(CGROUP_PATH_SYSFS, p.parentPath)
  154. }
  155. func (p podCgroupV2Util) ensureController(ctrId string, controller string) error {
  156. containerPath := p.getContainerPath(ctrId)
  157. // 确保 cgroup 目录存在
  158. cmd := fmt.Sprintf("mkdir -p %s", containerPath)
  159. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  160. if err != nil {
  161. return errors.Wrapf(err, "create cgroup directory: %s", out)
  162. }
  163. // 首先检查当前 cgroup 的 controllers,看是否已经启用了该控制器
  164. controllersFp := filepath.Join(containerPath, "cgroup.controllers")
  165. cmd = fmt.Sprintf("cat %s 2>/dev/null || echo ''", controllersFp)
  166. out, err = procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  167. if err != nil {
  168. return errors.Wrapf(err, "read cgroup.controllers: %s", out)
  169. }
  170. currentControllers := strings.TrimSpace(string(out))
  171. // 如果当前 cgroup 已经启用了该控制器,直接返回
  172. if containsController(currentControllers, controller) {
  173. return nil
  174. }
  175. // 根据文档,需要在父 cgroup 的 subtree_control 中启用控制器
  176. // 这样当前 cgroup 才能使用该控制器
  177. parentPath := p.getParentPath()
  178. // 确保父 cgroup 目录存在
  179. cmd = fmt.Sprintf("mkdir -p %s", parentPath)
  180. out, err = procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  181. if err != nil {
  182. return errors.Wrapf(err, "create parent cgroup directory: %s", out)
  183. }
  184. parentSubtreeControlFp := filepath.Join(parentPath, "cgroup.subtree_control")
  185. // 读取父 cgroup 的 subtree_control
  186. cmd = fmt.Sprintf("cat %s 2>/dev/null || echo ''", parentSubtreeControlFp)
  187. out, err = procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  188. if err != nil {
  189. return errors.Wrapf(err, "read parent subtree_control: %s", out)
  190. }
  191. parentSubtreeControl := strings.TrimSpace(string(out))
  192. // 如果父 cgroup 的 subtree_control 中没有该控制器,添加它
  193. if !containsController(parentSubtreeControl, controller) {
  194. cmd = fmt.Sprintf("echo +%s > %s", controller, parentSubtreeControlFp)
  195. out, err = procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  196. if err != nil {
  197. return errors.Wrapf(err, "enable controller %s in parent cgroup: %s", controller, out)
  198. }
  199. }
  200. // 再次检查当前 cgroup 的 controllers,确认控制器已启用
  201. cmd = fmt.Sprintf("cat %s 2>/dev/null || echo ''", controllersFp)
  202. out, err = procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  203. if err != nil {
  204. return errors.Wrapf(err, "read cgroup.controllers after enable: %s", out)
  205. }
  206. finalControllers := strings.TrimSpace(string(out))
  207. if !containsController(finalControllers, controller) {
  208. return errors.Errorf("controller %s not available in cgroup %s after enabling in parent. Available controllers: %s", controller, containerPath, finalControllers)
  209. }
  210. return nil
  211. }
  212. func containsController(controllers string, controller string) bool {
  213. // controllers 格式类似 "cpu memory pids" (空格分隔)
  214. controllers = strings.TrimSpace(controllers)
  215. if len(controllers) == 0 {
  216. return false
  217. }
  218. for _, c := range strings.Fields(controllers) {
  219. if strings.TrimSpace(c) == controller {
  220. return true
  221. }
  222. }
  223. return false
  224. }
  225. func (p podCgroupV2Util) SetMemoryLimitBytes(ctrId string, bytes int64) error {
  226. if err := p.ensureController(ctrId, "memory"); err != nil {
  227. return errors.Wrap(err, "ensure memory controller")
  228. }
  229. // cgroup v2 使用 memory.max
  230. memFp := p.getContainerCGFilePath(ctrId, "memory.max")
  231. return p.write(memFp, fmt.Sprintf("%d", bytes))
  232. }
  233. func (p podCgroupV2Util) SetCPUCfs(ctrId string, quota int64, period int64) error {
  234. if err := p.ensureController(ctrId, "cpu"); err != nil {
  235. return errors.Wrap(err, "ensure cpu controller")
  236. }
  237. // cgroup v2 使用 cpu.max,格式为 "quota period" 或 "max" 表示无限制
  238. // 如果 quota 为 -1,表示无限制
  239. var cpuMaxValue string
  240. if quota == -1 {
  241. cpuMaxValue = "max"
  242. } else {
  243. cpuMaxValue = fmt.Sprintf("%d %d", quota, period)
  244. }
  245. cpuFp := p.getContainerCGFilePath(ctrId, "cpu.max")
  246. return p.write(cpuFp, cpuMaxValue)
  247. }
  248. // normalizePermissions 规范化权限字符串,将任意顺序的权限(如 "rmw")转换为标准格式 "rwm"
  249. // DevicePermissions.IsValid() 要求权限字符串必须是 "rwm" 的标准顺序
  250. func normalizePermissions(permStr string) string {
  251. var hasR, hasW, hasM bool
  252. for _, c := range permStr {
  253. switch c {
  254. case 'r':
  255. hasR = true
  256. case 'w':
  257. hasW = true
  258. case 'm':
  259. hasM = true
  260. }
  261. }
  262. var normalized strings.Builder
  263. if hasR {
  264. normalized.WriteRune('r')
  265. }
  266. if hasW {
  267. normalized.WriteRune('w')
  268. }
  269. if hasM {
  270. normalized.WriteRune('m')
  271. }
  272. return normalized.String()
  273. }
  274. // GetDeviceAllowRuleFromPath 从设备路径获取设备号并生成设备规则字符串
  275. // 返回格式: "c 226:128 rwm" 或 "b 8:0 rwm"
  276. // devicePath: 设备路径,如 "/dev/dri/renderD128"
  277. // permissions: 权限字符串,如 "rwm" 或 "rmw"(会被规范化)
  278. func GetDeviceAllowRuleFromPath(devicePath string, permissions string) (string, error) {
  279. // 获取设备信息(使用 unix.Stat 避免类型转换问题)
  280. var stat unix.Stat_t
  281. if err := unix.Stat(devicePath, &stat); err != nil {
  282. return "", errors.Wrapf(err, "stat device: %s", devicePath)
  283. }
  284. // 获取设备号
  285. major := unix.Major(uint64(stat.Rdev))
  286. minor := unix.Minor(uint64(stat.Rdev))
  287. // 判断设备类型(通过 mode 判断)
  288. var devType string
  289. mode := stat.Mode
  290. if mode&unix.S_IFCHR != 0 {
  291. devType = "c" // 字符设备
  292. } else if mode&unix.S_IFBLK != 0 {
  293. devType = "b" // 块设备
  294. } else {
  295. return "", errors.Errorf("not a device file: %s (mode: %o)", devicePath, mode)
  296. }
  297. // 规范化权限字符串
  298. normalizedPerms := normalizePermissions(permissions)
  299. if normalizedPerms == "" {
  300. normalizedPerms = "rwm" // 默认权限
  301. }
  302. // 生成设备规则字符串
  303. ruleStr := fmt.Sprintf("%s %d:%d %s", devType, major, minor, normalizedPerms)
  304. return ruleStr, nil
  305. }
  306. // parseDeviceRule 解析设备规则字符串,格式如 "c 13:* rwm" 或 "b 8:0 rwm"
  307. // 格式: <type> <major>:<minor> <permissions>
  308. // type: 'c' (char), 'b' (block), 'a' (all)
  309. // major/minor: 数字或 '*' 表示通配符
  310. // permissions: 'r' (read), 'w' (write), 'm' (mknod) 的组合
  311. func parseDeviceRule(ruleStr string) (*specs.LinuxDeviceCgroup, error) {
  312. parts := strings.Fields(ruleStr)
  313. if len(parts) != 3 {
  314. return nil, errors.Errorf("invalid device rule format: %s, expected format: <type> <major>:<minor> <permissions>", ruleStr)
  315. }
  316. // 解析设备类型
  317. var devType string
  318. switch parts[0] {
  319. case "c":
  320. devType = "c"
  321. case "b":
  322. devType = "b"
  323. case "a":
  324. devType = "a"
  325. default:
  326. return nil, errors.Errorf("invalid device type: %s, must be 'c', 'b', or 'a'", parts[0])
  327. }
  328. // 解析 major:minor
  329. majorMinor := strings.Split(parts[1], ":")
  330. if len(majorMinor) != 2 {
  331. return nil, errors.Errorf("invalid major:minor format: %s", parts[1])
  332. }
  333. var major, minor int64 = -1, -1
  334. if majorMinor[0] != "*" {
  335. var err error
  336. major, err = strconv.ParseInt(majorMinor[0], 10, 64)
  337. if err != nil {
  338. return nil, errors.Wrapf(err, "invalid major number: %s", majorMinor[0])
  339. }
  340. }
  341. if majorMinor[1] != "*" {
  342. var err error
  343. minor, err = strconv.ParseInt(majorMinor[1], 10, 64)
  344. if err != nil {
  345. return nil, errors.Wrapf(err, "invalid minor number: %s", majorMinor[1])
  346. }
  347. }
  348. // 解析权限:先规范化权限字符串为标准格式 "rwm"
  349. normalizedPerms := normalizePermissions(parts[2])
  350. if normalizedPerms == "" {
  351. normalizedPerms = "rwm" // 默认权限
  352. }
  353. return &specs.LinuxDeviceCgroup{
  354. Type: devType,
  355. Major: &major,
  356. Minor: &minor,
  357. Access: normalizedPerms,
  358. Allow: true, // devices.allow 表示允许
  359. }, nil
  360. }
  361. // ConvertDeviceRulesToSpecsDevices 将设备规则字符串和容器设备配置转换为 specs.LinuxDeviceCgroup 列表
  362. // 用于通过 containerd API 更新 container spec 中的 devices
  363. func ConvertDeviceRulesToSpecsDevices(allows []string) ([]*specs.LinuxDeviceCgroup, error) {
  364. // 解析设备规则
  365. deviceRules := make([]*specs.LinuxDeviceCgroup, 0, len(allows))
  366. for _, allowStr := range allows {
  367. rule, err := parseDeviceRule(allowStr)
  368. if err != nil {
  369. return nil, errors.Wrapf(err, "parse device rule: %s", allowStr)
  370. }
  371. deviceRules = append(deviceRules, rule)
  372. }
  373. return deviceRules, nil
  374. }
  375. func (p podCgroupV2Util) SetDevicesAllow(ctrId string, allows []string) error {
  376. if len(allows) == 0 {
  377. return nil
  378. }
  379. log.Warningf("=========skip set devices allow for container %s", ctrId)
  380. return nil
  381. /*log.Infof("[SetDevicesAllow] Setting device allow rules for container %s: %v", ctrId, allows)
  382. // 解析设备规则
  383. deviceRules := make([]*configs.DeviceRule, 0, len(allows))
  384. for _, allowStr := range allows {
  385. rule, err := parseDeviceRule(allowStr)
  386. if err != nil {
  387. return errors.Wrapf(err, "parse device rule: %s", allowStr)
  388. }
  389. deviceRules = append(deviceRules, rule)
  390. log.Infof("[SetDevicesAllow] Parsed rule: type=%v, major=%d, minor=%d, permissions=%s, allow=%v",
  391. rule.Type, rule.Major, rule.Minor, rule.Permissions, rule.Allow)
  392. }
  393. // 转换为 specs.LinuxDeviceCgroup
  394. specDevices := make([]specs.LinuxDeviceCgroup, 0, len(deviceRules))
  395. for _, rule := range deviceRules {
  396. specDevices = append(specDevices, convertDeviceRuleToSpec(rule))
  397. }
  398. // 使用 containerd 的 DeviceFilter 函数生成 eBPF 设备过滤程序
  399. insts, license, err := devicefilter.DeviceFilter(specDevices)
  400. if err != nil {
  401. return errors.Wrap(err, "generate eBPF device filter program using containerd DeviceFilter")
  402. }
  403. // 获取 cgroup 路径
  404. containerPath := p.getContainerPath(ctrId)
  405. log.Infof("[SetDevicesAllow] Using cgroup path: %s, specDevices: %s", containerPath, jsonutils.Marshal(specDevices).PrettyString())
  406. // 打开 cgroup 目录(参考 containerd 实现)
  407. dirFD, err := unix.Open(containerPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0o600)
  408. if err != nil {
  409. return errors.Wrapf(err, "cannot get dir FD for %s", containerPath)
  410. }
  411. defer unix.Close(dirFD)
  412. // 加载并附加 eBPF 程序(参考 containerd 的 setDevices 逻辑)
  413. if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
  414. if !canSkipEBPFError(specDevices) {
  415. return errors.Wrap(err, "load and attach eBPF device filter")
  416. }
  417. log.Warningf("[SetDevicesAllow] Failed to attach eBPF device filter, but error can be skipped: %v", err)
  418. }
  419. log.Infof("[SetDevicesAllow] Successfully set device allow rules for container %s", ctrId)*/
  420. return nil
  421. }
  422. func (p podCgroupV2Util) SetPidsMax(ctrId string, max int) error {
  423. if err := p.ensureController(ctrId, "pids"); err != nil {
  424. return errors.Wrap(err, "ensure pids controller")
  425. }
  426. // cgroup v2 和 v1 都使用 pids.max
  427. pidFp := p.getContainerCGFilePath(ctrId, "pids.max")
  428. return p.write(pidFp, fmt.Sprintf("%d", max))
  429. }
  430. func (p podCgroupV2Util) SetCpusetCloneChildren(ctrId string) error {
  431. // cgroup v2 不支持 cgroup.clone_children
  432. // 在 v2 中,子 cgroup 会自动继承父 cgroup 的 cpuset 配置
  433. // 这个操作在 v2 中是 no-op,但为了接口兼容性,我们不做任何操作
  434. return nil
  435. }
  436. func (p podCgroupV2Util) SetCgroupKeyValue(ctrId string, ctrler TCgroupController, key, value string) error {
  437. // 对于通用键值设置,需要确保相应的控制器已启用
  438. controller := string(ctrler)
  439. if controller != "" {
  440. if err := p.ensureController(ctrId, controller); err != nil {
  441. return errors.Wrapf(err, "ensure %s controller", controller)
  442. }
  443. }
  444. // cgroup v2 中,文件路径直接在统一路径下
  445. fp := p.getContainerCGFilePath(ctrId, key)
  446. return p.write(fp, value)
  447. }
  448. // checkEbpfSupport 检查系统是否支持 eBPF
  449. func (p podCgroupV2Util) checkEbpfSupport() error {
  450. // 检查内核版本(需要 >= 4.15)
  451. // 这里只做基本检查,详细的版本检查可能需要解析 /proc/version
  452. // 实际的内核版本检查在 eBPF 库加载时会进行
  453. // 检查 /sys/fs/bpf 是否存在(如果使用 pinning)
  454. if !fileutils2.Exists("/sys/fs/bpf") {
  455. log.Debugf("[checkEbpfSupport] /sys/fs/bpf does not exist (may be normal if not using pinning)")
  456. }
  457. // 检查 memlock 限制
  458. cmd := "ulimit -l"
  459. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  460. if err == nil {
  461. memlock := strings.TrimSpace(string(out))
  462. if memlock != "unlimited" {
  463. log.Debugf("[checkEbpfSupport] memlock limit: %s (recommended: unlimited)", memlock)
  464. }
  465. }
  466. return nil
  467. }
  468. // verifyEbpfAttached 验证 eBPF 程序是否成功附加到 cgroup
  469. func (p podCgroupV2Util) verifyEbpfAttached(cgroupPath string) error {
  470. // 方法1: 尝试使用 bpftool 检查(如果可用)
  471. // bpftool cgroup tree /sys/fs/cgroup/<path>
  472. cmd := fmt.Sprintf("bpftool cgroup tree %s 2>/dev/null | grep -q device || echo 'not found'", cgroupPath)
  473. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  474. if err == nil {
  475. output := strings.TrimSpace(string(out))
  476. if output != "not found" {
  477. log.Debugf("[verifyEbpfAttached] Verified eBPF program attached via bpftool")
  478. return nil
  479. }
  480. log.Debugf("[verifyEbpfAttached] bpftool did not find device filter (may be normal)")
  481. } else {
  482. log.Debugf("[verifyEbpfAttached] bpftool not available or failed: %v", err)
  483. }
  484. // 方法2: 检查是否有进程在该 cgroup 中
  485. // 如果有进程,可以尝试实际测试设备访问
  486. cgroupProcsPath := filepath.Join(cgroupPath, "cgroup.procs")
  487. if fileutils2.Exists(cgroupProcsPath) {
  488. cmd := fmt.Sprintf("cat %s 2>/dev/null | wc -l", cgroupProcsPath)
  489. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", cmd).Output()
  490. if err == nil {
  491. procCount := strings.TrimSpace(string(out))
  492. log.Debugf("[verifyEbpfAttached] Processes in cgroup: %s", procCount)
  493. }
  494. }
  495. // 注意:由于 eBPF 程序附加是异步的,并且验证可能需要特殊权限,
  496. // 这里只做基本的检查,不返回错误
  497. return nil
  498. }