fs.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. package fs
  2. import (
  3. "errors"
  4. "fmt"
  5. "os"
  6. "sync"
  7. "golang.org/x/sys/unix"
  8. "github.com/opencontainers/runc/libcontainer/cgroups"
  9. "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
  10. "github.com/opencontainers/runc/libcontainer/configs"
  11. )
  12. var subsystems = []subsystem{
  13. &CpusetGroup{},
  14. &DevicesGroup{},
  15. &MemoryGroup{},
  16. &CpuGroup{},
  17. &CpuacctGroup{},
  18. &PidsGroup{},
  19. &BlkioGroup{},
  20. &HugetlbGroup{},
  21. &NetClsGroup{},
  22. &NetPrioGroup{},
  23. &PerfEventGroup{},
  24. &FreezerGroup{},
  25. &RdmaGroup{},
  26. &NameGroup{GroupName: "name=systemd", Join: true},
  27. }
  28. var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
  29. func init() {
  30. // If using cgroups-hybrid mode then add a "" controller indicating
  31. // it should join the cgroups v2.
  32. if cgroups.IsCgroup2HybridMode() {
  33. subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
  34. }
  35. }
  36. type subsystem interface {
  37. // Name returns the name of the subsystem.
  38. Name() string
  39. // GetStats fills in the stats for the subsystem.
  40. GetStats(path string, stats *cgroups.Stats) error
  41. // Apply creates and joins a cgroup, adding pid into it. Some
  42. // subsystems use resources to pre-configure the cgroup parents
  43. // before creating or joining it.
  44. Apply(path string, r *configs.Resources, pid int) error
  45. // Set sets the cgroup resources.
  46. Set(path string, r *configs.Resources) error
  47. }
  48. type manager struct {
  49. mu sync.Mutex
  50. cgroups *configs.Cgroup
  51. paths map[string]string
  52. }
  53. func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
  54. // Some v1 controllers (cpu, cpuset, and devices) expect
  55. // cgroups.Resources to not be nil in Apply.
  56. if cg.Resources == nil {
  57. return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
  58. }
  59. if cg.Resources.Unified != nil {
  60. return nil, cgroups.ErrV1NoUnified
  61. }
  62. if paths == nil {
  63. var err error
  64. paths, err = initPaths(cg)
  65. if err != nil {
  66. return nil, err
  67. }
  68. }
  69. return &manager{
  70. cgroups: cg,
  71. paths: paths,
  72. }, nil
  73. }
  74. // isIgnorableError returns whether err is a permission error (in the loose
  75. // sense of the word). This includes EROFS (which for an unprivileged user is
  76. // basically a permission error) and EACCES (for similar reasons) as well as
  77. // the normal EPERM.
  78. func isIgnorableError(rootless bool, err error) bool {
  79. // We do not ignore errors if we are root.
  80. if !rootless {
  81. return false
  82. }
  83. // Is it an ordinary EPERM?
  84. if errors.Is(err, os.ErrPermission) {
  85. return true
  86. }
  87. // Handle some specific syscall errors.
  88. var errno unix.Errno
  89. if errors.As(err, &errno) {
  90. return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
  91. }
  92. return false
  93. }
  94. func (m *manager) Apply(pid int) (err error) {
  95. m.mu.Lock()
  96. defer m.mu.Unlock()
  97. c := m.cgroups
  98. for _, sys := range subsystems {
  99. name := sys.Name()
  100. p, ok := m.paths[name]
  101. if !ok {
  102. continue
  103. }
  104. if err := sys.Apply(p, c.Resources, pid); err != nil {
  105. // In the case of rootless (including euid=0 in userns), where an
  106. // explicit cgroup path hasn't been set, we don't bail on error in
  107. // case of permission problems here, but do delete the path from
  108. // the m.paths map, since it is either non-existent and could not
  109. // be created, or the pid could not be added to it.
  110. //
  111. // Cases where limits for the subsystem have been set are handled
  112. // later by Set, which fails with a friendly error (see
  113. // if path == "" in Set).
  114. if isIgnorableError(c.Rootless, err) && c.Path == "" {
  115. delete(m.paths, name)
  116. continue
  117. }
  118. return err
  119. }
  120. }
  121. return nil
  122. }
  123. func (m *manager) Destroy() error {
  124. m.mu.Lock()
  125. defer m.mu.Unlock()
  126. return cgroups.RemovePaths(m.paths)
  127. }
  128. func (m *manager) Path(subsys string) string {
  129. m.mu.Lock()
  130. defer m.mu.Unlock()
  131. return m.paths[subsys]
  132. }
  133. func (m *manager) GetStats() (*cgroups.Stats, error) {
  134. m.mu.Lock()
  135. defer m.mu.Unlock()
  136. stats := cgroups.NewStats()
  137. for _, sys := range subsystems {
  138. path := m.paths[sys.Name()]
  139. if path == "" {
  140. continue
  141. }
  142. if err := sys.GetStats(path, stats); err != nil {
  143. return nil, err
  144. }
  145. }
  146. return stats, nil
  147. }
  148. func (m *manager) Set(r *configs.Resources) error {
  149. if r == nil {
  150. return nil
  151. }
  152. if r.Unified != nil {
  153. return cgroups.ErrV1NoUnified
  154. }
  155. m.mu.Lock()
  156. defer m.mu.Unlock()
  157. for _, sys := range subsystems {
  158. path := m.paths[sys.Name()]
  159. if err := sys.Set(path, r); err != nil {
  160. // When rootless is true, errors from the device subsystem
  161. // are ignored, as it is really not expected to work.
  162. if m.cgroups.Rootless && sys.Name() == "devices" {
  163. continue
  164. }
  165. // However, errors from other subsystems are not ignored.
  166. // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
  167. if path == "" {
  168. // We never created a path for this cgroup, so we cannot set
  169. // limits for it (though we have already tried at this point).
  170. return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
  171. }
  172. return err
  173. }
  174. }
  175. return nil
  176. }
  177. // Freeze toggles the container's freezer cgroup depending on the state
  178. // provided
  179. func (m *manager) Freeze(state configs.FreezerState) error {
  180. path := m.Path("freezer")
  181. if path == "" {
  182. return errors.New("cannot toggle freezer: cgroups not configured for container")
  183. }
  184. prevState := m.cgroups.Resources.Freezer
  185. m.cgroups.Resources.Freezer = state
  186. freezer := &FreezerGroup{}
  187. if err := freezer.Set(path, m.cgroups.Resources); err != nil {
  188. m.cgroups.Resources.Freezer = prevState
  189. return err
  190. }
  191. return nil
  192. }
  193. func (m *manager) GetPids() ([]int, error) {
  194. return cgroups.GetPids(m.Path("devices"))
  195. }
  196. func (m *manager) GetAllPids() ([]int, error) {
  197. return cgroups.GetAllPids(m.Path("devices"))
  198. }
  199. func (m *manager) GetPaths() map[string]string {
  200. m.mu.Lock()
  201. defer m.mu.Unlock()
  202. return m.paths
  203. }
  204. func (m *manager) GetCgroups() (*configs.Cgroup, error) {
  205. return m.cgroups, nil
  206. }
  207. func (m *manager) GetFreezerState() (configs.FreezerState, error) {
  208. dir := m.Path("freezer")
  209. // If the container doesn't have the freezer cgroup, say it's undefined.
  210. if dir == "" {
  211. return configs.Undefined, nil
  212. }
  213. freezer := &FreezerGroup{}
  214. return freezer.GetState(dir)
  215. }
  216. func (m *manager) Exists() bool {
  217. return cgroups.PathExists(m.Path("devices"))
  218. }
  219. func OOMKillCount(path string) (uint64, error) {
  220. return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
  221. }
  222. func (m *manager) OOMKillCount() (uint64, error) {
  223. c, err := OOMKillCount(m.Path("memory"))
  224. // Ignore ENOENT when rootless as it couldn't create cgroup.
  225. if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
  226. err = nil
  227. }
  228. return c, err
  229. }