factory_linux.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. package libcontainer
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "os"
  7. "path/filepath"
  8. "regexp"
  9. "runtime/debug"
  10. "strconv"
  11. securejoin "github.com/cyphar/filepath-securejoin"
  12. "github.com/moby/sys/mountinfo"
  13. "golang.org/x/sys/unix"
  14. "github.com/opencontainers/runc/libcontainer/cgroups/manager"
  15. "github.com/opencontainers/runc/libcontainer/configs"
  16. "github.com/opencontainers/runc/libcontainer/configs/validate"
  17. "github.com/opencontainers/runc/libcontainer/intelrdt"
  18. "github.com/opencontainers/runc/libcontainer/utils"
  19. "github.com/sirupsen/logrus"
  20. )
  21. const (
  22. stateFilename = "state.json"
  23. execFifoFilename = "exec.fifo"
  24. )
  25. var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
  26. // InitArgs returns an options func to configure a LinuxFactory with the
  27. // provided init binary path and arguments.
  28. func InitArgs(args ...string) func(*LinuxFactory) error {
  29. return func(l *LinuxFactory) (err error) {
  30. if len(args) > 0 {
  31. // Resolve relative paths to ensure that its available
  32. // after directory changes.
  33. if args[0], err = filepath.Abs(args[0]); err != nil {
  34. // The only error returned from filepath.Abs is
  35. // the one from os.Getwd, i.e. a system error.
  36. return err
  37. }
  38. }
  39. l.InitArgs = args
  40. return nil
  41. }
  42. }
  43. // IntelRdtfs is an options func to configure a LinuxFactory to return
  44. // containers that use the Intel RDT "resource control" filesystem to
  45. // create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
  46. func IntelRdtFs(l *LinuxFactory) error {
  47. if !intelrdt.IsCATEnabled() && !intelrdt.IsMBAEnabled() {
  48. l.NewIntelRdtManager = nil
  49. } else {
  50. l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
  51. return intelrdt.NewManager(config, id, path)
  52. }
  53. }
  54. return nil
  55. }
  56. // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
  57. func TmpfsRoot(l *LinuxFactory) error {
  58. mounted, err := mountinfo.Mounted(l.Root)
  59. if err != nil {
  60. return err
  61. }
  62. if !mounted {
  63. if err := mount("tmpfs", l.Root, "", "tmpfs", 0, ""); err != nil {
  64. return err
  65. }
  66. }
  67. return nil
  68. }
  69. // CriuPath returns an option func to configure a LinuxFactory with the
  70. // provided criupath
  71. func CriuPath(criupath string) func(*LinuxFactory) error {
  72. return func(l *LinuxFactory) error {
  73. l.CriuPath = criupath
  74. return nil
  75. }
  76. }
  77. // New returns a linux based container factory based in the root directory and
  78. // configures the factory with the provided option funcs.
  79. func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
  80. if root != "" {
  81. if err := os.MkdirAll(root, 0o700); err != nil {
  82. return nil, err
  83. }
  84. }
  85. l := &LinuxFactory{
  86. Root: root,
  87. InitPath: "/proc/self/exe",
  88. InitArgs: []string{os.Args[0], "init"},
  89. Validator: validate.New(),
  90. CriuPath: "criu",
  91. }
  92. for _, opt := range options {
  93. if opt == nil {
  94. continue
  95. }
  96. if err := opt(l); err != nil {
  97. return nil, err
  98. }
  99. }
  100. return l, nil
  101. }
  102. // LinuxFactory implements the default factory interface for linux based systems.
  103. type LinuxFactory struct {
  104. // Root directory for the factory to store state.
  105. Root string
  106. // InitPath is the path for calling the init responsibilities for spawning
  107. // a container.
  108. InitPath string
  109. // InitArgs are arguments for calling the init responsibilities for spawning
  110. // a container.
  111. InitArgs []string
  112. // CriuPath is the path to the criu binary used for checkpoint and restore of
  113. // containers.
  114. CriuPath string
  115. // New{u,g}idmapPath is the path to the binaries used for mapping with
  116. // rootless containers.
  117. NewuidmapPath string
  118. NewgidmapPath string
  119. // Validator provides validation to container configurations.
  120. Validator validate.Validator
  121. // NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
  122. NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
  123. }
  124. func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
  125. if l.Root == "" {
  126. return nil, errors.New("root not set")
  127. }
  128. if err := l.validateID(id); err != nil {
  129. return nil, err
  130. }
  131. if err := l.Validator.Validate(config); err != nil {
  132. return nil, err
  133. }
  134. containerRoot, err := securejoin.SecureJoin(l.Root, id)
  135. if err != nil {
  136. return nil, err
  137. }
  138. if _, err := os.Stat(containerRoot); err == nil {
  139. return nil, ErrExist
  140. } else if !os.IsNotExist(err) {
  141. return nil, err
  142. }
  143. cm, err := manager.New(config.Cgroups)
  144. if err != nil {
  145. return nil, err
  146. }
  147. // Check that cgroup does not exist or empty (no processes).
  148. // Note for cgroup v1 this check is not thorough, as there are multiple
  149. // separate hierarchies, while both Exists() and GetAllPids() only use
  150. // one for "devices" controller (assuming others are the same, which is
  151. // probably true in almost all scenarios). Checking all the hierarchies
  152. // would be too expensive.
  153. if cm.Exists() {
  154. pids, err := cm.GetAllPids()
  155. // Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV.
  156. if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) {
  157. return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err)
  158. }
  159. if len(pids) != 0 {
  160. // TODO: return an error.
  161. logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids))
  162. logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132")
  163. }
  164. }
  165. // Check that cgroup is not frozen. Do not use Exists() here
  166. // since in cgroup v1 it only checks "devices" controller.
  167. st, err := cm.GetFreezerState()
  168. if err != nil {
  169. return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err)
  170. }
  171. if st == configs.Frozen {
  172. return nil, errors.New("container's cgroup unexpectedly frozen")
  173. }
  174. if err := os.MkdirAll(containerRoot, 0o711); err != nil {
  175. return nil, err
  176. }
  177. if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
  178. return nil, err
  179. }
  180. c := &linuxContainer{
  181. id: id,
  182. root: containerRoot,
  183. config: config,
  184. initPath: l.InitPath,
  185. initArgs: l.InitArgs,
  186. criuPath: l.CriuPath,
  187. newuidmapPath: l.NewuidmapPath,
  188. newgidmapPath: l.NewgidmapPath,
  189. cgroupManager: cm,
  190. }
  191. if l.NewIntelRdtManager != nil {
  192. c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
  193. }
  194. c.state = &stoppedState{c: c}
  195. return c, nil
  196. }
  197. func (l *LinuxFactory) Load(id string) (Container, error) {
  198. if l.Root == "" {
  199. return nil, errors.New("root not set")
  200. }
  201. // when load, we need to check id is valid or not.
  202. if err := l.validateID(id); err != nil {
  203. return nil, err
  204. }
  205. containerRoot, err := securejoin.SecureJoin(l.Root, id)
  206. if err != nil {
  207. return nil, err
  208. }
  209. state, err := l.loadState(containerRoot)
  210. if err != nil {
  211. return nil, err
  212. }
  213. r := &nonChildProcess{
  214. processPid: state.InitProcessPid,
  215. processStartTime: state.InitProcessStartTime,
  216. fds: state.ExternalDescriptors,
  217. }
  218. cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths)
  219. if err != nil {
  220. return nil, err
  221. }
  222. c := &linuxContainer{
  223. initProcess: r,
  224. initProcessStartTime: state.InitProcessStartTime,
  225. id: id,
  226. config: &state.Config,
  227. initPath: l.InitPath,
  228. initArgs: l.InitArgs,
  229. criuPath: l.CriuPath,
  230. newuidmapPath: l.NewuidmapPath,
  231. newgidmapPath: l.NewgidmapPath,
  232. cgroupManager: cm,
  233. root: containerRoot,
  234. created: state.Created,
  235. }
  236. if l.NewIntelRdtManager != nil {
  237. c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
  238. }
  239. c.state = &loadedState{c: c}
  240. if err := c.refreshState(); err != nil {
  241. return nil, err
  242. }
  243. return c, nil
  244. }
  245. func (l *LinuxFactory) Type() string {
  246. return "libcontainer"
  247. }
  248. // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
  249. // This is a low level implementation detail of the reexec and should not be consumed externally
  250. func (l *LinuxFactory) StartInitialization() (err error) {
  251. // Get the INITPIPE.
  252. envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE")
  253. pipefd, err := strconv.Atoi(envInitPipe)
  254. if err != nil {
  255. err = fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err)
  256. logrus.Error(err)
  257. return err
  258. }
  259. pipe := os.NewFile(uintptr(pipefd), "pipe")
  260. defer pipe.Close()
  261. defer func() {
  262. // We have an error during the initialization of the container's init,
  263. // send it back to the parent process in the form of an initError.
  264. if werr := writeSync(pipe, procError); werr != nil {
  265. fmt.Fprintln(os.Stderr, err)
  266. return
  267. }
  268. if werr := utils.WriteJSON(pipe, &initError{Message: err.Error()}); werr != nil {
  269. fmt.Fprintln(os.Stderr, err)
  270. return
  271. }
  272. }()
  273. // Only init processes have FIFOFD.
  274. fifofd := -1
  275. envInitType := os.Getenv("_LIBCONTAINER_INITTYPE")
  276. it := initType(envInitType)
  277. if it == initStandard {
  278. envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD")
  279. if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
  280. return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err)
  281. }
  282. }
  283. var consoleSocket *os.File
  284. if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" {
  285. console, err := strconv.Atoi(envConsole)
  286. if err != nil {
  287. return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err)
  288. }
  289. consoleSocket = os.NewFile(uintptr(console), "console-socket")
  290. defer consoleSocket.Close()
  291. }
  292. logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE")
  293. logPipeFd, err := strconv.Atoi(logPipeFdStr)
  294. if err != nil {
  295. return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
  296. }
  297. // Get mount files (O_PATH).
  298. mountFds, err := parseMountFds()
  299. if err != nil {
  300. return err
  301. }
  302. // clear the current process's environment to clean any libcontainer
  303. // specific env vars.
  304. os.Clearenv()
  305. defer func() {
  306. if e := recover(); e != nil {
  307. if e, ok := e.(error); ok {
  308. err = fmt.Errorf("panic from initialization: %w, %s", e, debug.Stack())
  309. } else {
  310. //nolint:errorlint // here e is not of error type
  311. err = fmt.Errorf("panic from initialization: %v, %s", e, debug.Stack())
  312. }
  313. }
  314. }()
  315. i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
  316. if err != nil {
  317. return err
  318. }
  319. // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
  320. return i.Init()
  321. }
  322. func (l *LinuxFactory) loadState(root string) (*State, error) {
  323. stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
  324. if err != nil {
  325. return nil, err
  326. }
  327. f, err := os.Open(stateFilePath)
  328. if err != nil {
  329. if os.IsNotExist(err) {
  330. return nil, ErrNotExist
  331. }
  332. return nil, err
  333. }
  334. defer f.Close()
  335. var state *State
  336. if err := json.NewDecoder(f).Decode(&state); err != nil {
  337. return nil, err
  338. }
  339. return state, nil
  340. }
  341. func (l *LinuxFactory) validateID(id string) error {
  342. if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
  343. return ErrInvalidID
  344. }
  345. return nil
  346. }
  347. // NewuidmapPath returns an option func to configure a LinuxFactory with the
  348. // provided ..
  349. func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
  350. return func(l *LinuxFactory) error {
  351. l.NewuidmapPath = newuidmapPath
  352. return nil
  353. }
  354. }
  355. // NewgidmapPath returns an option func to configure a LinuxFactory with the
  356. // provided ..
  357. func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
  358. return func(l *LinuxFactory) error {
  359. l.NewgidmapPath = newgidmapPath
  360. return nil
  361. }
  362. }
  363. func parseMountFds() ([]int, error) {
  364. fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
  365. if fdsJson == "" {
  366. // Always return the nil slice if no fd is present.
  367. return nil, nil
  368. }
  369. var mountFds []int
  370. if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil {
  371. return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
  372. }
  373. return mountFds, nil
  374. }