process_linux.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. package libcontainer
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net"
  8. "os"
  9. "os/exec"
  10. "path/filepath"
  11. "strconv"
  12. "time"
  13. "github.com/opencontainers/runc/libcontainer/cgroups"
  14. "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
  15. "github.com/opencontainers/runc/libcontainer/configs"
  16. "github.com/opencontainers/runc/libcontainer/intelrdt"
  17. "github.com/opencontainers/runc/libcontainer/logs"
  18. "github.com/opencontainers/runc/libcontainer/system"
  19. "github.com/opencontainers/runc/libcontainer/utils"
  20. "github.com/opencontainers/runtime-spec/specs-go"
  21. "github.com/sirupsen/logrus"
  22. "golang.org/x/sys/unix"
  23. )
  24. type parentProcess interface {
  25. // pid returns the pid for the running process.
  26. pid() int
  27. // start starts the process execution.
  28. start() error
  29. // send a SIGKILL to the process and wait for the exit.
  30. terminate() error
  31. // wait waits on the process returning the process state.
  32. wait() (*os.ProcessState, error)
  33. // startTime returns the process start time.
  34. startTime() (uint64, error)
  35. signal(os.Signal) error
  36. externalDescriptors() []string
  37. setExternalDescriptors(fds []string)
  38. forwardChildLogs() chan error
  39. }
  40. type filePair struct {
  41. parent *os.File
  42. child *os.File
  43. }
  44. type setnsProcess struct {
  45. cmd *exec.Cmd
  46. messageSockPair filePair
  47. logFilePair filePair
  48. cgroupPaths map[string]string
  49. rootlessCgroups bool
  50. manager cgroups.Manager
  51. intelRdtPath string
  52. config *initConfig
  53. fds []string
  54. process *Process
  55. bootstrapData io.Reader
  56. initProcessPid int
  57. }
  58. func (p *setnsProcess) startTime() (uint64, error) {
  59. stat, err := system.Stat(p.pid())
  60. return stat.StartTime, err
  61. }
  62. func (p *setnsProcess) signal(sig os.Signal) error {
  63. s, ok := sig.(unix.Signal)
  64. if !ok {
  65. return errors.New("os: unsupported signal type")
  66. }
  67. return unix.Kill(p.pid(), s)
  68. }
  69. func (p *setnsProcess) start() (retErr error) {
  70. defer p.messageSockPair.parent.Close()
  71. // get the "before" value of oom kill count
  72. oom, _ := p.manager.OOMKillCount()
  73. err := p.cmd.Start()
  74. // close the write-side of the pipes (controlled by child)
  75. p.messageSockPair.child.Close()
  76. p.logFilePair.child.Close()
  77. if err != nil {
  78. return fmt.Errorf("error starting setns process: %w", err)
  79. }
  80. waitInit := initWaiter(p.messageSockPair.parent)
  81. defer func() {
  82. if retErr != nil {
  83. if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom {
  84. // Someone in this cgroup was killed, this _might_ be us.
  85. retErr = fmt.Errorf("%w (possibly OOM-killed)", retErr)
  86. }
  87. werr := <-waitInit
  88. if werr != nil {
  89. logrus.WithError(werr).Warn()
  90. }
  91. err := ignoreTerminateErrors(p.terminate())
  92. if err != nil {
  93. logrus.WithError(err).Warn("unable to terminate setnsProcess")
  94. }
  95. }
  96. }()
  97. if p.bootstrapData != nil {
  98. if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
  99. return fmt.Errorf("error copying bootstrap data to pipe: %w", err)
  100. }
  101. }
  102. err = <-waitInit
  103. if err != nil {
  104. return err
  105. }
  106. if err := p.execSetns(); err != nil {
  107. return fmt.Errorf("error executing setns process: %w", err)
  108. }
  109. for _, path := range p.cgroupPaths {
  110. if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups {
  111. // On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
  112. // https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
  113. // Try to join the cgroup of InitProcessPid.
  114. if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
  115. initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
  116. initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
  117. if initCgErr == nil {
  118. if initCgPath, ok := initCg[""]; ok {
  119. initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
  120. logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
  121. p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
  122. // NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
  123. err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
  124. }
  125. }
  126. }
  127. if err != nil {
  128. return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
  129. }
  130. }
  131. }
  132. if p.intelRdtPath != "" {
  133. // if Intel RDT "resource control" filesystem path exists
  134. _, err := os.Stat(p.intelRdtPath)
  135. if err == nil {
  136. if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
  137. return fmt.Errorf("error adding pid %d to Intel RDT: %w", p.pid(), err)
  138. }
  139. }
  140. }
  141. // set rlimits, this has to be done here because we lose permissions
  142. // to raise the limits once we enter a user-namespace
  143. if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
  144. return fmt.Errorf("error setting rlimits for process: %w", err)
  145. }
  146. if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil {
  147. return fmt.Errorf("error writing config to pipe: %w", err)
  148. }
  149. ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
  150. switch sync.Type {
  151. case procReady:
  152. // This shouldn't happen.
  153. panic("unexpected procReady in setns")
  154. case procHooks:
  155. // This shouldn't happen.
  156. panic("unexpected procHooks in setns")
  157. case procSeccomp:
  158. if p.config.Config.Seccomp.ListenerPath == "" {
  159. return errors.New("listenerPath is not set")
  160. }
  161. seccompFd, err := recvSeccompFd(uintptr(p.pid()), uintptr(sync.Fd))
  162. if err != nil {
  163. return err
  164. }
  165. defer unix.Close(seccompFd)
  166. bundle, annotations := utils.Annotations(p.config.Config.Labels)
  167. containerProcessState := &specs.ContainerProcessState{
  168. Version: specs.Version,
  169. Fds: []string{specs.SeccompFdName},
  170. Pid: p.cmd.Process.Pid,
  171. Metadata: p.config.Config.Seccomp.ListenerMetadata,
  172. State: specs.State{
  173. Version: specs.Version,
  174. ID: p.config.ContainerId,
  175. Status: specs.StateRunning,
  176. Pid: p.initProcessPid,
  177. Bundle: bundle,
  178. Annotations: annotations,
  179. },
  180. }
  181. if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
  182. containerProcessState, seccompFd); err != nil {
  183. return err
  184. }
  185. // Sync with child.
  186. if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
  187. return err
  188. }
  189. return nil
  190. default:
  191. return errors.New("invalid JSON payload from child")
  192. }
  193. })
  194. if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
  195. return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
  196. }
  197. // Must be done after Shutdown so the child will exit and we can wait for it.
  198. if ierr != nil {
  199. _, _ = p.wait()
  200. return ierr
  201. }
  202. return nil
  203. }
  204. // execSetns runs the process that executes C code to perform the setns calls
  205. // because setns support requires the C process to fork off a child and perform the setns
  206. // before the go runtime boots, we wait on the process to die and receive the child's pid
  207. // over the provided pipe.
  208. func (p *setnsProcess) execSetns() error {
  209. status, err := p.cmd.Process.Wait()
  210. if err != nil {
  211. _ = p.cmd.Wait()
  212. return fmt.Errorf("error waiting on setns process to finish: %w", err)
  213. }
  214. if !status.Success() {
  215. _ = p.cmd.Wait()
  216. return &exec.ExitError{ProcessState: status}
  217. }
  218. var pid *pid
  219. if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
  220. _ = p.cmd.Wait()
  221. return fmt.Errorf("error reading pid from init pipe: %w", err)
  222. }
  223. // Clean up the zombie parent process
  224. // On Unix systems FindProcess always succeeds.
  225. firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
  226. // Ignore the error in case the child has already been reaped for any reason
  227. _, _ = firstChildProcess.Wait()
  228. process, err := os.FindProcess(pid.Pid)
  229. if err != nil {
  230. return err
  231. }
  232. p.cmd.Process = process
  233. p.process.ops = p
  234. return nil
  235. }
  236. // terminate sends a SIGKILL to the forked process for the setns routine then waits to
  237. // avoid the process becoming a zombie.
  238. func (p *setnsProcess) terminate() error {
  239. if p.cmd.Process == nil {
  240. return nil
  241. }
  242. err := p.cmd.Process.Kill()
  243. if _, werr := p.wait(); err == nil {
  244. err = werr
  245. }
  246. return err
  247. }
  248. func (p *setnsProcess) wait() (*os.ProcessState, error) {
  249. err := p.cmd.Wait()
  250. // Return actual ProcessState even on Wait error
  251. return p.cmd.ProcessState, err
  252. }
  253. func (p *setnsProcess) pid() int {
  254. return p.cmd.Process.Pid
  255. }
  256. func (p *setnsProcess) externalDescriptors() []string {
  257. return p.fds
  258. }
  259. func (p *setnsProcess) setExternalDescriptors(newFds []string) {
  260. p.fds = newFds
  261. }
  262. func (p *setnsProcess) forwardChildLogs() chan error {
  263. return logs.ForwardLogs(p.logFilePair.parent)
  264. }
  265. type initProcess struct {
  266. cmd *exec.Cmd
  267. messageSockPair filePair
  268. logFilePair filePair
  269. config *initConfig
  270. manager cgroups.Manager
  271. intelRdtManager intelrdt.Manager
  272. container *linuxContainer
  273. fds []string
  274. process *Process
  275. bootstrapData io.Reader
  276. sharePidns bool
  277. }
  278. func (p *initProcess) pid() int {
  279. return p.cmd.Process.Pid
  280. }
  281. func (p *initProcess) externalDescriptors() []string {
  282. return p.fds
  283. }
  284. // getChildPid receives the final child's pid over the provided pipe.
  285. func (p *initProcess) getChildPid() (int, error) {
  286. var pid pid
  287. if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
  288. _ = p.cmd.Wait()
  289. return -1, err
  290. }
  291. // Clean up the zombie parent process
  292. // On Unix systems FindProcess always succeeds.
  293. firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
  294. // Ignore the error in case the child has already been reaped for any reason
  295. _, _ = firstChildProcess.Wait()
  296. return pid.Pid, nil
  297. }
  298. func (p *initProcess) waitForChildExit(childPid int) error {
  299. status, err := p.cmd.Process.Wait()
  300. if err != nil {
  301. _ = p.cmd.Wait()
  302. return err
  303. }
  304. if !status.Success() {
  305. _ = p.cmd.Wait()
  306. return &exec.ExitError{ProcessState: status}
  307. }
  308. process, err := os.FindProcess(childPid)
  309. if err != nil {
  310. return err
  311. }
  312. p.cmd.Process = process
  313. p.process.ops = p
  314. return nil
  315. }
  316. func (p *initProcess) start() (retErr error) {
  317. defer p.messageSockPair.parent.Close() //nolint: errcheck
  318. err := p.cmd.Start()
  319. p.process.ops = p
  320. // close the write-side of the pipes (controlled by child)
  321. _ = p.messageSockPair.child.Close()
  322. _ = p.logFilePair.child.Close()
  323. if err != nil {
  324. p.process.ops = nil
  325. return fmt.Errorf("unable to start init: %w", err)
  326. }
  327. waitInit := initWaiter(p.messageSockPair.parent)
  328. defer func() {
  329. if retErr != nil {
  330. // Find out if init is killed by the kernel's OOM killer.
  331. // Get the count before killing init as otherwise cgroup
  332. // might be removed by systemd.
  333. oom, err := p.manager.OOMKillCount()
  334. if err != nil {
  335. logrus.WithError(err).Warn("unable to get oom kill count")
  336. } else if oom > 0 {
  337. // Does not matter what the particular error was,
  338. // its cause is most probably OOM, so report that.
  339. const oomError = "container init was OOM-killed (memory limit too low?)"
  340. if logrus.GetLevel() >= logrus.DebugLevel {
  341. // Only show the original error if debug is set,
  342. // as it is not generally very useful.
  343. retErr = fmt.Errorf(oomError+": %w", retErr)
  344. } else {
  345. retErr = errors.New(oomError)
  346. }
  347. }
  348. werr := <-waitInit
  349. if werr != nil {
  350. logrus.WithError(werr).Warn()
  351. }
  352. // Terminate the process to ensure we can remove cgroups.
  353. if err := ignoreTerminateErrors(p.terminate()); err != nil {
  354. logrus.WithError(err).Warn("unable to terminate initProcess")
  355. }
  356. _ = p.manager.Destroy()
  357. if p.intelRdtManager != nil {
  358. _ = p.intelRdtManager.Destroy()
  359. }
  360. }
  361. }()
  362. // Do this before syncing with child so that no children can escape the
  363. // cgroup. We don't need to worry about not doing this and not being root
  364. // because we'd be using the rootless cgroup manager in that case.
  365. if err := p.manager.Apply(p.pid()); err != nil {
  366. return fmt.Errorf("unable to apply cgroup configuration: %w", err)
  367. }
  368. if p.intelRdtManager != nil {
  369. if err := p.intelRdtManager.Apply(p.pid()); err != nil {
  370. return fmt.Errorf("unable to apply Intel RDT configuration: %w", err)
  371. }
  372. }
  373. if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
  374. return fmt.Errorf("can't copy bootstrap data to pipe: %w", err)
  375. }
  376. err = <-waitInit
  377. if err != nil {
  378. return err
  379. }
  380. childPid, err := p.getChildPid()
  381. if err != nil {
  382. return fmt.Errorf("can't get final child's PID from pipe: %w", err)
  383. }
  384. // Save the standard descriptor names before the container process
  385. // can potentially move them (e.g., via dup2()). If we don't do this now,
  386. // we won't know at checkpoint time which file descriptor to look up.
  387. fds, err := getPipeFds(childPid)
  388. if err != nil {
  389. return fmt.Errorf("error getting pipe fds for pid %d: %w", childPid, err)
  390. }
  391. p.setExternalDescriptors(fds)
  392. // Wait for our first child to exit
  393. if err := p.waitForChildExit(childPid); err != nil {
  394. return fmt.Errorf("error waiting for our first child to exit: %w", err)
  395. }
  396. if err := p.createNetworkInterfaces(); err != nil {
  397. return fmt.Errorf("error creating network interfaces: %w", err)
  398. }
  399. if err := p.updateSpecState(); err != nil {
  400. return fmt.Errorf("error updating spec state: %w", err)
  401. }
  402. if err := p.sendConfig(); err != nil {
  403. return fmt.Errorf("error sending config to init process: %w", err)
  404. }
  405. var (
  406. sentRun bool
  407. sentResume bool
  408. )
  409. ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
  410. switch sync.Type {
  411. case procSeccomp:
  412. if p.config.Config.Seccomp.ListenerPath == "" {
  413. return errors.New("listenerPath is not set")
  414. }
  415. seccompFd, err := recvSeccompFd(uintptr(childPid), uintptr(sync.Fd))
  416. if err != nil {
  417. return err
  418. }
  419. defer unix.Close(seccompFd)
  420. s, err := p.container.currentOCIState()
  421. if err != nil {
  422. return err
  423. }
  424. // initProcessStartTime hasn't been set yet.
  425. s.Pid = p.cmd.Process.Pid
  426. s.Status = specs.StateCreating
  427. containerProcessState := &specs.ContainerProcessState{
  428. Version: specs.Version,
  429. Fds: []string{specs.SeccompFdName},
  430. Pid: s.Pid,
  431. Metadata: p.config.Config.Seccomp.ListenerMetadata,
  432. State: *s,
  433. }
  434. if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
  435. containerProcessState, seccompFd); err != nil {
  436. return err
  437. }
  438. // Sync with child.
  439. if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
  440. return err
  441. }
  442. case procReady:
  443. // set rlimits, this has to be done here because we lose permissions
  444. // to raise the limits once we enter a user-namespace
  445. if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
  446. return fmt.Errorf("error setting rlimits for ready process: %w", err)
  447. }
  448. // call prestart and CreateRuntime hooks
  449. if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
  450. // Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions.
  451. if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
  452. return fmt.Errorf("error setting cgroup config for ready process: %w", err)
  453. }
  454. if p.intelRdtManager != nil {
  455. if err := p.intelRdtManager.Set(p.config.Config); err != nil {
  456. return fmt.Errorf("error setting Intel RDT config for ready process: %w", err)
  457. }
  458. }
  459. if len(p.config.Config.Hooks) != 0 {
  460. s, err := p.container.currentOCIState()
  461. if err != nil {
  462. return err
  463. }
  464. // initProcessStartTime hasn't been set yet.
  465. s.Pid = p.cmd.Process.Pid
  466. s.Status = specs.StateCreating
  467. hooks := p.config.Config.Hooks
  468. if err := hooks[configs.Prestart].RunHooks(s); err != nil {
  469. return err
  470. }
  471. if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil {
  472. return err
  473. }
  474. }
  475. }
  476. // generate a timestamp indicating when the container was started
  477. p.container.created = time.Now().UTC()
  478. p.container.state = &createdState{
  479. c: p.container,
  480. }
  481. // NOTE: If the procRun state has been synced and the
  482. // runc-create process has been killed for some reason,
  483. // the runc-init[2:stage] process will be leaky. And
  484. // the runc command also fails to parse root directory
  485. // because the container doesn't have state.json.
  486. //
  487. // In order to cleanup the runc-init[2:stage] by
  488. // runc-delete/stop, we should store the status before
  489. // procRun sync.
  490. state, uerr := p.container.updateState(p)
  491. if uerr != nil {
  492. return fmt.Errorf("unable to store init state: %w", err)
  493. }
  494. p.container.initProcessStartTime = state.InitProcessStartTime
  495. // Sync with child.
  496. if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
  497. return err
  498. }
  499. sentRun = true
  500. case procHooks:
  501. // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
  502. if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
  503. return fmt.Errorf("error setting cgroup config for procHooks process: %w", err)
  504. }
  505. if p.intelRdtManager != nil {
  506. if err := p.intelRdtManager.Set(p.config.Config); err != nil {
  507. return fmt.Errorf("error setting Intel RDT config for procHooks process: %w", err)
  508. }
  509. }
  510. if len(p.config.Config.Hooks) != 0 {
  511. s, err := p.container.currentOCIState()
  512. if err != nil {
  513. return err
  514. }
  515. // initProcessStartTime hasn't been set yet.
  516. s.Pid = p.cmd.Process.Pid
  517. s.Status = specs.StateCreating
  518. hooks := p.config.Config.Hooks
  519. if err := hooks[configs.Prestart].RunHooks(s); err != nil {
  520. return err
  521. }
  522. if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil {
  523. return err
  524. }
  525. }
  526. // Sync with child.
  527. if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
  528. return err
  529. }
  530. sentResume = true
  531. default:
  532. return errors.New("invalid JSON payload from child")
  533. }
  534. return nil
  535. })
  536. if !sentRun {
  537. return fmt.Errorf("error during container init: %w", ierr)
  538. }
  539. if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
  540. return errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process")
  541. }
  542. if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
  543. return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
  544. }
  545. // Must be done after Shutdown so the child will exit and we can wait for it.
  546. if ierr != nil {
  547. _, _ = p.wait()
  548. return ierr
  549. }
  550. return nil
  551. }
  552. func (p *initProcess) wait() (*os.ProcessState, error) {
  553. err := p.cmd.Wait()
  554. // we should kill all processes in cgroup when init is died if we use host PID namespace
  555. if p.sharePidns {
  556. _ = signalAllProcesses(p.manager, unix.SIGKILL)
  557. }
  558. return p.cmd.ProcessState, err
  559. }
  560. func (p *initProcess) terminate() error {
  561. if p.cmd.Process == nil {
  562. return nil
  563. }
  564. err := p.cmd.Process.Kill()
  565. if _, werr := p.wait(); err == nil {
  566. err = werr
  567. }
  568. return err
  569. }
  570. func (p *initProcess) startTime() (uint64, error) {
  571. stat, err := system.Stat(p.pid())
  572. return stat.StartTime, err
  573. }
  574. func (p *initProcess) updateSpecState() error {
  575. s, err := p.container.currentOCIState()
  576. if err != nil {
  577. return err
  578. }
  579. p.config.SpecState = s
  580. return nil
  581. }
  582. func (p *initProcess) sendConfig() error {
  583. // send the config to the container's init process, we don't use JSON Encode
  584. // here because there might be a problem in JSON decoder in some cases, see:
  585. // https://github.com/docker/docker/issues/14203#issuecomment-174177790
  586. return utils.WriteJSON(p.messageSockPair.parent, p.config)
  587. }
  588. func (p *initProcess) createNetworkInterfaces() error {
  589. for _, config := range p.config.Config.Networks {
  590. strategy, err := getStrategy(config.Type)
  591. if err != nil {
  592. return err
  593. }
  594. n := &network{
  595. Network: *config,
  596. }
  597. if err := strategy.create(n, p.pid()); err != nil {
  598. return err
  599. }
  600. p.config.Networks = append(p.config.Networks, n)
  601. }
  602. return nil
  603. }
  604. func (p *initProcess) signal(sig os.Signal) error {
  605. s, ok := sig.(unix.Signal)
  606. if !ok {
  607. return errors.New("os: unsupported signal type")
  608. }
  609. return unix.Kill(p.pid(), s)
  610. }
  611. func (p *initProcess) setExternalDescriptors(newFds []string) {
  612. p.fds = newFds
  613. }
  614. func (p *initProcess) forwardChildLogs() chan error {
  615. return logs.ForwardLogs(p.logFilePair.parent)
  616. }
  617. func recvSeccompFd(childPid, childFd uintptr) (int, error) {
  618. pidfd, _, errno := unix.Syscall(unix.SYS_PIDFD_OPEN, childPid, 0, 0)
  619. if errno != 0 {
  620. return -1, fmt.Errorf("performing SYS_PIDFD_OPEN syscall: %w", errno)
  621. }
  622. defer unix.Close(int(pidfd))
  623. seccompFd, _, errno := unix.Syscall(unix.SYS_PIDFD_GETFD, pidfd, childFd, 0)
  624. if errno != 0 {
  625. return -1, fmt.Errorf("performing SYS_PIDFD_GETFD syscall: %w", errno)
  626. }
  627. return int(seccompFd), nil
  628. }
  629. func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, fd int) error {
  630. conn, err := net.Dial("unix", listenerPath)
  631. if err != nil {
  632. return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err)
  633. }
  634. socket, err := conn.(*net.UnixConn).File()
  635. if err != nil {
  636. return fmt.Errorf("cannot get seccomp socket: %w", err)
  637. }
  638. defer socket.Close()
  639. b, err := json.Marshal(state)
  640. if err != nil {
  641. return fmt.Errorf("cannot marshall seccomp state: %w", err)
  642. }
  643. err = utils.SendFds(socket, b, fd)
  644. if err != nil {
  645. return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err)
  646. }
  647. return nil
  648. }
  649. func getPipeFds(pid int) ([]string, error) {
  650. fds := make([]string, 3)
  651. dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
  652. for i := 0; i < 3; i++ {
  653. // XXX: This breaks if the path is not a valid symlink (which can
  654. // happen in certain particularly unlucky mount namespace setups).
  655. f := filepath.Join(dirPath, strconv.Itoa(i))
  656. target, err := os.Readlink(f)
  657. if err != nil {
  658. // Ignore permission errors, for rootless containers and other
  659. // non-dumpable processes. if we can't get the fd for a particular
  660. // file, there's not much we can do.
  661. if os.IsPermission(err) {
  662. continue
  663. }
  664. return fds, err
  665. }
  666. fds[i] = target
  667. }
  668. return fds, nil
  669. }
  670. // InitializeIO creates pipes for use with the process's stdio and returns the
  671. // opposite side for each. Do not use this if you want to have a pseudoterminal
  672. // set up for you by libcontainer (TODO: fix that too).
  673. // TODO: This is mostly unnecessary, and should be handled by clients.
  674. func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
  675. var fds []uintptr
  676. i = &IO{}
  677. // cleanup in case of an error
  678. defer func() {
  679. if err != nil {
  680. for _, fd := range fds {
  681. _ = unix.Close(int(fd))
  682. }
  683. }
  684. }()
  685. // STDIN
  686. r, w, err := os.Pipe()
  687. if err != nil {
  688. return nil, err
  689. }
  690. fds = append(fds, r.Fd(), w.Fd())
  691. p.Stdin, i.Stdin = r, w
  692. // STDOUT
  693. if r, w, err = os.Pipe(); err != nil {
  694. return nil, err
  695. }
  696. fds = append(fds, r.Fd(), w.Fd())
  697. p.Stdout, i.Stdout = w, r
  698. // STDERR
  699. if r, w, err = os.Pipe(); err != nil {
  700. return nil, err
  701. }
  702. fds = append(fds, r.Fd(), w.Fd())
  703. p.Stderr, i.Stderr = w, r
  704. // change ownership of the pipes in case we are in a user namespace
  705. for _, fd := range fds {
  706. if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
  707. return nil, &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
  708. }
  709. }
  710. return i, nil
  711. }
  712. // initWaiter returns a channel to wait on for making sure
  713. // runc init has finished the initial setup.
  714. func initWaiter(r io.Reader) chan error {
  715. ch := make(chan error, 1)
  716. go func() {
  717. defer close(ch)
  718. inited := make([]byte, 1)
  719. n, err := r.Read(inited)
  720. if err == nil {
  721. if n < 1 {
  722. err = errors.New("short read")
  723. } else if inited[0] != 0 {
  724. err = fmt.Errorf("unexpected %d != 0", inited[0])
  725. } else {
  726. ch <- nil
  727. return
  728. }
  729. }
  730. ch <- fmt.Errorf("waiting for init preliminary setup: %w", err)
  731. }()
  732. return ch
  733. }