seccomp_linux.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. //go:build cgo && seccomp
  2. // +build cgo,seccomp
  3. package seccomp
  4. import (
  5. "errors"
  6. "fmt"
  7. libseccomp "github.com/seccomp/libseccomp-golang"
  8. "github.com/sirupsen/logrus"
  9. "golang.org/x/sys/unix"
  10. "github.com/opencontainers/runc/libcontainer/configs"
  11. "github.com/opencontainers/runc/libcontainer/seccomp/patchbpf"
  12. )
  13. var (
  14. actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
  15. actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
  16. )
  17. const (
  18. // Linux system calls can have at most 6 arguments
  19. syscallMaxArguments int = 6
  20. )
  21. // InitSeccomp installs the seccomp filters to be used in the container as
  22. // specified in config.
  23. // Returns the seccomp file descriptor if any of the filters include a
  24. // SCMP_ACT_NOTIFY action, otherwise returns -1.
  25. func InitSeccomp(config *configs.Seccomp) (int, error) {
  26. if config == nil {
  27. return -1, errors.New("cannot initialize Seccomp - nil config passed")
  28. }
  29. defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet)
  30. if err != nil {
  31. return -1, errors.New("error initializing seccomp - invalid default action")
  32. }
  33. // Ignore the error since pre-2.4 libseccomp is treated as API level 0.
  34. apiLevel, _ := libseccomp.GetAPI()
  35. for _, call := range config.Syscalls {
  36. if call.Action == configs.Notify {
  37. if apiLevel < 6 {
  38. return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel)
  39. }
  40. // We can't allow the write syscall to notify to the seccomp agent.
  41. // After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain
  42. // number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we
  43. // never can write the seccomp fd to the parent and therefore the seccomp agent never receives
  44. // the seccomp fd and runc is hang during initialization.
  45. //
  46. // Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY.
  47. // Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and
  48. // send the seccomp fd to the agent (it is another process and not subject to the seccomp
  49. // filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp
  50. // agent allows those syscalls to proceed, initialization works just fine and the agent can
  51. // handle future read()/close() syscalls as it wanted.
  52. if call.Name == "write" {
  53. return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall")
  54. }
  55. }
  56. }
  57. // See comment on why write is not allowed. The same reason applies, as this can mean handling write too.
  58. if defaultAction == libseccomp.ActNotify {
  59. return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action")
  60. }
  61. filter, err := libseccomp.NewFilter(defaultAction)
  62. if err != nil {
  63. return -1, fmt.Errorf("error creating filter: %w", err)
  64. }
  65. // Add extra architectures
  66. for _, arch := range config.Architectures {
  67. scmpArch, err := libseccomp.GetArchFromString(arch)
  68. if err != nil {
  69. return -1, fmt.Errorf("error validating Seccomp architecture: %w", err)
  70. }
  71. if err := filter.AddArch(scmpArch); err != nil {
  72. return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err)
  73. }
  74. }
  75. // Unset no new privs bit
  76. if err := filter.SetNoNewPrivsBit(false); err != nil {
  77. return -1, fmt.Errorf("error setting no new privileges: %w", err)
  78. }
  79. // Add a rule for each syscall
  80. for _, call := range config.Syscalls {
  81. if call == nil {
  82. return -1, errors.New("encountered nil syscall while initializing Seccomp")
  83. }
  84. if err := matchCall(filter, call, defaultAction); err != nil {
  85. return -1, err
  86. }
  87. }
  88. seccompFd, err := patchbpf.PatchAndLoad(config, filter)
  89. if err != nil {
  90. return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err)
  91. }
  92. return seccompFd, nil
  93. }
  94. // Convert Libcontainer Action to Libseccomp ScmpAction
  95. func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
  96. switch act {
  97. case configs.Kill, configs.KillThread:
  98. return libseccomp.ActKillThread, nil
  99. case configs.Errno:
  100. if errnoRet != nil {
  101. return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
  102. }
  103. return actErrno, nil
  104. case configs.Trap:
  105. return libseccomp.ActTrap, nil
  106. case configs.Allow:
  107. return libseccomp.ActAllow, nil
  108. case configs.Trace:
  109. if errnoRet != nil {
  110. return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
  111. }
  112. return actTrace, nil
  113. case configs.Log:
  114. return libseccomp.ActLog, nil
  115. case configs.Notify:
  116. return libseccomp.ActNotify, nil
  117. case configs.KillProcess:
  118. return libseccomp.ActKillProcess, nil
  119. default:
  120. return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
  121. }
  122. }
  123. // Convert Libcontainer Operator to Libseccomp ScmpCompareOp
  124. func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
  125. switch op {
  126. case configs.EqualTo:
  127. return libseccomp.CompareEqual, nil
  128. case configs.NotEqualTo:
  129. return libseccomp.CompareNotEqual, nil
  130. case configs.GreaterThan:
  131. return libseccomp.CompareGreater, nil
  132. case configs.GreaterThanOrEqualTo:
  133. return libseccomp.CompareGreaterEqual, nil
  134. case configs.LessThan:
  135. return libseccomp.CompareLess, nil
  136. case configs.LessThanOrEqualTo:
  137. return libseccomp.CompareLessOrEqual, nil
  138. case configs.MaskEqualTo:
  139. return libseccomp.CompareMaskedEqual, nil
  140. default:
  141. return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule")
  142. }
  143. }
  144. // Convert Libcontainer Arg to Libseccomp ScmpCondition
  145. func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
  146. cond := libseccomp.ScmpCondition{}
  147. if arg == nil {
  148. return cond, errors.New("cannot convert nil to syscall condition")
  149. }
  150. op, err := getOperator(arg.Op)
  151. if err != nil {
  152. return cond, err
  153. }
  154. return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
  155. }
  156. // Add a rule to match a single syscall
  157. func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libseccomp.ScmpAction) error {
  158. if call == nil || filter == nil {
  159. return errors.New("cannot use nil as syscall to block")
  160. }
  161. if len(call.Name) == 0 {
  162. return errors.New("empty string is not a valid syscall")
  163. }
  164. // Convert the call's action to the libseccomp equivalent
  165. callAct, err := getAction(call.Action, call.ErrnoRet)
  166. if err != nil {
  167. return fmt.Errorf("action in seccomp profile is invalid: %w", err)
  168. }
  169. if callAct == defAct {
  170. // This rule is redundant, silently skip it
  171. // to avoid error from AddRule.
  172. return nil
  173. }
  174. // If we can't resolve the syscall, assume it is not supported
  175. // by this kernel. Warn about it, don't error out.
  176. callNum, err := libseccomp.GetSyscallFromName(call.Name)
  177. if err != nil {
  178. logrus.Debugf("unknown seccomp syscall %q ignored", call.Name)
  179. return nil
  180. }
  181. // Unconditional match - just add the rule
  182. if len(call.Args) == 0 {
  183. if err := filter.AddRule(callNum, callAct); err != nil {
  184. return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err)
  185. }
  186. } else {
  187. // If two or more arguments have the same condition,
  188. // Revert to old behavior, adding each condition as a separate rule
  189. argCounts := make([]uint, syscallMaxArguments)
  190. conditions := []libseccomp.ScmpCondition{}
  191. for _, cond := range call.Args {
  192. newCond, err := getCondition(cond)
  193. if err != nil {
  194. return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err)
  195. }
  196. argCounts[cond.Index] += 1
  197. conditions = append(conditions, newCond)
  198. }
  199. hasMultipleArgs := false
  200. for _, count := range argCounts {
  201. if count > 1 {
  202. hasMultipleArgs = true
  203. break
  204. }
  205. }
  206. if hasMultipleArgs {
  207. // Revert to old behavior
  208. // Add each condition attached to a separate rule
  209. for _, cond := range conditions {
  210. condArr := []libseccomp.ScmpCondition{cond}
  211. if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
  212. return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
  213. }
  214. }
  215. } else {
  216. // No conditions share same argument
  217. // Use new, proper behavior
  218. if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
  219. return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
  220. }
  221. }
  222. }
  223. return nil
  224. }
  225. // Version returns major, minor, and micro.
  226. func Version() (uint, uint, uint) {
  227. return libseccomp.GetLibraryVersion()
  228. }
  229. // Enabled is true if seccomp support is compiled in.
  230. const Enabled = true