| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268 |
- //go:build cgo && seccomp
- // +build cgo,seccomp
- package seccomp
- import (
- "errors"
- "fmt"
- libseccomp "github.com/seccomp/libseccomp-golang"
- "github.com/sirupsen/logrus"
- "golang.org/x/sys/unix"
- "github.com/opencontainers/runc/libcontainer/configs"
- "github.com/opencontainers/runc/libcontainer/seccomp/patchbpf"
- )
- var (
- actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
- actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
- )
- const (
- // Linux system calls can have at most 6 arguments
- syscallMaxArguments int = 6
- )
- // InitSeccomp installs the seccomp filters to be used in the container as
- // specified in config.
- // Returns the seccomp file descriptor if any of the filters include a
- // SCMP_ACT_NOTIFY action, otherwise returns -1.
- func InitSeccomp(config *configs.Seccomp) (int, error) {
- if config == nil {
- return -1, errors.New("cannot initialize Seccomp - nil config passed")
- }
- defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet)
- if err != nil {
- return -1, errors.New("error initializing seccomp - invalid default action")
- }
- // Ignore the error since pre-2.4 libseccomp is treated as API level 0.
- apiLevel, _ := libseccomp.GetAPI()
- for _, call := range config.Syscalls {
- if call.Action == configs.Notify {
- if apiLevel < 6 {
- return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel)
- }
- // We can't allow the write syscall to notify to the seccomp agent.
- // After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain
- // number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we
- // never can write the seccomp fd to the parent and therefore the seccomp agent never receives
- // the seccomp fd and runc is hang during initialization.
- //
- // Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY.
- // Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and
- // send the seccomp fd to the agent (it is another process and not subject to the seccomp
- // filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp
- // agent allows those syscalls to proceed, initialization works just fine and the agent can
- // handle future read()/close() syscalls as it wanted.
- if call.Name == "write" {
- return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall")
- }
- }
- }
- // See comment on why write is not allowed. The same reason applies, as this can mean handling write too.
- if defaultAction == libseccomp.ActNotify {
- return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action")
- }
- filter, err := libseccomp.NewFilter(defaultAction)
- if err != nil {
- return -1, fmt.Errorf("error creating filter: %w", err)
- }
- // Add extra architectures
- for _, arch := range config.Architectures {
- scmpArch, err := libseccomp.GetArchFromString(arch)
- if err != nil {
- return -1, fmt.Errorf("error validating Seccomp architecture: %w", err)
- }
- if err := filter.AddArch(scmpArch); err != nil {
- return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err)
- }
- }
- // Unset no new privs bit
- if err := filter.SetNoNewPrivsBit(false); err != nil {
- return -1, fmt.Errorf("error setting no new privileges: %w", err)
- }
- // Add a rule for each syscall
- for _, call := range config.Syscalls {
- if call == nil {
- return -1, errors.New("encountered nil syscall while initializing Seccomp")
- }
- if err := matchCall(filter, call, defaultAction); err != nil {
- return -1, err
- }
- }
- seccompFd, err := patchbpf.PatchAndLoad(config, filter)
- if err != nil {
- return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err)
- }
- return seccompFd, nil
- }
- // Convert Libcontainer Action to Libseccomp ScmpAction
- func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
- switch act {
- case configs.Kill, configs.KillThread:
- return libseccomp.ActKillThread, nil
- case configs.Errno:
- if errnoRet != nil {
- return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
- }
- return actErrno, nil
- case configs.Trap:
- return libseccomp.ActTrap, nil
- case configs.Allow:
- return libseccomp.ActAllow, nil
- case configs.Trace:
- if errnoRet != nil {
- return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
- }
- return actTrace, nil
- case configs.Log:
- return libseccomp.ActLog, nil
- case configs.Notify:
- return libseccomp.ActNotify, nil
- case configs.KillProcess:
- return libseccomp.ActKillProcess, nil
- default:
- return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
- }
- }
- // Convert Libcontainer Operator to Libseccomp ScmpCompareOp
- func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
- switch op {
- case configs.EqualTo:
- return libseccomp.CompareEqual, nil
- case configs.NotEqualTo:
- return libseccomp.CompareNotEqual, nil
- case configs.GreaterThan:
- return libseccomp.CompareGreater, nil
- case configs.GreaterThanOrEqualTo:
- return libseccomp.CompareGreaterEqual, nil
- case configs.LessThan:
- return libseccomp.CompareLess, nil
- case configs.LessThanOrEqualTo:
- return libseccomp.CompareLessOrEqual, nil
- case configs.MaskEqualTo:
- return libseccomp.CompareMaskedEqual, nil
- default:
- return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule")
- }
- }
- // Convert Libcontainer Arg to Libseccomp ScmpCondition
- func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
- cond := libseccomp.ScmpCondition{}
- if arg == nil {
- return cond, errors.New("cannot convert nil to syscall condition")
- }
- op, err := getOperator(arg.Op)
- if err != nil {
- return cond, err
- }
- return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
- }
- // Add a rule to match a single syscall
- func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libseccomp.ScmpAction) error {
- if call == nil || filter == nil {
- return errors.New("cannot use nil as syscall to block")
- }
- if len(call.Name) == 0 {
- return errors.New("empty string is not a valid syscall")
- }
- // Convert the call's action to the libseccomp equivalent
- callAct, err := getAction(call.Action, call.ErrnoRet)
- if err != nil {
- return fmt.Errorf("action in seccomp profile is invalid: %w", err)
- }
- if callAct == defAct {
- // This rule is redundant, silently skip it
- // to avoid error from AddRule.
- return nil
- }
- // If we can't resolve the syscall, assume it is not supported
- // by this kernel. Warn about it, don't error out.
- callNum, err := libseccomp.GetSyscallFromName(call.Name)
- if err != nil {
- logrus.Debugf("unknown seccomp syscall %q ignored", call.Name)
- return nil
- }
- // Unconditional match - just add the rule
- if len(call.Args) == 0 {
- if err := filter.AddRule(callNum, callAct); err != nil {
- return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err)
- }
- } else {
- // If two or more arguments have the same condition,
- // Revert to old behavior, adding each condition as a separate rule
- argCounts := make([]uint, syscallMaxArguments)
- conditions := []libseccomp.ScmpCondition{}
- for _, cond := range call.Args {
- newCond, err := getCondition(cond)
- if err != nil {
- return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err)
- }
- argCounts[cond.Index] += 1
- conditions = append(conditions, newCond)
- }
- hasMultipleArgs := false
- for _, count := range argCounts {
- if count > 1 {
- hasMultipleArgs = true
- break
- }
- }
- if hasMultipleArgs {
- // Revert to old behavior
- // Add each condition attached to a separate rule
- for _, cond := range conditions {
- condArr := []libseccomp.ScmpCondition{cond}
- if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
- return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
- }
- }
- } else {
- // No conditions share same argument
- // Use new, proper behavior
- if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
- return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
- }
- }
- }
- return nil
- }
- // Version returns major, minor, and micro.
- func Version() (uint, uint, uint) {
- return libseccomp.GetLibraryVersion()
- }
- // Enabled is true if seccomp support is compiled in.
- const Enabled = true
|