From 03fff5827f7b966301bc25bcf15523427651c8a3 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Jan 2025 20:52:06 -0800 Subject: [PATCH] Use seccomp_unotify to catch syscalls that violate filterss The main benefit of seccomp_unotify is an ability to resume the trapped syscall. We have seen a case when a blocked syscall was in the msan internals that prevented the Go runtime from reaching the SIGSYS handler and producing a panic report. Another benefit of this approach is an ability to run a target application and find out all syscalls that have to be handled. PiperOrigin-RevId: 713139953 --- pkg/abi/linux/seccomp.go | 1 + pkg/seccomp/BUILD | 1 + pkg/seccomp/seccomp.go | 14 ++++++- pkg/seccomp/seccomp_unsafe.go | 77 ++++++++++++++++++++++++++++++++++- runsc/boot/filter/BUILD | 1 + runsc/boot/filter/filter.go | 13 ++++-- 6 files changed, 101 insertions(+), 6 deletions(-) diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go index a8cc45fdb1..d5fe815c29 100644 --- a/pkg/abi/linux/seccomp.go +++ b/pkg/abi/linux/seccomp.go @@ -31,6 +31,7 @@ const ( SECCOMP_FILTER_FLAG_TSYNC = 1 SECCOMP_FILTER_FLAG_NEW_LISTENER = 1 << 3 + SECCOMP_FILTER_FLAG_TSYNC_ESRCH = 1 << 4 SECCOMP_USER_NOTIF_FLAG_CONTINUE = 1 diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 667428cc00..80d6aec233 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -20,6 +20,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/bpf", + "//pkg/hostsyscall", "//pkg/log", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index a26efcd6b7..4325f870e1 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -82,8 +82,14 @@ func Install(rules SyscallRules, denyRules SyscallRules, options ProgramOptions) } // Perform the actual installation. - if err := SetFilter(instrs); err != nil { - return fmt.Errorf("failed to set filter: %v", err) + if options.LogNotifications { + if err := SetFilterAndLogNotifications(instrs); err != nil { + return fmt.Errorf("failed to set filter: %v", err) + } + } else { + if err := SetFilter(instrs); err != nil { + return fmt.Errorf("failed to set filter: %v", err) + } } log.Infof("Seccomp filters installed.") @@ -321,6 +327,10 @@ type ProgramOptions struct { // called >10% of the time out of all syscalls made). // It is ordered from most frequent to least frequent. HotSyscalls []uintptr + + // LogNotifications enables logging of user notifications at the + // warning level. Syscalls triggered notifications are not blocked. + LogNotifications bool } // DefaultProgramOptions returns the default program options. diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index 629ac5cca1..f6de2375df 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -16,14 +16,87 @@ package seccomp import ( "fmt" + "os" "runtime" "unsafe" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/hostsyscall" + "gvisor.dev/gvisor/pkg/log" ) +// SetFiltersAndLogNotifications installs the given BPF program and logs user +// notifications triggered by the seccomp filter. +// +// This function is intended for debugging seccomp filter violations and should +// not be used in production environments. +// +// Note: It spawns a background goroutine to monitor a seccomp file descriptor +// and log any received notifications. +func SetFilterAndLogNotifications(instrs []bpf.Instruction) error { + // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See + // seccomp(2) for details. + // + // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole + // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must + // remain on the same thread. no_new_privs will be propagated to other + // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in + // kernel/seccomp.c:seccomp_sync_threads(). + runtime.LockOSThread() + defer runtime.UnlockOSThread() + if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 { + return errno + } + + sockProg := linux.SockFprog{ + Len: uint16(len(instrs)), + Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), + } + flags := linux.SECCOMP_FILTER_FLAG_TSYNC | + linux.SECCOMP_FILTER_FLAG_NEW_LISTENER | + linux.SECCOMP_FILTER_FLAG_TSYNC_ESRCH + fd, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, uint32(flags), unsafe.Pointer(&sockProg)) + if errno != 0 { + return errno + } + f := os.NewFile(fd, "seccomp_notify") + go func() { + // LockOSThread should help minimizing interactions with the scheduler. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + var ( + req linux.SeccompNotif + resp linux.SeccompNotifResp + ) + for { + req = linux.SeccompNotif{} + _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(f.Fd()), + uintptr(linux.SECCOMP_IOCTL_NOTIF_RECV), + uintptr(unsafe.Pointer(&req))) + if errno != 0 { + if errno == unix.EINTR { + continue + } + panic(fmt.Sprintf("SECCOMP_IOCTL_NOTIF_RECV failed with %d", errno)) + } + log.Warningf("Seccomp violation: %#v", req) + resp = linux.SeccompNotifResp{ + ID: req.ID, + Flags: linux.SECCOMP_USER_NOTIF_FLAG_CONTINUE, + } + errno = hostsyscall.RawSyscallErrno(unix.SYS_IOCTL, uintptr(f.Fd()), + uintptr(linux.SECCOMP_IOCTL_NOTIF_SEND), + uintptr(unsafe.Pointer(&resp))) + if errno != 0 { + panic(fmt.Sprintf("SECCOMP_IOCTL_NOTIF_SEND failed with %d", errno)) + } + } + }() + return nil +} + // SetFilter installs the given BPF program. func SetFilter(instrs []bpf.Instruction) error { // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See @@ -44,7 +117,8 @@ func SetFilter(instrs []bpf.Instruction) error { Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), } - tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + flags := linux.SECCOMP_FILTER_FLAG_TSYNC + tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, uint32(flags), unsafe.Pointer(&sockProg)) if errno != 0 { return errno } @@ -55,6 +129,7 @@ func SetFilter(instrs []bpf.Instruction) error { if tid != 0 { return fmt.Errorf("couldn't synchronize filter to TID %d", tid) } + return nil } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 81642d1a6d..d96767743f 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/seccomp/precompiledseccomp", "//pkg/sync", "//runsc/boot/filter/config", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 8c5cdd35c0..0df512fe5b 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -29,7 +29,10 @@ import ( // If you suspect the Sentry is getting killed due to a seccomp violation, // change this to `true` to get a panic stack trace when there is a // violation. -const debugFilter = false +const ( + debugFilterPanic = false // Panic on seccomp violation with stack trace. + debugFilterWarn = false // Log seccomp violation, but continue program execution. +) // Options is a re-export of the config Options type under this package. type Options = config.Options @@ -41,7 +44,7 @@ func Install(opt Options) error { } key := opt.ConfigKey() precompiled, usePrecompiled := GetPrecompiled(key) - if usePrecompiled && !debugFilter { + if usePrecompiled && !debugFilterPanic && !debugFilterWarn { vars := opt.Vars() log.Debugf("Loaded precompiled seccomp instructions for options %v, using variables: %v", key, vars) insns, err := precompiled.RenderInstructions(vars) @@ -51,9 +54,13 @@ func Install(opt Options) error { return seccomp.SetFilter(insns) } seccompOpts := config.SeccompOptions(opt) - if debugFilter { + if debugFilterPanic { log.Infof("Seccomp filter debugging is enabled; seccomp failures will result in a panic stack trace.") seccompOpts.DefaultAction = linux.SECCOMP_RET_TRAP + } else if debugFilterWarn { + log.Infof("Seccomp filter debugging is enabled; seccomp failures will be logged") + seccompOpts.DefaultAction = linux.SECCOMP_RET_USER_NOTIF + seccompOpts.LogNotifications = true } else { log.Infof("No precompiled program found for config options %v, building seccomp program from scratch. This may slow down container startup.", key) if log.IsLogging(log.Debug) {