Skip to content

Commit

Permalink
Validate whether process belongs to the container's NetNS
Browse files Browse the repository at this point in the history
ignores any sockets created in non-container NetNS including nested one.

Signed-off-by: Naoki MATSUMOTO <[email protected]>
  • Loading branch information
naoki9911 committed Apr 6, 2024
1 parent 8c74716 commit cc3ae88
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 11 deletions.
47 changes: 38 additions & 9 deletions pkg/bypass4netns/bypass4netns.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,19 @@ func (h *notifHandler) registerSocket(pid int, sockfd int, syscallName string) (
sock.state = NotBypassable
logger.Debugf("failed to get socket args err=%q", err)
} else {
if sockDomain != syscall.AF_INET && sockDomain != syscall.AF_INET6 {
// compare process's netns is same or not with container init process's netns
isSameNetNS, err := util.SameNetNS(h.containerInitPid, pid)
if err != nil {
logger.Errorf("failed to check NetNS: err=%q", err)
sock.state = NotBypassable
}

// check the process is executed with container's netns.
// processes with nested netns are not handled by bypass4netns
if !isSameNetNS {
logger.Infof("process seems to be executed in other netns. socket is NotBypassable and ignored")
sock.state = NotBypassable
} else if sockDomain != syscall.AF_INET && sockDomain != syscall.AF_INET6 {
// non IP sockets are not handled.
sock.state = NotBypassable
logger.Debugf("socket domain=0x%x", sockDomain)
Expand Down Expand Up @@ -629,6 +641,8 @@ type Handler struct {

// key is child port
forwardingPorts map[int]ForwardPortMapping

ContainerInitPid int
}

// NewHandler creates new seccomp notif handler
Expand All @@ -640,6 +654,7 @@ func NewHandler(socketPath, comSocketPath, tracerAgentLogPath string) *Handler {
ignoredSubnets: []net.IPNet{},
forwardingPorts: map[int]ForwardPortMapping{},
readyFd: -1,
ContainerInitPid: -1,
}

return &handler
Expand Down Expand Up @@ -711,6 +726,10 @@ type notifHandler struct {

// cache pidfd to reduce latency. key is pid.
pidInfos map[int]pidInfo

// container init process's pid
// used to check whether netns is container or not.
containerInitPid int
}

type containerInterface struct {
Expand All @@ -732,14 +751,15 @@ type pidInfo struct {
tgid int
}

func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler {
func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState, containerInitPid int) *notifHandler {
notifHandler := notifHandler{
fd: libseccomp.ScmpFd(fd),
state: state,
forwardingPorts: map[int]ForwardPortMapping{},
processes: map[int]*processStatus{},
memfds: map[int]int{},
pidInfos: map[int]pidInfo{},
fd: libseccomp.ScmpFd(fd),
state: state,
forwardingPorts: map[int]ForwardPortMapping{},
processes: map[int]*processStatus{},
memfds: map[int]int{},
pidInfos: map[int]pidInfo{},
containerInitPid: containerInitPid,
}
notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets)
notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate
Expand Down Expand Up @@ -793,8 +813,17 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon
continue
}

// state.Pid can be the process in nested netns when executed with 'ip netns exec'.
// so, we cannot distinguish container netns and nested netns with simply comparing state.Pid and hooked process's pid
// Instead of state.Pid, init process's pid should be used.
// bypass4netns recognizes the first process as a init process.
if h.ContainerInitPid < 0 {
h.ContainerInitPid = state.Pid
logrus.Infof("ContainerInitPid is %d", h.ContainerInitPid)
}

logrus.Infof("Received new seccomp fd: %v", newFd)
notifHandler := h.newNotifHandler(newFd, state)
notifHandler := h.newNotifHandler(newFd, state, h.ContainerInitPid)
notifHandler.c2cConnections = c2cConfig
notifHandler.multinode = multinodeConfig
if notifHandler.multinode.Enable {
Expand Down
12 changes: 10 additions & 2 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,16 @@ func ShrinkID(id string) string {
}

func SameUserNS(pidX, pidY int) (bool, error) {
nsX := fmt.Sprintf("/proc/%d/ns/user", pidX)
nsY := fmt.Sprintf("/proc/%d/ns/user", pidY)
return sameNS(pidX, pidY, "user")
}

func SameNetNS(pidX, pidY int) (bool, error) {
return sameNS(pidX, pidY, "net")
}

func sameNS(pidX, pidY int, nsName string) (bool, error) {
nsX := fmt.Sprintf("/proc/%d/ns/%s", pidX, nsName)
nsY := fmt.Sprintf("/proc/%d/ns/%s", pidY, nsName)
nsXResolved, err := os.Readlink(nsX)
if err != nil {
return false, err
Expand Down
4 changes: 4 additions & 0 deletions test/DockerfileNestedNetNS
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM ubuntu:22.04

RUN apt update && apt upgrade -y
RUN apt install -y netcat iproute2 tcpdump iperf3
44 changes: 44 additions & 0 deletions test/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,47 @@ echo "===== multinode test (single node) ===="
systemctl --user stop etcd.service
systemctl --user reset-failed
)
echo "===== nested netns test ===="
(
CONTAINER_NAME="test-nested"
set +e
nerdctl rm -f $CONTAINER_NAME
systemctl --user stop run-iperf3
systemctl --user stop run-bypass4netnsd
systemctl --user reset-failed
set -ex
IMAGE_NAME="b4ns:nested"
nerdctl build -f ./DockerfileNestedNetNS -t $IMAGE_NAME .
systemd-run --user --unit run-bypass4netnsd bypass4netnsd
sleep 1
nerdctl run --privileged --annotation nerdctl/bypass4netns=true -d -p 5202:5201 --name $CONTAINER_NAME $IMAGE_NAME sleep infinity
# with container's netns
systemd-run --user --unit run-iperf3 nerdctl exec $CONTAINER_NAME iperf3 -s
sleep 1
iperf3 -c localhost -t 1 -p 5202 --connect-timeout 1000 # it must success to connect.
systemctl --user stop run-iperf3
systemctl --user reset-failed
# with nested netns
nerdctl exec $CONTAINER_NAME mkdir /sys2
nerdctl exec $CONTAINER_NAME mount -t sysfs --make-private /sys2
nerdctl exec $CONTAINER_NAME ip netns add nested
systemd-run --user --unit run-iperf3 nerdctl exec $CONTAINER_NAME ip netns exec nested iperf3 -s
sleep 1
set +e
iperf3 -c localhost -t 1 -p 5202 --connect-timeout 1000 # it must fail
if [ $? -eq 0 ]; then
echo "iperf3 must not success to connect."
exit 1
fi
set -e
systemctl --user stop run-iperf3
nerdctl rm -f test-nested
systemctl --user reset-failed
)

0 comments on commit cc3ae88

Please sign in to comment.