Skip to content

Commit

Permalink
prepare-root: Use composefs as root filesystem
Browse files Browse the repository at this point in the history
This changes ostree-prepare-root to use the .ostree.cfs image as a
composefs filesystem, instead of the checkout.

Currently composefs is *always* used (if enabled at build time), but long
term we want to somehow make this optional while still not lessening
trust in the system by allowing an attacker to disable its use.

The final layout when this is active is:

 /        ro composefs mount
 /sysroot "real" root
 /etc     rw bind mount to $deploydir/etc
 /var     rw bind mount to $vardir

In order for this to work, we need to change the way prepare-root
works. Currently it works with CWD of $deploydir which is a bind mount
(so it can be later moved). However, we can't mount the composefs at
$deploydir, because then it will cover the etc dir from the underlying
deploydir and then we can't bind mount it.

Instead we change both codepaths to mount work with the destination
/sysroot.tmp, leaving the "real" $deploydir as CWD. I.e. the extra
bind mount is in /sysroot.tmp in the !use_composefs case, or the
composefs mount in the use_composefs case. This is really not that
different from before, as we had to temporarily use /sysroot.tmp at
the end before anyway.

A further note. I didn't test the overlayfs testcase, but the
comment mentions that you can't mount overlayfs on top of a
readonly mount. That seems incompatible with composefs.
  • Loading branch information
alexlarsson committed Apr 27, 2023
1 parent 60056f3 commit 091ccd2
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 40 deletions.
13 changes: 10 additions & 3 deletions Makefile-switchroot.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ ostree_prepare_root_SOURCES = \
src/switchroot/ostree-mount-util.h \
src/switchroot/ostree-prepare-root.c \
$(NULL)
ostree_prepare_root_CFLAGS =
ostree_prepare_root_CPPFLAGS = $(AM_CPPFLAGS)
ostree_prepare_root_LDADD =

if BUILDOPT_USE_STATIC_COMPILER
# ostree-prepare-root can be used as init in a system without a populated /lib.
Expand All @@ -43,10 +45,10 @@ if BUILDOPT_USE_STATIC_COMPILER
ostree_boot_SCRIPTS += ostree-prepare-root

ostree-prepare-root : $(ostree_prepare_root_SOURCES)
$(STATIC_COMPILER) -o $@ -static $(top_srcdir)/src/switchroot/ostree-prepare-root.c $(ostree_prepare_root_CPPFLAGS) $(AM_CFLAGS) $(DEFAULT_INCLUDES) -DOSTREE_PREPARE_ROOT_STATIC=1
$(STATIC_COMPILER) -o $@ -static $(top_srcdir)/src/switchroot/ostree-prepare-root.c $(ostree_prepare_root_CPPFLAGS) $(AM_CFLAGS) $(DEFAULT_INCLUDES) $(OT_DEP_COMPOSEFS_CFLAGS) $(OT_DEP_COMPOSEFS_LIBS) -DOSTREE_PREPARE_ROOT_STATIC=1
else
ostree_boot_PROGRAMS += ostree-prepare-root
ostree_prepare_root_CFLAGS = $(AM_CFLAGS) -Isrc/switchroot
ostree_prepare_root_CFLAGS += $(AM_CFLAGS) -Isrc/switchroot
endif

ostree_remount_SOURCES = \
Expand All @@ -56,9 +58,14 @@ ostree_remount_SOURCES = \
ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/libglnx
ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libglnx.la

if USE_COMPOSEFS
ostree_prepare_root_CFLAGS += $(OT_DEP_COMPOSEFS_CFLAGS)
ostree_prepare_root_LDADD += $(OT_DEP_COMPOSEFS_LIBS)
endif

if BUILDOPT_SYSTEMD
ostree_prepare_root_CPPFLAGS += -DHAVE_SYSTEMD=1
ostree_prepare_root_LDADD = $(AM_LDFLAGS) $(LIBSYSTEMD_LIBS)
ostree_prepare_root_LDADD += $(AM_LDFLAGS) $(LIBSYSTEMD_LIBS)
endif

# This is the "new mode" of using a generator for /var; see
Expand Down
93 changes: 56 additions & 37 deletions src/switchroot/ostree-prepare-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
#define OSTREE_PREPARE_ROOT_DEPLOYMENT_MSG SD_ID128_MAKE(71,70,33,6a,73,ba,46,01,ba,d3,1a,f8,88,aa,0d,f7)
#endif

#ifdef HAVE_COMPOSEFS
#include <libcomposefs/lcfs-mount.h>
#endif

#include "ostree-mount-util.h"

static inline bool
Expand Down Expand Up @@ -219,6 +223,7 @@ main(int argc, char *argv[])
/* Query the repository configuration - this is an operating system builder
* choice. More info: https://github.com/ostreedev/ostree/pull/1767
*/
const bool use_composefs = HAVE_COMPOSEFS; /* TODO: Read this from some config, maybe kernel cmdline for trust? */
const bool sysroot_readonly = sysroot_is_configured_ro (root_arg);
const bool sysroot_currently_writable = !path_is_on_readonly_fs (root_arg);
#ifdef USE_LIBSYSTEMD
Expand All @@ -239,16 +244,37 @@ main(int argc, char *argv[])
if (mount (NULL, "/", NULL, MS_REC | MS_PRIVATE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to make \"/\" private mount");

/* Make deploy_path a bind mount, so we can move it later */
if (mount (deploy_path, deploy_path, NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to make initial bind mount %s", deploy_path);
if (mkdir ("/sysroot.tmp", 0755) < 0)
err (EXIT_FAILURE, "couldn't create temporary sysroot /sysroot.tmp");

/* chdir to our new root. We need to do this after bind-mounting it over
* itself otherwise our cwd is still on the non-bind-mounted filesystem
* below. */
/* Run in the deploy_path dir so we can use relative paths below */
if (chdir (deploy_path) < 0)
err (EXIT_FAILURE, "failed to chdir to deploy_path");

/* We construct the new sysroot in /sysroot.tmp, which is either the composfs
mount or a bind mount of the deploy-dir */
if (use_composefs)
{
#ifdef HAVE_COMPOSEFS
const char *objdirs[] = { "/sysroot/ostree/repo/objects" };
struct lcfs_mount_options_s cfs_options = {
objdirs, 1,
};

if (lcfs_mount_image(".ostree.cfs", "/sysroot.tmp", &cfs_options) < 0)
err (EXIT_FAILURE, "Failed to mount composefs");
#else
err (EXIT_FAILURE, "Composefs not supported");
#endif
}
else
{
/* Make /sysroot.tmp a bind mount, so we can move it later */
if (mount (deploy_path, "/sysroot.tmp", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to make initial bind mount %s", deploy_path);
}


/* This will result in a system with /sysroot read-only. Thus, two additional
* writable bind-mounts (for /etc and /var) are required later on. */
if (sysroot_readonly)
Expand All @@ -274,50 +300,50 @@ main(int argc, char *argv[])
{
if (snprintf (srcpath, sizeof(srcpath), "%s/boot", root_mountpoint) < 0)
err (EXIT_FAILURE, "failed to assemble /boot path");
if (mount (srcpath, "boot", NULL, MS_BIND | MS_SILENT, NULL) < 0)
if (mount (srcpath, "/systree.tmp/boot", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount %s to boot", srcpath);
}
}

/* Prepare /etc.
* No action required if sysroot is writable. Otherwise, a bind-mount for
* the deployment needs to be created and remounted as read/write. */
if (sysroot_readonly)
if (sysroot_readonly || use_composefs)
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", "etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at %s", srcpath);
if (mount ("etc", "etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at %s", srcpath);
if (mount ("etc", "/sysroot.tmp/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount ("/sysroot.tmp/etc", "/sysroot.tmp/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
}

/* Prepare /usr.
* It may be either just a read-only bind-mount, or a persistent overlayfs. */
if (lstat (".usr-ovl-work", &stbuf) == 0)
{
/* Do we have a persistent overlayfs for /usr? If so, mount it now. */
const char usr_ovl_options[] = "lowerdir=usr,upperdir=.usr-ovl-upper,workdir=.usr-ovl-work";
const char usr_ovl_options[] = "lowerdir=/sysroot.tmp/usr,upperdir=.usr-ovl-upper,workdir=.usr-ovl-work";

/* Except overlayfs barfs if we try to mount it on a read-only
* filesystem. For this use case I think admins are going to be
* okay if we remount the rootfs here, rather than waiting until
* later boot and `systemd-remount-fs.service`.
*/
if (path_is_on_readonly_fs ("."))
if (path_is_on_readonly_fs ("/sysroot.tmp"))
{
if (mount (".", ".", NULL, MS_REMOUNT | MS_SILENT, NULL) < 0)
if (mount ("/sysroot.tmp", "/sysroot.tmp", NULL, MS_REMOUNT | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to remount rootfs writable (for overlayfs)");
}

if (mount ("overlay", "usr", "overlay", MS_SILENT, usr_ovl_options) < 0)
if (mount ("overlay", "/sysroot.tmp/usr", "overlay", MS_SILENT, usr_ovl_options) < 0)
err (EXIT_FAILURE, "failed to mount /usr overlayfs");
}
else
else if (!use_composefs)
{
/* Otherwise, a read-only bind mount for /usr */
if (mount ("usr", "usr", NULL, MS_BIND | MS_SILENT, NULL) < 0)
/* Otherwise, a read-only bind mount for /usr. (Not needed for composefs) */
if (mount ("/sysroot.tmp/usr", "/sysroot.tmp/usr", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr");
if (mount ("usr", "usr", NULL, MS_BIND | MS_REMOUNT | MS_RDONLY | MS_SILENT, NULL) < 0)
if (mount ("/sysroot.tmp/usr", "/sysroot.tmp/usr", NULL, MS_BIND | MS_REMOUNT | MS_RDONLY | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr");
}

Expand Down Expand Up @@ -350,7 +376,7 @@ main(int argc, char *argv[])
*/
if (mount_var)
{
if (mount ("../../var", "var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
if (mount ("../../var", "/sysroot.tmp/var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount ../../var to var");
}

Expand All @@ -362,6 +388,9 @@ main(int argc, char *argv[])
if (!running_as_pid1)
touch_run_ostree ();

if (chdir ("/sysroot.tmp") < 0)
err (EXIT_FAILURE, "failed to chdir to /sysroot.tmp");

if (strcmp(root_mountpoint, "/") == 0)
{
/* pivot_root rotates two mount points around. In this instance . (the
Expand All @@ -376,29 +405,19 @@ main(int argc, char *argv[])
else
{
/* In this instance typically we have our ready made-up up root at
* /sysroot/ostree/deploy/.../ (deploy_path) and the real rootfs at
* /sysroot (root_mountpoint). We want to end up with our made-up root at
* /sysroot.tmp and the real rootfs at /sysroot (root_mountpoint).
* We want to end up with our made-up root at
* /sysroot/ and the real rootfs under /sysroot/sysroot as systemd will be
* responsible for moving /sysroot to /.
*
* We need to do this in 3 moves to avoid trying to move /sysroot under
* itself:
*
* 1. /sysroot/ostree/deploy/... -> /sysroot.tmp
* 2. /sysroot -> /sysroot.tmp/sysroot
* 3. /sysroot.tmp -> /sysroot
*/
if (mkdir ("/sysroot.tmp", 0755) < 0)
err (EXIT_FAILURE, "couldn't create temporary sysroot /sysroot.tmp");

if (mount (deploy_path, "/sysroot.tmp", NULL, MS_MOVE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to MS_MOVE '%s' to '/sysroot.tmp'", deploy_path);

if (mount (root_mountpoint, "sysroot", NULL, MS_MOVE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to MS_MOVE '%s' to 'sysroot'", root_mountpoint);

if (mount (".", root_mountpoint, NULL, MS_MOVE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to MS_MOVE %s to %s", deploy_path, root_mountpoint);
err (EXIT_FAILURE, "failed to MS_MOVE /sysroot.tmp to %s", root_mountpoint);

if (chdir ("/sysroot") < 0)
err (EXIT_FAILURE, "failed to chdir to /sysroot.tmp");

if (rmdir ("/sysroot.tmp") < 0)
err (EXIT_FAILURE, "couldn't remove temporary sysroot /sysroot.tmp");
Expand Down

0 comments on commit 091ccd2

Please sign in to comment.