Skip to content

Commit

Permalink
Add concept of state overlays
Browse files Browse the repository at this point in the history
In the OSTree model, executables go in `/usr`, state in `/var` and
configuration in `/etc`. Software that lives in `/opt` however messes
this up because it often mixes code *and* state, making it harder to
manage.

More generally, it's sometimes useful to have the OSTree commit contain
code under a certain path, but still allow that path to be writable by
software and the sysadmin at runtime (`/usr/local` is another instance).

Add the concept of state overlays. A state overlay is an overlayfs
mount whose upper directory, which contains unmanaged state, is carried
forward on top of a lower directory, containing OSTree-managed files.

In the example of `/usr/local`, OSTree commits can ship content there,
all while allowing users to e.g. add scripts in `/usr/local/bin` when
booted into that commit.

Some reconciliation logic is executed whenever the base is updated so
that newer files in the base are never shadowed by a copied up version
in the upper directory. This matches RPM semantics when upgrading
packages whose files may have been modified.

For ease of integration, this is exposed as a systemd template unit which
any downstream distro/user can enable. The instance name is the mountpath
in escaped systemd path notation (e.g.
`[email protected]`).

See discussions in #3113 for
more details.
  • Loading branch information
jlebon committed Dec 14, 2023
1 parent 234fa2c commit fb5050d
Show file tree
Hide file tree
Showing 7 changed files with 431 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Makefile-boot.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \
src/boot/ostree-finalize-staged.service \
src/boot/ostree-finalize-staged.path \
src/boot/ostree-finalize-staged-hold.service \
src/boot/[email protected] \
$(NULL)
systemdtmpfilesdir = $(prefix)/lib/tmpfiles.d
dist_systemdtmpfiles_DATA = src/boot/ostree-tmpfiles.conf
Expand Down Expand Up @@ -72,6 +73,7 @@ EXTRA_DIST += src/boot/dracut/module-setup.sh \
src/boot/ostree-remount.service \
src/boot/ostree-finalize-staged.service \
src/boot/ostree-finalize-staged-hold.service \
src/boot/[email protected] \
src/boot/grub2/grub2-15_ostree \
src/boot/grub2/ostree-grub-generator \
$(NULL)
1 change: 1 addition & 0 deletions Makefile-ostree.am
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ ostree_SOURCES += \
src/ostree/ot-admin-builtin-post-copy.c \
src/ostree/ot-admin-builtin-upgrade.c \
src/ostree/ot-admin-builtin-unlock.c \
src/ostree/ot-admin-builtin-state-overlay.c \
src/ostree/ot-admin-builtins.h \
src/ostree/ot-admin-instutil-builtin-selinux-ensure-labeled.c \
src/ostree/ot-admin-instutil-builtin-set-kargs.c \
Expand Down
36 changes: 36 additions & 0 deletions src/boot/[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (C) 2023 Red Hat Inc.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see <https://www.gnu.org/licenses/>.

[Unit]
Description=OSTree State Overlay On /%I
Documentation=man:ostree(1)
DefaultDependencies=no
ConditionKernelCommandLine=ostree
# run after /var is setup since that's where the upperdir is stored
# and after boot.mount so we can load the sysroot
After=var.mount boot.mount
# but before local-fs.target, which we consider ourselves a part of
Before=local-fs.target

[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/bin/ostree admin state-overlay %i /%I
StandardInput=null
StandardOutput=journal
StandardError=journal+console

[Install]
WantedBy=local-fs.target
243 changes: 243 additions & 0 deletions src/ostree/ot-admin-builtin-state-overlay.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
/* Copyright (C) 2023 Red Hat, Inc.
*
* SPDX-License-Identifier: LGPL-2.0+
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <https://www.gnu.org/licenses/>.
*/

#include "config.h"

#include <fcntl.h>
#include <glib-unix.h>
#include <sched.h>
#include <stdlib.h>
#include <sys/mount.h>

#include "glnx-errors.h"
#include "glnx-fdio.h"
#include "glnx-local-alloc.h"
#include "glnx-shutil.h"
#include "glnx-xattrs.h"
#include "ot-admin-builtins.h"

#define OSTREE_STATEOVERLAYS_DIR "/var/ostree/state-overlays"
#define OSTREE_STATEOVERLAY_UPPER_DIR "upper"
#define OSTREE_STATEOVERLAY_WORK_DIR "work"

/* https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html */
#define OVERLAYFS_DIR_XATTR_OPAQUE "trusted.overlay.opaque"

static GOptionEntry options[] = { { NULL } };

static gboolean
ensure_overlay_dirs (const char *overlay_dir, int *out_overlay_dfd, GCancellable *cancellable,
GError **error)
{
glnx_autofd int overlay_dfd = -1;
if (!glnx_shutil_mkdir_p_at_open (AT_FDCWD, overlay_dir, 0755, &overlay_dfd, cancellable, error))
return FALSE;

if (!glnx_shutil_mkdir_p_at (overlay_dfd, OSTREE_STATEOVERLAY_WORK_DIR, 0755, cancellable, error))
return FALSE;
if (!glnx_shutil_mkdir_p_at (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, 0755, cancellable,
error))
return FALSE;

*out_overlay_dfd = glnx_steal_fd (&overlay_dfd);
return TRUE;
}

static gboolean
is_opaque_dir (int dfd, const char *dname, gboolean *out_is_opaque, GError **error)
{
/* XXX: this is basically like a `glnx_lgetxattrat_allow_noent()`; upstream it */

char pathbuf[PATH_MAX];
snprintf (pathbuf, sizeof (pathbuf), "/proc/self/fd/%d/%s", dfd, dname);

ssize_t bytes_read, real_size;
if (TEMP_FAILURE_RETRY (bytes_read = lgetxattr (pathbuf, OVERLAYFS_DIR_XATTR_OPAQUE, NULL, 0))
< 0)
{
if (errno != ENODATA)
return glnx_throw_errno_prefix (error, "lgetxattr(%s)", OVERLAYFS_DIR_XATTR_OPAQUE);
*out_is_opaque = FALSE;
return TRUE;
}

g_autofree guint8 *buf = g_malloc (bytes_read);
if (TEMP_FAILURE_RETRY (real_size
= lgetxattr (pathbuf, OVERLAYFS_DIR_XATTR_OPAQUE, buf, bytes_read))
< 0)
return glnx_throw_errno_prefix (error, "lgetxattr(%s)", OVERLAYFS_DIR_XATTR_OPAQUE);

*out_is_opaque = (real_size == 1 && buf[0] == 'y');
return TRUE;
}

static gboolean
prune_upperdir_recurse (int lower_dfd, int upper_dfd, GCancellable *cancellable, GError **error)
{
g_auto (GLnxDirFdIterator) dfd_iter = { 0 };
if (!glnx_dirfd_iterator_init_at (upper_dfd, ".", FALSE, &dfd_iter, error))
return FALSE;

while (TRUE)
{
struct dirent *dent = NULL;
if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, cancellable, error))
return FALSE;
if (dent == NULL)
break;

/* do we have an entry of the same name in the lowerdir? */
struct stat stbuf;
if (!glnx_fstatat_allow_noent (lower_dfd, dent->d_name, &stbuf, AT_SYMLINK_NOFOLLOW, error))
return FALSE;
if (errno == ENOENT)
continue; /* state file (i.e. upperdir only); carry on */

/* ok, it shadows; are they both directories? */
if (dent->d_type == DT_DIR && S_ISDIR (stbuf.st_mode))
{
/* is the directory opaque? this stmt expr brought to you by the Rust lobbying group */
gboolean is_opaque = FALSE;
if (!is_opaque_dir (upper_dfd, dent->d_name, &is_opaque, error))
return FALSE;

if (!is_opaque)
{
/* recurse */
glnx_autofd int lower_subdfd = -1;
if (!glnx_opendirat (lower_dfd, dent->d_name, FALSE, &lower_subdfd, error))
return FALSE;
glnx_autofd int upper_subdfd = -1;
if (!glnx_opendirat (upper_dfd, dent->d_name, FALSE, &upper_subdfd, error))
return FALSE;
if (!prune_upperdir_recurse (lower_subdfd, upper_subdfd, cancellable, error))
return glnx_prefix_error (error, "in %s", dent->d_name);

continue;
}

/* fallthrough; implicitly delete opaque directories */
}

/* any other case, we prune (this also implicitly covers whiteouts and opaque dirs) */
if (dent->d_type == DT_DIR)
{
if (!glnx_shutil_rm_rf_at (upper_dfd, dent->d_name, cancellable, error))
return FALSE;
}
/* just unlinkat(); saves one openat() call */
else if (!glnx_unlinkat (upper_dfd, dent->d_name, 0, error))
return FALSE;
}

return TRUE;
}

static gboolean
prune_upperdir (int sysroot_fd, const char *deployment_path, const char *mountpath, int overlay_dfd,
GCancellable *cancellable, GError **error)
{
glnx_autofd int lower_dfd = -1;
if (!glnx_opendirat (AT_FDCWD, mountpath, FALSE, &lower_dfd, error))
return FALSE;

glnx_autofd int upper_dfd = -1;
if (!glnx_opendirat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, FALSE, &upper_dfd, error))
return FALSE;

if (!prune_upperdir_recurse (lower_dfd, upper_dfd, cancellable, error))
return FALSE;

/* touch upperdir to mark prune as completed */
if (utimensat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, NULL, 0) < 0)
return glnx_throw_errno_prefix (error, "futimens(upper)");

return TRUE;
}

static gboolean
mount_overlay (const char *mountpath, const char *name, GError **error)
{
/* we could use /proc/self/... with overlay_dfd to avoid these allocations,
* but this gets stringified into the options field in the mount table, and
* being cryptic is not helpful */
g_autofree char *upperdir
= g_build_filename (OSTREE_STATEOVERLAYS_DIR, name, OSTREE_STATEOVERLAY_UPPER_DIR, NULL);
g_autofree char *workdir
= g_build_filename (OSTREE_STATEOVERLAYS_DIR, name, OSTREE_STATEOVERLAY_WORK_DIR, NULL);
g_autofree char *ovl_options
= g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", mountpath, upperdir, workdir);
if (mount ("overlay", mountpath, "overlay", MS_SILENT, ovl_options) < 0)
return glnx_throw_errno_prefix (error, "mount(%s)", mountpath);

return TRUE;
}

/* Called by [email protected]. */
gboolean
ot_admin_builtin_state_overlay (int argc, char **argv, OstreeCommandInvocation *invocation,
GCancellable *cancellable, GError **error)
{
g_autoptr (GOptionContext) context = g_option_context_new ("NAME MOUNTPATH");
g_autoptr (OstreeSysroot) sysroot = NULL;

/* First parse the args without loading the sysroot to see what options are
* set. */
if (!ostree_admin_option_context_parse (context, options, &argc, &argv,
OSTREE_ADMIN_BUILTIN_FLAG_NONE, invocation, &sysroot,
cancellable, error))
return FALSE;

if (argc < 3)
return glnx_throw (error, "Missing NAME or MOUNTPATH");

/* Sanity-check */
OstreeDeployment *booted_deployment = ostree_sysroot_get_booted_deployment (sysroot);
if (booted_deployment == NULL)
return glnx_throw (error, "Must be booted into an OSTree deployment");

const char *overlay_name = argv[1];
const char *mountpath = argv[2];

glnx_autofd int overlay_dfd = -1;
g_autofree char *overlay_dir = g_build_filename (OSTREE_STATEOVERLAYS_DIR, overlay_name, NULL);
if (!ensure_overlay_dirs (overlay_dir, &overlay_dfd, cancellable, error))
return FALSE;

struct stat stbuf_upper;
if (!glnx_fstatat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, &stbuf_upper, 0, error))
return FALSE;

/* We don't use "/" directly here because that may have e.g. an overlay
* slapped on from root.transient or composefs. */
g_autofree char *deployment_path
= ostree_sysroot_get_deployment_dirpath (sysroot, booted_deployment);
struct stat stbuf_lower;
if (!glnx_fstatat (ostree_sysroot_get_fd (sysroot), deployment_path, &stbuf_lower, 0, error))
return FALSE;

if (stbuf_upper.st_mtime < stbuf_lower.st_mtime)
{
/* the lowerdir was updated; prune the upperdir */
if (!prune_upperdir (ostree_sysroot_get_fd (sysroot), deployment_path, mountpath, overlay_dfd,
cancellable, error))
return glnx_prefix_error (error, "Pruning upperdir for %s", overlay_name);
}

return mount_overlay (mountpath, overlay_name, error);
}
1 change: 1 addition & 0 deletions src/ostree/ot-admin-builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ BUILTINPROTO (upgrade);
BUILTINPROTO (kargs);
BUILTINPROTO (post_copy);
BUILTINPROTO (lock_finalization);
BUILTINPROTO (state_overlay);

#undef BUILTINPROTO

Expand Down
2 changes: 2 additions & 0 deletions src/ostree/ot-builtin-admin.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ static OstreeCommand admin_subcommands[] = {
"Change the finalization locking state of the staged deployment" },
{ "boot-complete", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
ot_admin_builtin_boot_complete, "Internal command to run at boot after an update was applied" },
{ "state-overlay", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
ot_admin_builtin_state_overlay, "Internal command to assemble a state overlay" },
{ "init-fs", OSTREE_BUILTIN_FLAG_NO_REPO, ot_admin_builtin_init_fs,
"Initialize a root filesystem" },
{ "instutil", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN, ot_admin_builtin_instutil,
Expand Down
Loading

0 comments on commit fb5050d

Please sign in to comment.