From ee2740da079f3993687ccf3c5b5583d544f5c601 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 26 May 2023 18:41:18 +0200 Subject: [PATCH] overlay: integrate ComposeFS This commit introduces support for ComposeFS using the EROFS filesystem to mount the file system metadata. The current implementation allows each layer to be mounted individually. Only images that are using the zstd:chunked and eStargz format can be used in this way since the metadata is stored in the image itself. In future support for arbitrary images can be added. Signed-off-by: Giuseppe Scrivano --- Makefile | 2 +- drivers/overlay/composefs_notsupported.go | 23 +++ drivers/overlay/composefs_supported.go | 91 +++++++++++ drivers/overlay/overlay.go | 182 +++++++++++++++++++--- hack/composefs_tag.sh | 19 +++ 5 files changed, 293 insertions(+), 24 deletions(-) create mode 100644 drivers/overlay/composefs_notsupported.go create mode 100644 drivers/overlay/composefs_supported.go create mode 100755 hack/composefs_tag.sh diff --git a/Makefile b/Makefile index 6cb354c2c0..8cf3098a51 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ vendor-in-container NATIVETAGS := -AUTOTAGS := $(shell ./hack/btrfs_tag.sh) $(shell ./hack/libdm_tag.sh) $(shell ./hack/libsubid_tag.sh) +AUTOTAGS := $(shell ./hack/btrfs_tag.sh) $(shell ./hack/libdm_tag.sh) $(shell ./hack/libsubid_tag.sh) $(shell ./hack/composefs_tag.sh) BUILDFLAGS := -tags "$(AUTOTAGS) $(TAGS)" $(FLAGS) GO ?= go TESTFLAGS := $(shell $(GO) test -race $(BUILDFLAGS) ./pkg/stringutils 2>&1 > /dev/null && echo -race) diff --git a/drivers/overlay/composefs_notsupported.go b/drivers/overlay/composefs_notsupported.go new file mode 100644 index 0000000000..fd2c9a3f96 --- /dev/null +++ b/drivers/overlay/composefs_notsupported.go @@ -0,0 +1,23 @@ +//go:build !linux || !composefs || !cgo +// +build !linux +// +build !composefs +// +build !cgo + + +package overlay + +import ( + "fmt" +) + +func composeFsSupported() bool { + return false +} + +func generateComposeFsBlob(toc []byte, destFile string) error { + return fmt.Errorf("composefs is not supported") +} + +func mountErofsBlob(blobFile, mountPoint string) error { + return fmt.Errorf("composefs is not supported") +} diff --git a/drivers/overlay/composefs_supported.go b/drivers/overlay/composefs_supported.go new file mode 100644 index 0000000000..c14f1aed5c --- /dev/null +++ b/drivers/overlay/composefs_supported.go @@ -0,0 +1,91 @@ +//go:build linux && composefs && cgo +// +build linux,composefs,cgo + +package overlay + +import ( + "fmt" + "os" + "os/exec" + "sync" + + "github.com/containers/storage/pkg/loopback" + "golang.org/x/sys/unix" +) + +var ( + ComposeFsHelperOnce sync.Once + ComposeFsHelperPath string + ComposeFsHelperErr error +) + +func getComposeFsHelper() (string, error) { + ComposeFsHelperOnce.Do(func() { + ComposeFsHelperPath, ComposeFsHelperErr = exec.LookPath("composefs-from-json") + }) + return ComposeFsHelperPath, ComposeFsHelperErr +} + +func composeFsSupported() bool { + _, err := getComposeFsHelper() + return err == nil +} + +func generateComposeFsBlob(toc []byte, destFile string) error { + outFd, err := unix.Openat(unix.AT_FDCWD, destFile, unix.O_WRONLY|unix.O_CREAT|unix.O_TRUNC|unix.O_EXCL, 0644) + if err != nil { + return fmt.Errorf("failed to open output file: %w", err) + } + + + writerJson, err := getComposeFsHelper() + if err != nil { + return fmt.Errorf("failed to find composefs-from-json: %w", err) + } + + err = func() error { + // Use a func to have a scope for the close. This must be closed before + // fsverity can be enabled. + defer unix.Close(outFd) + + fd, err := unix.MemfdCreate("json-toc", unix.MFD_ALLOW_SEALING) + if err != nil { + return fmt.Errorf("failed to create memfd: %w", err) + } + defer unix.Close(fd) + + if err := unix.Ftruncate(fd, int64(len(toc))); err != nil { + return fmt.Errorf("failed to truncate memfd: %w", err) + } + + buf := toc + for len(buf) > 0 { + n, err := unix.Write(fd, buf) + if err != nil { + return fmt.Errorf("failed to write to memfd: %w", err) + } + buf = buf[n:] + } + + cmd := exec.Command(writerJson, "--format=erofs", fmt.Sprintf("--out=/proc/self/fd/%d", outFd), fmt.Sprintf("/proc/self/fd/%d", fd)) + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to convert json to erofs") + } + return nil + }() + if err != nil { + return err + } + return nil +} + +func mountErofsBlob(blobFile, mountPoint string) error { + loop, err := loopback.AttachLoopDevice(blobFile) + if err != nil { + return err + } + defer loop.Close() + + return unix.Mount(loop.Name(), mountPoint, "erofs", unix.MS_RDONLY, "") +} diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index 02db74d396..58edf08723 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -82,6 +82,8 @@ const ( lowerFile = "lower" maxDepth = 500 + zstdChunkedManifest = "zstd-chunked-manifest" + // idLength represents the number of random characters // which can be used to create the unique link identifier // for every layer. If this value is too long then the @@ -780,6 +782,10 @@ func supportsOverlay(home string, homeMagic graphdriver.FsMagic, rootUID, rootGI } func (d *Driver) useNaiveDiff() bool { + if d.useComposeFs() { + return true + } + useNaiveDiffLock.Do(func() { if d.options.mountProgram != "" { useNaiveDiffOnly = true @@ -1431,6 +1437,9 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO logLevel = logrus.DebugLevel } optsList := options.Options + + needsIDMapping := !disableShifting && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 && d.options.mountProgram == "" + if len(optsList) == 0 { optsList = strings.Split(d.options.mountOptions, ",") } else { @@ -1499,12 +1508,103 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } + idmappedMountProcessPid := -1 + if needsIDMapping { + pid, cleanupFunc, err := idmap.CreateUsernsProcess(options.UidMaps, options.GidMaps) + if err != nil { + return "", err + } + idmappedMountProcessPid = int(pid) + defer cleanupFunc() + } + + // user namespace requires this to move a directory from lower to upper. + rootUID, rootGID, err := idtools.GetRootUIDGID(options.UidMaps, options.GidMaps) + if err != nil { + return "", err + } + diffDir := path.Join(workDirBase, "diff") + + erofsLayers := filepath.Join(d.home, id, "erofs-layers") + if err := os.MkdirAll(erofsLayers, 0o700); err != nil { + return "", err + } + + skipIDMappingLayers := make(map[string]string) + + composeFsLayers := []string{} + + erofsMounts := []string{} + defer func() { + for _, m := range erofsMounts { + defer unix.Unmount(m, unix.MNT_DETACH) + } + }() + + maybeAddErofsMount := func(lowerID string, i int) (string, error) { + erofsBlob := d.getErofsBlob(lowerID) + _, err = os.Stat(erofsBlob) + if err != nil { + if os.IsNotExist(err) { + return "", nil + } + return "", err + } + logrus.Debugf("overlay: using erofs blob %s for lower %s", erofsBlob, lowerID) + + dest := filepath.Join(erofsLayers, fmt.Sprintf("%d", i)) + if err := os.MkdirAll(dest, 0o700); err != nil { + return "", err + } + + if err := mountErofsBlob(erofsBlob, dest); err != nil { + return "", err + } + erofsMounts = append(erofsMounts, dest) + + composeFsPath := d.getComposeFsPath(lowerID) + composeFsLayers = append(composeFsLayers, composeFsPath) + skipIDMappingLayers[composeFsPath] = composeFsPath + return dest, nil + } + dest, err := maybeAddErofsMount(id, 0) + if err != nil { + return "", err + } + if dest != "" { + diffDir = dest + } + // For each lower, resolve its path, and append it and any additional diffN // directories to the lowers list. - for _, l := range splitLowers { + for i, l := range splitLowers { if l == "" { continue } + + p, err := os.Readlink(path.Join(d.home, l)) + if err != nil { + return "", err + } + lowerID := filepath.Base(filepath.Dir(p)) + dest, err := maybeAddErofsMount(lowerID, i+1) + if err != nil { + return "", err + } + if dest != "" { + if needsIDMapping { + if err := idmap.CreateIDMappedMount(dest, dest, idmappedMountProcessPid); err != nil { + return "", fmt.Errorf("create mapped mount for %q: %w", dest, err) + } + skipIDMappingLayers[dest] = dest + // overlay takes a reference on the mount, so it is safe to unmount + // the mapped idmounts as soon as the final overlay file system is mounted. + defer unix.Unmount(dest, unix.MNT_DETACH) + } + absLowers = append(absLowers, dest) + continue + } + lower := "" newpath := path.Join(d.home, l) if st, err := os.Stat(newpath); err != nil { @@ -1548,15 +1648,16 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } + if len(composeFsLayers) > 0 { + optsList = append(optsList, "metacopy=on", "redirect_dir=on") + } + + absLowers = append(absLowers, composeFsLayers...) + if len(absLowers) == 0 { absLowers = append(absLowers, path.Join(dir, "empty")) } - // user namespace requires this to move a directory from lower to upper. - rootUID, rootGID, err := idtools.GetRootUIDGID(options.UidMaps, options.GidMaps) - if err != nil { - return "", err - } - diffDir := path.Join(workDirBase, "diff") + if err := idtools.MkdirAllAs(diffDir, perms, rootUID, rootGID); err != nil { return "", err } @@ -1596,31 +1697,30 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } - if !disableShifting && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 && d.options.mountProgram == "" { + if needsIDMapping { var newAbsDir []string + idMappedMounts := make(map[string]string) + mappedRoot := filepath.Join(d.home, id, "mapped") if err := os.MkdirAll(mappedRoot, 0o700); err != nil { return "", err } - pid, cleanupFunc, err := idmap.CreateUsernsProcess(options.UidMaps, options.GidMaps) - if err != nil { - return "", err - } - defer cleanupFunc() - - idMappedMounts := make(map[string]string) - // rewrite the lower dirs to their idmapped mount. c := 0 for _, absLower := range absLowers { mappedMountSrc := getMappedMountRoot(absLower) + if _, ok := skipIDMappingLayers[absLower]; ok { + newAbsDir = append(newAbsDir, absLower) + continue + } + root, found := idMappedMounts[mappedMountSrc] if !found { root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c)) c++ - if err := idmap.CreateIDMappedMount(mappedMountSrc, root, int(pid)); err != nil { + if err := idmap.CreateIDMappedMount(mappedMountSrc, root, idmappedMountProcessPid); err != nil { return "", fmt.Errorf("create mapped mount for %q on %q: %w", mappedMountSrc, root, err) } idMappedMounts[mappedMountSrc] = root @@ -1896,6 +1996,13 @@ func (d *Driver) CleanupStagingDirectory(stagingDirectory string) error { return os.RemoveAll(stagingDirectory) } +func (d *Driver) useComposeFs() bool { + if !composeFsSupported() || unshare.IsRootless() { + return false + } + return true +} + // ApplyDiff applies the changes in the new layer using the specified function func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.ApplyDiffOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, err error) { var idMappings *idtools.IDMappings @@ -1928,14 +2035,23 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App logrus.Debugf("Applying differ in %s", applyDir) + differOptions := graphdriver.DifferOptions{ + Format: graphdriver.DifferOutputFormatDir, + } + if d.useComposeFs() { + differOptions.Format = graphdriver.DifferOutputFormatFlat + } + out, err := differ.ApplyDiff(applyDir, &archive.TarOptions{ UIDMaps: idMappings.UIDs(), GIDMaps: idMappings.GIDs(), IgnoreChownErrors: d.options.ignoreChownErrors, WhiteoutFormat: d.getWhiteoutFormat(), InUserNS: unshare.IsRootless(), - }, nil) + }, &differOptions) + out.Target = applyDir + return out, err } @@ -1945,17 +2061,27 @@ func (d *Driver) ApplyDiffFromStagingDirectory(id, parent, stagingDirectory stri return fmt.Errorf("%q is not a staging directory", stagingDirectory) } - diff, err := d.getDiffPath(id) - if err != nil { - return err + dest := "" + if d.useComposeFs() { + toc := diffOutput.BigData[zstdChunkedManifest] + if err := generateComposeFsBlob(toc, d.getErofsBlob(id)); err != nil { + return err + } + dest = d.getComposeFsPath(id) + } else { + diffPath, err := d.getDiffPath(id) + if err != nil { + return err + } + dest = diffPath } - if err := os.RemoveAll(diff); err != nil && !os.IsNotExist(err) { + if err := os.RemoveAll(dest); err != nil && !os.IsNotExist(err) { return err } diffOutput.UncompressedDigest = diffOutput.TOCDigest - return os.Rename(stagingDirectory, diff) + return os.Rename(stagingDirectory, dest) } // DifferTarget gets the location where files are stored for the layer. @@ -2001,6 +2127,16 @@ func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) return directory.Size(applyDir) } +func (d *Driver) getComposeFsPath(id string) string { + dir := d.dir(id) + return path.Join(dir, ".cfs") +} + +func (d *Driver) getErofsBlob(id string) string { + dir := d.dir(id) + return path.Join(dir, "erofs-blob") +} + func (d *Driver) getDiffPath(id string) (string, error) { dir, imagestore, _ := d.dir2(id) base := dir diff --git a/hack/composefs_tag.sh b/hack/composefs_tag.sh new file mode 100755 index 0000000000..12d9970407 --- /dev/null +++ b/hack/composefs_tag.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +if test $(${GO:-go} env GOOS) != "linux" ; then + exit 0 +fi +tmpdir="$PWD/tmp.$RANDOM" +mkdir -p "$tmpdir" +trap 'rm -fr "$tmpdir"' EXIT +cc -o "$tmpdir"/libsubid_tag -l composefs -l yajl -x c - > /dev/null 2> /dev/null << EOF +#include +#include +#include + +int main() { + return 0; +} +EOF +if test $? -eq 0 ; then + echo composefs +fi