From 7a5f2bef3680b97d09efb0854533c7310f8d9422 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 21 Jun 2023 21:05:21 +0200 Subject: [PATCH 1/7] chunked: file fd leak The file descriptor was not closed before, thus leaking all the opened files. Signed-off-by: Giuseppe Scrivano --- pkg/chunked/storage_linux.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index a80b28fb58..aa415e268a 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -844,7 +844,14 @@ func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *ar }, nil } -func (d *destinationFile) Close() error { +func (d *destinationFile) Close() (Err error) { + defer func() { + err := d.file.Close() + if Err == nil { + Err = err + } + }() + manifestChecksum, err := digest.Parse(d.metadata.Digest) if err != nil { return err From 8bb5a087abfa11f4a300e925c44c910276776835 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 26 May 2023 11:57:37 +0200 Subject: [PATCH 2/7] driver: extend ApplyDiff with differOpts Signed-off-by: Giuseppe Scrivano --- drivers/driver.go | 6 +++++- drivers/overlay/overlay.go | 2 +- pkg/chunked/storage_linux.go | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/driver.go b/drivers/driver.go index f7b0d6891b..663173a29c 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -191,10 +191,14 @@ type DriverWithDifferOutput struct { TOCDigest digest.Digest } +// DifferOptions overrides how the differ work +type DifferOptions struct { +} + // Differ defines the interface for using a custom differ. // This API is experimental and can be changed without bumping the major version number. type Differ interface { - ApplyDiff(dest string, options *archive.TarOptions) (DriverWithDifferOutput, error) + ApplyDiff(dest string, options *archive.TarOptions, differOpts *DifferOptions) (DriverWithDifferOutput, error) } // DriverWithDiffer is the interface for direct diff access. diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index 1ef7122c58..02db74d396 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -1934,7 +1934,7 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App IgnoreChownErrors: d.options.ignoreChownErrors, WhiteoutFormat: d.getWhiteoutFormat(), InUserNS: unshare.IsRootless(), - }) + }, nil) out.Target = applyDir return out, err } diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index aa415e268a..254ce9c73c 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -1324,7 +1324,7 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop return false, nil } -func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { +func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) { defer c.layersCache.release() defer func() { if c.zstdReader != nil { From a50bb95770a9471bd2d1d11d19048d96245d372f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 26 May 2023 12:20:56 +0200 Subject: [PATCH 3/7] chunked: support writing files in a flat dir format so that they can be stored by their digest Signed-off-by: Giuseppe Scrivano --- drivers/driver.go | 13 +++++++ pkg/chunked/cache_linux.go | 40 ++++++++++++++++--- pkg/chunked/cache_linux_test.go | 27 +++++++++++-- pkg/chunked/storage_linux.go | 69 ++++++++++++++++++++++++++++++++- 4 files changed, 138 insertions(+), 11 deletions(-) diff --git a/drivers/driver.go b/drivers/driver.go index 663173a29c..1fb04dc3ed 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -191,8 +191,21 @@ type DriverWithDifferOutput struct { TOCDigest digest.Digest } +type DifferOutputFormat int + +const ( + // DifferOutputFormatDir means the output is a directory and it will + // keep the original layout. + DifferOutputFormatDir = iota + // DifferOutputFormatFlat will store the files by their checksum, in the form + // checksum[0:2]/checksum[2:] + DifferOutputFormatFlat +) + // DifferOptions overrides how the differ work type DifferOptions struct { + // Format defines the destination directory layout format + Format DifferOutputFormat } // Differ defines the interface for using a custom differ. diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index cd13212e6d..56c30e2675 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -15,6 +15,7 @@ import ( "unsafe" storage "github.com/containers/storage" + graphdriver "github.com/containers/storage/drivers" "github.com/containers/storage/pkg/chunked/internal" "github.com/containers/storage/pkg/ioutils" jsoniter "github.com/json-iterator/go" @@ -109,7 +110,7 @@ func (c *layersCache) load() error { } bigData, err := c.store.LayerBigData(r.ID, cacheKey) - // if the cache areadly exists, read and use it + // if the cache already exists, read and use it if err == nil { defer bigData.Close() metadata, err := readMetadataFromCache(bigData) @@ -122,6 +123,23 @@ func (c *layersCache) load() error { return err } + var lcd chunkedLayerData + + clFile, err := c.store.LayerBigData(r.ID, chunkedLayerDataKey) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + if clFile != nil { + cl, err := io.ReadAll(clFile) + if err != nil { + return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) + } + json := jsoniter.ConfigCompatibleWithStandardLibrary + if err := json.Unmarshal(cl, &lcd); err != nil { + return err + } + } + // otherwise create it from the layer TOC. manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey) if err != nil { @@ -134,7 +152,7 @@ func (c *layersCache) load() error { return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) } - metadata, err := writeCache(manifest, r.ID, c.store) + metadata, err := writeCache(manifest, lcd.Format, r.ID, c.store) if err == nil { c.addLayer(r.ID, metadata) } @@ -211,13 +229,13 @@ type setBigData interface { // - digest(file.payload)) // - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) // - digest(i) for each i in chunks(file payload) -func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) { +func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*metadata, error) { var vdata bytes.Buffer tagLen := 0 digestLen := 0 var tagsBuffer bytes.Buffer - toc, err := prepareMetadata(manifest) + toc, err := prepareMetadata(manifest, format) if err != nil { return nil, err } @@ -396,7 +414,7 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) { }, nil } -func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) { +func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*internal.FileMetadata, error) { toc, err := unmarshalToc(manifest) if err != nil { // ignore errors here. They might be caused by a different manifest format. @@ -404,6 +422,17 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) { return nil, nil //nolint: nilnil } + switch format { + case graphdriver.DifferOutputFormatDir: + case graphdriver.DifferOutputFormatFlat: + toc.Entries, err = makeEntriesFlat(toc.Entries) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown format %q", format) + } + var r []*internal.FileMetadata chunkSeen := make(map[string]bool) for i := range toc.Entries { @@ -420,6 +449,7 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) { chunkSeen[cd] = true } } + return r, nil } diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 598fd63e9e..2ce18d9363 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -4,8 +4,12 @@ import ( "bytes" "fmt" "io" + "path/filepath" "reflect" + "strings" "testing" + + graphdriver "github.com/containers/storage/drivers" ) const jsonTOC = ` @@ -55,7 +59,7 @@ const jsonTOC = ` ` func TestPrepareMetadata(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC)) + toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { t.Errorf("got error from prepareMetadata: %v", err) } @@ -64,6 +68,21 @@ func TestPrepareMetadata(t *testing.T) { } } +func TestPrepareMetadataFlat(t *testing.T) { + toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat) + if err != nil { + t.Errorf("got error from prepareMetadata: %v", err) + } + for _, e := range toc { + if len(strings.Split(e.Name, "/")) != 2 { + t.Error("prepareMetadata returns the wrong number of path elements for flat directories") + } + if len(filepath.Dir(e.Name)) != 2 { + t.Error("prepareMetadata returns the wrong path for flat directories") + } + } +} + type bigDataToBuffer struct { buf *bytes.Buffer id string @@ -83,7 +102,7 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error } func TestWriteCache(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC)) + toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { t.Errorf("got error from prepareMetadata: %v", err) } @@ -91,7 +110,7 @@ func TestWriteCache(t *testing.T) { dest := bigDataToBuffer{ buf: bytes.NewBuffer(nil), } - cache, err := writeCache([]byte(jsonTOC), "foobar", &dest) + cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest) if err != nil { t.Errorf("got error from writeCache: %v", err) } @@ -156,7 +175,7 @@ func TestReadCache(t *testing.T) { dest := bigDataToBuffer{ buf: bytes.NewBuffer(nil), } - cache, err := writeCache([]byte(jsonTOC), "foobar", &dest) + cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest) if err != nil { t.Errorf("got error from writeCache: %v", err) } diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 254ce9c73c..f130560829 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -28,6 +28,7 @@ import ( "github.com/containers/storage/pkg/system" "github.com/containers/storage/types" securejoin "github.com/cyphar/filepath-securejoin" + jsoniter "github.com/json-iterator/go" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" @@ -41,6 +42,8 @@ const ( newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY) containersOverrideXattr = "user.containers.override_stat" bigDataKey = "zstd-chunked-manifest" + chunkedData = "zstd-chunked-data" + chunkedLayerDataKey = "zstd-chunked-layer-data" fileTypeZstdChunked = iota fileTypeEstargz @@ -73,6 +76,11 @@ var xattrsToIgnore = map[string]interface{}{ "security.selinux": true, } +// chunkedLayerData is used to store additional information about the layer +type chunkedLayerData struct { + Format graphdriver.DifferOutputFormat `json:"format"` +} + func timeToTimespec(time *time.Time) (ts unix.Timespec) { if time == nil || time.IsZero() { // Return UTIME_OMIT special value @@ -241,7 +249,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0) if err != nil { - return false, nil, 0, fmt.Errorf("open source file under target rootfs: %w", err) + return false, nil, 0, fmt.Errorf("open source file under target rootfs (%s): %w", name, err) } defer srcFile.Close() @@ -1324,6 +1332,38 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop return false, nil } +func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) { + var new []internal.FileMetadata + + hashes := make(map[string]string) + for i := range mergedEntries { + if mergedEntries[i].Type != TypeReg { + continue + } + if mergedEntries[i].Digest == "" { + if mergedEntries[i].Size != 0 { + return nil, fmt.Errorf("missing digest for %q", mergedEntries[i].Name) + } + continue + } + digest, err := digest.Parse(mergedEntries[i].Digest) + if err != nil { + return nil, err + } + d := digest.Encoded() + + if hashes[d] != "" { + continue + } + hashes[d] = d + + mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:]) + + new = append(new, mergedEntries[i]) + } + return new, nil +} + func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) { defer c.layersCache.release() defer func() { @@ -1332,11 +1372,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff } }() + lcd := chunkedLayerData{ + Format: differOpts.Format, + } + + json := jsoniter.ConfigCompatibleWithStandardLibrary + lcdBigData, err := json.Marshal(lcd) + if err != nil { + return graphdriver.DriverWithDifferOutput{}, err + } output := graphdriver.DriverWithDifferOutput{ Differ: c, TarSplit: c.tarSplit, BigData: map[string][]byte{ - bigDataKey: c.manifest, + bigDataKey: c.manifest, + chunkedLayerDataKey: lcdBigData, }, TOCDigest: c.tocDigest, } @@ -1396,6 +1446,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff } defer unix.Close(dirfd) + if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat { + mergedEntries, err = makeEntriesFlat(mergedEntries) + if err != nil { + return output, err + } + createdDirs := make(map[string]struct{}) + for _, e := range mergedEntries { + d := e.Name[0:2] + if _, found := createdDirs[d]; !found { + unix.Mkdirat(dirfd, d, 0o755) + createdDirs[d] = struct{}{} + } + } + } + // hardlinks can point to missing files. So create them after all files // are retrieved var hardLinks []hardLinkToCreate From 4cb91e2b279627469b9c8d6c141efde44baf7303 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 23 Jun 2023 13:04:04 +0200 Subject: [PATCH 4/7] overlay: move condition to a new variable Signed-off-by: Giuseppe Scrivano --- drivers/overlay/overlay.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index 02db74d396..8bac5e1fb8 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -1431,6 +1431,9 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO logLevel = logrus.DebugLevel } optsList := options.Options + + needsIDMapping := !disableShifting && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 && d.options.mountProgram == "" + if len(optsList) == 0 { optsList = strings.Split(d.options.mountOptions, ",") } else { @@ -1596,7 +1599,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } - if !disableShifting && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 && d.options.mountProgram == "" { + if needsIDMapping { var newAbsDir []string mappedRoot := filepath.Join(d.home, id, "mapped") if err := os.MkdirAll(mappedRoot, 0o700); err != nil { From 1cc3e7af3456f34713918b43b5f8694ba6bf58aa Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 23 Jun 2023 13:11:53 +0200 Subject: [PATCH 5/7] overlay: create idmapped user namespace earlier Signed-off-by: Giuseppe Scrivano --- drivers/overlay/overlay.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index 8bac5e1fb8..e924e2c34e 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -1502,6 +1502,16 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } + idmappedMountProcessPid := -1 + if needsIDMapping { + pid, cleanupFunc, err := idmap.CreateUsernsProcess(options.UidMaps, options.GidMaps) + if err != nil { + return "", err + } + idmappedMountProcessPid = int(pid) + defer cleanupFunc() + } + // For each lower, resolve its path, and append it and any additional diffN // directories to the lowers list. for _, l := range splitLowers { @@ -1601,19 +1611,13 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO if needsIDMapping { var newAbsDir []string + idMappedMounts := make(map[string]string) + mappedRoot := filepath.Join(d.home, id, "mapped") if err := os.MkdirAll(mappedRoot, 0o700); err != nil { return "", err } - pid, cleanupFunc, err := idmap.CreateUsernsProcess(options.UidMaps, options.GidMaps) - if err != nil { - return "", err - } - defer cleanupFunc() - - idMappedMounts := make(map[string]string) - // rewrite the lower dirs to their idmapped mount. c := 0 for _, absLower := range absLowers { @@ -1623,7 +1627,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO if !found { root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c)) c++ - if err := idmap.CreateIDMappedMount(mappedMountSrc, root, int(pid)); err != nil { + if err := idmap.CreateIDMappedMount(mappedMountSrc, root, idmappedMountProcessPid); err != nil { return "", fmt.Errorf("create mapped mount for %q on %q: %w", mappedMountSrc, root, err) } idMappedMounts[mappedMountSrc] = root From 6b10c1ad5fb6c0720ef5cd64bbf5bafa7cff9aae Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 23 Jun 2023 13:27:21 +0200 Subject: [PATCH 6/7] overlay: integrate ComposeFS This commit introduces support for ComposeFS using the EROFS filesystem to mount the file system metadata. The current implementation allows each layer to be mounted individually. Only images that are using the zstd:chunked and eStargz format can be used in this way since the metadata is stored in the image itself. In future support for arbitrary images can be added. Signed-off-by: Giuseppe Scrivano --- drivers/overlay/composefs_notsupported.go | 20 ++++ drivers/overlay/composefs_supported.go | 66 +++++++++++ drivers/overlay/overlay.go | 134 +++++++++++++++++++++- 3 files changed, 214 insertions(+), 6 deletions(-) create mode 100644 drivers/overlay/composefs_notsupported.go create mode 100644 drivers/overlay/composefs_supported.go diff --git a/drivers/overlay/composefs_notsupported.go b/drivers/overlay/composefs_notsupported.go new file mode 100644 index 0000000000..273ef4699d --- /dev/null +++ b/drivers/overlay/composefs_notsupported.go @@ -0,0 +1,20 @@ +//go:build !linux || !composefs || !cgo +// +build !linux !composefs !cgo + +package overlay + +import ( + "fmt" +) + +func composeFsSupported() bool { + return false +} + +func generateComposeFsBlob(toc []byte, destFile string) error { + return fmt.Errorf("composefs is not supported") +} + +func mountErofsBlob(blobFile, mountPoint string) error { + return fmt.Errorf("composefs is not supported") +} diff --git a/drivers/overlay/composefs_supported.go b/drivers/overlay/composefs_supported.go new file mode 100644 index 0000000000..61cc47f81b --- /dev/null +++ b/drivers/overlay/composefs_supported.go @@ -0,0 +1,66 @@ +//go:build linux && composefs && cgo +// +build linux,composefs,cgo + +package overlay + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "sync" + + "github.com/containers/storage/pkg/loopback" + "golang.org/x/sys/unix" +) + +var ( + composeFsHelperOnce sync.Once + composeFsHelperPath string + composeFsHelperErr error +) + +func getComposeFsHelper() (string, error) { + composeFsHelperOnce.Do(func() { + composeFsHelperPath, composeFsHelperErr = exec.LookPath("composefs-from-json") + }) + return composeFsHelperPath, composeFsHelperErr +} + +func composeFsSupported() bool { + _, err := getComposeFsHelper() + return err == nil +} + +func generateComposeFsBlob(toc []byte, destFile string) error { + writerJson, err := getComposeFsHelper() + if err != nil { + return fmt.Errorf("failed to find composefs-from-json: %w", err) + } + + fd, err := unix.Openat(unix.AT_FDCWD, destFile, unix.O_WRONLY|unix.O_CREAT|unix.O_TRUNC|unix.O_EXCL|unix.O_CLOEXEC, 0o644) + if err != nil { + return fmt.Errorf("failed to open output file: %w", err) + } + outFd := os.NewFile(uintptr(fd), "outFd") + + defer outFd.Close() + cmd := exec.Command(writerJson, "--format=erofs", "--out=/proc/self/fd/3", "/proc/self/fd/0") + cmd.ExtraFiles = []*os.File{outFd} + cmd.Stdin = bytes.NewReader(toc) + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to convert json to erofs: %w", err) + } + return nil +} + +func mountErofsBlob(blobFile, mountPoint string) error { + loop, err := loopback.AttachLoopDevice(blobFile) + if err != nil { + return err + } + defer loop.Close() + + return unix.Mount(loop.Name(), mountPoint, "erofs", unix.MS_RDONLY, "ro") +} diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index e924e2c34e..a4e6b7d3ca 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -82,6 +82,8 @@ const ( lowerFile = "lower" maxDepth = 500 + zstdChunkedManifest = "zstd-chunked-manifest" + // idLength represents the number of random characters // which can be used to create the unique link identifier // for every layer. If this value is too long then the @@ -780,6 +782,10 @@ func supportsOverlay(home string, homeMagic graphdriver.FsMagic, rootUID, rootGI } func (d *Driver) useNaiveDiff() bool { + if d.useComposeFs() { + return true + } + useNaiveDiffLock.Do(func() { if d.options.mountProgram != "" { useNaiveDiffOnly = true @@ -1512,12 +1518,66 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO defer cleanupFunc() } + erofsLayers := filepath.Join(workDirBase, "erofs-layers") + if err := os.MkdirAll(erofsLayers, 0o700); err != nil { + return "", err + } + + skipIDMappingLayers := make(map[string]string) + + composeFsLayers := []string{} + + erofsMounts := []string{} + defer func() { + for _, m := range erofsMounts { + defer unix.Unmount(m, unix.MNT_DETACH) + } + }() + + maybeAddErofsMount := func(lowerID string, i int) (string, error) { + erofsBlob := d.getErofsBlob(lowerID) + _, err = os.Stat(erofsBlob) + if err != nil { + if os.IsNotExist(err) { + return "", nil + } + return "", err + } + logrus.Debugf("overlay: using erofs blob %s for lower %s", erofsBlob, lowerID) + + dest := filepath.Join(erofsLayers, fmt.Sprintf("%d", i)) + if err := os.MkdirAll(dest, 0o700); err != nil { + return "", err + } + + if err := mountErofsBlob(erofsBlob, dest); err != nil { + return "", err + } + erofsMounts = append(erofsMounts, dest) + composeFsPath, err := d.getDiffPath(lowerID) + if err != nil { + return "", err + } + composeFsLayers = append(composeFsLayers, composeFsPath) + skipIDMappingLayers[composeFsPath] = composeFsPath + return dest, nil + } + + diffDir := path.Join(workDirBase, "diff") + + if dest, err := maybeAddErofsMount(id, 0); err != nil { + return "", err + } else if dest != "" { + diffDir = dest + } + // For each lower, resolve its path, and append it and any additional diffN // directories to the lowers list. - for _, l := range splitLowers { + for i, l := range splitLowers { if l == "" { continue } + lower := "" newpath := path.Join(d.home, l) if st, err := os.Stat(newpath); err != nil { @@ -1551,6 +1611,30 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } lower = newpath } + + linkContent, err := os.Readlink(lower) + if err != nil { + return "", err + } + lowerID := filepath.Base(filepath.Dir(linkContent)) + erofsMount, err := maybeAddErofsMount(lowerID, i+1) + if err != nil { + return "", err + } + if erofsMount != "" { + if needsIDMapping { + if err := idmap.CreateIDMappedMount(erofsMount, erofsMount, idmappedMountProcessPid); err != nil { + return "", fmt.Errorf("create mapped mount for %q: %w", erofsMount, err) + } + skipIDMappingLayers[erofsMount] = erofsMount + // overlay takes a reference on the mount, so it is safe to unmount + // the mapped idmounts as soon as the final overlay file system is mounted. + defer unix.Unmount(erofsMount, unix.MNT_DETACH) + } + absLowers = append(absLowers, erofsMount) + continue + } + absLowers = append(absLowers, lower) diffN = 1 _, err = os.Stat(dumbJoin(lower, "..", nameWithSuffix("diff", diffN))) @@ -1561,15 +1645,22 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } + if len(composeFsLayers) > 0 { + optsList = append(optsList, "metacopy=on", "redirect_dir=on") + } + + absLowers = append(absLowers, composeFsLayers...) + if len(absLowers) == 0 { absLowers = append(absLowers, path.Join(dir, "empty")) } + // user namespace requires this to move a directory from lower to upper. rootUID, rootGID, err := idtools.GetRootUIDGID(options.UidMaps, options.GidMaps) if err != nil { return "", err } - diffDir := path.Join(workDirBase, "diff") + if err := idtools.MkdirAllAs(diffDir, perms, rootUID, rootGID); err != nil { return "", err } @@ -1623,6 +1714,11 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO for _, absLower := range absLowers { mappedMountSrc := getMappedMountRoot(absLower) + if _, ok := skipIDMappingLayers[absLower]; ok { + newAbsDir = append(newAbsDir, absLower) + continue + } + root, found := idMappedMounts[mappedMountSrc] if !found { root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c)) @@ -1903,6 +1999,13 @@ func (d *Driver) CleanupStagingDirectory(stagingDirectory string) error { return os.RemoveAll(stagingDirectory) } +func (d *Driver) useComposeFs() bool { + if !composeFsSupported() || unshare.IsRootless() { + return false + } + return true +} + // ApplyDiff applies the changes in the new layer using the specified function func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.ApplyDiffOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, err error) { var idMappings *idtools.IDMappings @@ -1935,14 +2038,22 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App logrus.Debugf("Applying differ in %s", applyDir) + differOptions := graphdriver.DifferOptions{ + Format: graphdriver.DifferOutputFormatDir, + } + if d.useComposeFs() { + differOptions.Format = graphdriver.DifferOutputFormatFlat + } out, err := differ.ApplyDiff(applyDir, &archive.TarOptions{ UIDMaps: idMappings.UIDs(), GIDMaps: idMappings.GIDs(), IgnoreChownErrors: d.options.ignoreChownErrors, WhiteoutFormat: d.getWhiteoutFormat(), InUserNS: unshare.IsRootless(), - }, nil) + }, &differOptions) + out.Target = applyDir + return out, err } @@ -1952,17 +2063,23 @@ func (d *Driver) ApplyDiffFromStagingDirectory(id, parent, stagingDirectory stri return fmt.Errorf("%q is not a staging directory", stagingDirectory) } - diff, err := d.getDiffPath(id) + if d.useComposeFs() { + toc := diffOutput.BigData[zstdChunkedManifest] + if err := generateComposeFsBlob(toc, d.getErofsBlob(id)); err != nil { + return err + } + } + diffPath, err := d.getDiffPath(id) if err != nil { return err } - if err := os.RemoveAll(diff); err != nil && !os.IsNotExist(err) { + if err := os.RemoveAll(diffPath); err != nil && !os.IsNotExist(err) { return err } diffOutput.UncompressedDigest = diffOutput.TOCDigest - return os.Rename(stagingDirectory, diff) + return os.Rename(stagingDirectory, diffPath) } // DifferTarget gets the location where files are stored for the layer. @@ -2008,6 +2125,11 @@ func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) return directory.Size(applyDir) } +func (d *Driver) getErofsBlob(id string) string { + dir := d.dir(id) + return path.Join(dir, "erofs-blob") +} + func (d *Driver) getDiffPath(id string) (string, error) { dir, imagestore, _ := d.dir2(id) base := dir From 1c76934a2c6917262c525a8aeaed12f1903cab8f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 29 May 2023 14:16:00 +0200 Subject: [PATCH 7/7] overlay, composefs: add fs-verity to files at the moment it is a best-effort implementation to enable fs-verity for the composefs blob as well as for the data files. Signed-off-by: Giuseppe Scrivano --- drivers/overlay/composefs_notsupported.go | 4 ++ drivers/overlay/composefs_supported.go | 78 +++++++++++++++++++++-- drivers/overlay/overlay.go | 5 ++ 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/drivers/overlay/composefs_notsupported.go b/drivers/overlay/composefs_notsupported.go index 273ef4699d..75eae0affb 100644 --- a/drivers/overlay/composefs_notsupported.go +++ b/drivers/overlay/composefs_notsupported.go @@ -18,3 +18,7 @@ func generateComposeFsBlob(toc []byte, destFile string) error { func mountErofsBlob(blobFile, mountPoint string) error { return fmt.Errorf("composefs is not supported") } + +func enableVerityRecursive(path string) error { + return fmt.Errorf("composefs is not supported") +} diff --git a/drivers/overlay/composefs_supported.go b/drivers/overlay/composefs_supported.go index 61cc47f81b..b9efee8a3e 100644 --- a/drivers/overlay/composefs_supported.go +++ b/drivers/overlay/composefs_supported.go @@ -5,12 +5,18 @@ package overlay import ( "bytes" + "errors" "fmt" + "io/fs" "os" "os/exec" + "path/filepath" "sync" + "syscall" + "unsafe" "github.com/containers/storage/pkg/loopback" + "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -32,6 +38,43 @@ func composeFsSupported() bool { return err == nil } +func enableVerity(description string, fd int) error { + enableArg := unix.FsverityEnableArg{ + Version: 1, + Hash_algorithm: unix.FS_VERITY_HASH_ALG_SHA256, + Block_size: 4096, + } + + _, _, e1 := syscall.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.FS_IOC_ENABLE_VERITY), uintptr(unsafe.Pointer(&enableArg))) + if e1 != 0 && !errors.Is(e1, unix.EEXIST) { + return fmt.Errorf("failed to enable verity for %q: %w", description, e1) + } + return nil +} + +func enableVerityRecursive(path string) error { + walkFn := func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.Type().IsRegular() { + return nil + } + + f, err := os.Open(path) + if err != nil { + return err + } + defer f.Close() + + if err := enableVerity(path, int(f.Fd())); err != nil { + return err + } + return nil + } + return filepath.WalkDir(path, walkFn) +} + func generateComposeFsBlob(toc []byte, destFile string) error { writerJson, err := getComposeFsHelper() if err != nil { @@ -44,14 +87,35 @@ func generateComposeFsBlob(toc []byte, destFile string) error { } outFd := os.NewFile(uintptr(fd), "outFd") - defer outFd.Close() - cmd := exec.Command(writerJson, "--format=erofs", "--out=/proc/self/fd/3", "/proc/self/fd/0") - cmd.ExtraFiles = []*os.File{outFd} - cmd.Stdin = bytes.NewReader(toc) - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf("failed to convert json to erofs: %w", err) + fd, err = unix.Open(fmt.Sprintf("/proc/self/fd/%d", outFd.Fd()), unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + outFd.Close() + return fmt.Errorf("failed to dup output file: %w", err) + } + newFd := os.NewFile(uintptr(fd), "newFd") + defer newFd.Close() + + err = func() error { + // a scope to close outFd before setting fsverity on the read-only fd. + defer outFd.Close() + + cmd := exec.Command(writerJson, "--format=erofs", "--out=/proc/self/fd/3", "/proc/self/fd/0") + cmd.ExtraFiles = []*os.File{outFd} + cmd.Stderr = os.Stderr + cmd.Stdin = bytes.NewReader(toc) + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to convert json to erofs: %w", err) + } + return nil + }() + if err != nil { + return err } + + if err := enableVerity("manifest file", int(newFd.Fd())); err != nil && !errors.Is(err, unix.ENOTSUP) && !errors.Is(err, unix.ENOTTY) { + logrus.Warningf("%s", err) + } + return nil } diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index a4e6b7d3ca..3170f09645 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -2064,6 +2064,11 @@ func (d *Driver) ApplyDiffFromStagingDirectory(id, parent, stagingDirectory stri } if d.useComposeFs() { + // FIXME: move this logic into the differ so we don't have to open + // the file twice. + if err := enableVerityRecursive(stagingDirectory); err != nil && !errors.Is(err, unix.ENOTSUP) && !errors.Is(err, unix.ENOTTY) { + logrus.Warningf("%s", err) + } toc := diffOutput.BigData[zstdChunkedManifest] if err := generateComposeFsBlob(toc, d.getErofsBlob(id)); err != nil { return err