diff --git a/go.mod b/go.mod index 2c4bf2448e..c272ac8164 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( github.com/tchap/go-patricia/v2 v2.3.1 github.com/ulikunitz/xz v0.5.12 github.com/vbatts/tar-split v0.11.5 + golang.org/x/exp v0.0.0-20231006140011-7918f672742d golang.org/x/sys v0.22.0 gotest.tools v2.2.0+incompatible ) diff --git a/go.sum b/go.sum index dca15c6959..7b541730f1 100644 --- a/go.sum +++ b/go.sum @@ -119,6 +119,8 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= diff --git a/pkg/chunked/compression_linux.go b/pkg/chunked/compression_linux.go index 05b4ba11d6..78d1f9c4b3 100644 --- a/pkg/chunked/compression_linux.go +++ b/pkg/chunked/compression_linux.go @@ -5,13 +5,16 @@ import ( "errors" "fmt" "io" + "maps" "strconv" + "time" "github.com/containers/storage/pkg/chunked/internal" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/vbatts/tar-split/archive/tar" + expMaps "golang.org/x/exp/maps" ) var typesToTar = map[string]byte{ @@ -221,6 +224,12 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di if err != nil { return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err) } + // We use the TOC for creating on-disk files, but the tar-split for creating metadata + // when exporting the layer contents. Ensure the two match, otherwise local inspection of a container + // might be misleading about the exported contents. + if err := ensureTOCMatchesTarSplit(toc, decodedTarSplit); err != nil { + return nil, nil, nil, 0, fmt.Errorf("tar-split and TOC data is inconsistent: %w", err) + } } else if tarSplitChunk.Offset > 0 { // We must ignore the tar-split when the digest is not present in the TOC, because we can’t authenticate it. // @@ -234,6 +243,121 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err } +// ensureTOCMatchesTarSplit validates that toc and tarSplit contain _exactly_ the same entries. +func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error { + pendingFiles := map[string]*internal.FileMetadata{} // Name -> an entry in toc.Entries + for i := range toc.Entries { + e := &toc.Entries[i] + if e.Type != internal.TypeChunk { + if _, ok := pendingFiles[e.Name]; ok { + return fmt.Errorf("TOC contains duplicate entries for path %q", e.Name) + } + pendingFiles[e.Name] = e + } + } + + if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error { + e, ok := pendingFiles[hdr.Name] + if !ok { + return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name) + } + delete(pendingFiles, hdr.Name) + expected, err := internal.NewFileMetadata(hdr) + if err != nil { + return fmt.Errorf("determining expected metadata for %q: %w", hdr.Name, err) + } + if err := ensureFileMetadataAttributesMatch(e, &expected); err != nil { + return fmt.Errorf("TOC and tar-split metadata doesn’t match: %w", err) + } + + return nil + }); err != nil { + return err + } + if len(pendingFiles) != 0 { + remaining := expMaps.Keys(pendingFiles) + if len(remaining) > 5 { + remaining = remaining[:5] // Just to limit the size of the output. + } + return fmt.Errorf("TOC contains entries not present in tar-split, incl. %q", remaining) + } + return nil +} + +// ensureTimePointersMatch ensures that a and b are equal +func ensureTimePointersMatch(a, b *time.Time) error { + switch { + case a == nil && b == nil: + return nil + case a == nil: + return fmt.Errorf("nil != %v", *b) + case b == nil: + return fmt.Errorf("%v != nil", *a) + default: + if a.Equal(*b) { + return nil + } + return fmt.Errorf("%v != %v", *a, *b) + } +} + +// ensureFileMetadataAttributesMatch ensures that a and b match in file attributes (it ignores entries relevant to locating data +// in the tar stream or matching contents) +func ensureFileMetadataAttributesMatch(a, b *internal.FileMetadata) error { + // Keep this in sync with internal.FileMetadata! + + if a.Type != b.Type { + return fmt.Errorf("mismatch of Type: %q != %q", a.Type, b.Type) + } + if a.Name != b.Name { + return fmt.Errorf("mismatch of Name: %q != %q", a.Name, b.Name) + } + if a.Linkname != b.Linkname { + return fmt.Errorf("mismatch of Linkname: %q != %q", a.Linkname, b.Linkname) + } + if a.Mode != b.Mode { + return fmt.Errorf("mismatch of Mode: %q != %q", a.Mode, b.Mode) + } + if a.Size != b.Size { + return fmt.Errorf("mismatch of Size: %q != %q", a.Size, b.Size) + } + if a.UID != b.UID { + return fmt.Errorf("mismatch of UID: %q != %q", a.UID, b.UID) + } + if a.GID != b.GID { + return fmt.Errorf("mismatch of GID: %q != %q", a.GID, b.GID) + } + + if err := ensureTimePointersMatch(a.ModTime, b.ModTime); err != nil { + return fmt.Errorf("mismatch of ModTime: %w", err) + } + if err := ensureTimePointersMatch(a.AccessTime, b.AccessTime); err != nil { + return fmt.Errorf("mismatch of AccessTime: %w", err) + } + if err := ensureTimePointersMatch(a.ChangeTime, b.ChangeTime); err != nil { + return fmt.Errorf("mismatch of ChangeTime: %w", err) + } + if a.Devmajor != b.Devmajor { + return fmt.Errorf("mismatch of Devmajor: %q != %q", a.Devmajor, b.Devmajor) + } + if a.Devminor != b.Devminor { + return fmt.Errorf("mismatch of Devminor: %q != %q", a.Devminor, b.Devminor) + } + if !maps.Equal(a.Xattrs, b.Xattrs) { + return fmt.Errorf("mismatch of Xattrs: %q != %q", a.Xattrs, b.Xattrs) + } + + // Digest is not compared + // Offset is not compared + // EndOffset is not compared + + // ChunkSize is not compared + // ChunkOffset is not compared + // ChunkDigest is not compared + // ChunkType is not compared + return nil +} + func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) { d, err := digest.Parse(expectedCompressedChecksum) if err != nil { diff --git a/pkg/chunked/internal/compression.go b/pkg/chunked/internal/compression.go index 8420d3ac73..95c07999f2 100644 --- a/pkg/chunked/internal/compression.go +++ b/pkg/chunked/internal/compression.go @@ -43,6 +43,8 @@ type TOC struct { // is used instead of that in the tar stream. The contents of the tar stream // are not used in this scenario. type FileMetadata struct { + // If you add any fields, update ensureFileMetadataMatches as well! + // The metadata below largely duplicates that in the tar headers. Type string `json:"type"` Name string `json:"name"` diff --git a/pkg/chunked/tar_split_linux.go b/pkg/chunked/tar_split_linux.go new file mode 100644 index 0000000000..8353016acf --- /dev/null +++ b/pkg/chunked/tar_split_linux.go @@ -0,0 +1,68 @@ +package chunked + +import ( + "bytes" + "fmt" + "io" + + "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/storage" +) + +// iterateTarSplit calls handler for each tar header in tarSplit +func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error { + // This, strictly speaking hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream + // forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least + // for every non-empty file, which constraints it basically to the output we expect. + // + // Specifically, we assume: + // - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions + // - (There is a FileType entry for every tar header, we ignore it) + // - Trailing padding of a file, if any, is included in the next SegmentType entry + // - At the end, there may be SegmentType entries just for the terminating zero blocks. + + unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit)) + for { + tsEntry, err := unpacker.Next() + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading tar-split entries: %w", err) + } + switch tsEntry.Type { + case storage.SegmentType: + payload := tsEntry.Payload + // This is horrible, but we don’t know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse + // files, but for sparse files that is set to the logical size.) + // + // First, assume that all padding is zero bytes. + // A tar header starts with a file name, which might in principle be empty, but + // at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that + // the tar name should never be empty (it should be ".", or maybe "./"). + // + // This will cause us to skip all zero bytes in the trailing blocks, but that’s fine. + i := 0 + for i < len(payload) && payload[i] == 0 { + i++ + } + payload = payload[i:] + tr := tar.NewReader(bytes.NewReader(payload)) + hdr, err := tr.Next() + if err != nil { + if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that. + break + } + return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err) + } + if err := handler(hdr); err != nil { + return err + } + + case storage.FileType: + // Nothing + default: + return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type) + } + } +} diff --git a/pkg/chunked/tar_split_linux_test.go b/pkg/chunked/tar_split_linux_test.go new file mode 100644 index 0000000000..20153a36b2 --- /dev/null +++ b/pkg/chunked/tar_split_linux_test.go @@ -0,0 +1,104 @@ +package chunked + +import ( + "bytes" + "fmt" + "io" + "testing" + "time" + + "github.com/containers/storage/pkg/chunked/internal" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func testTarheader(index int, typeFlag byte, size int64) tar.Header { + n := (index + 1) * 100 // Use predictable, but distinct, values for all headers + + res := tar.Header{ + Typeflag: typeFlag, + Name: fmt.Sprintf("name%d", n), + Size: size, + Mode: int64(n + 1), + Uid: n + 2, + Gid: n + 3, + Uname: fmt.Sprintf("user%d", n), + Gname: fmt.Sprintf("group%d", n), + ModTime: time.Unix(int64(n+4), 0), + AccessTime: time.Unix(int64(n+5), 0), + ChangeTime: time.Unix(int64(n+6), 0), + PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)}, + Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded. + } + switch res.Typeflag { + case tar.TypeLink, tar.TypeSymlink: + res.Linkname = fmt.Sprintf("link%d", n) + case tar.TypeChar, tar.TypeBlock: + res.Devmajor = int64(n + 7) + res.Devminor = int64(n + 8) + } + return res +} + +func TestIterateTarSplit(t *testing.T) { + entries := []struct { + typeFlag byte + size int64 + }{ + {tar.TypeReg, 0}, + {tar.TypeReg, 1}, + {tar.TypeReg, 511}, + {tar.TypeReg, 512}, + {tar.TypeReg, 513}, + {tar.TypeLink, 0}, + {tar.TypeSymlink, 0}, + {tar.TypeChar, 0}, + {tar.TypeBlock, 0}, + {tar.TypeDir, 0}, + {tar.TypeFifo, 0}, + } + + var tarball bytes.Buffer + var expected []tar.Header + w := tar.NewWriter(&tarball) + for i, e := range entries { + hdr := testTarheader(i, e.typeFlag, e.size) + err := w.WriteHeader(&hdr) + require.NoError(t, err) + data := make([]byte, e.size) + _, err = w.Write(data) + require.NoError(t, err) + expected = append(expected, hdr) + } + err := w.Close() + require.NoError(t, err) + + var tarSplit bytes.Buffer + tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter()) + require.NoError(t, err) + _, err = io.Copy(io.Discard, tsReader) + require.NoError(t, err) + + var actual []tar.Header + err = iterateTarSplit(tarSplit.Bytes(), func(hdr *tar.Header) error { + actual = append(actual, *hdr) + return nil + }) + require.NoError(t, err) + + assert.Equal(t, len(expected), len(actual)) + for i := range expected { + // We would have to open-code an equality comparison of time.Time values; instead, convert to FileMetadata, + // because we already have that implemented for that type — and because it provides a tiny bit of code coverage + // testing for ensureFileMetadataAttributesMatch. + expected1, err := internal.NewFileMetadata(&expected[i]) + require.NoError(t, err, i) + actual1, err := internal.NewFileMetadata(&actual[i]) + require.NoError(t, err, i) + err = ensureFileMetadataAttributesMatch(&expected1, &actual1) + assert.NoError(t, err, i) + } +} diff --git a/vendor/golang.org/x/exp/LICENSE b/vendor/golang.org/x/exp/LICENSE new file mode 100644 index 0000000000..6a66aea5ea --- /dev/null +++ b/vendor/golang.org/x/exp/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/exp/PATENTS b/vendor/golang.org/x/exp/PATENTS new file mode 100644 index 0000000000..733099041f --- /dev/null +++ b/vendor/golang.org/x/exp/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/exp/maps/maps.go b/vendor/golang.org/x/exp/maps/maps.go new file mode 100644 index 0000000000..ecc0dabb74 --- /dev/null +++ b/vendor/golang.org/x/exp/maps/maps.go @@ -0,0 +1,94 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package maps defines various functions useful with maps of any type. +package maps + +// Keys returns the keys of the map m. +// The keys will be in an indeterminate order. +func Keys[M ~map[K]V, K comparable, V any](m M) []K { + r := make([]K, 0, len(m)) + for k := range m { + r = append(r, k) + } + return r +} + +// Values returns the values of the map m. +// The values will be in an indeterminate order. +func Values[M ~map[K]V, K comparable, V any](m M) []V { + r := make([]V, 0, len(m)) + for _, v := range m { + r = append(r, v) + } + return r +} + +// Equal reports whether two maps contain the same key/value pairs. +// Values are compared using ==. +func Equal[M1, M2 ~map[K]V, K, V comparable](m1 M1, m2 M2) bool { + if len(m1) != len(m2) { + return false + } + for k, v1 := range m1 { + if v2, ok := m2[k]; !ok || v1 != v2 { + return false + } + } + return true +} + +// EqualFunc is like Equal, but compares values using eq. +// Keys are still compared with ==. +func EqualFunc[M1 ~map[K]V1, M2 ~map[K]V2, K comparable, V1, V2 any](m1 M1, m2 M2, eq func(V1, V2) bool) bool { + if len(m1) != len(m2) { + return false + } + for k, v1 := range m1 { + if v2, ok := m2[k]; !ok || !eq(v1, v2) { + return false + } + } + return true +} + +// Clear removes all entries from m, leaving it empty. +func Clear[M ~map[K]V, K comparable, V any](m M) { + for k := range m { + delete(m, k) + } +} + +// Clone returns a copy of m. This is a shallow clone: +// the new keys and values are set using ordinary assignment. +func Clone[M ~map[K]V, K comparable, V any](m M) M { + // Preserve nil in case it matters. + if m == nil { + return nil + } + r := make(M, len(m)) + for k, v := range m { + r[k] = v + } + return r +} + +// Copy copies all key/value pairs in src adding them to dst. +// When a key in src is already present in dst, +// the value in dst will be overwritten by the value associated +// with the key in src. +func Copy[M1 ~map[K]V, M2 ~map[K]V, K comparable, V any](dst M1, src M2) { + for k, v := range src { + dst[k] = v + } +} + +// DeleteFunc deletes any key/value pairs from m for which del returns true. +func DeleteFunc[M ~map[K]V, K comparable, V any](m M, del func(K, V) bool) { + for k, v := range m { + if del(k, v) { + delete(m, k) + } + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 9ef3c429e2..b9bd6ebb1e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -164,6 +164,9 @@ go.opencensus.io/internal go.opencensus.io/trace go.opencensus.io/trace/internal go.opencensus.io/trace/tracestate +# golang.org/x/exp v0.0.0-20231006140011-7918f672742d +## explicit; go 1.20 +golang.org/x/exp/maps # golang.org/x/sync v0.6.0 ## explicit; go 1.18 golang.org/x/sync/errgroup