Skip to content

Commit

Permalink
Merge pull request #2213 from mtrmac/zstd-blob-reuse-choice
Browse files Browse the repository at this point in the history
Make blob reuse choices manifest-format-sensitive, and allow conversions when writing to format-agnostic transports
  • Loading branch information
rhatdan committed Feb 5, 2024
2 parents ffdc507 + 8aadec3 commit dd3d3eb
Show file tree
Hide file tree
Showing 14 changed files with 112 additions and 86 deletions.
2 changes: 1 addition & 1 deletion copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
if d.uploadedCompressorName != "" && d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms.
// There is already a similar hack in internal/imagedestination/impl/helpers.BlobMatchesRequiredCompression,
// There is already a similar hack in internal/imagedestination/impl/helpers.CandidateMatchesTryReusingBlobOptions,
// and that one prevents reusing zstd:chunked blobs, so recording the algorithm here would be mostly harmless.
//
// We skip that here anyway to work around the inability of blobPipelineDetectCompressionStep to differentiate
Expand Down
11 changes: 2 additions & 9 deletions copy/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,11 @@ func determineManifestConversion(in determineManifestConversionInputs) (manifest
if in.forceManifestMIMEType != "" {
destSupportedManifestMIMETypes = []string{in.forceManifestMIMEType}
}

restrictiveCompressionRequired := in.requestedCompressionFormat != nil && !internalManifest.CompressionAlgorithmIsUniversallySupported(*in.requestedCompressionFormat)
if len(destSupportedManifestMIMETypes) == 0 {
if (!in.requiresOCIEncryption || manifest.MIMETypeSupportsEncryption(srcType)) &&
(!restrictiveCompressionRequired || internalManifest.MIMETypeSupportsCompressionAlgorithm(srcType, *in.requestedCompressionFormat)) {
return manifestConversionPlan{ // Anything goes; just use the original as is, do not try any conversions.
preferredMIMEType: srcType,
otherMIMETypeCandidates: []string{},
}, nil
}
destSupportedManifestMIMETypes = allManifestMIMETypes
}

restrictiveCompressionRequired := in.requestedCompressionFormat != nil && !internalManifest.CompressionAlgorithmIsUniversallySupported(*in.requestedCompressionFormat)
supportedByDest := set.New[string]()
for _, t := range destSupportedManifestMIMETypes {
if in.requiresOCIEncryption && !manifest.MIMETypeSupportsEncryption(t) {
Expand Down
6 changes: 3 additions & 3 deletions copy/manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,21 @@ func TestDetermineManifestConversion(t *testing.T) {
destTypes []string
expected manifestConversionPlan
}{
// Destination accepts anything — no conversion necessary
// Destination accepts anything — consider all options, prefer the source format
{
"s1→anything", manifest.DockerV2Schema1SignedMediaType, nil,
manifestConversionPlan{
preferredMIMEType: manifest.DockerV2Schema1SignedMediaType,
preferredMIMETypeNeedsConversion: false,
otherMIMETypeCandidates: []string{},
otherMIMETypeCandidates: []string{manifest.DockerV2Schema2MediaType, v1.MediaTypeImageManifest, manifest.DockerV2Schema1MediaType},
},
},
{
"s2→anything", manifest.DockerV2Schema2MediaType, nil,
manifestConversionPlan{
preferredMIMEType: manifest.DockerV2Schema2MediaType,
preferredMIMETypeNeedsConversion: false,
otherMIMETypeCandidates: []string{},
otherMIMETypeCandidates: []string{manifest.DockerV2Schema1SignedMediaType, v1.MediaTypeImageManifest, manifest.DockerV2Schema1MediaType},
},
},
// Destination accepts the unmodified original
Expand Down
47 changes: 21 additions & 26 deletions copy/single.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type imageCopier struct {
c *copier
manifestUpdates *types.ManifestUpdateOptions
src *image.SourcedImage
manifestConversionPlan manifestConversionPlan
diffIDsAreNeeded bool
cannotModifyManifestReason string // The reason the manifest cannot be modified, or an empty string if it can
canSubstituteBlobs bool
Expand Down Expand Up @@ -136,7 +137,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
c: c,
manifestUpdates: &types.ManifestUpdateOptions{InformationOnly: types.ManifestUpdateInformation{Destination: c.dest}},
src: src,
// diffIDsAreNeeded is computed later
// manifestConversionPlan and diffIDsAreNeeded are computed later
cannotModifyManifestReason: cannotModifyManifestReason,
requireCompressionFormatMatch: opts.requireCompressionFormatMatch,
}
Expand Down Expand Up @@ -164,7 +165,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar

destRequiresOciEncryption := (isEncrypted(src) && ic.c.options.OciDecryptConfig == nil) || c.options.OciEncryptLayers != nil

manifestConversionPlan, err := determineManifestConversion(determineManifestConversionInputs{
ic.manifestConversionPlan, err = determineManifestConversion(determineManifestConversionInputs{
srcMIMEType: ic.src.ManifestMIMEType,
destSupportedManifestMIMETypes: ic.c.dest.SupportedManifestMIMETypes(),
forceManifestMIMEType: c.options.ForceManifestMIMEType,
Expand All @@ -179,8 +180,8 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
// code that calls copyUpdatedConfigAndManifest, so that other parts of the copy code
// (e.g. the UpdatedImageNeedsLayerDiffIDs check just below) can make decisions based
// on the expected destination format.
if manifestConversionPlan.preferredMIMETypeNeedsConversion {
ic.manifestUpdates.ManifestMIMEType = manifestConversionPlan.preferredMIMEType
if ic.manifestConversionPlan.preferredMIMETypeNeedsConversion {
ic.manifestUpdates.ManifestMIMEType = ic.manifestConversionPlan.preferredMIMEType
}

// If src.UpdatedImageNeedsLayerDiffIDs(ic.manifestUpdates) will be true, it needs to be true by the time we get here.
Expand Down Expand Up @@ -219,11 +220,11 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
manifestBytes, manifestDigest, err := ic.copyUpdatedConfigAndManifest(ctx, targetInstance)
wipResult := copySingleImageResult{
manifest: manifestBytes,
manifestMIMEType: manifestConversionPlan.preferredMIMEType,
manifestMIMEType: ic.manifestConversionPlan.preferredMIMEType,
manifestDigest: manifestDigest,
}
if err != nil {
logrus.Debugf("Writing manifest using preferred type %s failed: %v", manifestConversionPlan.preferredMIMEType, err)
logrus.Debugf("Writing manifest using preferred type %s failed: %v", ic.manifestConversionPlan.preferredMIMEType, err)
// … if it fails, and the failure is either because the manifest is rejected by the registry, or
// because we failed to create a manifest of the specified type because the specific manifest type
// doesn't support the type of compression we're trying to use (e.g. docker v2s2 and zstd), we may
Expand All @@ -232,13 +233,13 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
var manifestLayerCompressionIncompatibilityError manifest.ManifestLayerCompressionIncompatibilityError
isManifestRejected := errors.As(err, &manifestTypeRejectedError)
isCompressionIncompatible := errors.As(err, &manifestLayerCompressionIncompatibilityError)
if (!isManifestRejected && !isCompressionIncompatible) || len(manifestConversionPlan.otherMIMETypeCandidates) == 0 {
if (!isManifestRejected && !isCompressionIncompatible) || len(ic.manifestConversionPlan.otherMIMETypeCandidates) == 0 {
// We don’t have other options.
// In principle the code below would handle this as well, but the resulting error message is fairly ugly.
// Don’t bother the user with MIME types if we have no choice.
return copySingleImageResult{}, err
}
// If the original MIME type is acceptable, determineManifestConversion always uses it as manifestConversionPlan.preferredMIMEType.
// If the original MIME type is acceptable, determineManifestConversion always uses it as ic.manifestConversionPlan.preferredMIMEType.
// So if we are here, we will definitely be trying to convert the manifest.
// With ic.cannotModifyManifestReason != "", that would just be a string of repeated failures for the same reason,
// so let’s bail out early and with a better error message.
Expand All @@ -247,8 +248,8 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
}

// errs is a list of errors when trying various manifest types. Also serves as an "upload succeeded" flag when set to nil.
errs := []string{fmt.Sprintf("%s(%v)", manifestConversionPlan.preferredMIMEType, err)}
for _, manifestMIMEType := range manifestConversionPlan.otherMIMETypeCandidates {
errs := []string{fmt.Sprintf("%s(%v)", ic.manifestConversionPlan.preferredMIMEType, err)}
for _, manifestMIMEType := range ic.manifestConversionPlan.otherMIMETypeCandidates {
logrus.Debugf("Trying to use manifest type %s…", manifestMIMEType)
ic.manifestUpdates.ManifestMIMEType = manifestMIMEType
attemptedManifest, attemptedManifestDigest, err := ic.copyUpdatedConfigAndManifest(ctx, targetInstance)
Expand Down Expand Up @@ -683,17 +684,10 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
logrus.Debugf("Checking if we can reuse blob %s: general substitution = %v, compression for MIME type %q = %v",
srcInfo.Digest, ic.canSubstituteBlobs, srcInfo.MediaType, canChangeLayerCompression)
canSubstitute := ic.canSubstituteBlobs && ic.src.CanChangeLayerCompression(srcInfo.MediaType)
// TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm
// that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing
// a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause
// a failure when we eventually try to update the manifest with the digest and MIME type of the reused blob.
// Fixing that will probably require passing more information to TryReusingBlob() than the current version of
// the ImageDestination interface lets us pass in.

var requiredCompression *compressiontypes.Algorithm
var originalCompression *compressiontypes.Algorithm
if ic.requireCompressionFormatMatch {
requiredCompression = ic.compressionFormat
originalCompression = srcInfo.CompressionAlgorithm
}

// Check if we have a chunked layer in storage that's based on that blob. These layers are stored by their TOC digest.
Expand All @@ -703,14 +697,15 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
}

reused, reusedBlob, err := ic.c.dest.TryReusingBlobWithOptions(ctx, srcInfo, private.TryReusingBlobOptions{
Cache: ic.c.blobInfoCache,
CanSubstitute: canSubstitute,
EmptyLayer: emptyLayer,
LayerIndex: &layerIndex,
SrcRef: srcRef,
RequiredCompression: requiredCompression,
OriginalCompression: originalCompression,
TOCDigest: tocDigest,
Cache: ic.c.blobInfoCache,
CanSubstitute: canSubstitute,
EmptyLayer: emptyLayer,
LayerIndex: &layerIndex,
SrcRef: srcRef,
PossibleManifestFormats: append([]string{ic.manifestConversionPlan.preferredMIMEType}, ic.manifestConversionPlan.otherMIMETypeCandidates...),
RequiredCompression: requiredCompression,
OriginalCompression: srcInfo.CompressionAlgorithm,
TOCDigest: tocDigest,
})
if err != nil {
return types.BlobInfo{}, "", fmt.Errorf("trying to reuse blob %s at destination: %w", srcInfo.Digest, err)
Expand Down
2 changes: 1 addition & 1 deletion directory/directory_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func (d *dirImageDestination) PutBlobWithOptions(ctx context.Context, stream io.
// If the blob has been successfully reused, returns (true, info, nil).
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (d *dirImageDestination) TryReusingBlobWithOptions(ctx context.Context, info types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
if info.Digest == "" {
Expand Down
29 changes: 16 additions & 13 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/containers/image/v5/internal/uploadreader"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/pkg/blobinfocache/none"
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/types"
"github.com/docker/distribution/registry/api/errcode"
v2 "github.com/docker/distribution/registry/api/v2"
Expand Down Expand Up @@ -311,6 +312,13 @@ func (d *dockerImageDestination) tryReusingExactBlob(ctx context.Context, info t
return false, private.ReusedBlob{}, nil
}

func optionalCompressionName(algo *compressiontypes.Algorithm) string {
if algo != nil {
return algo.Name()
}
return "nil"
}

// TryReusingBlobWithOptions checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
// info.Digest must not be empty.
Expand All @@ -321,7 +329,7 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
return false, private.ReusedBlob{}, errors.New("Can not check for a blob with unknown digest")
}

if impl.OriginalBlobMatchesRequiredCompression(options) {
if impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
// First, check whether the blob happens to already exist at the destination.
haveBlob, reusedInfo, err := d.tryReusingExactBlob(ctx, info, options.Cache)
if err != nil {
Expand All @@ -331,11 +339,8 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
return true, reusedInfo, nil
}
} else {
requiredCompression := "nil"
if options.OriginalCompression != nil {
requiredCompression = options.OriginalCompression.Name()
}
logrus.Debugf("Ignoring exact blob match case due to compression mismatch ( %s vs %s )", options.RequiredCompression.Name(), requiredCompression)
logrus.Debugf("Ignoring exact blob match, compression %s does not match required %s or MIME types %#v",
optionalCompressionName(options.OriginalCompression), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
}

// Then try reusing blobs from other locations.
Expand All @@ -355,15 +360,13 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
continue
}
}
if !impl.BlobMatchesRequiredCompression(options, compressionAlgorithm) {
requiredCompression := "nil"
if compressionAlgorithm != nil {
requiredCompression = compressionAlgorithm.Name()
}
if !impl.CandidateMatchesTryReusingBlobOptions(options, compressionAlgorithm) {
if !candidate.UnknownLocation {
logrus.Debugf("Ignoring candidate blob %s as reuse candidate due to compression mismatch ( %s vs %s ) in %s", candidate.Digest.String(), options.RequiredCompression.Name(), requiredCompression, candidateRepo.Name())
logrus.Debugf("Ignoring candidate blob %s in %s, compression %s does not match required %s or MIME types %#v", candidate.Digest.String(), candidateRepo.Name(),
optionalCompressionName(compressionAlgorithm), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
} else {
logrus.Debugf("Ignoring candidate blob %s as reuse candidate due to compression mismatch ( %s vs %s ) with no location match, checking current repo", candidate.Digest.String(), options.RequiredCompression.Name(), requiredCompression)
logrus.Debugf("Ignoring candidate blob %s with no known location, compression %s does not match required %s or MIME types %#v", candidate.Digest.String(),
optionalCompressionName(compressionAlgorithm), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
}
continue
}
Expand Down
2 changes: 1 addition & 1 deletion docker/internal/tarfile/dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (d *Destination) PutBlobWithOptions(ctx context.Context, stream io.Reader,
// If the blob has been successfully reused, returns (true, info, nil).
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (d *Destination) TryReusingBlobWithOptions(ctx context.Context, info types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
if err := d.archive.lock(); err != nil {
Expand Down
38 changes: 27 additions & 11 deletions internal/imagedestination/impl/helpers.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,41 @@
package impl

import (
"github.com/containers/image/v5/internal/manifest"
"github.com/containers/image/v5/internal/private"
compression "github.com/containers/image/v5/pkg/compression/types"
"golang.org/x/exp/slices"
)

// BlobMatchesRequiredCompression validates if compression is required by the caller while selecting a blob, if it is required
// CandidateMatchesTryReusingBlobOptions validates if compression is required by the caller while selecting a blob, if it is required
// then function performs a match against the compression requested by the caller and compression of existing blob
// (which can be nil to represent uncompressed or unknown)
func BlobMatchesRequiredCompression(options private.TryReusingBlobOptions, candidateCompression *compression.Algorithm) bool {
if options.RequiredCompression == nil {
return true // no requirement imposed
func CandidateMatchesTryReusingBlobOptions(options private.TryReusingBlobOptions, candidateCompression *compression.Algorithm) bool {
if options.RequiredCompression != nil {
if options.RequiredCompression.Name() == compression.ZstdChunkedAlgorithmName {
// HACK: Never match when the caller asks for zstd:chunked, because we don’t record the annotations required to use the chunked blobs.
// The caller must re-compress to build those annotations.
return false
}
if candidateCompression == nil || (options.RequiredCompression.Name() != candidateCompression.Name()) {
return false
}
}
if options.RequiredCompression.Name() == compression.ZstdChunkedAlgorithmName {
// HACK: Never match when the caller asks for zstd:chunked, because we don’t record the annotations required to use the chunked blobs.
// The caller must re-compress to build those annotations.
return false

// For candidateCompression == nil, we can’t tell the difference between “uncompressed” and “unknown”;
// and “uncompressed” is acceptable in all known formats (well, it seems to work in practice for schema1),
// so don’t impose any restrictions if candidateCompression == nil
if options.PossibleManifestFormats != nil && candidateCompression != nil {
if !slices.ContainsFunc(options.PossibleManifestFormats, func(mt string) bool {
return manifest.MIMETypeSupportsCompressionAlgorithm(mt, *candidateCompression)
}) {
return false
}
}
return candidateCompression != nil && (options.RequiredCompression.Name() == candidateCompression.Name())

return true
}

func OriginalBlobMatchesRequiredCompression(opts private.TryReusingBlobOptions) bool {
return BlobMatchesRequiredCompression(opts, opts.OriginalCompression)
func OriginalCandidateMatchesTryReusingBlobOptions(opts private.TryReusingBlobOptions) bool {
return CandidateMatchesTryReusingBlobOptions(opts, opts.OriginalCompression)
}
Loading

0 comments on commit dd3d3eb

Please sign in to comment.