From 27813fb65d6863b822cf46d701e84a2d7b292aa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 31 Jan 2024 02:17:18 +0100 Subject: [PATCH 1/5] Reword the documentation a bit to suggest the edits are idempotent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In particular, it is OK to use CompressionOperation: Decompress on already-decompressed layers. TestUpdatedMIMEType already ensures this works correctly. Signed-off-by: Miloslav Trmač --- types/types.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/types/types.go b/types/types.go index 180a98c5b..7d6097346 100644 --- a/types/types.go +++ b/types/types.go @@ -135,8 +135,8 @@ type BlobInfo struct { // CompressionOperation is used in Image.UpdateLayerInfos to instruct // whether the original layer's "compressed or not" should be preserved, // possibly while changing the compression algorithm from one to another, - // or if it should be compressed or decompressed. The field defaults to - // preserve the original layer's compressedness. + // or if it should be changed to compressed or decompressed. + // The field defaults to preserve the original layer's compressedness. // TODO: To remove together with CryptoOperation in re-design to remove // field out of BlobInfo. CompressionOperation LayerCompression From 230e72fcb23c16ff12efd1301bd7dd132fa9d3d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 31 Jan 2024 02:35:44 +0100 Subject: [PATCH 2/5] Rename bpCompressionStepData.operation to uploadedOperation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it clearer that it is intended for manifest edits, and is not indicative of what we actually did. Should not change behavior. Signed-off-by: Miloslav Trmač --- copy/compression.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index 953ef9deb..8b5e99dfd 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -70,7 +70,7 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI // bpCompressionStepData contains data that the copy pipeline needs about the compression step. type bpCompressionStepData struct { - operation types.LayerCompression // Operation to use for updating the blob metadata. + uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata. uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. uploadedAnnotations map[string]string // Annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. srcCompressorName string // Compressor name to record in the blob info cache for the source blob. @@ -115,7 +115,7 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp logrus.Debugf("Using original blob without modification for encrypted blob") // PreserveOriginal due to any compression not being able to be done on an encrypted blob unless decrypted return &bpCompressionStepData{ - operation: types.PreserveOriginal, + uploadedOperation: types.PreserveOriginal, uploadedAlgorithm: nil, srcCompressorName: internalblobinfocache.UnknownCompression, uploadedCompressorName: internalblobinfocache.UnknownCompression, @@ -143,7 +143,7 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: types.Compress, + uploadedOperation: types.Compress, uploadedAlgorithm: uploadedAlgorithm, uploadedAnnotations: annotations, srcCompressorName: detected.srcCompressorName, @@ -182,7 +182,7 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp } succeeded = true return &bpCompressionStepData{ - operation: types.PreserveOriginal, + uploadedOperation: types.PreserveOriginal, uploadedAlgorithm: ic.compressionFormat, uploadedAnnotations: annotations, srcCompressorName: detected.srcCompressorName, @@ -208,7 +208,7 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: types.Decompress, + uploadedOperation: types.Decompress, uploadedAlgorithm: nil, srcCompressorName: detected.srcCompressorName, uploadedCompressorName: internalblobinfocache.Uncompressed, @@ -239,7 +239,7 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom algorithm = nil } return &bpCompressionStepData{ - operation: types.PreserveOriginal, + uploadedOperation: types.PreserveOriginal, uploadedAlgorithm: algorithm, srcCompressorName: detected.srcCompressorName, uploadedCompressorName: detected.srcCompressorName, @@ -248,7 +248,7 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom // updateCompressionEdits sets *operation, *algorithm and updates *annotations, if necessary. func (d *bpCompressionStepData) updateCompressionEdits(operation *types.LayerCompression, algorithm **compressiontypes.Algorithm, annotations *map[string]string) { - *operation = d.operation + *operation = d.uploadedOperation // If we can modify the layer's blob, set the desired algorithm for it to be set in the manifest. *algorithm = d.uploadedAlgorithm if *annotations == nil { @@ -268,11 +268,11 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // in the blob info cache (which would probably be necessary for any more complex logic), // and the simplicity is attractive. if !encryptionStep.encrypting && !decryptionStep.decrypting { - // If d.operation != types.PreserveOriginal, we now have two reliable digest values: - // srcinfo.Digest describes the pre-d.operation input, verified by digestingReader - // uploadedInfo.Digest describes the post-d.operation output, computed by PutBlob + // If d.uploadedOperation != types.PreserveOriginal, we now have two reliable digest values: + // srcinfo.Digest describes the pre-d.uploadedOperation input, verified by digestingReader + // uploadedInfo.Digest describes the post-d.uploadedOperation output, computed by PutBlob // (because stream.info.Digest == "", this must have been computed afresh). - switch d.operation { + switch d.uploadedOperation { case types.PreserveOriginal: break // Do nothing, we have only one digest and we might not have even verified it. case types.Compress: @@ -280,7 +280,7 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf case types.Decompress: c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) default: - return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation) + return fmt.Errorf("Internal error: Unexpected d.uploadedOperation value %#v", d.uploadedOperation) } } if d.uploadedCompressorName != "" && d.uploadedCompressorName != internalblobinfocache.UnknownCompression { From 61312cf85b5ff144fdd339024ea98ba2a6a37b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 31 Jan 2024 20:01:20 +0100 Subject: [PATCH 3/5] Add a new bpCompressionStepData.operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... describing in more granularity what we are doing. Use it in recordValidatedDigestData, adding a case where we can record that a layer is uncompressed when we determine that while copying it without changes. Adding this separate .operation field will also allow us to set .uploadedOperation to Decompress in the bpcOpPreserveUncompressed case, without changing what recordValidatedDigestData does. Signed-off-by: Miloslav Trmač --- copy/compression.go | 65 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index 8b5e99dfd..5af468d2d 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -70,7 +70,8 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI // bpCompressionStepData contains data that the copy pipeline needs about the compression step. type bpCompressionStepData struct { - uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata. + operation bpcOperation // What we are actually doing + uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. uploadedAnnotations map[string]string // Annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. srcCompressorName string // Compressor name to record in the blob info cache for the source blob. @@ -78,6 +79,18 @@ type bpCompressionStepData struct { closers []io.Closer // Objects to close after the upload is done, if any. } +type bpcOperation int + +const ( + bpcOpInvalid bpcOperation = iota + bpcOpPreserveOpaque // We are preserving something where compression is not applicable + bpcOpPreserveCompressed // We are preserving a compressed, and decompressible, layer + bpcOpPreserveUncompressed // We are preserving an uncompressed, and compressible, layer + bpcOpCompressUncompressed // We are compressing uncompressed data + bpcOpRecompressCompressed // We are recompressing compressed data + bpcOpDecompressCompressed // We are decompressing compressed data +) + // blobPipelineCompressionStep updates *stream to compress and/or decompress it. // srcInfo is primarily used for error messages. // Returns data for other steps; the caller should eventually call updateCompressionEdits and perhaps recordValidatedBlobData, @@ -112,9 +125,10 @@ func (ic *imageCopier) blobPipelineCompressionStep(stream *sourceStream, canModi // bpcPreserveEncrypted checks if the input is encrypted, and returns a *bpCompressionStepData if so. func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectCompressionStepData) (*bpCompressionStepData, error) { if isOciEncrypted(stream.info.MediaType) { + // We can’t do anything with an encrypted blob unless decrypted. logrus.Debugf("Using original blob without modification for encrypted blob") - // PreserveOriginal due to any compression not being able to be done on an encrypted blob unless decrypted return &bpCompressionStepData{ + operation: bpcOpPreserveOpaque, uploadedOperation: types.PreserveOriginal, uploadedAlgorithm: nil, srcCompressorName: internalblobinfocache.UnknownCompression, @@ -143,6 +157,7 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ + operation: bpcOpCompressUncompressed, uploadedOperation: types.Compress, uploadedAlgorithm: uploadedAlgorithm, uploadedAnnotations: annotations, @@ -182,6 +197,7 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp } succeeded = true return &bpCompressionStepData{ + operation: bpcOpRecompressCompressed, uploadedOperation: types.PreserveOriginal, uploadedAlgorithm: ic.compressionFormat, uploadedAnnotations: annotations, @@ -208,6 +224,7 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ + operation: bpcOpDecompressCompressed, uploadedOperation: types.Decompress, uploadedAlgorithm: nil, srcCompressorName: detected.srcCompressorName, @@ -232,13 +249,21 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom // But don’t touch blobs in objects where we can’t change compression, // so that src.UpdatedImage() doesn’t fail; assume that for such blobs // LayerInfosForCopy() should not be making any changes in the first place. + var bpcOp bpcOperation var algorithm *compressiontypes.Algorithm - if layerCompressionChangeSupported && detected.isCompressed { + switch { + case !layerCompressionChangeSupported: + bpcOp = bpcOpPreserveOpaque + algorithm = nil + case detected.isCompressed: + bpcOp = bpcOpPreserveCompressed algorithm = &detected.format - } else { + default: + bpcOp = bpcOpPreserveUncompressed algorithm = nil } return &bpCompressionStepData{ + operation: bpcOp, uploadedOperation: types.PreserveOriginal, uploadedAlgorithm: algorithm, srcCompressorName: detected.srcCompressorName, @@ -257,7 +282,8 @@ func (d *bpCompressionStepData) updateCompressionEdits(operation *types.LayerCom maps.Copy(*annotations, d.uploadedAnnotations) } -// recordValidatedBlobData updates b.blobInfoCache with data about the created uploadedInfo adnd the original srcInfo. +// recordValidatedBlobData updates b.blobInfoCache with data about the created uploadedInfo (as returned by PutBlob) +// and the original srcInfo (which the caller guarantees has been validated). // This must ONLY be called if all data has been validated by OUR code, and is not coming from third parties. func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInfo types.BlobInfo, srcInfo types.BlobInfo, encryptionStep *bpEncryptionStepData, decryptionStep *bpDecryptionStepData) error { @@ -268,19 +294,28 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // in the blob info cache (which would probably be necessary for any more complex logic), // and the simplicity is attractive. if !encryptionStep.encrypting && !decryptionStep.decrypting { - // If d.uploadedOperation != types.PreserveOriginal, we now have two reliable digest values: - // srcinfo.Digest describes the pre-d.uploadedOperation input, verified by digestingReader - // uploadedInfo.Digest describes the post-d.uploadedOperation output, computed by PutBlob - // (because stream.info.Digest == "", this must have been computed afresh). - switch d.uploadedOperation { - case types.PreserveOriginal: - break // Do nothing, we have only one digest and we might not have even verified it. - case types.Compress: + // If d.operation != bpcOpPreserve*, we now have two reliable digest values: + // srcinfo.Digest describes the pre-d.operation input, verified by digestingReader + // uploadedInfo.Digest describes the post-d.operation output, computed by PutBlob + // (because we set stream.info.Digest == "", this must have been computed afresh). + switch d.operation { + case bpcOpPreserveOpaque: + // No useful information + case bpcOpCompressUncompressed: c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) - case types.Decompress: + case bpcOpDecompressCompressed: c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) + case bpcOpRecompressCompressed, bpcOpPreserveCompressed: + // We know one or two compressed digests. BlobInfoCache associates compression variants via the uncompressed digest, + // and we don’t know that one. + // That also means that repeated copies with the same recompression don’t identify reuse opportunities (unless + // RecordDigestUncompressedPair was called for both compressed variants for some other reason). + case bpcOpPreserveUncompressed: + c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, srcInfo.Digest) + case bpcOpInvalid: + fallthrough default: - return fmt.Errorf("Internal error: Unexpected d.uploadedOperation value %#v", d.uploadedOperation) + return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation) } } if d.uploadedCompressorName != "" && d.uploadedCompressorName != internalblobinfocache.UnknownCompression { From 772f39109bb2df31d54ac0132c675a64ce4f36dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 31 Jan 2024 20:07:45 +0100 Subject: [PATCH 4/5] Ensure uncompressed layers from c/storage are recorded as uncompressed on upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When copying an uncompressed layer without modification, arrange types.BlobInfo.CompressionOperation to be set to types.Decompress, so that we edit the manifest to use an uncompressed MIME type (and not, e.g. a zstd:chunked MIME type which might not be representable in the destination at all). This is all still gated by the > if srcInfosUpdated || layerDigestsDiffer(srcInfos, destInfos) { condition in copyLayers, so we don't do these manifest updates frivolously. Signed-off-by: Miloslav Trmač --- copy/compression.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/copy/compression.go b/copy/compression.go index 5af468d2d..22dc432b2 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -250,21 +250,25 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom // so that src.UpdatedImage() doesn’t fail; assume that for such blobs // LayerInfosForCopy() should not be making any changes in the first place. var bpcOp bpcOperation + var uploadedOp types.LayerCompression var algorithm *compressiontypes.Algorithm switch { case !layerCompressionChangeSupported: bpcOp = bpcOpPreserveOpaque + uploadedOp = types.PreserveOriginal algorithm = nil case detected.isCompressed: bpcOp = bpcOpPreserveCompressed + uploadedOp = types.PreserveOriginal algorithm = &detected.format default: bpcOp = bpcOpPreserveUncompressed + uploadedOp = types.Decompress algorithm = nil } return &bpCompressionStepData{ operation: bpcOp, - uploadedOperation: types.PreserveOriginal, + uploadedOperation: uploadedOp, uploadedAlgorithm: algorithm, srcCompressorName: detected.srcCompressorName, uploadedCompressorName: detected.srcCompressorName, From 278b32423a11d2ea25ba27e749dc2ae93d76d935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 31 Jan 2024 20:20:23 +0100 Subject: [PATCH 5/5] Ensure uncompressed layers from c/storage are recorded as uncompressed on reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- copy/single.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/copy/single.go b/copy/single.go index c23361942..9e892943b 100644 --- a/copy/single.go +++ b/copy/single.go @@ -383,7 +383,7 @@ func (ic *imageCopier) compareImageDestinationManifestEqual(ctx context.Context, compressionAlgos := set.New[string]() for _, srcInfo := range ic.src.LayerInfos() { - if c := compressionAlgorithmFromMIMEType(srcInfo); c != nil { + if _, c := compressionEditsFromMIMEType(srcInfo); c != nil { compressionAlgos.Add(c.Name()) } } @@ -636,17 +636,22 @@ type diffIDResult struct { err error } -func compressionAlgorithmFromMIMEType(srcInfo types.BlobInfo) *compressiontypes.Algorithm { +// compressionEditsFromMIMEType returns a (CompressionOperation, CompressionAlgorithm) value pair suitable +// for types.BlobInfo, based on a MIME type of srcInfo. +func compressionEditsFromMIMEType(srcInfo types.BlobInfo) (types.LayerCompression, *compressiontypes.Algorithm) { // This MIME type → compression mapping belongs in manifest-specific code in our manifest // package (but we should preferably replace/change UpdatedImage instead of productizing // this workaround). switch srcInfo.MediaType { case manifest.DockerV2Schema2LayerMediaType, imgspecv1.MediaTypeImageLayerGzip: - return &compression.Gzip + return types.PreserveOriginal, &compression.Gzip case imgspecv1.MediaTypeImageLayerZstd: - return &compression.Zstd + return types.PreserveOriginal, &compression.Zstd + case manifest.DockerV2SchemaLayerMediaTypeUncompressed, imgspecv1.MediaTypeImageLayer: + return types.Decompress, nil + default: + return types.PreserveOriginal, nil } - return nil } // copyLayer copies a layer with srcInfo (with known Digest and Annotations and possibly known Size) in src to dest, perhaps (de/re/)compressing it, @@ -660,8 +665,8 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to // which uses the compression information to compute the updated MediaType values. // (Sadly UpdatedImage() is documented to not update MediaTypes from // ManifestUpdateOptions.LayerInfos[].MediaType, so we are doing it indirectly.) - if srcInfo.CompressionAlgorithm == nil { - srcInfo.CompressionAlgorithm = compressionAlgorithmFromMIMEType(srcInfo) + if srcInfo.CompressionOperation == types.PreserveOriginal && srcInfo.CompressionAlgorithm == nil { + srcInfo.CompressionOperation, srcInfo.CompressionAlgorithm = compressionEditsFromMIMEType(srcInfo) } ic.c.printCopyInfo("blob", srcInfo)