From 1d3ef2eff975ff67a2d13a18fd20fc3fd582fe68 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 3 Dec 2024 15:09:56 +0100 Subject: [PATCH 1/6] Add Full Object Checksum API Add support for full object checksums as described here: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html To enable, use `ChecksumCRC64NVME`, `ChecksumFullObjectCRC32` or `ChecksumFullObjectCRC32C` as checksum type when uploading. Mint tests updated, but can be disabled with `MINT_NO_FULL_OBJECT=anything` env var. PR will fail against community MinIO without above env var. --- api-datatypes.go | 18 ++-- api-put-object-multipart.go | 48 ++++----- api-put-object-streaming.go | 97 +++++++---------- api-put-object.go | 30 +++--- api-s3-datatypes.go | 63 ++++++++--- checksum.go | 210 ++++++++++++++++++++++++++++++++++-- functional_tests.go | 95 +++++++++++++--- utils.go | 152 +++++++++++++++++++++++++- utils_test.go | 41 +++++++ 9 files changed, 605 insertions(+), 149 deletions(-) diff --git a/api-datatypes.go b/api-datatypes.go index 97a6f80b25..8a8fd88985 100644 --- a/api-datatypes.go +++ b/api-datatypes.go @@ -143,10 +143,11 @@ type UploadInfo struct { // Verified checksum values, if any. // Values are base64 (standard) encoded. // For multipart objects this is a checksum of the checksum of each part. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string } // RestoreInfo contains information of the restore operation of an archived object @@ -215,10 +216,11 @@ type ObjectInfo struct { Restore *RestoreInfo // Checksum values - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string Internal *struct { K int // Data blocks diff --git a/api-put-object-multipart.go b/api-put-object-multipart.go index a70cbea9e5..03bd34f76b 100644 --- a/api-put-object-multipart.go +++ b/api-put-object-multipart.go @@ -83,10 +83,7 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // HTTPS connection. hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256) if len(hashSums) == 0 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. @@ -113,7 +110,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() for partNumber <= totalPartsCount { @@ -154,7 +150,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } p := uploadPartParams{bucketName: bucketName, objectName: objectName, uploadID: uploadID, reader: rd, partNumber: partNumber, md5Base64: md5Base64, sha256Hex: sha256Hex, size: int64(length), sse: opts.ServerSideEncryption, streamSha256: !opts.DisableContentSha256, customHeader: customHeader} @@ -182,18 +177,21 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -203,12 +201,8 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -354,10 +348,11 @@ func (c *Client) uploadPart(ctx context.Context, p uploadPartParams) (ObjectPart // Once successfully uploaded, return completed part. h := resp.Header objPart := ObjectPart{ - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), } objPart.Size = p.size objPart.PartNumber = p.partNumber @@ -457,9 +452,10 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object Expiration: expTime, ExpirationRuleID: ruleID, - ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256, - ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1, - ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32, - ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C, + ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256, + ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1, + ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32, + ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C, + ChecksumCRC64NVME: completeMultipartUploadResult.ChecksumCRC64NVME, }, nil } diff --git a/api-put-object-streaming.go b/api-put-object-streaming.go index dac4c0efef..c2663b49bf 100644 --- a/api-put-object-streaming.go +++ b/api-put-object-streaming.go @@ -113,10 +113,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN } withChecksum := c.trailingHeaderSupport if withChecksum { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts) @@ -240,6 +237,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN // Gather the responses as they occur and update any // progress bar. + allParts := make([]ObjectPart, 0, totalPartsCount) for u := 1; u <= totalPartsCount; u++ { select { case <-ctx.Done(): @@ -248,16 +246,17 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN if uploadRes.Error != nil { return UploadInfo{}, uploadRes.Error } - + allParts = append(allParts, uploadRes.Part) // Update the totalUploadedSize. totalUploadedSize += uploadRes.Size complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: uploadRes.Part.ETag, - PartNumber: uploadRes.Part.PartNumber, - ChecksumCRC32: uploadRes.Part.ChecksumCRC32, - ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C, - ChecksumSHA1: uploadRes.Part.ChecksumSHA1, - ChecksumSHA256: uploadRes.Part.ChecksumSHA256, + ETag: uploadRes.Part.ETag, + PartNumber: uploadRes.Part.PartNumber, + ChecksumCRC32: uploadRes.Part.ChecksumCRC32, + ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C, + ChecksumSHA1: uploadRes.Part.ChecksumSHA1, + ChecksumSHA256: uploadRes.Part.ChecksumSHA256, + ChecksumCRC64NVME: uploadRes.Part.ChecksumCRC64NVME, }) } } @@ -275,15 +274,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN AutoChecksum: opts.AutoChecksum, } if withChecksum { - // Add hash of hashes. - crc := opts.AutoChecksum.Hasher() - for _, part := range complMultipartUpload.Parts { - cs, err := base64.StdEncoding.DecodeString(part.Checksum(opts.AutoChecksum)) - if err == nil { - crc.Write(cs) - } - } - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} + applyAutoChecksum(&opts, allParts) } uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) @@ -312,10 +303,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Calculate the optimal parts info for a given size. @@ -420,18 +408,21 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -442,12 +433,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -475,10 +461,7 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam opts.AutoChecksum = opts.Checksum } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Cancel all when an error occurs. @@ -510,7 +493,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte crc := opts.AutoChecksum.Hasher() // Total data read and written to server. should be equal to 'size' at the end of the call. @@ -570,7 +552,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } wg.Add(1) @@ -630,18 +611,21 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -652,12 +636,8 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -823,9 +803,10 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, ExpirationRuleID: ruleID, // Checksum values - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), }, nil } diff --git a/api-put-object.go b/api-put-object.go index 10131a5be6..0981757841 100644 --- a/api-put-object.go +++ b/api-put-object.go @@ -387,10 +387,7 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam opts.AutoChecksum = opts.Checksum } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. @@ -417,7 +414,6 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() @@ -443,7 +439,6 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset @@ -475,18 +470,21 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -497,12 +495,8 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err diff --git a/api-s3-datatypes.go b/api-s3-datatypes.go index 790606c509..0bba0d42d4 100644 --- a/api-s3-datatypes.go +++ b/api-s3-datatypes.go @@ -18,6 +18,7 @@ package minio import ( + "encoding/base64" "encoding/xml" "errors" "io" @@ -276,10 +277,44 @@ type ObjectPart struct { Size int64 // Checksum values of each part. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string +} + +// Checksum will return the checksum for the given type. +// Will return the empty string if not set. +func (c ObjectPart) Checksum(t ChecksumType) string { + switch { + case t.Is(ChecksumCRC32C): + return c.ChecksumCRC32C + case t.Is(ChecksumCRC32): + return c.ChecksumCRC32 + case t.Is(ChecksumSHA1): + return c.ChecksumSHA1 + case t.Is(ChecksumSHA256): + return c.ChecksumSHA256 + case t.Is(ChecksumCRC64NVME): + return c.ChecksumCRC64NVME + } + return "" +} + +func (c ObjectPart) ChecksumRaw(t ChecksumType) ([]byte, error) { + b64 := c.Checksum(t) + if b64 == "" { + return nil, errors.New("no checksum set") + } + decoded, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return nil, err + } + if len(decoded) != t.RawByteLen() { + return nil, errors.New("checksum length mismatch") + } + return decoded, nil } // ListObjectPartsResult container for ListObjectParts response. @@ -320,10 +355,11 @@ type completeMultipartUploadResult struct { ETag string // Checksum values, hash of hashes of parts. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string } // CompletePart sub container lists individual part numbers and their @@ -334,10 +370,11 @@ type CompletePart struct { ETag string // Checksum values - ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"` - ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"` - ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"` - ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"` + ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"` + ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"` + ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"` + ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"` + ChecksumCRC64NVME string `xml:",omitempty"` } // Checksum will return the checksum for the given type. @@ -352,6 +389,8 @@ func (c CompletePart) Checksum(t ChecksumType) string { return c.ChecksumSHA1 case t.Is(ChecksumSHA256): return c.ChecksumSHA256 + case t.Is(ChecksumCRC64NVME): + return c.ChecksumCRC64NVME } return "" } diff --git a/checksum.go b/checksum.go index 7eb1bf25ab..8e4c27ce42 100644 --- a/checksum.go +++ b/checksum.go @@ -21,11 +21,15 @@ import ( "crypto/sha1" "crypto/sha256" "encoding/base64" + "encoding/binary" + "errors" "hash" "hash/crc32" + "hash/crc64" "io" "math/bits" "net/http" + "sort" ) // ChecksumType contains information about the checksum type. @@ -41,23 +45,41 @@ const ( ChecksumCRC32 // ChecksumCRC32C indicates a CRC32 checksum with Castagnoli table. ChecksumCRC32C + // ChecksumCRC64NVME indicates CRC64 with 0xad93d23594c93659 polynomial. + ChecksumCRC64NVME // Keep after all valid checksums checksumLast + // ChecksumFullObject is a modifier that can be used on CRC32 and CRC32C + // to indicate full object checksums. + ChecksumFullObject + // checksumMask is a mask for valid checksum types. checksumMask = checksumLast - 1 // ChecksumNone indicates no checksum. ChecksumNone ChecksumType = 0 - amzChecksumAlgo = "x-amz-checksum-algorithm" - amzChecksumCRC32 = "x-amz-checksum-crc32" - amzChecksumCRC32C = "x-amz-checksum-crc32c" - amzChecksumSHA1 = "x-amz-checksum-sha1" - amzChecksumSHA256 = "x-amz-checksum-sha256" + // ChecksumFullObjectCRC32 indicates full object CRC32 + ChecksumFullObjectCRC32 = ChecksumCRC32 | ChecksumFullObject + + // ChecksumFullObjectCRC32C indicates full object CRC32C + ChecksumFullObjectCRC32C = ChecksumCRC32C | ChecksumFullObject + + amzChecksumAlgo = "x-amz-checksum-algorithm" + amzChecksumCRC32 = "x-amz-checksum-crc32" + amzChecksumCRC32C = "x-amz-checksum-crc32c" + amzChecksumSHA1 = "x-amz-checksum-sha1" + amzChecksumSHA256 = "x-amz-checksum-sha256" + amzChecksumCRC64NVME = "x-amz-checksum-crc64nvme" ) +// Base returns the base type, without modifiers. +func (c ChecksumType) Base() ChecksumType { + return c & checksumMask +} + // Is returns if c is all of t. func (c ChecksumType) Is(t ChecksumType) bool { return c&t == t @@ -75,10 +97,39 @@ func (c ChecksumType) Key() string { return amzChecksumSHA1 case ChecksumSHA256: return amzChecksumSHA256 + case ChecksumCRC64NVME: + return amzChecksumCRC64NVME } return "" } +// CanComposite will return if the checksum type can be used for composite multipart upload on AWS. +func (c ChecksumType) CanComposite() bool { + switch c & checksumMask { + case ChecksumSHA256, ChecksumSHA1, ChecksumCRC32, ChecksumCRC32C: + return true + } + return false +} + +// CanMergeCRC will return if the checksum type can be used for multipart upload on AWS. +func (c ChecksumType) CanMergeCRC() bool { + switch c & checksumMask { + case ChecksumCRC32, ChecksumCRC32C, ChecksumCRC64NVME: + return true + } + return false +} + +// FullObjectRequested will return if the checksum type indicates full object checksum was requested. +func (c ChecksumType) FullObjectRequested() bool { + switch c & (ChecksumFullObject | checksumMask) { + case ChecksumFullObjectCRC32C, ChecksumFullObjectCRC32, ChecksumCRC64NVME: + return true + } + return false +} + // KeyCapitalized returns the capitalized key as used in HTTP headers. func (c ChecksumType) KeyCapitalized() string { return http.CanonicalHeaderKey(c.Key()) @@ -93,10 +144,17 @@ func (c ChecksumType) RawByteLen() int { return sha1.Size case ChecksumSHA256: return sha256.Size + case ChecksumCRC64NVME: + return crc64.Size } return 0 } +const crc64NVMEPolynomial = 0xad93d23594c93659 + +// crc64 uses reversed polynomials. +var crc64Table = crc64.MakeTable(bits.Reverse64(crc64NVMEPolynomial)) + // Hasher returns a hasher corresponding to the checksum type. // Returns nil if no checksum. func (c ChecksumType) Hasher() hash.Hash { @@ -109,13 +167,15 @@ func (c ChecksumType) Hasher() hash.Hash { return sha1.New() case ChecksumSHA256: return sha256.New() + case ChecksumCRC64NVME: + return crc64.New(crc64Table) } return nil } // IsSet returns whether the type is valid and known. func (c ChecksumType) IsSet() bool { - return bits.OnesCount32(uint32(c)) == 1 + return bits.OnesCount32(uint32(c&checksumMask)) == 1 } // SetDefault will set the checksum if not already set. @@ -125,6 +185,16 @@ func (c *ChecksumType) SetDefault(t ChecksumType) { } } +// EncodeToString the encoded hash value of the content provided in b. +func (c ChecksumType) EncodeToString(b []byte) string { + if !c.IsSet() { + return "" + } + h := c.Hasher() + h.Write(b) + return base64.StdEncoding.EncodeToString(h.Sum(nil)) +} + // String returns the type as a string. // CRC32, CRC32C, SHA1, and SHA256 for valid values. // Empty string for unset and "" if not valid. @@ -140,6 +210,8 @@ func (c ChecksumType) String() string { return "SHA256" case ChecksumNone: return "" + case ChecksumCRC64NVME: + return "CRC64NVME" } return "" } @@ -221,3 +293,129 @@ func (c Checksum) Raw() []byte { } return c.r } + +// CompositeChecksum returns the composite checksum of all provided parts. +func (c ChecksumType) CompositeChecksum(p []ObjectPart) (*Checksum, error) { + if !c.CanComposite() { + return nil, errors.New("cannot do composite checksum") + } + sort.Slice(p, func(i, j int) bool { + return p[i].PartNumber < p[j].PartNumber + }) + c = c.Base() + crcBytes := make([]byte, 0, len(p)*c.RawByteLen()) + for _, part := range p { + pCrc, err := part.ChecksumRaw(c) + if err != nil { + return nil, err + } + crcBytes = append(crcBytes, pCrc...) + } + h := c.Hasher() + h.Write(crcBytes) + return &Checksum{Type: c, r: h.Sum(nil)}, nil +} + +// FullObjectChecksum will return the full object checksum from provided parts. +func (c ChecksumType) FullObjectChecksum(p []ObjectPart) (*Checksum, error) { + if !c.CanMergeCRC() { + return nil, errors.New("cannot merge this checksum type") + } + c = c.Base() + sort.Slice(p, func(i, j int) bool { + return p[i].PartNumber < p[j].PartNumber + }) + + switch len(p) { + case 0: + return nil, errors.New("no parts given") + case 1: + check, err := p[0].ChecksumRaw(c) + if err != nil { + return nil, err + } + return &Checksum{ + Type: c, + r: check, + }, nil + } + var merged uint32 + var merged64 uint64 + first, err := p[0].ChecksumRaw(c) + if err != nil { + return nil, err + } + sz := p[0].Size + switch c { + case ChecksumCRC32, ChecksumCRC32C: + merged = binary.BigEndian.Uint32(first) + case ChecksumCRC64NVME: + merged64 = binary.BigEndian.Uint64(first) + } + + poly32 := uint32(crc32.IEEE) + if c.Is(ChecksumCRC32C) { + poly32 = crc32.Castagnoli + } + for _, part := range p[1:] { + if part.Size == 0 { + continue + } + sz += part.Size + pCrc, err := part.ChecksumRaw(c) + if err != nil { + return nil, err + } + switch c { + case ChecksumCRC32, ChecksumCRC32C: + merged = crc32Combine(poly32, merged, binary.BigEndian.Uint32(pCrc), part.Size) + case ChecksumCRC64NVME: + merged64 = crc64Combine(bits.Reverse64(crc64NVMEPolynomial), merged64, binary.BigEndian.Uint64(pCrc), part.Size) + } + } + var tmp [8]byte + switch c { + case ChecksumCRC32, ChecksumCRC32C: + binary.BigEndian.PutUint32(tmp[:], merged) + return &Checksum{ + Type: c, + r: tmp[:4], + }, nil + case ChecksumCRC64NVME: + binary.BigEndian.PutUint64(tmp[:], merged64) + return &Checksum{ + Type: c, + r: tmp[:8], + }, nil + default: + return nil, errors.New("unknown checksum type") + } +} + +func addAutoChecksumHeaders(opts *PutObjectOptions) { + if opts.UserMetadata == nil { + opts.UserMetadata = make(map[string]string, 1) + } + opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + if opts.AutoChecksum.FullObjectRequested() { + opts.UserMetadata["X-Amz-Checksum-Type"] = "FULL_OBJECT" + } +} + +func applyAutoChecksum(opts *PutObjectOptions, allParts []ObjectPart) { + if !opts.AutoChecksum.IsSet() { + return + } + if opts.AutoChecksum.CanComposite() && !opts.AutoChecksum.Is(ChecksumFullObject) { + // Add composite hash of hashes. + crc, err := opts.AutoChecksum.CompositeChecksum(allParts) + if err == nil { + opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): crc.Encoded()} + } + } else if opts.AutoChecksum.CanMergeCRC() { + crc, err := opts.AutoChecksum.FullObjectChecksum(allParts) + if err == nil { + opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): crc.Encoded(), "X-Amz-Checksum-Type": "FULL_OBJECT"} + } + } +} diff --git a/functional_tests.go b/functional_tests.go index 43383d1348..823f4455d9 100644 --- a/functional_tests.go +++ b/functional_tests.go @@ -2006,9 +2006,13 @@ func testPutObjectWithChecksums() { {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, {cs: minio.ChecksumSHA256}, + {cs: minio.ChecksumCRC64NVME}, } for _, test := range tests { + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } bufSize := dataFileMap["datafile-10-kB"] // Save the data @@ -2065,6 +2069,7 @@ func testPutObjectWithChecksums() { cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) // Read the data back gopts := minio.GetObjectOptions{Checksum: true} @@ -2084,6 +2089,7 @@ func testPutObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(st.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) if st.Size != int64(bufSize) { logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) @@ -2127,12 +2133,12 @@ func testPutObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, "") cmpChecksum(st.ChecksumCRC32, "") cmpChecksum(st.ChecksumCRC32C, "") + cmpChecksum(st.ChecksumCRC64NVME, "") delete(args, "range") delete(args, "metadata") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with custom checksums. @@ -2173,13 +2179,16 @@ func testPutObjectWithTrailingChecksums() { tests := []struct { cs minio.ChecksumType }{ + {cs: minio.ChecksumCRC64NVME}, {cs: minio.ChecksumCRC32C}, {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, {cs: minio.ChecksumSHA256}, } - for _, test := range tests { + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } function := "PutObject(bucketName, objectName, reader,size, opts)" bufSize := dataFileMap["datafile-10-kB"] @@ -2227,6 +2236,7 @@ func testPutObjectWithTrailingChecksums() { cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) // Read the data back gopts := minio.GetObjectOptions{Checksum: true} @@ -2247,6 +2257,7 @@ func testPutObjectWithTrailingChecksums() { cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) if st.Size != int64(bufSize) { logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) @@ -2291,6 +2302,7 @@ func testPutObjectWithTrailingChecksums() { cmpChecksum(st.ChecksumSHA1, "") cmpChecksum(st.ChecksumCRC32, "") cmpChecksum(st.ChecksumCRC32C, "") + cmpChecksum(st.ChecksumCRC64NVME, "") function = "GetObjectAttributes(...)" s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) @@ -2305,9 +2317,8 @@ func testPutObjectWithTrailingChecksums() { delete(args, "range") delete(args, "metadata") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with custom checksums. @@ -2319,7 +2330,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { args := map[string]interface{}{ "bucketName": "", "objectName": "", - "opts": fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Progress: progress Checksum: %v}", trailing), + "opts": fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Trailing: %v}", trailing), } if !isFullMode() { @@ -2344,14 +2355,18 @@ func testPutMultipartObjectWithChecksums(trailing bool) { return } - hashMultiPart := func(b []byte, partSize int, hasher hash.Hash) string { + hashMultiPart := func(b []byte, partSize int, cs minio.ChecksumType) string { r := bytes.NewReader(b) + hasher := cs.Hasher() + if cs.FullObjectRequested() { + partSize = len(b) + } tmp := make([]byte, partSize) parts := 0 var all []byte for { n, err := io.ReadFull(r, tmp) - if err != nil && err != io.ErrUnexpectedEOF { + if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { logError(testName, function, args, startTime, "", "Calc crc failed", err) } if n == 0 { @@ -2365,6 +2380,9 @@ func testPutMultipartObjectWithChecksums(trailing bool) { break } } + if parts == 1 { + return base64.StdEncoding.EncodeToString(hasher.Sum(nil)) + } hasher.Reset() hasher.Write(all) return fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hasher.Sum(nil)), parts) @@ -2373,6 +2391,9 @@ func testPutMultipartObjectWithChecksums(trailing bool) { tests := []struct { cs minio.ChecksumType }{ + {cs: minio.ChecksumFullObjectCRC32}, + {cs: minio.ChecksumFullObjectCRC32C}, + {cs: minio.ChecksumCRC64NVME}, {cs: minio.ChecksumCRC32C}, {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, @@ -2380,8 +2401,16 @@ func testPutMultipartObjectWithChecksums(trailing bool) { } for _, test := range tests { - bufSize := dataFileMap["datafile-129-MB"] + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } + args["section"] = "prep" + bufSize := dataFileMap["datafile-129-MB"] + if false && test.cs.Is(minio.ChecksumFullObjectCRC32) { + c.TraceOn(os.Stdout) + defer c.TraceOff() + } // Save the data objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") args["objectName"] = objectName @@ -2405,7 +2434,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { reader.Close() h := test.cs.Hasher() h.Reset() - want := hashMultiPart(b, partSize, test.cs.Hasher()) + want := hashMultiPart(b, partSize, test.cs) var cs minio.ChecksumType rd := io.Reader(io.NopCloser(bytes.NewReader(b))) @@ -2413,7 +2442,9 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cs = test.cs rd = bytes.NewReader(b) } + // Set correct CRC. + args["section"] = "PutObject" resp, err := c.PutObject(context.Background(), bucketName, objectName, rd, int64(bufSize), minio.PutObjectOptions{ DisableContentSha256: true, DisableMultipart: false, @@ -2427,7 +2458,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { return } - switch test.cs { + switch test.cs.Base() { case minio.ChecksumCRC32C: cmpChecksum(resp.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2436,15 +2467,41 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cmpChecksum(resp.ChecksumSHA1, want) case minio.ChecksumSHA256: cmpChecksum(resp.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + cmpChecksum(resp.ChecksumCRC64NVME, want) + } + + args["section"] = "HeadObject" + st, err := c.StatObject(context.Background(), bucketName, objectName, minio.StatObjectOptions{Checksum: true}) + if err != nil { + logError(testName, function, args, startTime, "", "StatObject failed", err) + return + } + switch test.cs.Base() { + case minio.ChecksumCRC32C: + cmpChecksum(st.ChecksumCRC32C, want) + case minio.ChecksumCRC32: + cmpChecksum(st.ChecksumCRC32, want) + case minio.ChecksumSHA1: + cmpChecksum(st.ChecksumSHA1, want) + case minio.ChecksumSHA256: + cmpChecksum(st.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + cmpChecksum(st.ChecksumCRC64NVME, want) } + args["section"] = "GetObjectAttributes" s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) if err != nil { logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err) return } - want = want[:strings.IndexByte(want, '-')] + + if strings.ContainsRune(want, '-') { + want = want[:strings.IndexByte(want, '-')] + } switch test.cs { + // Full Object CRC does not return anything with GetObjectAttributes case minio.ChecksumCRC32C: cmpChecksum(s.Checksum.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2460,13 +2517,14 @@ func testPutMultipartObjectWithChecksums(trailing bool) { gopts.PartNumber = 2 // We cannot use StatObject, since it ignores partnumber. + args["section"] = "GetObject-Part" r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) if err != nil { logError(testName, function, args, startTime, "", "GetObject failed", err) return } io.Copy(io.Discard, r) - st, err := r.Stat() + st, err = r.Stat() if err != nil { logError(testName, function, args, startTime, "", "Stat failed", err) return @@ -2478,6 +2536,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { want = base64.StdEncoding.EncodeToString(h.Sum(nil)) switch test.cs { + // Full Object CRC does not return any part CRC for whatever reason. case minio.ChecksumCRC32C: cmpChecksum(st.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2486,12 +2545,15 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cmpChecksum(st.ChecksumSHA1, want) case minio.ChecksumSHA256: cmpChecksum(st.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + // AWS does not send this. + cmpChecksum(st.ChecksumCRC64NVME, "") } delete(args, "metadata") + delete(args, "section") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with trailing checksums. @@ -2688,9 +2750,8 @@ func testTrailingChecksums() { } delete(args, "metadata") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with custom checksums. diff --git a/utils.go b/utils.go index a5beb371f2..36eba760e8 100644 --- a/utils.go +++ b/utils.go @@ -378,10 +378,11 @@ func ToObjectInfo(bucketName, objectName string, h http.Header) (ObjectInfo, err Restore: restore, // Checksum values - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), }, nil } @@ -698,3 +699,146 @@ func (h *hashReaderWrapper) Read(p []byte) (n int, err error) { } return n, err } + +// Followis is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration. +// Used uint for unsigned long. Used uint32 for input arguments in order to match +// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib) +// Modified for hash/crc64 by Klaus Post, 2024. +func gf2MatrixTimes(mat []uint64, vec uint64) uint64 { + var sum uint64 + + for vec != 0 { + if vec&1 != 0 { + sum ^= mat[0] + } + vec >>= 1 + mat = mat[1:] + } + return sum +} + +func gf2MatrixSquare(square, mat []uint64) { + if len(square) != len(mat) { + panic("square matrix size mismatch") + } + for n := range mat { + square[n] = gf2MatrixTimes(mat, mat[n]) + } +} + +// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32 +// hash values crc1 and crc2. poly represents the generator polynomial +// and len2 specifies the byte length that the crc2 hash covers. +func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 32) // even-power-of-two zeros operator + odd := make([]uint64, 32) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = uint64(poly) // CRC-32 polynomial + row := uint64(1) + for n := 1; n < 32; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := uint64(crc1) + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= uint64(crc2) + return uint32(crc1n) +} + +func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 64) // even-power-of-two zeros operator + odd := make([]uint64, 64) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = poly // CRC-64 polynomial + row := uint64(1) + for n := 1; n < 64; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := crc1 + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= crc2 + return crc1n +} diff --git a/utils_test.go b/utils_test.go index 0e2ed08b44..6e4a0ddbb3 100644 --- a/utils_test.go +++ b/utils_test.go @@ -20,6 +20,7 @@ package minio import ( "errors" "fmt" + "math/rand" "net/url" "testing" "time" @@ -429,3 +430,43 @@ func TestIsCustomQueryValue(t *testing.T) { } } } + +func TestFullObjectChecksum64(t *testing.T) { + tests := []ChecksumType{ + ChecksumCRC32, + ChecksumCRC32C, + ChecksumCRC64NVME, + } + for _, cs := range tests { + t.Run(cs.String(), func(t *testing.T) { + b := make([]byte, 1024000) + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + rng.Read(b) + sum := cs.EncodeToString + want := sum(b) + var parts []ObjectPart + for len(b) > 0 { + sz := len(b) / 2 + if len(b)-sz < 1024 { + sz = len(b) + } + switch cs { + case ChecksumCRC32: + parts = append(parts, ObjectPart{PartNumber: len(parts) + 1, ChecksumCRC32: cs.EncodeToString(b[:sz]), Size: int64(sz)}) + case ChecksumCRC32C: + parts = append(parts, ObjectPart{PartNumber: len(parts) + 1, ChecksumCRC32C: cs.EncodeToString(b[:sz]), Size: int64(sz)}) + case ChecksumCRC64NVME: + parts = append(parts, ObjectPart{PartNumber: len(parts) + 1, ChecksumCRC64NVME: cs.EncodeToString(b[:sz]), Size: int64(sz)}) + } + b = b[sz:] + } + gotCRC, err := cs.FullObjectChecksum(parts) + if err != nil { + t.Fatal(err) + } + if gotCRC.Encoded() != want { + t.Errorf("Checksum CRC64NVME does not match the expected CRC got:%s want:%s", gotCRC.Encoded(), want) + } + }) + } +} From 6e4de97f9385215570e1dfcfdd26ee17a7eec9b1 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 3 Dec 2024 15:14:08 +0100 Subject: [PATCH 2/6] Add docs --- api-s3-datatypes.go | 1 + 1 file changed, 1 insertion(+) diff --git a/api-s3-datatypes.go b/api-s3-datatypes.go index 0bba0d42d4..9dcaed7c39 100644 --- a/api-s3-datatypes.go +++ b/api-s3-datatypes.go @@ -302,6 +302,7 @@ func (c ObjectPart) Checksum(t ChecksumType) string { return "" } +// ChecksumRaw returns the decoded checksum from the part. func (c ObjectPart) ChecksumRaw(t ChecksumType) ([]byte, error) { b64 := c.Checksum(t) if b64 == "" { From ccf9ad544bcf6f716cea4837b8ed90a4c34ccf87 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 3 Dec 2024 15:18:46 +0100 Subject: [PATCH 3/6] Remove old crc slice. --- api-put-object-streaming.go | 2 -- utils_test.go | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/api-put-object-streaming.go b/api-put-object-streaming.go index c2663b49bf..3ff3b69efd 100644 --- a/api-put-object-streaming.go +++ b/api-put-object-streaming.go @@ -330,7 +330,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() md5Hash := c.md5Hasher() @@ -377,7 +376,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.KeyCapitalized(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset diff --git a/utils_test.go b/utils_test.go index 6e4a0ddbb3..f6bccfc3b7 100644 --- a/utils_test.go +++ b/utils_test.go @@ -446,7 +446,7 @@ func TestFullObjectChecksum64(t *testing.T) { want := sum(b) var parts []ObjectPart for len(b) > 0 { - sz := len(b) / 2 + sz := rng.Intn(len(b) / 2) if len(b)-sz < 1024 { sz = len(b) } From f7c6d78c90b73d2e9a0d5782c70aea7b26571a07 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 3 Dec 2024 16:16:48 +0100 Subject: [PATCH 4/6] Let's try setting the env var --- .github/workflows/go-windows.yml | 1 + .github/workflows/go.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/go-windows.yml b/.github/workflows/go-windows.yml index dec2dd18d2..ae6e8993ce 100644 --- a/.github/workflows/go-windows.yml +++ b/.github/workflows/go-windows.yml @@ -38,6 +38,7 @@ jobs: ENABLE_HTTPS: 1 MINIO_KMS_MASTER_KEY: my-minio-key:6368616e676520746869732070617373776f726420746f206120736563726574 MINIO_CI_CD: true + MINT_NO_FULL_OBJECT: true run: | New-Item -ItemType Directory -Path "$env:temp/certs-dir" Copy-Item -Path testcerts\* -Destination "$env:temp/certs-dir" diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 22eaad42a6..eb73d14fdf 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -39,6 +39,7 @@ jobs: MINIO_KMS_MASTER_KEY: my-minio-key:6368616e676520746869732070617373776f726420746f206120736563726574 SSL_CERT_FILE: /tmp/certs-dir/public.crt MINIO_CI_CD: true + MINT_NO_FULL_OBJECT: true run: | sudo apt update -y sudo apt install devscripts -y From 68652352bdc9aed34fe6804ebd535ce5fd951e98 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 4 Dec 2024 18:14:31 +0100 Subject: [PATCH 5/6] Tweaks --- functional_tests.go | 4 ---- utils.go | 2 +- utils_test.go | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/functional_tests.go b/functional_tests.go index 823f4455d9..3ce9d38e35 100644 --- a/functional_tests.go +++ b/functional_tests.go @@ -2407,10 +2407,6 @@ func testPutMultipartObjectWithChecksums(trailing bool) { args["section"] = "prep" bufSize := dataFileMap["datafile-129-MB"] - if false && test.cs.Is(minio.ChecksumFullObjectCRC32) { - c.TraceOn(os.Stdout) - defer c.TraceOff() - } // Save the data objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") args["objectName"] = objectName diff --git a/utils.go b/utils.go index 36eba760e8..cd7d2c27e6 100644 --- a/utils.go +++ b/utils.go @@ -700,7 +700,7 @@ func (h *hashReaderWrapper) Read(p []byte) (n int, err error) { return n, err } -// Followis is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration. +// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration. // Used uint for unsigned long. Used uint32 for input arguments in order to match // the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib) // Modified for hash/crc64 by Klaus Post, 2024. diff --git a/utils_test.go b/utils_test.go index f6bccfc3b7..9c111de1bf 100644 --- a/utils_test.go +++ b/utils_test.go @@ -465,7 +465,7 @@ func TestFullObjectChecksum64(t *testing.T) { t.Fatal(err) } if gotCRC.Encoded() != want { - t.Errorf("Checksum CRC64NVME does not match the expected CRC got:%s want:%s", gotCRC.Encoded(), want) + t.Errorf("Checksum %v does not match the expected CRC got:%s want:%s", cs.String(), gotCRC.Encoded(), want) } }) } From 61c5ccdcdf3b69f117e4e341a89cdc8d10fc6eb7 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 6 Dec 2024 18:01:21 +0100 Subject: [PATCH 6/6] Allow checksum to be either unset or correct. --- functional_tests.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/functional_tests.go b/functional_tests.go index 3ce9d38e35..fa67abb117 100644 --- a/functional_tests.go +++ b/functional_tests.go @@ -2542,8 +2542,10 @@ func testPutMultipartObjectWithChecksums(trailing bool) { case minio.ChecksumSHA256: cmpChecksum(st.ChecksumSHA256, want) case minio.ChecksumCRC64NVME: - // AWS does not send this. - cmpChecksum(st.ChecksumCRC64NVME, "") + // AWS doesn't return part checksum, but may in the future. + if st.ChecksumCRC64NVME != "" { + cmpChecksum(st.ChecksumCRC64NVME, want) + } } delete(args, "metadata")