From 5ce143530dbecc1a16eac58f49e204309754844b Mon Sep 17 00:00:00 2001 From: sinkingpoint Date: Wed, 8 Nov 2023 11:36:03 +1100 Subject: [PATCH] Allow turning off CRC32 checksums in S3 Puts https://github.com/thanos-io/thanos/pull/6746 bumped objstore, which bumped minio, which made Thanos incompatible with otherwise compliant S3 backends that do not support the x-amz-checksum header. This adds in a `PutWithMD5` (open to naming changes) config entry to the S3 bucket config that allows reverting back to MD5 checksums which _are_ supported. Signed-off-by: sinkingpoint --- CHANGELOG.md | 1 + providers/s3/s3.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46b1410e..60d9fa90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#73](https://github.com/thanos-io/objstore/pull/73) Аdded file path to erros from DownloadFile - [#51](https://github.com/thanos-io/objstore/pull/51) Azure: Support using connection string authentication. - [#76](https://github.com/thanos-io/objstore/pull/76) GCS: Query for object names only in `Iter` to possibly improve performance when listing objects. +- [#85](https://github.com/thanos-io/objstore/pull/85) S3: Allow checksum algorithm to be configured ### Changed - [#38](https://github.com/thanos-io/objstore/pull/38) *: Upgrade minio-go version to `v7.0.45`. diff --git a/providers/s3/s3.go b/providers/s3/s3.go index 83e3a2de..507b550b 100644 --- a/providers/s3/s3.go +++ b/providers/s3/s3.go @@ -113,6 +113,7 @@ var DefaultConfig = Config{ }, PartSize: 1024 * 1024 * 64, // 64MB. BucketLookupType: AutoLookup, + SendContentMd5: true, // Default to using MD5. } // HTTPConfig exists here only because Cortex depends on it, and we depend on Cortex. @@ -136,6 +137,7 @@ type Config struct { TraceConfig TraceConfig `yaml:"trace"` ListObjectsVersion string `yaml:"list_objects_version"` BucketLookupType BucketLookupType `yaml:"bucket_lookup_type"` + SendContentMd5 bool `yaml:"send_content_md5"` // PartSize used for multipart upload. Only used if uploaded object size is known and larger than configured PartSize. // NOTE we need to make sure this number does not produce more parts than 10 000. PartSize uint64 `yaml:"part_size"` @@ -166,6 +168,7 @@ type Bucket struct { storageClass string partSize uint64 listObjectsV1 bool + sendContentMd5 bool } // parseConfig unmarshals a buffer into a Config with default values. @@ -334,6 +337,7 @@ func NewBucketWithConfig(logger log.Logger, config Config, component string) (*B storageClass: storageClass, partSize: config.PartSize, listObjectsV1: config.ListObjectsVersion == "v1", + sendContentMd5: config.SendContentMd5, } return bkt, nil } @@ -510,6 +514,7 @@ func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { ServerSideEncryption: sse, UserMetadata: userMetadata, StorageClass: b.storageClass, + SendContentMd5: b.sendContentMd5, // 4 is what minio-go have as the default. To be certain we do micro benchmark before any changes we // ensure we pin this number to four. // TODO(bwplotka): Consider adjusting this number to GOMAXPROCS or to expose this in config if it becomes bottleneck.