From 8c881e441f862efeeaf5bef20bdbd60f345558a9 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin <paraseba@gmail.com> Date: Tue, 20 Feb 2024 21:06:54 -0300 Subject: [PATCH 1/2] fix(object_store): Include Content-MD5 header for S3 DeleteObjects S3 API [specification](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) requires the presence of this header for all `DeleteObjects` requests to general purpose buckets: > The Content-MD5 request header is required for all Multi-Object Delete requests Some platform, such as MinIO, enforce this requirement, failing requests that don't include the header. --- object_store/Cargo.toml | 3 ++- object_store/src/aws/client.rs | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index 512fa305960c..e724e107586a 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -54,6 +54,7 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls- ring = { version = "0.17", default-features = false, features = ["std"], optional = true } rustls-pemfile = { version = "2.0", default-features = false, features = ["std"], optional = true } tokio = { version = "1.25.0", features = ["sync", "macros", "rt", "time", "io-util"] } +md5 = { version = "0.7.0", default-features = false, features = ["std"], optional = true } [target.'cfg(target_family="unix")'.dev-dependencies] nix = { version = "0.27.1", features = ["fs"] } @@ -62,7 +63,7 @@ nix = { version = "0.27.1", features = ["fs"] } cloud = ["serde", "serde_json", "quick-xml", "hyper", "reqwest", "reqwest/json", "reqwest/stream", "chrono/serde", "base64", "rand", "ring"] azure = ["cloud"] gcp = ["cloud", "rustls-pemfile"] -aws = ["cloud"] +aws = ["cloud", "md5"] http = ["cloud"] tls-webpki-roots = ["reqwest?/rustls-tls-webpki-roots"] diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs index fed6911e6f04..13f4915767ca 100644 --- a/object_store/src/aws/client.rs +++ b/object_store/src/aws/client.rs @@ -438,6 +438,13 @@ impl S3Client { None }; + // S3 *requires* DeleteObjects to include a Content-MD5 header: + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html + // > "The Content-MD5 request header is required for all Multi-Object Delete requests" + // Some platforms, like MinIO, enforce this requirement and fail requests without the header. + let md5_digest = md5::compute(&body); + builder = builder.header("Content-MD5", BASE64_STANDARD.encode(md5_digest.0)); + let response = builder .header(CONTENT_TYPE, "application/xml") .body(body) From dfe5d7f01829401bf91308ac8f11d9ac6da79789 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin <paraseba@gmail.com> Date: Wed, 21 Feb 2024 12:01:03 -0300 Subject: [PATCH 2/2] Switch dependency from md5 to md-5 md-5 seems better maintained. --- object_store/Cargo.toml | 4 ++-- object_store/src/aws/client.rs | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index e724e107586a..f3aaf35fbb02 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -54,7 +54,7 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls- ring = { version = "0.17", default-features = false, features = ["std"], optional = true } rustls-pemfile = { version = "2.0", default-features = false, features = ["std"], optional = true } tokio = { version = "1.25.0", features = ["sync", "macros", "rt", "time", "io-util"] } -md5 = { version = "0.7.0", default-features = false, features = ["std"], optional = true } +md-5 = { version = "0.10.6", default-features = false, optional = true } [target.'cfg(target_family="unix")'.dev-dependencies] nix = { version = "0.27.1", features = ["fs"] } @@ -63,7 +63,7 @@ nix = { version = "0.27.1", features = ["fs"] } cloud = ["serde", "serde_json", "quick-xml", "hyper", "reqwest", "reqwest/json", "reqwest/stream", "chrono/serde", "base64", "rand", "ring"] azure = ["cloud"] gcp = ["cloud", "rustls-pemfile"] -aws = ["cloud", "md5"] +aws = ["cloud", "md-5"] http = ["cloud"] tls-webpki-roots = ["reqwest?/rustls-tls-webpki-roots"] diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs index 13f4915767ca..a31350fad4d8 100644 --- a/object_store/src/aws/client.rs +++ b/object_store/src/aws/client.rs @@ -43,6 +43,7 @@ use bytes::{Buf, Bytes}; use hyper::http; use hyper::http::HeaderName; use itertools::Itertools; +use md5::{Digest, Md5}; use percent_encoding::{utf8_percent_encode, PercentEncode}; use quick_xml::events::{self as xml_events}; use reqwest::{ @@ -442,8 +443,9 @@ impl S3Client { // https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html // > "The Content-MD5 request header is required for all Multi-Object Delete requests" // Some platforms, like MinIO, enforce this requirement and fail requests without the header. - let md5_digest = md5::compute(&body); - builder = builder.header("Content-MD5", BASE64_STANDARD.encode(md5_digest.0)); + let mut hasher = Md5::new(); + hasher.update(&body); + builder = builder.header("Content-MD5", BASE64_STANDARD.encode(hasher.finalize())); let response = builder .header(CONTENT_TYPE, "application/xml")