From 1b2238ca8fbedd79f058d6c7feea78512a74036f Mon Sep 17 00:00:00 2001 From: Cory Grinstead Date: Tue, 19 Sep 2023 08:59:21 -0500 Subject: [PATCH] fix: object store http header last modified (#4834) * fix: object store http header last modified * refactor: make headermeta configurable on required fields * Update object_store/src/client/header.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> * Update object_store/src/client/header.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> * Update object_store/src/client/header.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --------- Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- object_store/src/client/get.rs | 12 +++--- object_store/src/client/header.rs | 62 ++++++++++++++++++++++++------- object_store/src/http/mod.rs | 9 ++++- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs index 6b2d60ae565f..8b84a079c7d5 100644 --- a/object_store/src/client/get.rs +++ b/object_store/src/client/get.rs @@ -49,8 +49,8 @@ impl GetClientExt for T { async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { let range = options.range.clone(); let response = self.get_request(location, options, false).await?; - let meta = - header_meta(location, response.headers()).map_err(|e| Error::Generic { + let meta = header_meta(location, response.headers(), Default::default()) + .map_err(|e| Error::Generic { store: T::STORE, source: Box::new(e), })?; @@ -73,9 +73,11 @@ impl GetClientExt for T { async fn head(&self, location: &Path) -> Result { let options = GetOptions::default(); let response = self.get_request(location, options, true).await?; - header_meta(location, response.headers()).map_err(|e| Error::Generic { - store: T::STORE, - source: Box::new(e), + header_meta(location, response.headers(), Default::default()).map_err(|e| { + Error::Generic { + store: T::STORE, + source: Box::new(e), + } }) } } diff --git a/object_store/src/client/header.rs b/object_store/src/client/header.rs index cc4f16eaa599..b55494cdb812 100644 --- a/object_store/src/client/header.rs +++ b/object_store/src/client/header.rs @@ -19,11 +19,33 @@ use crate::path::Path; use crate::ObjectMeta; -use chrono::{DateTime, Utc}; +use chrono::{DateTime, TimeZone, Utc}; use hyper::header::{CONTENT_LENGTH, ETAG, LAST_MODIFIED}; use hyper::HeaderMap; use snafu::{OptionExt, ResultExt, Snafu}; +#[derive(Debug)] +/// Configuration for header extraction +pub struct HeaderConfig { + /// Whether to require an ETag header when extracting [`ObjectMeta`] from headers. + /// + /// Defaults to `true` + pub etag_required: bool, + /// Whether to require a Last-Modified header when extracting [`ObjectMeta`] from headers. + /// + /// Defaults to `true` + pub last_modified_required: bool, +} + +impl Default for HeaderConfig { + fn default() -> Self { + Self { + etag_required: true, + last_modified_required: true, + } + } +} + #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("ETag Header missing from response"))] @@ -52,32 +74,44 @@ pub enum Error { } /// Extracts [`ObjectMeta`] from the provided [`HeaderMap`] -pub fn header_meta(location: &Path, headers: &HeaderMap) -> Result { - let last_modified = headers - .get(LAST_MODIFIED) - .context(MissingLastModifiedSnafu)?; +pub fn header_meta( + location: &Path, + headers: &HeaderMap, + cfg: HeaderConfig, +) -> Result { + let last_modified = match headers.get(LAST_MODIFIED) { + Some(last_modified) => { + let last_modified = last_modified.to_str().context(BadHeaderSnafu)?; + DateTime::parse_from_rfc2822(last_modified) + .context(InvalidLastModifiedSnafu { last_modified })? + .with_timezone(&Utc) + } + None if cfg.last_modified_required => return Err(Error::MissingLastModified), + None => Utc.timestamp_nanos(0), + }; + + let e_tag = match headers.get(ETAG) { + Some(e_tag) => { + let e_tag = e_tag.to_str().context(BadHeaderSnafu)?; + Some(e_tag.to_string()) + } + None if cfg.etag_required => return Err(Error::MissingEtag), + None => None, + }; let content_length = headers .get(CONTENT_LENGTH) .context(MissingContentLengthSnafu)?; - let last_modified = last_modified.to_str().context(BadHeaderSnafu)?; - let last_modified = DateTime::parse_from_rfc2822(last_modified) - .context(InvalidLastModifiedSnafu { last_modified })? - .with_timezone(&Utc); - let content_length = content_length.to_str().context(BadHeaderSnafu)?; let content_length = content_length .parse() .context(InvalidContentLengthSnafu { content_length })?; - let e_tag = headers.get(ETAG).context(MissingEtagSnafu)?; - let e_tag = e_tag.to_str().context(BadHeaderSnafu)?; - Ok(ObjectMeta { location: location.clone(), last_modified, size: content_length, - e_tag: Some(e_tag.to_string()), + e_tag, }) } diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs index e8e7b459e12f..614381975625 100644 --- a/object_store/src/http/mod.rs +++ b/object_store/src/http/mod.rs @@ -40,7 +40,7 @@ use snafu::{OptionExt, ResultExt, Snafu}; use tokio::io::AsyncWrite; use url::Url; -use crate::client::header::header_meta; +use crate::client::header::{header_meta, HeaderConfig}; use crate::http::client::Client; use crate::path::Path; use crate::{ @@ -117,7 +117,12 @@ impl ObjectStore for HttpStore { async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { let range = options.range.clone(); let response = self.client.get(location, options).await?; - let meta = header_meta(location, response.headers()).context(MetadataSnafu)?; + let cfg = HeaderConfig { + last_modified_required: false, + etag_required: false, + }; + let meta = + header_meta(location, response.headers(), cfg).context(MetadataSnafu)?; let stream = response .bytes_stream()