Skip to content

Commit

Permalink
Generate ETags for InMemory and LocalFileSystem (apache#4879) (a…
Browse files Browse the repository at this point in the history
…pache#4922)

* Support ETag in InMemory (apache#4879)

* Add LocalFileSystem Etag

* Review feedback

* Review feedback
  • Loading branch information
tustvold authored Oct 17, 2023
1 parent 95b015c commit ab87abd
Show file tree
Hide file tree
Showing 3 changed files with 268 additions and 124 deletions.
206 changes: 158 additions & 48 deletions object_store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,12 +698,28 @@ pub struct GetOptions {
/// Request will succeed if the `ObjectMeta::e_tag` matches
/// otherwise returning [`Error::Precondition`]
///
/// <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>
/// See <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>
///
/// Examples:
///
/// ```text
/// If-Match: "xyzzy"
/// If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
/// If-Match: *
/// ```
pub if_match: Option<String>,
/// Request will succeed if the `ObjectMeta::e_tag` does not match
/// otherwise returning [`Error::NotModified`]
///
/// <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>
/// See <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>
///
/// Examples:
///
/// ```text
/// If-None-Match: "xyzzy"
/// If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
/// If-None-Match: *
/// ```
pub if_none_match: Option<String>,
/// Request will succeed if the object has been modified since
///
Expand All @@ -730,25 +746,41 @@ pub struct GetOptions {

impl GetOptions {
/// Returns an error if the modification conditions on this request are not satisfied
fn check_modified(
&self,
location: &Path,
last_modified: DateTime<Utc>,
) -> Result<()> {
if let Some(date) = self.if_modified_since {
if last_modified <= date {
return Err(Error::NotModified {
path: location.to_string(),
source: format!("{} >= {}", date, last_modified).into(),
///
/// <https://datatracker.ietf.org/doc/html/rfc7232#section-6>
fn check_preconditions(&self, meta: &ObjectMeta) -> Result<()> {
// The use of the invalid etag "*" means no ETag is equivalent to never matching
let etag = meta.e_tag.as_deref().unwrap_or("*");
let last_modified = meta.last_modified;

if let Some(m) = &self.if_match {
if m != "*" && m.split(',').map(str::trim).all(|x| x != etag) {
return Err(Error::Precondition {
path: meta.location.to_string(),
source: format!("{etag} does not match {m}").into(),
});
}
}

if let Some(date) = self.if_unmodified_since {
} else if let Some(date) = self.if_unmodified_since {
if last_modified > date {
return Err(Error::Precondition {
path: location.to_string(),
source: format!("{} < {}", date, last_modified).into(),
path: meta.location.to_string(),
source: format!("{date} < {last_modified}").into(),
});
}
}

if let Some(m) = &self.if_none_match {
if m == "*" || m.split(',').map(str::trim).any(|x| x == etag) {
return Err(Error::NotModified {
path: meta.location.to_string(),
source: format!("{etag} matches {m}").into(),
});
}
} else if let Some(date) = self.if_modified_since {
if last_modified <= date {
return Err(Error::NotModified {
path: meta.location.to_string(),
source: format!("{date} >= {last_modified}").into(),
});
}
}
Expand Down Expand Up @@ -952,6 +984,7 @@ mod test_util {
mod tests {
use super::*;
use crate::test_util::flatten_list_stream;
use chrono::TimeZone;
use rand::{thread_rng, Rng};
use tokio::io::AsyncWriteExt;

Expand Down Expand Up @@ -1359,33 +1392,32 @@ mod tests {
Err(e) => panic!("{e}"),
}

if let Some(tag) = meta.e_tag {
let options = GetOptions {
if_match: Some(tag.clone()),
..GetOptions::default()
};
storage.get_opts(&path, options).await.unwrap();

let options = GetOptions {
if_match: Some("invalid".to_string()),
..GetOptions::default()
};
let err = storage.get_opts(&path, options).await.unwrap_err();
assert!(matches!(err, Error::Precondition { .. }), "{err}");

let options = GetOptions {
if_none_match: Some(tag.clone()),
..GetOptions::default()
};
let err = storage.get_opts(&path, options).await.unwrap_err();
assert!(matches!(err, Error::NotModified { .. }), "{err}");

let options = GetOptions {
if_none_match: Some("invalid".to_string()),
..GetOptions::default()
};
storage.get_opts(&path, options).await.unwrap();
}
let tag = meta.e_tag.unwrap();
let options = GetOptions {
if_match: Some(tag.clone()),
..GetOptions::default()
};
storage.get_opts(&path, options).await.unwrap();

let options = GetOptions {
if_match: Some("invalid".to_string()),
..GetOptions::default()
};
let err = storage.get_opts(&path, options).await.unwrap_err();
assert!(matches!(err, Error::Precondition { .. }), "{err}");

let options = GetOptions {
if_none_match: Some(tag.clone()),
..GetOptions::default()
};
let err = storage.get_opts(&path, options).await.unwrap_err();
assert!(matches!(err, Error::NotModified { .. }), "{err}");

let options = GetOptions {
if_none_match: Some("invalid".to_string()),
..GetOptions::default()
};
storage.get_opts(&path, options).await.unwrap();
}

/// Returns a chunk of length `chunk_length`
Expand Down Expand Up @@ -1697,8 +1729,86 @@ mod tests {
assert!(stream.next().await.is_none());
}

// Tests TODO:
// GET nonexisting location (in_memory/file)
// DELETE nonexisting location
// PUT overwriting
#[test]
fn test_preconditions() {
let mut meta = ObjectMeta {
location: Path::from("test"),
last_modified: Utc.timestamp_nanos(100),
size: 100,
e_tag: Some("123".to_string()),
};

let mut options = GetOptions::default();
options.check_preconditions(&meta).unwrap();

options.if_modified_since = Some(Utc.timestamp_nanos(50));
options.check_preconditions(&meta).unwrap();

options.if_modified_since = Some(Utc.timestamp_nanos(100));
options.check_preconditions(&meta).unwrap_err();

options.if_modified_since = Some(Utc.timestamp_nanos(101));
options.check_preconditions(&meta).unwrap_err();

options = GetOptions::default();

options.if_unmodified_since = Some(Utc.timestamp_nanos(50));
options.check_preconditions(&meta).unwrap_err();

options.if_unmodified_since = Some(Utc.timestamp_nanos(100));
options.check_preconditions(&meta).unwrap();

options.if_unmodified_since = Some(Utc.timestamp_nanos(101));
options.check_preconditions(&meta).unwrap();

options = GetOptions::default();

options.if_match = Some("123".to_string());
options.check_preconditions(&meta).unwrap();

options.if_match = Some("123,354".to_string());
options.check_preconditions(&meta).unwrap();

options.if_match = Some("354, 123,".to_string());
options.check_preconditions(&meta).unwrap();

options.if_match = Some("354".to_string());
options.check_preconditions(&meta).unwrap_err();

options.if_match = Some("*".to_string());
options.check_preconditions(&meta).unwrap();

// If-Match takes precedence
options.if_unmodified_since = Some(Utc.timestamp_nanos(200));
options.check_preconditions(&meta).unwrap();

options = GetOptions::default();

options.if_none_match = Some("123".to_string());
options.check_preconditions(&meta).unwrap_err();

options.if_none_match = Some("*".to_string());
options.check_preconditions(&meta).unwrap_err();

options.if_none_match = Some("1232".to_string());
options.check_preconditions(&meta).unwrap();

options.if_none_match = Some("23, 123".to_string());
options.check_preconditions(&meta).unwrap_err();

// If-None-Match takes precedence
options.if_modified_since = Some(Utc.timestamp_nanos(10));
options.check_preconditions(&meta).unwrap_err();

// Check missing ETag
meta.e_tag = None;
options = GetOptions::default();

options.if_none_match = Some("*".to_string()); // Fails if any file exists
options.check_preconditions(&meta).unwrap_err();

options = GetOptions::default();
options.if_match = Some("*".to_string()); // Passes if file exists
options.check_preconditions(&meta).unwrap();
}
}
37 changes: 23 additions & 14 deletions object_store/src/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,23 +365,12 @@ impl ObjectStore for LocalFileSystem {
}

async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
if options.if_match.is_some() || options.if_none_match.is_some() {
return Err(super::Error::NotSupported {
source: "ETags not supported by LocalFileSystem".to_string().into(),
});
}

let location = location.clone();
let path = self.config.path_to_filesystem(&location)?;
maybe_spawn_blocking(move || {
let (file, metadata) = open_file(&path)?;
if options.if_unmodified_since.is_some()
|| options.if_modified_since.is_some()
{
options.check_modified(&location, last_modified(&metadata))?;
}

let meta = convert_metadata(metadata, location)?;
options.check_preconditions(&meta)?;

Ok(GetResult {
payload: GetResultPayload::File(file, path),
Expand Down Expand Up @@ -965,7 +954,7 @@ fn convert_entry(entry: DirEntry, location: Path) -> Result<ObjectMeta> {
convert_metadata(metadata, location)
}

fn last_modified(metadata: &std::fs::Metadata) -> DateTime<Utc> {
fn last_modified(metadata: &Metadata) -> DateTime<Utc> {
metadata
.modified()
.expect("Modified file time should be supported on this platform")
Expand All @@ -977,15 +966,35 @@ fn convert_metadata(metadata: Metadata, location: Path) -> Result<ObjectMeta> {
let size = usize::try_from(metadata.len()).context(FileSizeOverflowedUsizeSnafu {
path: location.as_ref(),
})?;
let inode = get_inode(&metadata);
let mtime = last_modified.timestamp_micros();

// Use an ETag scheme based on that used by many popular HTTP servers
// <https://httpd.apache.org/docs/2.2/mod/core.html#fileetag>
// <https://stackoverflow.com/questions/47512043/how-etags-are-generated-and-configured>
let etag = format!("{inode:x}-{mtime:x}-{size:x}");

Ok(ObjectMeta {
location,
last_modified,
size,
e_tag: None,
e_tag: Some(etag),
})
}

#[cfg(unix)]
/// We include the inode when available to yield an ETag more resistant to collisions
/// and as used by popular web servers such as [Apache](https://httpd.apache.org/docs/2.2/mod/core.html#fileetag)
fn get_inode(metadata: &Metadata) -> u64 {
std::os::unix::fs::MetadataExt::ino(metadata)
}

#[cfg(not(unix))]
/// On platforms where an inode isn't available, fallback to just relying on size and mtime
fn get_inode(metadata: &Metadata) -> u64 {
0
}

/// Convert walkdir results and converts not-found errors into `None`.
/// Convert broken symlinks to `None`.
fn convert_walkdir_result(
Expand Down
Loading

0 comments on commit ab87abd

Please sign in to comment.