From e1f7c424ae0687c816439320042b9841394b2a47 Mon Sep 17 00:00:00 2001 From: Nugine Date: Tue, 16 Jan 2024 18:32:46 +0800 Subject: [PATCH 1/5] fix coc --- CODE_OF_CONDUCT.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 4d2ad984..12765f86 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -8,5 +8,3 @@ For example, the political statements in the pages below will be rejected. + + + -+ - From 400f0752272199cab25a84443e725a619f5fa027 Mon Sep 17 00:00:00 2001 From: Nugine Date: Tue, 16 Jan 2024 18:32:46 +0800 Subject: [PATCH 2/5] s3s-fs: cli and logging --- crates/s3s-fs/src/main.rs | 30 +++++++++++++++++++++--------- crates/s3s-proxy/src/main.rs | 4 ++-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/crates/s3s-fs/src/main.rs b/crates/s3s-fs/src/main.rs index 3a9dc2de..663c86df 100644 --- a/crates/s3s-fs/src/main.rs +++ b/crates/s3s-fs/src/main.rs @@ -7,10 +7,11 @@ use s3s_fs::Result; use s3s::auth::SimpleAuth; use s3s::service::S3ServiceBuilder; +use std::io::IsTerminal; use std::net::TcpListener; use std::path::PathBuf; -use clap::Parser; +use clap::{CommandFactory, Parser}; use hyper::server::Server; use tracing::info; @@ -26,11 +27,11 @@ struct Opt { port: u16, /// Access key used for authentication. - #[arg(long, requires("secret-key"))] + #[arg(long)] access_key: Option, /// Secret key used for authentication. - #[arg(long, requires("access-key"))] + #[arg(long)] secret_key: Option, /// Domain name used for virtual-hosted-style requests. @@ -45,8 +46,7 @@ fn setup_tracing() { use tracing_subscriber::EnvFilter; let env_filter = EnvFilter::from_default_env(); - // let enable_color = std::io::stdout().is_terminal(); // TODO - let enable_color = false; + let enable_color = std::io::stdout().is_terminal(); tracing_subscriber::fmt() .pretty() @@ -55,18 +55,28 @@ fn setup_tracing() { .init(); } -fn check_cli_args(opt: &Opt) -> Result<(), String> { +fn check_cli_args(opt: &Opt) { + use clap::error::ErrorKind; + + let mut cmd = Opt::command(); + + // TODO: how to specify the requirements with clap derive API? + if let (Some(_), None) | (None, Some(_)) = (&opt.access_key, &opt.secret_key) { + let msg = "access key and secret key must be specified together"; + cmd.error(ErrorKind::MissingRequiredArgument, msg).exit(); + } + if let Some(ref s) = opt.domain_name { if s.contains('/') { - return Err(format!("expected domain name, found URL-like string: {s:?}")); + let msg = format!("expected domain name, found URL-like string: {s:?}"); + cmd.error(ErrorKind::InvalidValue, msg).exit(); } } - Ok(()) } fn main() -> Result { let opt = Opt::parse(); - check_cli_args(&opt).map_err(s3s_fs::Error::from_string)?; + check_cli_args(&opt); setup_tracing(); @@ -85,11 +95,13 @@ async fn run(opt: Opt) -> Result { // Enable authentication if let (Some(ak), Some(sk)) = (opt.access_key, opt.secret_key) { b.set_auth(SimpleAuth::from_single(ak, sk)); + info!("authentication is enabled"); } // Enable parsing virtual-hosted-style requests if let Some(domain_name) = opt.domain_name { b.set_base_domain(domain_name); + info!("virtual-hosted-style requests are enabled"); } b.build() diff --git a/crates/s3s-proxy/src/main.rs b/crates/s3s-proxy/src/main.rs index 1ad6dca0..233dd044 100644 --- a/crates/s3s-proxy/src/main.rs +++ b/crates/s3s-proxy/src/main.rs @@ -5,6 +5,7 @@ use s3s::auth::SimpleAuth; use s3s::service::S3ServiceBuilder; use std::error::Error; +use std::io::IsTerminal; use std::net::TcpListener; use aws_credential_types::provider::ProvideCredentials; @@ -32,8 +33,7 @@ fn setup_tracing() { use tracing_subscriber::EnvFilter; let env_filter = EnvFilter::from_default_env(); - // let enable_color = std::io::stdout().is_terminal(); // TODO - let enable_color = false; + let enable_color = std::io::stdout().is_terminal(); tracing_subscriber::fmt() .pretty() From 6a042ab148e5a24d051444498f28ae798ee5f90a Mon Sep 17 00:00:00 2001 From: nomick Date: Mon, 22 Jan 2024 03:40:20 +0100 Subject: [PATCH 3/5] Fix s3UnwrappedXmlOutput of LocationConstraint (#127) * Fix s3UnwrappedXmlOutput of LocationConstraint * Remove field in struct type * add canary assertion * add xml test --------- Co-authored-by: Nugine --- codegen/src/ops.rs | 10 ++++++++++ codegen/src/smithy.rs | 4 ++++ codegen/src/xml.rs | 9 ++++++++- crates/s3s/src/xml/generated.rs | 2 +- crates/s3s/src/xml/tests.rs | 34 +++++++++++++++++++++++++++++++++ 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/codegen/src/ops.rs b/codegen/src/ops.rs index ee64a3e7..23f14b64 100644 --- a/codegen/src/ops.rs +++ b/codegen/src/ops.rs @@ -24,6 +24,8 @@ pub struct Operation { pub smithy_input: String, pub smithy_output: String, + pub s3_unwrapped_xml_output: bool, + pub doc: Option, pub http_method: String, @@ -77,6 +79,12 @@ pub fn collect_operations(model: &smithy::Model) -> Operations { smithy_http_code }; + // https://smithy.io/2.0/aws/customizations/s3-customizations.html + // https://github.com/Nugine/s3s/pull/127 + if sh.traits.s3_unwrapped_xml_output() { + assert_eq!(op_name, "GetBucketLocation"); + } + let op = Operation { name: op_name.clone(), @@ -86,6 +94,8 @@ pub fn collect_operations(model: &smithy::Model) -> Operations { smithy_input, smithy_output, + s3_unwrapped_xml_output: sh.traits.s3_unwrapped_xml_output(), + doc: sh.traits.doc().map(o), http_method: sh.traits.http_method().unwrap().to_owned(), diff --git a/codegen/src/smithy.rs b/codegen/src/smithy.rs index e166b1e8..ee76fad9 100644 --- a/codegen/src/smithy.rs +++ b/codegen/src/smithy.rs @@ -209,6 +209,10 @@ impl Traits { self.get("smithy.api#xmlFlattened").is_some() } + pub fn s3_unwrapped_xml_output(&self) -> bool { + self.get("aws.customizations#s3UnwrappedXmlOutput").is_some() + } + pub fn http_label(&self) -> Option<&Value> { self.get("smithy.api#httpLabel") } diff --git a/codegen/src/xml.rs b/codegen/src/xml.rs index 5d8e1269..ba9365a0 100644 --- a/codegen/src/xml.rs +++ b/codegen/src/xml.rs @@ -154,8 +154,15 @@ fn codegen_xml_ser(ops: &Operations, rust_types: &RustTypes) { g!("s.timestamp(\"{}\", &self.{}, TimestampFormat::{})?;", xml_name, field.name, fmt); } } else if field.option_type { + let s3_unwrapped_xml_output = + ops.iter().any(|(_, op)| op.s3_unwrapped_xml_output && op.output == ty.name); + g!("if let Some(ref val) = self.{} {{", field.name); - g!("s.content(\"{xml_name}\", val)?;"); + if s3_unwrapped_xml_output { + g!("val.serialize_content(s)?;"); + } else { + g!("s.content(\"{xml_name}\", val)?;"); + } g!("}}"); } else { let default_is_zero = match field.default_value.as_ref() { diff --git a/crates/s3s/src/xml/generated.rs b/crates/s3s/src/xml/generated.rs index ccf1cf9f..76dce69f 100644 --- a/crates/s3s/src/xml/generated.rs +++ b/crates/s3s/src/xml/generated.rs @@ -505,7 +505,7 @@ impl SerializeContent for GetBucketLifecycleConfigurationOutput { impl SerializeContent for GetBucketLocationOutput { fn serialize_content(&self, s: &mut Serializer) -> SerResult { if let Some(ref val) = self.location_constraint { - s.content("LocationConstraint", val)?; + val.serialize_content(s)?; } Ok(()) } diff --git a/crates/s3s/src/xml/tests.rs b/crates/s3s/src/xml/tests.rs index f938bbc4..5a79c8a0 100644 --- a/crates/s3s/src/xml/tests.rs +++ b/crates/s3s/src/xml/tests.rs @@ -19,6 +19,15 @@ fn serialize_content(val: &T) -> xml::SerResult(val: &T) -> xml::SerResult { + let mut buf = Vec::with_capacity(256); + { + let mut ser = xml::Serializer::new(&mut buf); + val.serialize(&mut ser)?; + } + Ok(String::from_utf8(buf).unwrap()) +} + /// See #[test] fn d001() { @@ -146,3 +155,28 @@ fn s001() { assert_eq!(ans, expected); } + +#[test] +fn s002() { + { + let us_west_2 = crate::dto::BucketLocationConstraint::from_static(crate::dto::BucketLocationConstraint::US_WEST_2); + let val = crate::dto::GetBucketLocationOutput { + location_constraint: Some(us_west_2), + }; + + let ans = serialize(&val).unwrap(); + let expected = "us-west-2"; + + assert_eq!(ans, expected); + } + { + let val = crate::dto::GetBucketLocationOutput { + location_constraint: None, + }; + + let ans = serialize(&val).unwrap(); + let expected = ""; + + assert_eq!(ans, expected); + } +} From 2ff56c28e2ef997c944e0dd5475a2ad2fb884bf5 Mon Sep 17 00:00:00 2001 From: nomick Date: Mon, 22 Jan 2024 05:03:53 +0100 Subject: [PATCH 4/5] Fix range requests for GetObject (#128) * Fix: GetObject range request * fix satisfaction check * content range --------- Co-authored-by: Nugine --- crates/s3s-fs/src/s3.rs | 14 +++++++-- crates/s3s/src/dto/range.rs | 60 +++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/crates/s3s-fs/src/s3.rs b/crates/s3s-fs/src/s3.rs index e16fff23..9070e384 100644 --- a/crates/s3s-fs/src/s3.rs +++ b/crates/s3s-fs/src/s3.rs @@ -48,6 +48,11 @@ fn normalize_path(path: &Path, delimiter: &str) -> Option { Some(normalized) } +/// +fn fmt_content_range(start: u64, end_inclusive: u64, size: u64) -> String { + format!("bytes {start}-{end_inclusive}/{size}") +} + #[async_trait::async_trait] impl S3 for FileSystem { #[tracing::instrument] @@ -195,11 +200,13 @@ impl S3 for FileSystem { let last_modified = Timestamp::from(try_!(file_metadata.modified())); let file_len = file_metadata.len(); - let content_length = match input.range { - None => file_len, + let (content_length, content_range) = match input.range { + None => (file_len, None), Some(range) => { let file_range = range.check(file_len)?; - file_range.end - file_range.start + let content_length = file_range.end - file_range.start; + let content_range = fmt_content_range(file_range.start, file_range.end - 1, file_len); + (content_length, Some(content_range)) } }; let content_length_usize = try_!(usize::try_from(content_length)); @@ -232,6 +239,7 @@ impl S3 for FileSystem { let output = GetObjectOutput { body: Some(StreamingBlob::wrap(body)), content_length: content_length_i64, + content_range, last_modified: Some(last_modified), metadata: object_metadata, e_tag: Some(e_tag), diff --git a/crates/s3s/src/dto/range.rs b/crates/s3s/src/dto/range.rs index 4664980e..57ff221e 100644 --- a/crates/s3s/src/dto/range.rs +++ b/crates/s3s/src/dto/range.rs @@ -103,31 +103,37 @@ impl Range { } /// Checks if the range is satisfiable + /// according to [RFC9110](https://www.rfc-editor.org/rfc/rfc9110.html#name-byte-ranges). /// # Errors /// Returns an error if the range is not satisfiable #[allow(clippy::range_plus_one)] // cannot be fixed pub fn check(&self, full_length: u64) -> Result, RangeNotSatisfiable> { let err = || RangeNotSatisfiable { _priv: () }; match *self { - Range::Int { first, last } => match last { - Some(last) => { - if first > last || last >= full_length { - return Err(err()); - } - // first <= last < full_length - Ok(first..last + 1) + Range::Int { first, last } => { + if first >= full_length { + return Err(err()); } - None => { - if first > full_length { - return Err(err()); + // 0 <= first < full_length + + match last { + Some(last) => { + let last = last.min(full_length - 1); + if first > last { + return Err(err()); + } + // 0 <= first <= last < full_length + Ok(first..last + 1) } - Ok(first..full_length) + // 0 <= first < full_length + None => Ok(first..full_length), } - }, + } Range::Suffix { length } => { - if length > full_length { + if length == 0 { return Err(err()); } + let length = length.min(full_length); Ok((full_length - length)..full_length) } } @@ -193,6 +199,8 @@ mod tests { ("bytes=-500 ", Err(())), ("bytes=-+500", Err(())), ("bytes=-1000000000000000000000000", Err(())), + ("bytes=-0", Ok(range_suffix(0))), + ("bytes=-000", Ok(range_suffix(0))), ]; for (input, expected) in &cases { @@ -203,4 +211,30 @@ mod tests { } } } + + #[test] + fn satisfiable() { + let cases = [ + (10000, range_int_from(9500), Ok(9500..10000)), + (10000, range_int_from(10000), Err(())), + (10000, range_int_inclusive(0, 499), Ok(0..500)), + (10000, range_int_inclusive(0, 0), Ok(0..1)), + (10000, range_int_inclusive(9500, 50000), Ok(9500..10000)), + (10000, range_int_inclusive(10000, 10000), Err(())), + (10000, range_suffix(500), Ok(9500..10000)), + (10000, range_suffix(10000), Ok(0..10000)), + (10000, range_suffix(0), Err(())), + (0, range_int_from(0), Err(())), + (0, range_suffix(1), Ok(0..0)), + (0, range_suffix(0), Err(())), + ]; + + for &(full_length, ref range, ref expected) in &cases { + let output = range.check(full_length); + match expected { + Ok(expected) => assert_eq!(output.unwrap(), *expected), + Err(()) => assert!(output.is_err(), "{full_length:?}, {range:?}"), + } + } + } } From cff9779ec0f8e37af016c4c3d1689b62bc00d559 Mon Sep 17 00:00:00 2001 From: nomick Date: Wed, 24 Jan 2024 03:43:48 +0100 Subject: [PATCH 5/5] store multipart metadata (#129) --- crates/s3s-fs/src/fs.rs | 26 ++++++++++++++++++++------ crates/s3s-fs/src/s3.rs | 22 +++++++++++++++++----- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/crates/s3s-fs/src/fs.rs b/crates/s3s-fs/src/fs.rs index 9a4c7f18..6801054c 100644 --- a/crates/s3s-fs/src/fs.rs +++ b/crates/s3s-fs/src/fs.rs @@ -74,9 +74,10 @@ impl FileSystem { } /// resolve metadata path under the virtual root (custom format) - pub(crate) fn get_metadata_path(&self, bucket: &str, key: &str) -> Result { + pub(crate) fn get_metadata_path(&self, bucket: &str, key: &str, upload_id: Option) -> Result { let encode = |s: &str| base64_simd::URL_SAFE_NO_PAD.encode_to_string(s); - let file_path = format!(".bucket-{}.object-{}.metadata.json", encode(bucket), encode(key)); + let u_ext = upload_id.map(|u| format!(".upload-{u}")).unwrap_or_default(); + let file_path = format!(".bucket-{}.object-{}{u_ext}.metadata.json", encode(bucket), encode(key)); self.resolve_abs_path(file_path) } @@ -87,8 +88,8 @@ impl FileSystem { } /// load metadata from fs - pub(crate) async fn load_metadata(&self, bucket: &str, key: &str) -> Result> { - let path = self.get_metadata_path(bucket, key)?; + pub(crate) async fn load_metadata(&self, bucket: &str, key: &str, upload_id: Option) -> Result> { + let path = self.get_metadata_path(bucket, key, upload_id)?; if path.exists().not() { return Ok(None); } @@ -98,13 +99,26 @@ impl FileSystem { } /// save metadata to fs - pub(crate) async fn save_metadata(&self, bucket: &str, key: &str, metadata: &dto::Metadata) -> Result<()> { - let path = self.get_metadata_path(bucket, key)?; + pub(crate) async fn save_metadata( + &self, + bucket: &str, + key: &str, + metadata: &dto::Metadata, + upload_id: Option, + ) -> Result<()> { + let path = self.get_metadata_path(bucket, key, upload_id)?; let content = serde_json::to_vec(metadata)?; fs::write(&path, &content).await?; Ok(()) } + /// remove metadata from fs + pub(crate) fn delete_metadata(&self, bucket: &str, key: &str, upload_id: Option) -> Result<()> { + let path = self.get_metadata_path(bucket, key, upload_id)?; + std::fs::remove_file(path)?; + Ok(()) + } + pub(crate) async fn load_internal_info(&self, bucket: &str, key: &str) -> Result> { let path = self.get_internal_info_path(bucket, key)?; if path.exists().not() { diff --git a/crates/s3s-fs/src/s3.rs b/crates/s3s-fs/src/s3.rs index 9070e384..6f4ccde2 100644 --- a/crates/s3s-fs/src/s3.rs +++ b/crates/s3s-fs/src/s3.rs @@ -100,9 +100,9 @@ impl S3 for FileSystem { debug!(from = %src_path.display(), to = %dst_path.display(), "copy file"); - let src_metadata_path = self.get_metadata_path(bucket, key)?; + let src_metadata_path = self.get_metadata_path(bucket, key, None)?; if src_metadata_path.exists() { - let dst_metadata_path = self.get_metadata_path(&input.bucket, &input.key)?; + let dst_metadata_path = self.get_metadata_path(&input.bucket, &input.key, None)?; let _ = try_!(fs::copy(src_metadata_path, dst_metadata_path).await); } @@ -225,7 +225,7 @@ impl S3 for FileSystem { let body = bytes_stream(ReaderStream::with_capacity(file, 4096), content_length_usize); - let object_metadata = self.load_metadata(&input.bucket, &input.key).await?; + let object_metadata = self.load_metadata(&input.bucket, &input.key, None).await?; let md5_sum = self.get_md5_sum(&input.bucket, &input.key).await?; let e_tag = format!("\"{md5_sum}\""); @@ -277,7 +277,7 @@ impl S3 for FileSystem { let last_modified = Timestamp::from(try_!(file_metadata.modified())); let file_len = file_metadata.len(); - let object_metadata = self.load_metadata(&input.bucket, &input.key).await?; + let object_metadata = self.load_metadata(&input.bucket, &input.key, None).await?; // TODO: detect content type let content_type = mime::APPLICATION_OCTET_STREAM; @@ -504,7 +504,7 @@ impl S3 for FileSystem { debug!(path = %object_path.display(), ?size, %md5_sum, ?checksum, "write file"); if let Some(ref metadata) = metadata { - self.save_metadata(&bucket, &key, metadata).await?; + self.save_metadata(&bucket, &key, metadata, None).await?; } let mut info: InternalInfo = default(); @@ -532,6 +532,11 @@ impl S3 for FileSystem { let input = req.input; let upload_id = self.create_upload_id(req.credentials.as_ref()).await?; + if let Some(ref metadata) = input.metadata { + self.save_metadata(&input.bucket, &input.key, metadata, Some(upload_id)) + .await?; + } + let output = CreateMultipartUploadOutput { bucket: Some(input.bucket), key: Some(input.key), @@ -715,6 +720,11 @@ impl S3 for FileSystem { self.delete_upload_id(&upload_id).await?; + if let Ok(Some(metadata)) = self.load_metadata(&bucket, &key, Some(upload_id)).await { + self.save_metadata(&bucket, &key, &metadata, None).await?; + let _ = self.delete_metadata(&bucket, &key, Some(upload_id)); + } + let object_path = self.get_object_path(&bucket, &key)?; let mut file_writer = self.prepare_file_write(&object_path).await?; @@ -764,6 +774,8 @@ impl S3 for FileSystem { return Err(s3_error!(AccessDenied)); } + let _ = self.delete_metadata(&bucket, &key, Some(upload_id)); + let prefix = format!(".upload_id-{upload_id}"); let mut iter = try_!(fs::read_dir(&self.root).await); while let Some(entry) = try_!(iter.next_entry().await) {