From 3b305ae704794bdce60b0a9394224c647dffcb16 Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Tue, 19 Mar 2024 01:01:58 -0700 Subject: [PATCH 1/9] Initial test --- crates/aws/Cargo.toml | 1 + crates/aws/src/storage.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/crates/aws/Cargo.toml b/crates/aws/Cargo.toml index 5d2bd5d69e..f667b65072 100644 --- a/crates/aws/Cargo.toml +++ b/crates/aws/Cargo.toml @@ -32,6 +32,7 @@ regex = { workspace = true } uuid = { workspace = true, features = ["serde", "v4"] } url = { workspace = true } backoff = { version = "0.4", features = [ "tokio" ] } +urlencoding = "2.1.3" [dev-dependencies] deltalake-core = { path = "../core", features = ["datafusion"] } diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index 2a57b25de1..8bff9e2136 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -70,6 +70,20 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { }), )?; + let prefix = match urlencoding::decode(prefix.to_string().as_str()) { + Ok(res) => { + println!("TEST CODE - BEFORE DECODE - {:?}", prefix); + let result = Path::from_url_path(res.into_owned())?; + println!("TEST CODE - AFTER DECODE - {:?}", result); + result + } + Err(e) => { + // Default back to prefix as is + println!("TEST CODE - COULD NOT DECODE, err: {:?}", e); + prefix + } + }; + if options .0 .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref()) From b2439efa0f6eb0da16ed00cf3248ca50e09c7b3b Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Tue, 19 Mar 2024 02:13:57 -0700 Subject: [PATCH 2/9] Initial test --- crates/aws/src/storage.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index 8bff9e2136..d14c3dc3c0 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -62,6 +62,38 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { options: &StorageOptions, ) -> DeltaResult<(ObjectStoreRef, Path)> { let options = self.with_env_s3(options); + + let mut builder = object_store::aws::AmazonS3Builder::new(); + + let url_decoded = match urlencoding::decode(url.as_str()) { + Ok(res) => { + println!("TEST CODE - BEFORE DECODE - {:?}", url); + let result = res.into_owned(); + println!("TEST CODE - AFTER DECODE - {:?}", result); + result + } + Err(e) => { + // Default back to prefix as is + println!("TEST CODE - COULD NOT DECODE, err: {:?}", e); + url.as_str().to_string() + } + }; + + builder = builder.with_url(url_decoded); + + // TODO: better code + // TODO: get rid of clone + for (key, value) in options.clone().0.into_iter() { + let s3_key = AmazonS3ConfigKey::from_str(&key.to_ascii_lowercase()).ok(); + if let Some(s3_key) = s3_key { + builder = builder.with_config(s3_key, value.clone()); + } + } + + + // TODO: fix prefix + let (store, prefix) = (Box::new(builder.build()?) as Box, Path::from_absolute_path("")?); + /* let (store, prefix) = parse_url_opts( url, options.0.iter().filter_map(|(key, value)| { @@ -69,7 +101,9 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { Some((s3_key, value.clone())) }), )?; + */ + /* let prefix = match urlencoding::decode(prefix.to_string().as_str()) { Ok(res) => { println!("TEST CODE - BEFORE DECODE - {:?}", prefix); @@ -83,6 +117,7 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { prefix } }; + */ if options .0 From a547f7b6f4ffa2d790e0d2fc1defb3fb19536360 Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Tue, 19 Mar 2024 02:26:18 -0700 Subject: [PATCH 3/9] Initial test --- crates/aws/src/storage.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index d14c3dc3c0..621abdd042 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -65,7 +65,7 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { let mut builder = object_store::aws::AmazonS3Builder::new(); - let url_decoded = match urlencoding::decode(url.as_str()) { + /* let url_decoded = match urlencoding::decode(url.as_str()) { Ok(res) => { println!("TEST CODE - BEFORE DECODE - {:?}", url); let result = res.into_owned(); @@ -78,8 +78,9 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { url.as_str().to_string() } }; + */ - builder = builder.with_url(url_decoded); + builder = builder.with_url(url.as_str()); // TODO: better code // TODO: get rid of clone From b00b149769298bde88f2841e34b18cbf14273898 Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Tue, 19 Mar 2024 02:30:12 -0700 Subject: [PATCH 4/9] Initial test --- crates/aws/src/storage.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index 621abdd042..a1be1efd8b 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -63,9 +63,10 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { ) -> DeltaResult<(ObjectStoreRef, Path)> { let options = self.with_env_s3(options); + /* let mut builder = object_store::aws::AmazonS3Builder::new(); - /* let url_decoded = match urlencoding::decode(url.as_str()) { + let url_decoded = match urlencoding::decode(url.as_str()) { Ok(res) => { println!("TEST CODE - BEFORE DECODE - {:?}", url); let result = res.into_owned(); @@ -78,7 +79,6 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { url.as_str().to_string() } }; - */ builder = builder.with_url(url.as_str()); @@ -91,10 +91,9 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { } } - // TODO: fix prefix let (store, prefix) = (Box::new(builder.build()?) as Box, Path::from_absolute_path("")?); - /* + */ let (store, prefix) = parse_url_opts( url, options.0.iter().filter_map(|(key, value)| { @@ -102,7 +101,6 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { Some((s3_key, value.clone())) }), )?; - */ /* let prefix = match urlencoding::decode(prefix.to_string().as_str()) { From dc5afbd77e08c3aec058e5c2550c1744c20f1e6e Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Tue, 19 Mar 2024 02:45:33 -0700 Subject: [PATCH 5/9] Initial test --- crates/aws/src/lib.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs index 2630f80512..ac22dbe977 100644 --- a/crates/aws/src/lib.rs +++ b/crates/aws/src/lib.rs @@ -41,7 +41,21 @@ impl LogStoreFactory for S3LogStoreFactory { location: &Url, options: &StorageOptions, ) -> DeltaResult> { - let store = url_prefix_handler(store, Path::parse(location.path())?)?; + let path_decoded = match urlencoding::decode(location.path()) { + Ok(res) => { + println!("TEST CODE - BEFORE DECODE - {:?}", location); + let result = res.into_owned(); + println!("TEST CODE - AFTER DECODE - {:?}", result); + result + } + Err(e) => { + // Default back to prefix as is + println!("TEST CODE - COULD NOT DECODE, err: {:?}", e); + location.as_str().to_string() + } + }; + + let store = url_prefix_handler(store, Path::parse(path_decoded)?)?; if options .0 From 672331e3afecb2aa0c6ec1de0384701ddb3a71b6 Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Wed, 20 Mar 2024 15:32:22 -0700 Subject: [PATCH 6/9] Add decoding of prefix for s3 path --- crates/aws/src/lib.rs | 5 +---- crates/aws/src/storage.rs | 47 --------------------------------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs index ac22dbe977..88ab085bea 100644 --- a/crates/aws/src/lib.rs +++ b/crates/aws/src/lib.rs @@ -43,15 +43,12 @@ impl LogStoreFactory for S3LogStoreFactory { ) -> DeltaResult> { let path_decoded = match urlencoding::decode(location.path()) { Ok(res) => { - println!("TEST CODE - BEFORE DECODE - {:?}", location); let result = res.into_owned(); - println!("TEST CODE - AFTER DECODE - {:?}", result); result } Err(e) => { // Default back to prefix as is - println!("TEST CODE - COULD NOT DECODE, err: {:?}", e); - location.as_str().to_string() + location.path().to_string() } }; diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index a1be1efd8b..fb327f2981 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -63,37 +63,6 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { ) -> DeltaResult<(ObjectStoreRef, Path)> { let options = self.with_env_s3(options); - /* - let mut builder = object_store::aws::AmazonS3Builder::new(); - - let url_decoded = match urlencoding::decode(url.as_str()) { - Ok(res) => { - println!("TEST CODE - BEFORE DECODE - {:?}", url); - let result = res.into_owned(); - println!("TEST CODE - AFTER DECODE - {:?}", result); - result - } - Err(e) => { - // Default back to prefix as is - println!("TEST CODE - COULD NOT DECODE, err: {:?}", e); - url.as_str().to_string() - } - }; - - builder = builder.with_url(url.as_str()); - - // TODO: better code - // TODO: get rid of clone - for (key, value) in options.clone().0.into_iter() { - let s3_key = AmazonS3ConfigKey::from_str(&key.to_ascii_lowercase()).ok(); - if let Some(s3_key) = s3_key { - builder = builder.with_config(s3_key, value.clone()); - } - } - - // TODO: fix prefix - let (store, prefix) = (Box::new(builder.build()?) as Box, Path::from_absolute_path("")?); - */ let (store, prefix) = parse_url_opts( url, options.0.iter().filter_map(|(key, value)| { @@ -102,22 +71,6 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { }), )?; - /* - let prefix = match urlencoding::decode(prefix.to_string().as_str()) { - Ok(res) => { - println!("TEST CODE - BEFORE DECODE - {:?}", prefix); - let result = Path::from_url_path(res.into_owned())?; - println!("TEST CODE - AFTER DECODE - {:?}", result); - result - } - Err(e) => { - // Default back to prefix as is - println!("TEST CODE - COULD NOT DECODE, err: {:?}", e); - prefix - } - }; - */ - if options .0 .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref()) From d6fb30a4da8573685abbd2b9f829fb62ae0566a7 Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Wed, 20 Mar 2024 15:33:25 -0700 Subject: [PATCH 7/9] Add decoding of prefix for s3 path --- crates/aws/src/storage.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index fb327f2981..2a57b25de1 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -62,7 +62,6 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { options: &StorageOptions, ) -> DeltaResult<(ObjectStoreRef, Path)> { let options = self.with_env_s3(options); - let (store, prefix) = parse_url_opts( url, options.0.iter().filter_map(|(key, value)| { From 5eed1182ea7f1ceb4d4543a1afe07fab3d438ba5 Mon Sep 17 00:00:00 2001 From: alexr-bq <72045206+alexr-bq@users.noreply.github.com> Date: Fri, 22 Mar 2024 20:02:25 -0700 Subject: [PATCH 8/9] Update crates/aws/src/lib.rs Co-authored-by: R. Tyler Croy --- crates/aws/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs index f16d662dec..b7651791a6 100644 --- a/crates/aws/src/lib.rs +++ b/crates/aws/src/lib.rs @@ -47,10 +47,7 @@ impl LogStoreFactory for S3LogStoreFactory { options: &StorageOptions, ) -> DeltaResult> { let path_decoded = match urlencoding::decode(location.path()) { - Ok(res) => { - let result = res.into_owned(); - result - } + Ok(res) => res.into_owned(), Err(e) => { // Default back to prefix as is location.path().to_string() From fc98ef2edb03d73fec407b46ba39cd42600f16f1 Mon Sep 17 00:00:00 2001 From: Alex Rehnby-Martin Date: Fri, 22 Mar 2024 20:05:06 -0700 Subject: [PATCH 9/9] Add comment --- crates/aws/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs index b7651791a6..41b3b83759 100644 --- a/crates/aws/src/lib.rs +++ b/crates/aws/src/lib.rs @@ -46,6 +46,7 @@ impl LogStoreFactory for S3LogStoreFactory { location: &Url, options: &StorageOptions, ) -> DeltaResult> { + // Decode this path, as this prefix is passed into an S3 API that does not expect a URL-encoded path let path_decoded = match urlencoding::decode(location.path()) { Ok(res) => res.into_owned(), Err(e) => {