From 7055b09e8fa0a066e7ccbd4d0f6947c4cbe6f3bd Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Sun, 22 Sep 2024 19:28:55 +0000 Subject: [PATCH] feat: disable AWS configuration resolution for non-AWS S3 storage scenarios This should reduce bugs, reduce warnings, and improve performance when somebody is using this crate against an S3-alike service rather than AWS S3 directly. Currently I cannot think of a valid scenarijo where one would use AWS_ENDPOINT_URL against actual S3 buckets, so using that as the heuristic --- crates/aws/src/constants.rs | 3 + crates/aws/src/lib.rs | 2 +- crates/aws/src/storage.rs | 108 ++++++++++++++------ crates/aws/tests/integration_s3_dynamodb.rs | 13 ++- 4 files changed, 89 insertions(+), 37 deletions(-) diff --git a/crates/aws/src/constants.rs b/crates/aws/src/constants.rs index 73d2da1b48..90c23ff572 100644 --- a/crates/aws/src/constants.rs +++ b/crates/aws/src/constants.rs @@ -84,6 +84,9 @@ pub const AWS_EC2_METADATA_DISABLED: &str = "AWS_EC2_METADATA_DISABLED"; /// Defaults to 100 pub const AWS_EC2_METADATA_TIMEOUT: &str = "AWS_EC2_METADATA_TIMEOUT"; +/// Force the delta-rs to attempt to load AWS credentials +pub const AWS_FORCE_CREDENTIAL_LOAD: &str = "AWS_FORCE_CREDENTIAL_LOAD"; + /// The list of option keys owned by the S3 module. /// Option keys not contained in this list will be added to the `extra_opts` /// field of [crate::storage::s3::S3StorageOptions]. diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs index ffaae15333..ddb768bdd9 100644 --- a/crates/aws/src/lib.rs +++ b/crates/aws/src/lib.rs @@ -725,7 +725,7 @@ mod tests { let factory = S3LogStoreFactory::default(); let store = InMemory::new(); let url = Url::parse("s3://test-bucket").unwrap(); - std::env::remove_var(storage::s3_constants::AWS_S3_LOCKING_PROVIDER); + std::env::remove_var(crate::constants::AWS_S3_LOCKING_PROVIDER); let logstore = factory .with_options(Arc::new(store), &url, &StorageOptions::from(HashMap::new())) .unwrap(); diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs index 64c34d5b3a..a6735b1c0f 100644 --- a/crates/aws/src/storage.rs +++ b/crates/aws/src/storage.rs @@ -74,14 +74,7 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { ) -> DeltaResult<(ObjectStoreRef, Path)> { let options = self.with_env_s3(storage_options); - let sdk_config = if options.0.contains_key(constants::AWS_ENDPOINT_URL) { - None - } else { - Some(execute_sdk_future( - crate::credentials::resolve_credentials(storage_options.clone()), - )??) - }; - + // All S3-likes should start their builder the same way let mut builder = AmazonS3Builder::new().with_url(url.to_string()); for (key, value) in options.0.iter() { @@ -95,14 +88,18 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { source: Box::new(e), })?; let prefix = Path::parse(path)?; - let inner = if let Some(sdk_config) = sdk_config { - builder.with_credentials(Arc::new(crate::credentials::AWSForObjectStore::new( - sdk_config, - ))) - } else { - builder + + if is_aws(storage_options) { + debug!("Detected AWS S3, resolving credentials"); + let sdk_config = execute_sdk_future(crate::credentials::resolve_credentials( + storage_options.clone(), + ))??; + builder = builder.with_credentials(Arc::new( + crate::credentials::AWSForObjectStore::new(sdk_config), + )); } - .build()?; + + let inner = builder.build()?; let store = aws_storage_handler(limit_store_handler(inner, &options), &options)?; debug!("Initialized the object store: {store:?}"); @@ -136,6 +133,26 @@ fn aws_storage_handler( } } +// Determine whether this crate is being configured for use with native AWS S3 or an S3-alike +// +// This function will rteturn true in the default case since it's most likely that the absence of +// options will mean default/S3 configuration +fn is_aws(options: &StorageOptions) -> bool { + if options + .0 + .contains_key(crate::constants::AWS_FORCE_CREDENTIAL_LOAD) + { + return true; + } + if options + .0 + .contains_key(crate::constants::AWS_S3_LOCKING_PROVIDER) + { + return true; + } + !options.0.contains_key(crate::constants::AWS_ENDPOINT_URL) +} + /// Options used to configure the [S3StorageBackend]. /// /// Available options are described in [s3_constants]. @@ -208,12 +225,14 @@ impl S3StorageOptions { let storage_options = StorageOptions(options.clone()); - let sdk_config = if storage_options.0.contains_key(constants::AWS_ENDPOINT_URL) { - None - } else { - Some(execute_sdk_future( - crate::credentials::resolve_credentials(storage_options.clone()), - )??) + let sdk_config = match is_aws(&storage_options) { + false => None, + true => { + debug!("Detected AWS S3, resolving credentials"); + Some(execute_sdk_future( + crate::credentials::resolve_credentials(storage_options.clone()), + )??) + } }; Ok(Self { @@ -528,10 +547,12 @@ mod tests { let options = S3StorageOptions::try_default().unwrap(); assert_eq!( S3StorageOptions { - sdk_config: SdkConfig::builder() - .endpoint_url("http://localhost".to_string()) - .region(Region::from_static("us-west-1")) - .build(), + sdk_config: Some( + SdkConfig::builder() + .endpoint_url("http://localhost".to_string()) + .region(Region::from_static("us-west-1")) + .build() + ), virtual_hosted_style_request: false, locking_provider: Some("dynamodb".to_string()), dynamodb_endpoint: None, @@ -560,9 +581,11 @@ mod tests { .unwrap(); let mut expected = S3StorageOptions::try_default().unwrap(); - expected.sdk_config = SdkConfig::builder() - .region(Region::from_static("eu-west-1")) - .build(); + expected.sdk_config = Some( + SdkConfig::builder() + .region(Region::from_static("eu-west-1")) + .build(), + ); assert_eq!(expected, options); }); } @@ -670,10 +693,12 @@ mod tests { assert_eq!( S3StorageOptions { - sdk_config: SdkConfig::builder() - .endpoint_url("http://localhost".to_string()) - .region(Region::from_static("us-west-2")) - .build(), + sdk_config: Some( + SdkConfig::builder() + .endpoint_url("http://localhost".to_string()) + .region(Region::from_static("us-west-2")) + .build() + ), virtual_hosted_style_request: false, locking_provider: Some("dynamodb".to_string()), dynamodb_endpoint: Some("http://localhost:dynamodb".to_string()), @@ -767,4 +792,23 @@ mod tests { } }); } + + #[test] + fn test_is_aws() { + let options = StorageOptions::default(); + assert!(is_aws(&options)); + + let minio: HashMap = hashmap! { + crate::constants::AWS_ENDPOINT_URL.to_string() => "http://minio:8080".to_string(), + }; + let options = StorageOptions::from(minio); + assert!(!is_aws(&options)); + + let localstack: HashMap = hashmap! { + crate::constants::AWS_FORCE_CREDENTIAL_LOAD.to_string() => "true".to_string(), + crate::constants::AWS_ENDPOINT_URL.to_string() => "http://minio:8080".to_string(), + }; + let options = StorageOptions::from(localstack); + assert!(is_aws(&options)); + } } diff --git a/crates/aws/tests/integration_s3_dynamodb.rs b/crates/aws/tests/integration_s3_dynamodb.rs index 28c99664f4..da0b0e06c8 100644 --- a/crates/aws/tests/integration_s3_dynamodb.rs +++ b/crates/aws/tests/integration_s3_dynamodb.rs @@ -23,6 +23,7 @@ use lazy_static::lazy_static; use object_store::path::Path; use serde_json::Value; use serial_test::serial; +use tracing::log::*; use maplit::hashmap; use object_store::{PutOptions, PutPayload}; @@ -43,7 +44,7 @@ lazy_static! { fn make_client() -> TestResult { let options: S3StorageOptions = S3StorageOptions::try_default().unwrap(); Ok(DynamoDbLockClient::try_new( - &options.sdk_config, + &options.sdk_config.unwrap(), None, None, None, @@ -74,7 +75,7 @@ fn client_configs_via_env_variables() -> TestResult<()> { billing_mode: BillingMode::PayPerRequest, lock_table_name: "some_table".to_owned(), max_elapsed_request_time: Duration::from_secs(64), - sdk_config: options.sdk_config, + sdk_config: options.sdk_config.unwrap(), }, *config, ); @@ -87,6 +88,7 @@ fn client_configs_via_env_variables() -> TestResult<()> { #[tokio::test] #[serial] async fn test_create_s3_table() -> TestResult<()> { + let _ = pretty_env_logger::try_init(); let context = IntegrationContext::new(Box::new(S3Integration::default()))?; let _client = make_client()?; let table_name = format!("{}_{}", "create_test", uuid::Uuid::new_v4()); @@ -98,8 +100,10 @@ async fn test_create_s3_table() -> TestResult<()> { true, )]); let storage_options: HashMap = hashmap! { - "AWS_ALLOW_HTTP".into() => "true".into(), - "AWS_ENDPOINT_URL".into() => "http://localhost:4566".into(), + deltalake_aws::constants::AWS_ALLOW_HTTP.into() => "true".into(), + // Despite not being in AWS, we should force credential resolution + deltalake_aws::constants::AWS_FORCE_CREDENTIAL_LOAD.into() => "true".into(), + deltalake_aws::constants::AWS_ENDPOINT_URL.into() => "http://localhost:4566".into(), }; let log_store = logstore_for(Url::parse(&table_uri)?, storage_options, None)?; @@ -113,6 +117,7 @@ async fn test_create_s3_table() -> TestResult<()> { ) .await?; + debug!("creating a CreateBuilder"); let _created = CreateBuilder::new() .with_log_store(log_store) .with_partition_columns(vec!["id"])