Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: delta-io/delta-rs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 9d686df0cb0f8a63fb8ef0403ebe40675d0a8024
Choose a base ref
..
head repository: delta-io/delta-rs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: d4bfacd015c5a056aa598fa659a68633fe29468f
Choose a head ref
Showing with 1,257 additions and 595 deletions.
  1. +1 −0 .github/workflows/build.yml
  2. +9 −9 Cargo.toml
  3. +9 −8 crates/aws/Cargo.toml
  4. +138 −0 crates/aws/src/constants.rs
  5. +229 −95 crates/aws/src/credentials.rs
  6. +5 −37 crates/aws/src/lib.rs
  7. +156 −345 crates/aws/src/storage.rs
  8. +2 −2 crates/azure/Cargo.toml
  9. +2 −2 crates/catalog-glue/Cargo.toml
  10. +1 −1 crates/core/Cargo.toml
  11. +4 −2 crates/core/src/delta_datafusion/mod.rs
  12. +124 −7 crates/core/src/kernel/models/actions.rs
  13. +1 −1 crates/core/src/lib.rs
  14. +196 −0 crates/core/src/operations/add_feature.rs
  15. +3 −3 crates/core/src/operations/cdc.rs
  16. +2 −2 crates/core/src/operations/convert_to_delta.rs
  17. +6 −6 crates/core/src/operations/create.rs
  18. +4 −4 crates/core/src/operations/delete.rs
  19. +2 −2 crates/core/src/operations/load_cdf.rs
  20. +4 −4 crates/core/src/operations/merge/mod.rs
  21. +8 −0 crates/core/src/operations/mod.rs
  22. +3 −3 crates/core/src/operations/transaction/protocol.rs
  23. +4 −4 crates/core/src/operations/update.rs
  24. +5 −5 crates/core/src/operations/write.rs
  25. +9 −1 crates/core/src/protocol/mod.rs
  26. +20 −20 crates/core/src/table/config.rs
  27. +2 −2 crates/core/src/writer/test_utils.rs
  28. +7 −7 crates/core/tests/checkpoint_writer.rs
  29. +1 −0 crates/core/tests/command_merge.rs
  30. +7 −7 crates/deltalake/Cargo.toml
  31. +2 −2 crates/gcp/Cargo.toml
  32. +2 −2 crates/hdfs/Cargo.toml
  33. +2 −2 crates/mount/Cargo.toml
  34. +2 −2 crates/test/Cargo.toml
  35. +2 −2 docs/integrations/delta-lake-dagster.md
  36. +1 −0 python/deltalake/__init__.py
  37. +36 −0 python/deltalake/_internal.pyi
  38. +2 −0 python/deltalake/schema.py
  39. +41 −5 python/deltalake/table.py
  40. +56 −0 python/src/features.rs
  41. +31 −0 python/src/lib.rs
  42. +1 −0 python/stubs/pyarrow/__init__.pyi
  43. +7 −0 python/tests/conftest.py
  44. +83 −1 python/tests/test_alter.py
  45. +25 −0 python/tests/test_schema.py
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -96,6 +96,7 @@ jobs:
# https://github.com/rust-lang/cargo/issues/10280
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
RUST_BACKTRACE: "1"
RUST_LOG: debug
AWS_DEFAULT_REGION: "us-east-1"
AWS_ACCESS_KEY_ID: deltalake
AWS_SECRET_ACCESS_KEY: weloverust
18 changes: 9 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -45,15 +45,15 @@ object_store = { version = "0.11" }
parquet = { version = "53" }

# datafusion
datafusion = { version = "41" }
datafusion-expr = { version = "41" }
datafusion-common = { version = "41" }
datafusion-proto = { version = "41" }
datafusion-sql = { version = "41" }
datafusion-physical-expr = { version = "41" }
datafusion-physical-plan = { version = "41" }
datafusion-functions = { version = "41" }
datafusion-functions-aggregate = { version = "41" }
datafusion = { version = "42" }
datafusion-expr = { version = "42" }
datafusion-common = { version = "42" }
datafusion-proto = { version = "42" }
datafusion-sql = { version = "42" }
datafusion-physical-expr = { version = "42" }
datafusion-physical-plan = { version = "42" }
datafusion-functions = { version = "42" }
datafusion-functions-aggregate = { version = "42" }

# serde
serde = { version = "1.0.194", features = ["derive"] }
17 changes: 9 additions & 8 deletions crates/aws/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "deltalake-aws"
version = "0.2.0"
version = "0.3.0"
authors.workspace = true
keywords.workspace = true
readme.workspace = true
@@ -12,19 +12,20 @@ repository.workspace = true
rust-version.workspace = true

[dependencies]
deltalake-core = { version = "0.19.1", path = "../core" }
aws-smithy-runtime-api = { version="1.1.7" }
aws-smithy-runtime = { version="1.1.7", optional = true}
aws-credential-types = { version="1.1.7", features = ["hardcoded-credentials"]}
aws-config = { version = "1.1.6", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
aws-sdk-dynamodb = {version = "1.15.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
aws-sdk-sts = {version = "1.1.6", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
deltalake-core = { version = "0.20.0", path = "../core" }
aws-smithy-runtime-api = { version="1.7" }
aws-smithy-runtime = { version="1.7", optional = true}
aws-credential-types = { version="1.2", features = ["hardcoded-credentials"]}
aws-config = { version = "1.5", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
aws-sdk-dynamodb = {version = "1.45", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
aws-sdk-sts = {version = "1.42", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
lazy_static = "1"
maplit = "1"

# workspace dependencies
async-trait = { workspace = true }
bytes = { workspace = true }
chrono = { workspace = true }
futures = { workspace = true }
tracing = { workspace = true }
object_store = { workspace = true, features = ["aws"]}
138 changes: 138 additions & 0 deletions crates/aws/src/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
//! Constants used for modifying and configuring various AWS S3 (or similar) connections with
//! delta-rs
//!
use lazy_static::lazy_static;
use std::time::Duration;

/// Custom S3 endpoint.
pub const AWS_ENDPOINT_URL: &str = "AWS_ENDPOINT_URL";
/// Custom DynamoDB endpoint.
/// If DynamoDB endpoint is not supplied, will use S3 endpoint (AWS_ENDPOINT_URL)
/// If it is supplied, this endpoint takes precedence over the global endpoint set in AWS_ENDPOINT_URL for DynamoDB
pub const AWS_ENDPOINT_URL_DYNAMODB: &str = "AWS_ENDPOINT_URL_DYNAMODB";
/// The AWS region.
pub const AWS_REGION: &str = "AWS_REGION";
/// The AWS profile.
pub const AWS_PROFILE: &str = "AWS_PROFILE";
/// The AWS_ACCESS_KEY_ID to use for S3.
pub const AWS_ACCESS_KEY_ID: &str = "AWS_ACCESS_KEY_ID";
/// The AWS_SECRET_ACCESS_KEY to use for S3.
pub const AWS_SECRET_ACCESS_KEY: &str = "AWS_SECRET_ACCESS_KEY";
/// The AWS_SESSION_TOKEN to use for S3.
pub const AWS_SESSION_TOKEN: &str = "AWS_SESSION_TOKEN";
/// Uses either "path" (the default) or "virtual", which turns on
/// [virtual host addressing](http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html).
pub const AWS_S3_ADDRESSING_STYLE: &str = "AWS_S3_ADDRESSING_STYLE";
/// Locking provider to use for safe atomic rename.
/// `dynamodb` is currently the only supported locking provider.
/// If not set, safe atomic rename is not available.
pub const AWS_S3_LOCKING_PROVIDER: &str = "AWS_S3_LOCKING_PROVIDER";
/// The role to assume for S3 writes.
pub const AWS_IAM_ROLE_ARN: &str = "AWS_IAM_ROLE_ARN";
/// The role to assume. Please use [AWS_IAM_ROLE_ARN] instead
#[deprecated(since = "0.20.0", note = "Please use AWS_IAM_ROLE_ARN instead")]
pub const AWS_S3_ASSUME_ROLE_ARN: &str = "AWS_S3_ASSUME_ROLE_ARN";
/// The role session name to use when a role is assumed. If not provided a random session name is generated.
pub const AWS_IAM_ROLE_SESSION_NAME: &str = "AWS_IAM_ROLE_SESSION_NAME";
/// The role session name to use when a role is assumed. If not provided a random session name is generated.
#[deprecated(
since = "0.20.0",
note = "Please use AWS_IAM_ROLE_SESSION_NAME instead"
)]
pub const AWS_S3_ROLE_SESSION_NAME: &str = "AWS_S3_ROLE_SESSION_NAME";
/// The `pool_idle_timeout` option of aws http client. Has to be lower than 20 seconds, which is
/// default S3 server timeout <https://aws.amazon.com/premiumsupport/knowledge-center/s3-socket-connection-timeout-error/>.
/// However, since rusoto uses hyper as a client, its default timeout is 90 seconds
/// <https://docs.rs/hyper/0.13.2/hyper/client/struct.Builder.html#method.keep_alive_timeout>.
/// Hence, the `connection closed before message completed` could occur.
/// To avoid that, the default value of this setting is 15 seconds if it's not set otherwise.
pub const AWS_S3_POOL_IDLE_TIMEOUT_SECONDS: &str = "AWS_S3_POOL_IDLE_TIMEOUT_SECONDS";
/// The `pool_idle_timeout` for the as3_constants sts client. See
/// the reasoning in `AWS_S3_POOL_IDLE_TIMEOUT_SECONDS`.
pub const AWS_STS_POOL_IDLE_TIMEOUT_SECONDS: &str = "AWS_STS_POOL_IDLE_TIMEOUT_SECONDS";
/// The number of retries for S3 GET requests failed with 500 Internal Server Error.
pub const AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES: &str =
"AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES";
/// The web identity token file to use when using a web identity provider.
/// NOTE: web identity related options are set in the environment when
/// creating an instance of [crate::storage::s3::S3StorageOptions].
/// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
pub const AWS_WEB_IDENTITY_TOKEN_FILE: &str = "AWS_WEB_IDENTITY_TOKEN_FILE";
/// The role name to use for web identity.
/// NOTE: web identity related options are set in the environment when
/// creating an instance of [crate::storage::s3::S3StorageOptions].
/// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
pub const AWS_ROLE_ARN: &str = "AWS_ROLE_ARN";
/// The role session name to use for web identity.
/// NOTE: web identity related options are set in the environment when
/// creating an instance of [crate::storage::s3::S3StorageOptions].
/// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
pub const AWS_ROLE_SESSION_NAME: &str = "AWS_ROLE_SESSION_NAME";
/// Allow http connections - mainly useful for integration tests
pub const AWS_ALLOW_HTTP: &str = "AWS_ALLOW_HTTP";

/// If set to "true", allows creating commits without concurrent writer protection.
/// Only safe if there is one writer to a given table.
pub const AWS_S3_ALLOW_UNSAFE_RENAME: &str = "AWS_S3_ALLOW_UNSAFE_RENAME";

/// If set to "true", disables the imds client
/// Defaults to "true"
pub const AWS_EC2_METADATA_DISABLED: &str = "AWS_EC2_METADATA_DISABLED";

/// The timeout in milliseconds for the EC2 metadata endpoint
/// Defaults to 100
pub const AWS_EC2_METADATA_TIMEOUT: &str = "AWS_EC2_METADATA_TIMEOUT";

/// The list of option keys owned by the S3 module.
/// Option keys not contained in this list will be added to the `extra_opts`
/// field of [crate::storage::s3::S3StorageOptions].
pub const S3_OPTS: &[&str] = &[
AWS_ENDPOINT_URL,
AWS_ENDPOINT_URL_DYNAMODB,
AWS_REGION,
AWS_PROFILE,
AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY,
AWS_SESSION_TOKEN,
AWS_S3_LOCKING_PROVIDER,
AWS_S3_ASSUME_ROLE_ARN,
AWS_S3_ROLE_SESSION_NAME,
AWS_WEB_IDENTITY_TOKEN_FILE,
AWS_ROLE_ARN,
AWS_ROLE_SESSION_NAME,
AWS_S3_POOL_IDLE_TIMEOUT_SECONDS,
AWS_STS_POOL_IDLE_TIMEOUT_SECONDS,
AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES,
AWS_EC2_METADATA_DISABLED,
AWS_EC2_METADATA_TIMEOUT,
];

pub const DEFAULT_LOCK_TABLE_NAME: &str = "delta_log";
pub const LOCK_TABLE_KEY_NAME: &str = "DELTA_DYNAMO_TABLE_NAME";
pub const BILLING_MODE_KEY_NAME: &str = "DELTA_DYNAMO_BILLING_MODE";
pub const MAX_ELAPSED_REQUEST_TIME_KEY_NAME: &str = "DELTA_DYNAMO_MAX_ELAPSED_REQUEST_TIME";

pub const ATTR_TABLE_PATH: &str = "tablePath";
pub const ATTR_FILE_NAME: &str = "fileName";
pub const ATTR_TEMP_PATH: &str = "tempPath";
pub const ATTR_COMPLETE: &str = "complete";
pub const ATTR_EXPIRE_TIME: &str = "expireTime";

pub const STRING_TYPE: &str = "S";

pub const KEY_TYPE_HASH: &str = "HASH";
pub const KEY_TYPE_RANGE: &str = "RANGE";

lazy_static! {
pub static ref CONDITION_EXPR_CREATE: String = format!(
"attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME})"
);

pub static ref CONDITION_DELETE_INCOMPLETE: String = format!(
"(complete = :f) or (attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME}))"
);
}

pub const CONDITION_UPDATE_INCOMPLETE: &str = "complete = :f";
pub const DEFAULT_COMMIT_ENTRY_EXPIRATION_DELAY: Duration = Duration::from_secs(86_400);
Loading