Skip to content

Commit

Permalink
Merge branch 'main' into cdf_read
Browse files Browse the repository at this point in the history
  • Loading branch information
hntd187 authored Jan 7, 2024
2 parents 3a9d454 + a86cf66 commit 8532288
Show file tree
Hide file tree
Showing 602 changed files with 4,657 additions and 5,865 deletions.
24 changes: 1 addition & 23 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests

- name: Spot-check build for native-tls features
run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue-native-tls --tests
run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue --tests

- name: Check docs
run: cargo doc --features azure,datafusion,s3,gcs,glue
Expand Down Expand Up @@ -144,25 +144,3 @@ jobs:
- name: Run tests with native-tls
run: |
cargo test --no-default-features --features integration_test,s3-native-tls,datafusion
parquet2_test:
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

steps:
- uses: actions/checkout@v3

- name: Install minimal stable with clippy and rustfmt
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: stable
override: true

- uses: Swatinem/rust-cache@v2

- name: Run tests
working-directory: crates/deltalake-core
run: cargo test --no-default-features --features=parquet2
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Build documentation
name: Build (and maybe release) the documentation

on:
pull_request:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/docs_release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: docs_release
name: Release documentation

on:
pull_request:
Expand All @@ -12,6 +12,8 @@ on:
jobs:
release-docs:
if: github.event.pull_request.merged == true
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- name: Trigger the docs release event
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/python_release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Release to PyPI
name: Release to PyPI and documentation

on:
push:
Expand Down Expand Up @@ -103,6 +103,8 @@ jobs:
release-pypi-mac,
release-pypi-windows,
]
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- name: Trigger the docs release event
Expand Down
36 changes: 19 additions & 17 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,35 @@ debug = "line-tables-only"

[workspace.dependencies]
# arrow
arrow = { version = "48.0.1" }
arrow-arith = { version = "48.0.1" }
arrow-array = { version = "48.0.1" }
arrow-buffer = { version = "48.0.1" }
arrow-cast = { version = "48.0.1" }
arrow-ord = { version = "48.0.1" }
arrow-row = { version = "48.0.1" }
arrow-schema = { version = "48.0.1" }
arrow-select = { version = "48.0.1" }
parquet = { version = "48.0.1" }
arrow = { version = "49" }
arrow-arith = { version = "49" }
arrow-array = { version = "49" }
arrow-buffer = { version = "49" }
arrow-cast = { version = "49" }
arrow-ord = { version = "49" }
arrow-row = { version = "49" }
arrow-schema = { version = "49" }
arrow-select = { version = "49" }
object_store = { version = "0.8" }
parquet = { version = "49" }

# datafusion
datafusion = { version = "33.0.0" }
datafusion-expr = { version = "33.0.0" }
datafusion-common = { version = "33.0.0" }
datafusion-proto = { version = "33.0.0" }
datafusion-sql = { version = "33.0.0" }
datafusion-physical-expr = { version = "33.0.0" }
datafusion = { version = "34" }
datafusion-expr = { version = "34" }
datafusion-common = { version = "34" }
datafusion-proto = { version = "34" }
datafusion-sql = { version = "34" }
datafusion-physical-expr = { version = "34" }


# serde
serde = { version = "1", features = ["derive"] }
serde = { version = "1.0.194", features = ["derive"] }
serde_json = "1"

# "stdlib"
bytes = { version = "1" }
chrono = { version = "0.4.31", default-features = false, features = ["clock"] }
tracing = { version = "0.1", features = ["log"] }
regex = { version = "1" }
thiserror = { version = "1" }
url = { version = "2" }
Expand Down
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc
| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
| Merge | [![semi-done]][merge-rs] | [![semi-done]][merge-py] | Merge two tables (limited to full re-write) |
| Merge | ![done] | ![done] | Merge a target Delta table with source data |
| FS check | ![done] | ![done] | Remove corrupted files from table |

### Protocol Support Level
Expand Down Expand Up @@ -182,8 +182,6 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc
[semi-done]: https://cdn.jsdelivr.net/gh/Readme-Workflows/Readme-Icons@main/icons/octicons/ApprovedChangesGrey.svg
[done]: https://cdn.jsdelivr.net/gh/Readme-Workflows/Readme-Icons@main/icons/octicons/ApprovedChanges.svg
[roadmap]: https://github.com/delta-io/delta-rs/issues/1128
[merge-py]: https://github.com/delta-io/delta-rs/issues/1357
[merge-rs]: https://github.com/delta-io/delta-rs/issues/850
[writer-rs]: https://github.com/delta-io/delta-rs/issues/851
[check-constraints]: https://github.com/delta-io/delta-rs/issues/1881
[onelake-rs]: https://github.com/delta-io/delta-rs/issues/1418
Expand Down
10 changes: 9 additions & 1 deletion crates/benchmarks/src/bin/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ async fn benchmark_merge_tpcds(
.filter(col("r").lt_eq(lit(parameters.sample_files)))?;

let file_sample = files.collect_partitioned().await?;
let schema = file_sample.get(0).unwrap().get(0).unwrap().schema();
let schema = file_sample.first().unwrap().first().unwrap().schema();
let mem_table = Arc::new(MemTable::try_new(schema, file_sample)?);
ctx.register_table("file_sample", mem_table)?;
let file_sample_count = ctx.table("file_sample").await?.count().await?;
Expand Down Expand Up @@ -265,6 +265,14 @@ async fn benchmark_merge_tpcds(
.object_store()
.delete(&Path::parse("_delta_log/00000000000000000002.json")?)
.await?;
table
.object_store()
.delete(&Path::parse("_delta_log/00000000000000000003.json")?)
.await?;
let _ = table
.object_store()
.delete(&Path::parse("_delta_log/00000000000000000004.json")?)
.await;

Ok((duration, metrics))
}
Expand Down
29 changes: 21 additions & 8 deletions crates/deltalake-aws/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,46 @@ name = "deltalake-aws"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
deltalake-core = { path = "../deltalake-core" }
rusoto_core = { version = "0.47", default-features = false, optional = true }
rusoto_credential = { version = "0.47", optional = true }
rusoto_credential = { version = "0.47" }
rusoto_sts = { version = "0.47", default-features = false, optional = true }
rusoto_dynamodb = { version = "0.47", default-features = false, optional = true }
object_store = "0.7"
lazy_static = "1"
maplit = "1"

# workspace dependencies
async-trait = { workspace = true }
bytes = { workspace = true }
futures = { workspace = true }
tracing = { workspace = true }
object_store = { workspace = true, features = ["aws"]}
thiserror = { workspace = true }
tokio = { workspace = true }
regex = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4"] }
url = { workspace = true }
backoff = { version = "0.4", features = [ "tokio" ] }

[dev-dependencies]
chrono = { workspace = true }
serial_test = "2"
deltalake-test = { path = "../deltalake-test" }
pretty_env_logger = "*"
rand = "0.8"
serde_json = { workspace = true }

[features]
default = ["rustls"]
integration_test = []
native-tls = [
"rusoto_core/native-tls",
"rusoto_credential",
"rusoto_sts/native-tls",
"rusoto_dynamodb/native-tls",
"object_store/aws",
]
rustls = [
"rusoto_core/rustls",
"rusoto_credential",
"rusoto_sts/rustls",
"rusoto_dynamodb/rustls",
"object_store/aws",
]
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub async fn setup_s3_context() -> TestContext {
config.insert("AWS_ACCESS_KEY_ID".to_owned(), "deltalake".to_owned());
config.insert("AWS_SECRET_ACCESS_KEY".to_owned(), "weloverust".to_owned());
config.insert("AWS_S3_LOCKING_PROVIDER".to_owned(), "dynamodb".to_owned());
config.insert("DYNAMO_LOCK_TABLE_NAME".to_owned(), lock_table.clone());
config.insert(constants::LOCK_TABLE_KEY_NAME.to_owned(), lock_table.clone());
config.insert("AWS_ALLOW_HTTP".to_owned(), "TRUE".to_string());

TestContext {
Expand Down
9 changes: 9 additions & 0 deletions crates/deltalake-aws/src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Errors for S3 log store backed by DynamoDb
use std::num::ParseIntError;

use rusoto_core::RusotoError;
use rusoto_dynamodb::{CreateTableError, GetItemError, PutItemError, QueryError, UpdateItemError};

Expand All @@ -24,6 +26,13 @@ pub enum DynamoDbConfigError {
/// Billing mode string invalid
#[error("Invalid billing mode : {0}, supported values : ['provided', 'pay_per_request']")]
InvalidBillingMode(String),

/// Cannot parse max_elapsed_request_time value into u64
#[error("Cannot parse max elapsed request time into u64: {source}")]
ParseMaxElapsedRequestTime {
// config_value: String,
source: ParseIntError,
},
}

/// Errors produced by `DynamoDbLockClient`
Expand Down
Loading

0 comments on commit 8532288

Please sign in to comment.