Skip to content

Commit

Permalink
Merge branch 'main' into fix-write-to-fuse
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco authored Mar 9, 2024
2 parents b40b9de + 1e19cf3 commit 1253161
Show file tree
Hide file tree
Showing 69 changed files with 2,877 additions and 880 deletions.
20 changes: 0 additions & 20 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ jobs:
toolchain: stable
override: true

- uses: Swatinem/rust-cache@v2

- name: build and lint with clippy
run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests

Expand Down Expand Up @@ -82,8 +80,6 @@ jobs:
toolchain: "stable"
override: true

- uses: Swatinem/rust-cache@v2

- name: Run tests
run: cargo test --verbose --features datafusion,azure

Expand Down Expand Up @@ -118,22 +114,6 @@ jobs:
toolchain: stable
override: true

# - uses: actions/setup-java@v3
# with:
# distribution: "zulu"
# java-version: "17"

# - uses: beyondstorage/setup-hdfs@master
# with:
# hdfs-version: "3.3.2"

# - name: Set Hadoop env
# run: |
# echo "CLASSPATH=$CLASSPATH:`hadoop classpath --glob`" >> $GITHUB_ENV
# echo "LD_LIBRARY_PATH=$JAVA_HOME/lib/server" >> $GITHUB_ENV

- uses: Swatinem/rust-cache@v2

- name: Start emulated services
run: docker-compose up -d

Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ tlaplus/*.toolbox/*/[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*/
/.idea
.vscode
.env
.venv
**/.DS_Store
**/.python-version
.coverage
Expand All @@ -29,4 +30,4 @@ Cargo.lock

justfile
site
__pycache__
__pycache__
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Contributing to delta-rs

Development on this project is mostly driven by volunteer contributors. We welcome new contributors, including not only those who develop new features, but also those who are able to help with documentation and provide detailed bug reports.
Development on this project is mostly driven by volunteer contributors. We welcome new contributors, including not only those who develop new features, but also those who are able to help with documentation and provide detailed bug reports.

Please take note of our [code of conduct](CODE_OF_CONDUCT.md).

Expand Down Expand Up @@ -31,7 +31,7 @@ python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
- Run some Rust code, e.g. run an example
```
cd crates/deltalake
cargo run --examples basic_operations
cargo run --example basic_operations --features="datafusion"
```

## Run the docs locally
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ arrow-ord = { version = "50" }
arrow-row = { version = "50" }
arrow-schema = { version = "50" }
arrow-select = { version = "50" }
object_store = { version = "0.9" }
object_store = { version = "=0.9.0" }
parquet = { version = "50" }

# datafusion
Expand All @@ -59,7 +59,7 @@ serde_json = "1"

# "stdlib"
bytes = { version = "1" }
chrono = { version = "0.4.31", default-features = false, features = ["clock"] }
chrono = { version = "=0.4.34", default-features = false, features = ["clock"] }
tracing = { version = "0.1", features = ["log"] }
regex = { version = "1" }
thiserror = { version = "1" }
Expand Down
68 changes: 34 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ write_deltalake("./data/delta", df)
dt = DeltaTable("./data/delta")
df2 = dt.to_pandas()

assert df == df2
assert df.equals(df2)
```

The same table can also be loaded using the core Rust crate:
Expand All @@ -91,7 +91,7 @@ async fn main() -> Result<(), DeltaTableError> {
let table = open_table("./data/delta").await?;

// show all active files in the table
let files = table.get_files();
let files: Vec<_> = table.get_file_uris()?.collect();
println!("{:?}", files);

Ok(())
Expand Down Expand Up @@ -130,45 +130,45 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc

### Cloud Integrations

| Storage | Rust | Python | Comment |
| -------------------- | :-----: | :-----: | ----------------------------------- |
| Local | ![done] | ![done] | |
| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes |
| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes |
| S3 - R2 | ![done] | ![done] | requires lock for concurrent writes |
| Azure Blob | ![done] | ![done] | |
| Azure ADLS Gen2 | ![done] | ![done] | |
| Microsoft OneLake | ![done] | ![done] | |
| Google Cloud Storage | ![done] | ![done] | |
| Storage | Rust | Python | Comment |
| -------------------- | :-----: | :-----: | ---------------------------------------------------------------- |
| Local | ![done] | ![done] | |
| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes |
| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes |
| S3 - R2 | ![done] | ![done] | No lock required when using `AmazonS3ConfigKey::CopyIfNotExists` |
| Azure Blob | ![done] | ![done] | |
| Azure ADLS Gen2 | ![done] | ![done] | |
| Microsoft OneLake | ![done] | ![done] | |
| Google Cloud Storage | ![done] | ![done] | |

### Supported Operations

| Operation | Rust | Python | Description |
| --------------------- | :----------------------: | :----------------------: | ------------------------------------------- |
| Create | ![done] | ![done] | Create a new table |
| Read | ![done] | ![done] | Read data from a table |
| Vacuum | ![done] | ![done] | Remove unused files and log entries |
| Delete - partitions | | ![done] | Delete a table partition |
| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
| Merge | ![done] | ![done] | Merge a target Delta table with source data |
| FS check | ![done] | ![done] | Remove corrupted files from table |
| Operation | Rust | Python | Description |
| --------------------- | :-----: | :-----: | ------------------------------------------- |
| Create | ![done] | ![done] | Create a new table |
| Read | ![done] | ![done] | Read data from a table |
| Vacuum | ![done] | ![done] | Remove unused files and log entries |
| Delete - partitions | | ![done] | Delete a table partition |
| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
| Merge | ![done] | ![done] | Merge a target Delta table with source data |
| FS check | ![done] | ![done] | Remove corrupted files from table |

### Protocol Support Level

| Writer Version | Requirement | Status |
| -------------- | --------------------------------------------- | :------------------: |
| Version 2 | Append Only Tables | ![done] |
| Version 2 | Column Invariants | ![done] |
| Version 3 | Enforce `delta.checkpoint.writeStatsAsJson` | [![open]][writer-rs] |
| Version 3 | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] |
| Writer Version | Requirement | Status |
| -------------- | --------------------------------------------- | :-------------------------------: |
| Version 2 | Append Only Tables | ![done] |
| Version 2 | Column Invariants | ![done] |
| Version 3 | Enforce `delta.checkpoint.writeStatsAsJson` | [![open]][writer-rs] |
| Version 3 | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] |
| Version 3 | CHECK constraints | [![semi-done]][check-constraints] |
| Version 4 | Change Data Feed | |
| Version 4 | Generated Columns | |
| Version 5 | Column Mapping | |
| Version 6 | Identity Columns | |
| Version 7 | Table Features | |
| Version 4 | Change Data Feed | |
| Version 4 | Generated Columns | |
| Version 5 | Column Mapping | |
| Version 6 | Identity Columns | |
| Version 7 | Table Features | |

| Reader Version | Requirement | Status |
| -------------- | ----------------------------------- | ------ |
Expand Down
24 changes: 14 additions & 10 deletions crates/aws/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ rust-version.workspace = true

[dependencies]
deltalake-core = { version = "0.17.0", path = "../core" }
rusoto_core = { version = "0.47", default-features = false, optional = true }
rusoto_credential = { version = "0.47" }
rusoto_sts = { version = "0.47", default-features = false, optional = true }
rusoto_dynamodb = { version = "0.47", default-features = false, optional = true }
aws-smithy-runtime-api = { version="1.1.7" }
aws-smithy-runtime = { version="1.1.7", optional = true}
aws-credential-types = { version="1.1.7", features = ["hardcoded-credentials"]}
aws-config = { version = "1.1.6", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
aws-sdk-dynamodb = {version = "1.15.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
aws-sdk-sts = {version = "1.1.6", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
lazy_static = "1"
maplit = "1"

Expand All @@ -32,6 +34,7 @@ regex = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4"] }
url = { workspace = true }
backoff = { version = "0.4", features = [ "tokio" ] }
hyper-tls = { version = "0.5", optional = true }

[dev-dependencies]
deltalake-core = { path = "../core", features = ["datafusion"] }
Expand All @@ -46,12 +49,13 @@ serde_json = { workspace = true }
default = ["rustls"]
integration_test = []
native-tls = [
"rusoto_core/native-tls",
"rusoto_sts/native-tls",
"rusoto_dynamodb/native-tls",
"aws-config/client-hyper",
"aws-smithy-runtime/connector-hyper-0-14-x",
"hyper-tls"
]
rustls = [
"rusoto_core/rustls",
"rusoto_sts/rustls",
"rusoto_dynamodb/rustls",
"aws-config/client-hyper",
"aws-config/rustls",
"aws-sdk-dynamodb/rustls",
"aws-sdk-sts/rustls",
]
Loading

0 comments on commit 1253161

Please sign in to comment.