diff --git a/.cargo/config b/.cargo/config.toml
similarity index 100%
rename from .cargo/config
rename to .cargo/config.toml
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index b99809d1f6..736703c551 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,4 +1,4 @@
-crates/ @wjones127 @roeap @rtyler
+crates/ @wjones127 @roeap @rtyler @hntd187 @ion-elgreco
delta-inspect/ @wjones127 @rtyler
proofs/ @houqp
python/ @wjones127 @fvaleye @roeap @ion-elgreco
diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml
new file mode 100644
index 0000000000..7875107ddd
--- /dev/null
+++ b/.github/actions/setup-env/action.yml
@@ -0,0 +1,34 @@
+name: "Setup Python and Rust Environment"
+description: "Set up Python, virtual environment, and Rust toolchain"
+
+inputs:
+
+ python-version:
+ description: "The Python version to set up"
+ required: true
+ default: "3.10"
+
+ rust-toolchain:
+ description: "The Rust toolchain to set up"
+ required: true
+ default: "stable"
+
+runs:
+ using: "composite"
+
+ steps:
+
+ - name: Set up Python ${{ inputs.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ inputs.python-version }}
+
+ - name: Install Rust toolchain
+ uses: actions-rs/toolchain@v1
+ with:
+ profile: default
+ toolchain: ${{ inputs.rust-toolchain }}
+ override: true
+ components: rustfmt, clippy
+
+ - uses: Swatinem/rust-cache@v2
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 220c5b21d9..93b3cbdc3e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -5,6 +5,7 @@ on:
branches: [main, "rust-v*"]
pull_request:
branches: [main, "rust-v*"]
+ merge_group:
jobs:
format:
@@ -28,7 +29,6 @@ jobs:
matrix:
os:
- ubuntu-latest
- - macos-11
- windows-latest
runs-on: ${{ matrix.os }}
@@ -42,16 +42,14 @@ jobs:
toolchain: stable
override: true
- - uses: Swatinem/rust-cache@v2
-
- name: build and lint with clippy
- run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests
+ run: cargo clippy --features azure,datafusion,s3,gcs,glue,hdfs --tests
- name: Spot-check build for native-tls features
run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue --tests
- name: Check docs
- run: cargo doc --features azure,datafusion,s3,gcs,glue
+ run: cargo doc --features azure,datafusion,s3,gcs,glue,hdfs
- name: Check no default features (except rustls)
run: cargo check --no-default-features --features rustls
@@ -62,7 +60,6 @@ jobs:
matrix:
os:
- ubuntu-latest
- - macos-11
- windows-latest
runs-on: ${{ matrix.os }}
env:
@@ -82,8 +79,6 @@ jobs:
toolchain: "stable"
override: true
- - uses: Swatinem/rust-cache@v2
-
- name: Run tests
run: cargo test --verbose --features datafusion,azure
@@ -118,28 +113,24 @@ jobs:
toolchain: stable
override: true
- # - uses: actions/setup-java@v3
- # with:
- # distribution: "zulu"
- # java-version: "17"
-
- # - uses: beyondstorage/setup-hdfs@master
- # with:
- # hdfs-version: "3.3.2"
-
- # - name: Set Hadoop env
- # run: |
- # echo "CLASSPATH=$CLASSPATH:`hadoop classpath --glob`" >> $GITHUB_ENV
- # echo "LD_LIBRARY_PATH=$JAVA_HOME/lib/server" >> $GITHUB_ENV
+ # Install Java and Hadoop for HDFS integration tests
+ - uses: actions/setup-java@v4
+ with:
+ distribution: "temurin"
+ java-version: "17"
- - uses: Swatinem/rust-cache@v2
+ - name: Download Hadoop
+ run: |
+ wget -q https://dlcdn.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
+ tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE
+ echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH
- name: Start emulated services
- run: docker-compose up -d
+ run: docker compose up -d
- name: Run tests with rustls (default)
run: |
- cargo test --features integration_test,azure,s3,gcs,datafusion
+ cargo test --features integration_test,azure,s3,gcs,datafusion,hdfs
- name: Run tests with native-tls
run: |
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 6b3d5a7ddb..121e0b8882 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -2,6 +2,7 @@ name: dev_pr
# Trigger whenever a PR is changed (title as well as new / changed commits)
on:
+ merge_group:
pull_request_target:
types:
- opened
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 079cd66fcc..5729b87624 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -1,6 +1,7 @@
name: Build (and maybe release) the documentation
on:
+ merge_group:
pull_request:
paths:
- python/**
@@ -31,9 +32,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- - uses: psf/black@stable
- with:
- src: docs/src/python
+ - run: |
+ cd docs
+ make check
build-deploy:
needs:
@@ -47,25 +48,13 @@ jobs:
steps:
- uses: actions/checkout@v3
- - name: Install Rust
- uses: actions-rs/toolchain@v1
- with:
- toolchain: stable
- override: true
- components: rustfmt, clippy
-
- - uses: Swatinem/rust-cache@v2
-
- - name: Set up Python
- uses: actions/setup-python@v3
- with:
- python-version: '3.10'
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
- name: Build and install deltalake
run: |
cd python
- pip install virtualenv
- virtualenv venv
+ python -m venv venv
source venv/bin/activate
make ${{ env.BUILD_ARGS }}
diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml
index bc2f20cc9a..ce2a7e0bfd 100644
--- a/.github/workflows/python_build.yml
+++ b/.github/workflows/python_build.yml
@@ -1,6 +1,7 @@
name: python_build
on:
+ merge_group:
push:
branches: [main]
pull_request:
@@ -15,23 +16,17 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- - name: Setup Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.8
+
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
- name: Check Python
run: |
- pip install ruff black mypy types-dataclasses typing-extensions
+ python -m venv venv
+ source venv/bin/activate
+ pip install ruff==0.5.2 mypy==1.10.1 types-dataclasses typing-extensions
make check-python
- - name: Install minimal stable with clippy and rustfmt
- uses: actions-rs/toolchain@v1
- with:
- profile: default
- toolchain: stable
- override: true
-
- name: Check Rust
run: make check-rust
@@ -45,24 +40,14 @@ jobs:
steps:
- uses: actions/checkout@v3
- - name: Setup Python
- uses: actions/setup-python@v2
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
with:
python-version: 3.8
- - name: Install latest nightly
- uses: actions-rs/toolchain@v1
- with:
- toolchain: stable
- override: true
- components: rustfmt, clippy
-
- - uses: Swatinem/rust-cache@v2
-
- name: Build and install deltalake
run: |
- pip install virtualenv
- virtualenv venv
+ python -m venv venv
source venv/bin/activate
make setup
# Install minimum PyArrow version
@@ -89,26 +74,15 @@ jobs:
steps:
- uses: actions/checkout@v3
- - name: Install latest nightly
- uses: actions-rs/toolchain@v1
- with:
- toolchain: stable
- override: true
- components: rustfmt, clippy
-
- - uses: Swatinem/rust-cache@v2
-
- - uses: actions/setup-python@v3
- with:
- python-version: "3.10"
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
- name: Start emulated services
- run: docker-compose up -d
+ run: docker compose up -d
- name: Build and install deltalake
run: |
- pip install virtualenv
- virtualenv venv
+ python -m venv venv
source venv/bin/activate
make develop
@@ -137,23 +111,12 @@ jobs:
steps:
- uses: actions/checkout@v2
- - name: Install latest nightly
- uses: actions-rs/toolchain@v1
- with:
- toolchain: stable
- override: true
- components: rustfmt, clippy
-
- - uses: Swatinem/rust-cache@v2
-
- - uses: actions/setup-python@v4
- with:
- python-version: "3.10"
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
- name: Build deltalake in release mode
run: |
- pip install virtualenv
- virtualenv venv
+ python -m venv venv
source venv/bin/activate
MATURIN_EXTRA_ARGS=--release make develop
@@ -187,18 +150,8 @@ jobs:
steps:
- uses: actions/checkout@v3
- - name: Install latest nightly
- uses: actions-rs/toolchain@v1
- with:
- toolchain: stable
- override: true
- components: rustfmt, clippy
-
- - uses: Swatinem/rust-cache@v2
-
- - uses: actions/setup-python@v3
- with:
- python-version: "3.10"
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
- uses: actions/setup-java@v2
with:
@@ -207,8 +160,7 @@ jobs:
- name: Build and install deltalake
run: |
- pip install virtualenv
- virtualenv venv
+ python -m venv venv
source venv/bin/activate
make develop-pyspark
@@ -231,15 +183,14 @@ jobs:
steps:
- uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ - name: Setup Environment
+ uses: ./.github/actions/setup-env
with:
python-version: ${{ matrix.python-version }}
- name: Build and install deltalake
run: |
- pip install virtualenv
- virtualenv venv
+ python -m venv venv
source venv/bin/activate
make setup
maturin develop
diff --git a/.github/workflows/python_release.yml b/.github/workflows/python_release.yml
index 48611bacb4..46b4230af1 100644
--- a/.github/workflows/python_release.yml
+++ b/.github/workflows/python_release.yml
@@ -35,7 +35,7 @@ jobs:
fail-fast: false
matrix:
target: [x86_64-apple-darwin, aarch64-apple-darwin]
- runs-on: macos-12
+ runs-on: macos-14
steps:
- uses: actions/checkout@v3
diff --git a/.gitignore b/.gitignore
index ca0576b47c..18dcc39f69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,8 @@ tlaplus/*.toolbox/*/[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*/
/.idea
.vscode
.env
+.venv
+venv
**/.DS_Store
**/.python-version
.coverage
@@ -20,6 +22,7 @@ __blobstorage__
.githubchangeloggenerator.cache.log
.githubchangeloggenerator.cache/
.githubchangeloggenerator*
+data
# Add all Cargo.lock files except for those in binary crates
Cargo.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 922a49f47e..9161a320c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,689 @@
# Changelog
+## [rust-v0.18.2](https://github.com/delta-io/delta-rs/tree/rust-v0.18.2) (2024-08-07)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.18.1...rust-v0.18.2)
+
+**Implemented enhancements:**
+
+- Choose which columns to store min/max values for [\#2709](https://github.com/delta-io/delta-rs/issues/2709)
+- Projection pushdown for load\_cdf [\#2681](https://github.com/delta-io/delta-rs/issues/2681)
+- Way to check if Delta table exists at specified path [\#2662](https://github.com/delta-io/delta-rs/issues/2662)
+- Support HDFS via hdfs-native package [\#2611](https://github.com/delta-io/delta-rs/issues/2611)
+- Deletion `_change_type` does not appear in change data feed [\#2579](https://github.com/delta-io/delta-rs/issues/2579)
+- Could you please explain in the README what "Deltalake" is for the uninitiated? [\#2523](https://github.com/delta-io/delta-rs/issues/2523)
+- Discuss: Allow protocol change during write actions [\#2444](https://github.com/delta-io/delta-rs/issues/2444)
+- Support for Arrow PyCapsule interface [\#2376](https://github.com/delta-io/delta-rs/issues/2376)
+
+**Fixed bugs:**
+
+- Slow add\_actions.to\_pydict for tables with large number of columns, impacting read performance [\#2733](https://github.com/delta-io/delta-rs/issues/2733)
+- append is deleting records [\#2716](https://github.com/delta-io/delta-rs/issues/2716)
+- segmentation fault - Python 3.10 on Mac M3 [\#2706](https://github.com/delta-io/delta-rs/issues/2706)
+- Failure to delete dir and files [\#2703](https://github.com/delta-io/delta-rs/issues/2703)
+- DeltaTable.from\_data\_catalog not working [\#2699](https://github.com/delta-io/delta-rs/issues/2699)
+- Project should use the same version of `ruff` in the `lint` stage of `python_build.yml` as in `pyproject.toml` [\#2678](https://github.com/delta-io/delta-rs/issues/2678)
+- un-tracked columns are giving json error when pyarrow schema have feild with nullable=False and create\_checkpoint is trigged [\#2675](https://github.com/delta-io/delta-rs/issues/2675)
+- \[BUG\]write\_delta\({'custom\_metadata':str}\) cannot be converted. str to pyDict error \(0.18.2\_DeltaPython/Windows10\) [\#2697](https://github.com/delta-io/delta-rs/issues/2697)
+- Pyarrow engine not supporting schema overwrite with Append mode [\#2654](https://github.com/delta-io/delta-rs/issues/2654)
+- `deltalake-core` version re-exported by `deltalake` different than versions used by `deltalake-azure` and `deltalake-gcp` [\#2647](https://github.com/delta-io/delta-rs/issues/2647)
+- i32 limit in JSON stats [\#2646](https://github.com/delta-io/delta-rs/issues/2646)
+- Rust writer not encoding correct URL for partitions in delta table [\#2634](https://github.com/delta-io/delta-rs/issues/2634)
+- Large Types breaks merge predicate pruning [\#2632](https://github.com/delta-io/delta-rs/issues/2632)
+- Getting error when converting a partitioned parquet table to delta table [\#2626](https://github.com/delta-io/delta-rs/issues/2626)
+- Arrow: Parquet does not support writing empty structs when creating checkpoint [\#2622](https://github.com/delta-io/delta-rs/issues/2622)
+- InvalidTableLocation\("Unknown scheme: gs"\) on 0.18.0 [\#2610](https://github.com/delta-io/delta-rs/issues/2610)
+- Unable to read delta table created using Uniform [\#2578](https://github.com/delta-io/delta-rs/issues/2578)
+- schema merging doesn't work when overwriting with a predicate [\#2567](https://github.com/delta-io/delta-rs/issues/2567)
+- Not working in AWS Lambda \(0.16.2 - 0.17.4\) OSError: Generic S3 error [\#2511](https://github.com/delta-io/delta-rs/issues/2511)
+- DataFusion filter on partition column doesn't work. \(when the phsical schema ordering is different to logical one\) [\#2494](https://github.com/delta-io/delta-rs/issues/2494)
+- Creating checkpoints for tables with missing column stats results in Err [\#2493](https://github.com/delta-io/delta-rs/issues/2493)
+- Cannot merge to a table with a timestamp column after upgrading delta-rs [\#2478](https://github.com/delta-io/delta-rs/issues/2478)
+- Azure AD Auth fails on ARM64 [\#2475](https://github.com/delta-io/delta-rs/issues/2475)
+- Generic S3 error: Error after 0 retries ... Broken pipe \(os error 32\) [\#2403](https://github.com/delta-io/delta-rs/issues/2403)
+- write\_deltalake identifies large\_string as datatype even though string is set in schema [\#2374](https://github.com/delta-io/delta-rs/issues/2374)
+- Inconsistent arrow timestamp type breaks datafusion query [\#2341](https://github.com/delta-io/delta-rs/issues/2341)
+
+**Closed issues:**
+
+- Unable to write new partitions with type timestamp on tables created with delta-rs 0.10.0 [\#2631](https://github.com/delta-io/delta-rs/issues/2631)
+
+**Merged pull requests:**
+
+- fix: schema adapter doesn't map partial batches correctly [\#2735](https://github.com/delta-io/delta-rs/pull/2735) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- perf: grab file size in rust [\#2734](https://github.com/delta-io/delta-rs/pull/2734) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: use logical plan in update, refactor/simplify CDCTracker [\#2727](https://github.com/delta-io/delta-rs/pull/2727) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: use logical plan in delete, delta planner refactoring [\#2725](https://github.com/delta-io/delta-rs/pull/2725) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: try an alternative docke compose invocation syntax [\#2724](https://github.com/delta-io/delta-rs/pull/2724) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): use input schema to get correct schema in cdf reads [\#2723](https://github.com/delta-io/delta-rs/pull/2723) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): cdc write-support for `overwrite` and `replacewhere` writes [\#2722](https://github.com/delta-io/delta-rs/pull/2722) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): cdc write-support for `delete` operation [\#2721](https://github.com/delta-io/delta-rs/pull/2721) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: enabling actions for merge groups [\#2718](https://github.com/delta-io/delta-rs/pull/2718) ([rtyler](https://github.com/rtyler))
+- perf: apply projection when reading checkpoint parquet [\#2717](https://github.com/delta-io/delta-rs/pull/2717) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- feat\(python\): add DeltaTable.is\_deltatable static method \(\#2662\) [\#2715](https://github.com/delta-io/delta-rs/pull/2715) ([omkar-foss](https://github.com/omkar-foss))
+- chore: prepare python release 0.18.3 [\#2707](https://github.com/delta-io/delta-rs/pull/2707) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): use url encoder when encoding partition values [\#2705](https://github.com/delta-io/delta-rs/pull/2705) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): add projection in CDF reads [\#2704](https://github.com/delta-io/delta-rs/pull/2704) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: ensure DataFusion SessionState Parquet options are applied to DeltaScan [\#2702](https://github.com/delta-io/delta-rs/pull/2702) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: refactor `write_deltalake` in `writer.py` [\#2695](https://github.com/delta-io/delta-rs/pull/2695) ([fpgmaas](https://github.com/fpgmaas))
+- fix\(python\): empty dataset fix for "pyarrow" engine [\#2689](https://github.com/delta-io/delta-rs/pull/2689) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: add test coverage command to `Makefile` [\#2688](https://github.com/delta-io/delta-rs/pull/2688) ([fpgmaas](https://github.com/fpgmaas))
+- chore: create separate action to setup python and rust in the cicd pipeline [\#2687](https://github.com/delta-io/delta-rs/pull/2687) ([fpgmaas](https://github.com/fpgmaas))
+- fix: update delta kernel version [\#2685](https://github.com/delta-io/delta-rs/pull/2685) ([jeppe742](https://github.com/jeppe742))
+- chore: update README.md [\#2684](https://github.com/delta-io/delta-rs/pull/2684) ([veronewra](https://github.com/veronewra))
+- fix\(rust,python\): checkpoint with column nullable false [\#2680](https://github.com/delta-io/delta-rs/pull/2680) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: pin `ruff` and `mypy` versions in the `lint` stage in the CI pipeline [\#2679](https://github.com/delta-io/delta-rs/pull/2679) ([fpgmaas](https://github.com/fpgmaas))
+- chore: enable `RUF` ruleset for `ruff` [\#2677](https://github.com/delta-io/delta-rs/pull/2677) ([fpgmaas](https://github.com/fpgmaas))
+- chore: remove stale code for conditional import of `Literal` [\#2676](https://github.com/delta-io/delta-rs/pull/2676) ([fpgmaas](https://github.com/fpgmaas))
+- chore: remove references to black from the project [\#2674](https://github.com/delta-io/delta-rs/pull/2674) ([fpgmaas](https://github.com/fpgmaas))
+- chore: bump ruff to 0.5.2 [\#2673](https://github.com/delta-io/delta-rs/pull/2673) ([fpgmaas](https://github.com/fpgmaas))
+- chore: improve contributing.md [\#2672](https://github.com/delta-io/delta-rs/pull/2672) ([fpgmaas](https://github.com/fpgmaas))
+- feat: support userMetadata in CommitInfo [\#2670](https://github.com/delta-io/delta-rs/pull/2670) ([jkylling](https://github.com/jkylling))
+- chore: upgrade to datafusion 40 [\#2661](https://github.com/delta-io/delta-rs/pull/2661) ([rtyler](https://github.com/rtyler))
+- docs: improve navigation fixes [\#2660](https://github.com/delta-io/delta-rs/pull/2660) ([avriiil](https://github.com/avriiil))
+- docs: add integration docs for s3 backend [\#2658](https://github.com/delta-io/delta-rs/pull/2658) ([avriiil](https://github.com/avriiil))
+- docs: fix bullets on hdfs docs [\#2653](https://github.com/delta-io/delta-rs/pull/2653) ([Kimahriman](https://github.com/Kimahriman))
+- ci: update CODEOWNERS [\#2650](https://github.com/delta-io/delta-rs/pull/2650) ([hntd187](https://github.com/hntd187))
+- feat\(rust\): fix size\_in\_bytes in last\_checkpoint\_ to i64 [\#2649](https://github.com/delta-io/delta-rs/pull/2649) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: increase subcrate versions [\#2648](https://github.com/delta-io/delta-rs/pull/2648) ([rtyler](https://github.com/rtyler))
+- chore: missed one macos runner reference in actions [\#2645](https://github.com/delta-io/delta-rs/pull/2645) ([rtyler](https://github.com/rtyler))
+- chore: add a reproduction case for merge failures with struct\ [\#2644](https://github.com/delta-io/delta-rs/pull/2644) ([rtyler](https://github.com/rtyler))
+- chore: remove macos builders from pull request flow [\#2638](https://github.com/delta-io/delta-rs/pull/2638) ([rtyler](https://github.com/rtyler))
+- fix: enable parquet pushdown for DeltaScan via TableProvider impl for DeltaTable \(rebase\) [\#2637](https://github.com/delta-io/delta-rs/pull/2637) ([rtyler](https://github.com/rtyler))
+- chore: fix documentation generation with a pin of griffe [\#2636](https://github.com/delta-io/delta-rs/pull/2636) ([rtyler](https://github.com/rtyler))
+- fix\(python\): fixed large\_dtype to schema convert [\#2635](https://github.com/delta-io/delta-rs/pull/2635) ([sherlockbeard](https://github.com/sherlockbeard))
+- fix\(rust, python\): fix writing empty structs when creating checkpoint [\#2627](https://github.com/delta-io/delta-rs/pull/2627) ([sherlockbeard](https://github.com/sherlockbeard))
+- fix\(rust, python\): fix merge schema with overwrite [\#2623](https://github.com/delta-io/delta-rs/pull/2623) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: bump python 0.18.2 [\#2621](https://github.com/delta-io/delta-rs/pull/2621) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: report DataFusion metrics for DeltaScan [\#2617](https://github.com/delta-io/delta-rs/pull/2617) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- feat\(rust,python\): cast each parquet file to delta schema [\#2615](https://github.com/delta-io/delta-rs/pull/2615) ([HawaiianSpork](https://github.com/HawaiianSpork))
+- fix\(rust\): inconsistent order of partitioning columns \(\#2494\) [\#2614](https://github.com/delta-io/delta-rs/pull/2614) ([aditanase](https://github.com/aditanase))
+- docs: add Daft writer [\#2594](https://github.com/delta-io/delta-rs/pull/2594) ([avriiil](https://github.com/avriiil))
+- feat\(python, rust\): `add column` operation [\#2562](https://github.com/delta-io/delta-rs/pull/2562) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: change arrow map root name to follow with parquet root name [\#2538](https://github.com/delta-io/delta-rs/pull/2538) ([sclmn](https://github.com/sclmn))
+- feat\(python\): handle PyCapsule interface objects in write\_deltalake [\#2534](https://github.com/delta-io/delta-rs/pull/2534) ([kylebarron](https://github.com/kylebarron))
+- feat: improve merge performance by using predicate non-partition columns min/max for prefiltering [\#2513](https://github.com/delta-io/delta-rs/pull/2513) ([JonasDev1](https://github.com/JonasDev1))
+- feat\(python, rust\): cleanup expired logs post-commit hook [\#2459](https://github.com/delta-io/delta-rs/pull/2459) ([ion-elgreco](https://github.com/ion-elgreco))
+
+## [rust-v0.18.0](https://github.com/delta-io/delta-rs/tree/rust-v0.18.0) (2024-06-12)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.3...rust-v0.18.0)
+
+**Implemented enhancements:**
+
+- documentation: concurrent writes for non-S3 backends [\#2556](https://github.com/delta-io/delta-rs/issues/2556)
+- pyarrow options for `write_delta` [\#2515](https://github.com/delta-io/delta-rs/issues/2515)
+- \[deltalake\_aws\] Allow configuring separate endpoints for S3 and DynamoDB clients. [\#2498](https://github.com/delta-io/delta-rs/issues/2498)
+- Include file stats when converting a parquet directory to a Delta table [\#2490](https://github.com/delta-io/delta-rs/issues/2490)
+- Adopt the delta kernel types [\#2489](https://github.com/delta-io/delta-rs/issues/2489)
+
+**Fixed bugs:**
+
+- `raise_if_not_exists` for properties not configurable on CreateBuilder [\#2564](https://github.com/delta-io/delta-rs/issues/2564)
+- write\_deltalake with rust engine fails when mode is append and overwrite schema is enabled [\#2553](https://github.com/delta-io/delta-rs/issues/2553)
+- Running the basic\_operations examples fails with `Error: Transaction { source: WriterFeaturesRequired(TimestampWithoutTimezone) `} [\#2552](https://github.com/delta-io/delta-rs/issues/2552)
+- invalid peer certificate: BadSignature when connecting to s3 from arm64/aarch64 [\#2551](https://github.com/delta-io/delta-rs/issues/2551)
+- load\_cdf\(\) issue : Generic S3 error: request or response body error: operation timed out [\#2549](https://github.com/delta-io/delta-rs/issues/2549)
+- write\_deltalake fails on Databricks volume [\#2540](https://github.com/delta-io/delta-rs/issues/2540)
+- Getting "Microsoft Azure Error: Operation timed out" when trying to retrieve big files [\#2537](https://github.com/delta-io/delta-rs/issues/2537)
+- Impossible to append to a DeltaTable with float data type on RHEL [\#2520](https://github.com/delta-io/delta-rs/issues/2520)
+- Creating DeltaTable object slow [\#2518](https://github.com/delta-io/delta-rs/issues/2518)
+- `write_deltalake` throws parser error when using `rust` engine and big decimals [\#2510](https://github.com/delta-io/delta-rs/issues/2510)
+- TypeError: Object of type int64 is not JSON serializable when writing using a Pandas dataframe [\#2501](https://github.com/delta-io/delta-rs/issues/2501)
+- unable to read delta table when table contains both null and non-null add stats [\#2477](https://github.com/delta-io/delta-rs/issues/2477)
+- Commits on WriteMode::MergeSchema cause table metadata corruption [\#2468](https://github.com/delta-io/delta-rs/issues/2468)
+- S3 object store always returns IMDS warnings [\#2460](https://github.com/delta-io/delta-rs/issues/2460)
+- File skipping according to documentation [\#2427](https://github.com/delta-io/delta-rs/issues/2427)
+- LockClientError [\#2379](https://github.com/delta-io/delta-rs/issues/2379)
+- get\_app\_transaction\_version\(\) returns wrong result [\#2340](https://github.com/delta-io/delta-rs/issues/2340)
+- Property setting in `create` is not handled correctly [\#2247](https://github.com/delta-io/delta-rs/issues/2247)
+- Handling of decimals in scientific notation [\#2221](https://github.com/delta-io/delta-rs/issues/2221)
+- Unable to append to delta table without datafusion feature [\#2204](https://github.com/delta-io/delta-rs/issues/2204)
+- Decimal Column with Value 0 Causes Failure in Python Binding [\#2193](https://github.com/delta-io/delta-rs/issues/2193)
+
+**Merged pull requests:**
+
+- docs: improve S3 access docs [\#2589](https://github.com/delta-io/delta-rs/pull/2589) ([avriiil](https://github.com/avriiil))
+- chore: bump macOS runners, maybe resolve import error [\#2588](https://github.com/delta-io/delta-rs/pull/2588) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump to datafusion 39, arrow 52, pyo3 0.21 [\#2581](https://github.com/delta-io/delta-rs/pull/2581) ([abhiaagarwal](https://github.com/abhiaagarwal))
+- feat: add custom dynamodb endpoint configuration [\#2575](https://github.com/delta-io/delta-rs/pull/2575) ([hnaoto](https://github.com/hnaoto))
+- fix: consistently use raise\_if\_key\_not\_exists in CreateBuilder [\#2569](https://github.com/delta-io/delta-rs/pull/2569) ([vegarsti](https://github.com/vegarsti))
+- fix: add raise\_if\_key\_not\_exists to CreateBuilder [\#2565](https://github.com/delta-io/delta-rs/pull/2565) ([vegarsti](https://github.com/vegarsti))
+- docs: dt.delete add context + api docs link [\#2560](https://github.com/delta-io/delta-rs/pull/2560) ([avriiil](https://github.com/avriiil))
+- fix: update deltalake crate examples for crate layout and TimestampNtz [\#2559](https://github.com/delta-io/delta-rs/pull/2559) ([jhoekx](https://github.com/jhoekx))
+- docs: clarify locking mechanism requirement for S3 [\#2558](https://github.com/delta-io/delta-rs/pull/2558) ([inigohidalgo](https://github.com/inigohidalgo))
+- fix: remove deprecated overwrite\_schema configuration which has incorrect behavior [\#2554](https://github.com/delta-io/delta-rs/pull/2554) ([rtyler](https://github.com/rtyler))
+- fix: clippy warnings [\#2548](https://github.com/delta-io/delta-rs/pull/2548) ([imor](https://github.com/imor))
+- docs: dask write syntax fix [\#2543](https://github.com/delta-io/delta-rs/pull/2543) ([avriiil](https://github.com/avriiil))
+- fix: cast support fields nested in lists and maps [\#2541](https://github.com/delta-io/delta-rs/pull/2541) ([HawaiianSpork](https://github.com/HawaiianSpork))
+- feat: implement transaction identifiers - continued [\#2539](https://github.com/delta-io/delta-rs/pull/2539) ([roeap](https://github.com/roeap))
+- docs: pull delta from conda not pip [\#2535](https://github.com/delta-io/delta-rs/pull/2535) ([avriiil](https://github.com/avriiil))
+- chore: expose `files_by_partition` to public api [\#2533](https://github.com/delta-io/delta-rs/pull/2533) ([edmondop](https://github.com/edmondop))
+- chore: bump python 0.17.5 [\#2531](https://github.com/delta-io/delta-rs/pull/2531) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(rust\): make PartitionWriter public [\#2525](https://github.com/delta-io/delta-rs/pull/2525) ([adriangb](https://github.com/adriangb))
+- fix: msrv in workspace [\#2524](https://github.com/delta-io/delta-rs/pull/2524) ([roeap](https://github.com/roeap))
+- chore: fixing some clips [\#2521](https://github.com/delta-io/delta-rs/pull/2521) ([rtyler](https://github.com/rtyler))
+- fix: enable field\_with\_name to support nested fields with '.' delimiter [\#2519](https://github.com/delta-io/delta-rs/pull/2519) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: tidying up builds without datafusion feature and clippy [\#2516](https://github.com/delta-io/delta-rs/pull/2516) ([rtyler](https://github.com/rtyler))
+- fix\(python\): release GIL on most operations [\#2512](https://github.com/delta-io/delta-rs/pull/2512) ([adriangb](https://github.com/adriangb))
+- docs: fix typo [\#2508](https://github.com/delta-io/delta-rs/pull/2508) ([avriiil](https://github.com/avriiil))
+- fix\(rust, python\): fixed differences in storage options between log and object stores [\#2500](https://github.com/delta-io/delta-rs/pull/2500) ([mightyshazam](https://github.com/mightyshazam))
+- docs: improve daft integration docs [\#2496](https://github.com/delta-io/delta-rs/pull/2496) ([avriiil](https://github.com/avriiil))
+- feat: adopt kernel schema types [\#2495](https://github.com/delta-io/delta-rs/pull/2495) ([roeap](https://github.com/roeap))
+- feat: add stats to convert-to-delta operation [\#2491](https://github.com/delta-io/delta-rs/pull/2491) ([gruuya](https://github.com/gruuya))
+- fix\(python, rust\): region lookup wasn't working correctly for dynamo [\#2488](https://github.com/delta-io/delta-rs/pull/2488) ([mightyshazam](https://github.com/mightyshazam))
+- feat: introduce CDC write-side support for the Update operations [\#2486](https://github.com/delta-io/delta-rs/pull/2486) ([rtyler](https://github.com/rtyler))
+- fix\(python\): reuse state in `to_pyarrow_dataset` [\#2485](https://github.com/delta-io/delta-rs/pull/2485) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: check to see if the file exists before attempting to rename [\#2482](https://github.com/delta-io/delta-rs/pull/2482) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): use new schema for stats parsing instead of old [\#2480](https://github.com/delta-io/delta-rs/pull/2480) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): unable to read delta table when table contains both null and non-null add stats [\#2476](https://github.com/delta-io/delta-rs/pull/2476) ([yjshen](https://github.com/yjshen))
+- chore: update the changelog to include rust-v0.17.3 [\#2473](https://github.com/delta-io/delta-rs/pull/2473) ([rtyler](https://github.com/rtyler))
+- chore: a bunch of tweaks to get releases out the door [\#2472](https://github.com/delta-io/delta-rs/pull/2472) ([rtyler](https://github.com/rtyler))
+- chore: bump the core crate for its next release [\#2470](https://github.com/delta-io/delta-rs/pull/2470) ([rtyler](https://github.com/rtyler))
+- fix: return unsupported error for merging schemas in the presence of partition columns [\#2469](https://github.com/delta-io/delta-rs/pull/2469) ([emcake](https://github.com/emcake))
+- feat\(python\): add parameter to DeltaTable.to\_pyarrow\_dataset\(\) [\#2465](https://github.com/delta-io/delta-rs/pull/2465) ([adriangb](https://github.com/adriangb))
+- feat\(python, rust\): add OBJECT\_STORE\_CONCURRENCY\_LIMIT setting for ObjectStoreFactory [\#2458](https://github.com/delta-io/delta-rs/pull/2458) ([vigimite](https://github.com/vigimite))
+- fix\(rust\): handle 429 from GCS [\#2454](https://github.com/delta-io/delta-rs/pull/2454) ([adriangb](https://github.com/adriangb))
+- fix\(python\): reuse table state in write engine [\#2453](https://github.com/delta-io/delta-rs/pull/2453) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): implement abort commit for S3DynamoDBLogStore [\#2452](https://github.com/delta-io/delta-rs/pull/2452) ([PeterKeDer](https://github.com/PeterKeDer))
+- fix\(python, rust\): check timestamp\_ntz in nested fields, add check\_can\_write in pyarrow writer [\#2443](https://github.com/delta-io/delta-rs/pull/2443) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): remove imds calls from profile auth and region [\#2442](https://github.com/delta-io/delta-rs/pull/2442) ([mightyshazam](https://github.com/mightyshazam))
+- fix\(python, rust\): use from\_name during column projection creation [\#2441](https://github.com/delta-io/delta-rs/pull/2441) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump python for 0.17 release [\#2439](https://github.com/delta-io/delta-rs/pull/2439) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python,rust\): missing remove actions during `create_or_replace` [\#2437](https://github.com/delta-io/delta-rs/pull/2437) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: introduce the Operation trait to enforce consistency between operations [\#2435](https://github.com/delta-io/delta-rs/pull/2435) ([rtyler](https://github.com/rtyler))
+- fix\(python\): load\_as\_version with datetime object with no timezone specified [\#2429](https://github.com/delta-io/delta-rs/pull/2429) ([t1g0rz](https://github.com/t1g0rz))
+- feat\(python, rust\): respect column stats collection configurations [\#2428](https://github.com/delta-io/delta-rs/pull/2428) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: lazy static runtime in python [\#2424](https://github.com/delta-io/delta-rs/pull/2424) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: implement repartitioned for DeltaScan [\#2421](https://github.com/delta-io/delta-rs/pull/2421) ([jkylling](https://github.com/jkylling))
+- fix: return error when checkpoints and metadata get out of sync [\#2406](https://github.com/delta-io/delta-rs/pull/2406) ([esarili](https://github.com/esarili))
+- fix\(rust\): stats\_parsed has different number of records with stats [\#2405](https://github.com/delta-io/delta-rs/pull/2405) ([yjshen](https://github.com/yjshen))
+- docs: add Daft integration [\#2402](https://github.com/delta-io/delta-rs/pull/2402) ([avriiil](https://github.com/avriiil))
+- feat\(rust\): advance state in post commit [\#2396](https://github.com/delta-io/delta-rs/pull/2396) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore\(rust\): bump arrow v51 and datafusion v37.1 [\#2395](https://github.com/delta-io/delta-rs/pull/2395) ([lasantosr](https://github.com/lasantosr))
+- docs: document required aws permissions [\#2393](https://github.com/delta-io/delta-rs/pull/2393) ([ale-rinaldi](https://github.com/ale-rinaldi))
+- feat\(rust\): post commit hook \(v2\), create checkpoint hook [\#2391](https://github.com/delta-io/delta-rs/pull/2391) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: time travel when checkpointed and logs removed [\#2389](https://github.com/delta-io/delta-rs/pull/2389) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): remove flush after writing every batch [\#2387](https://github.com/delta-io/delta-rs/pull/2387) ([PeterKeDer](https://github.com/PeterKeDer))
+- feat: added configuration variables to handle EC2 metadata service [\#2385](https://github.com/delta-io/delta-rs/pull/2385) ([mightyshazam](https://github.com/mightyshazam))
+- fix\(rust\): timestamp deserialization format, missing type [\#2383](https://github.com/delta-io/delta-rs/pull/2383) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump chrono [\#2372](https://github.com/delta-io/delta-rs/pull/2372) ([universalmind303](https://github.com/universalmind303))
+- chore: bump python 0.16.4 [\#2371](https://github.com/delta-io/delta-rs/pull/2371) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add snappy compression on checkpoint files [\#2365](https://github.com/delta-io/delta-rs/pull/2365) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add config for parquet pushdown on delta scan [\#2364](https://github.com/delta-io/delta-rs/pull/2364) ([Blajda](https://github.com/Blajda))
+- fix\(python,rust\): optimize compact on schema evolved table [\#2358](https://github.com/delta-io/delta-rs/pull/2358) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): expr parsing date/timestamp [\#2357](https://github.com/delta-io/delta-rs/pull/2357) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: remove tmp files in cleanup\_metadata [\#2356](https://github.com/delta-io/delta-rs/pull/2356) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: make struct fields nullable in stats schema [\#2346](https://github.com/delta-io/delta-rs/pull/2346) ([qinix](https://github.com/qinix))
+- fix\(rust\): adhere to protocol for Decimal [\#2332](https://github.com/delta-io/delta-rs/pull/2332) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): raise schema mismatch when decimal is not subset [\#2330](https://github.com/delta-io/delta-rs/pull/2330) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(rust\): derive Copy on some public enums [\#2329](https://github.com/delta-io/delta-rs/pull/2329) ([lasantosr](https://github.com/lasantosr))
+- fix: merge pushdown handling [\#2326](https://github.com/delta-io/delta-rs/pull/2326) ([Blajda](https://github.com/Blajda))
+- fix: merge concurrency control [\#2324](https://github.com/delta-io/delta-rs/pull/2324) ([ion-elgreco](https://github.com/ion-elgreco))
+- Revert 2291 merge predicate fix [\#2323](https://github.com/delta-io/delta-rs/pull/2323) ([Blajda](https://github.com/Blajda))
+- fix: try to fix timeouts [\#2318](https://github.com/delta-io/delta-rs/pull/2318) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): serialize MetricDetails from compaction runs to a string [\#2317](https://github.com/delta-io/delta-rs/pull/2317) ([liamphmurphy](https://github.com/liamphmurphy))
+- docs: add example in to\_pyarrow\_dataset [\#2315](https://github.com/delta-io/delta-rs/pull/2315) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python\): wrong batch size [\#2314](https://github.com/delta-io/delta-rs/pull/2314) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: object store 0.9.1 [\#2311](https://github.com/delta-io/delta-rs/pull/2311) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: checkpoint features format below v3,7 [\#2307](https://github.com/delta-io/delta-rs/pull/2307) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: schema evolution not coercing with large arrow types [\#2305](https://github.com/delta-io/delta-rs/pull/2305) ([aersam](https://github.com/aersam))
+- fix: clean up some non-datafusion builds [\#2303](https://github.com/delta-io/delta-rs/pull/2303) ([rtyler](https://github.com/rtyler))
+- docs: fix typo [\#2300](https://github.com/delta-io/delta-rs/pull/2300) ([LauH1987](https://github.com/LauH1987))
+- docs: make replaceWhere example compile [\#2299](https://github.com/delta-io/delta-rs/pull/2299) ([LauH1987](https://github.com/LauH1987))
+- fix\(rust\): add missing chrono-tz feature [\#2295](https://github.com/delta-io/delta-rs/pull/2295) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore\(python\): bump to v0.16.1 [\#2294](https://github.com/delta-io/delta-rs/pull/2294) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): features not maintained in protocol after checkpoint [\#2293](https://github.com/delta-io/delta-rs/pull/2293) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: merge predicate for concurrent writes [\#2291](https://github.com/delta-io/delta-rs/pull/2291) ([JonasDev1](https://github.com/JonasDev1))
+- fix: replace assert and AssertionError with appropriate exceptions [\#2286](https://github.com/delta-io/delta-rs/pull/2286) ([joe-sharman](https://github.com/joe-sharman))
+- docs: fix typo in delta-lake-polars.md [\#2285](https://github.com/delta-io/delta-rs/pull/2285) ([vladdoster](https://github.com/vladdoster))
+- fix\(python, rust\): prevent table scan returning large arrow dtypes [\#2274](https://github.com/delta-io/delta-rs/pull/2274) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python\): always encapsulate column names in backticks in \_all functions [\#2271](https://github.com/delta-io/delta-rs/pull/2271) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): read only checkpoints that match \_last\_checkpoint version [\#2270](https://github.com/delta-io/delta-rs/pull/2270) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add .venv to .gitignore [\#2268](https://github.com/delta-io/delta-rs/pull/2268) ([gacharya](https://github.com/gacharya))
+- feat\(python, rust\): add `set table properties` operation [\#2264](https://github.com/delta-io/delta-rs/pull/2264) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: use dagster deltalake polars library [\#2263](https://github.com/delta-io/delta-rs/pull/2263) ([avriiil](https://github.com/avriiil))
+- docs: update comment about r2 requiring locks [\#2261](https://github.com/delta-io/delta-rs/pull/2261) ([cmackenzie1](https://github.com/cmackenzie1))
+- fix\(\#2256\): use consistent units of time [\#2260](https://github.com/delta-io/delta-rs/pull/2260) ([cmackenzie1](https://github.com/cmackenzie1))
+- chore: update the changelog for rust-v0.17.1 [\#2259](https://github.com/delta-io/delta-rs/pull/2259) ([rtyler](https://github.com/rtyler))
+- feat\(python\): release GIL in the write\_deltalake function [\#2257](https://github.com/delta-io/delta-rs/pull/2257) ([franz101](https://github.com/franz101))
+- chore\(rust\): bump datafusion to 36 [\#2249](https://github.com/delta-io/delta-rs/pull/2249) ([universalmind303](https://github.com/universalmind303))
+- chore!: replace rusoto with AWS SDK [\#2243](https://github.com/delta-io/delta-rs/pull/2243) ([mightyshazam](https://github.com/mightyshazam))
+- fix: handle conflict checking in optimize correctly [\#2208](https://github.com/delta-io/delta-rs/pull/2208) ([emcake](https://github.com/emcake))
+- feat: logical Node for find files [\#2194](https://github.com/delta-io/delta-rs/pull/2194) ([hntd187](https://github.com/hntd187))
+
+## [rust-v0.17.3](https://github.com/delta-io/delta-rs/tree/rust-v0.17.3) (2024-05-01)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.1...rust-v0.17.3)
+
+**Implemented enhancements:**
+
+- Limit concurrent ObjectStore access to avoid resource limitations in constrained environments [\#2457](https://github.com/delta-io/delta-rs/issues/2457)
+- How to get a DataFrame in Rust? [\#2404](https://github.com/delta-io/delta-rs/issues/2404)
+- Allow checkpoint creation when partion column is "timestampNtz " [\#2381](https://github.com/delta-io/delta-rs/issues/2381)
+- is there a way to make writing timestamp\_ntz optional [\#2339](https://github.com/delta-io/delta-rs/issues/2339)
+- Update arrow dependency [\#2328](https://github.com/delta-io/delta-rs/issues/2328)
+- Release GIL in deltalake.write\_deltalake [\#2234](https://github.com/delta-io/delta-rs/issues/2234)
+- Unable to retrieve custom metadata from tables in rust [\#2153](https://github.com/delta-io/delta-rs/issues/2153)
+- Refactor commit interface to be a Builder [\#2131](https://github.com/delta-io/delta-rs/issues/2131)
+
+**Fixed bugs:**
+
+- Handle rate limiting during write contention [\#2451](https://github.com/delta-io/delta-rs/issues/2451)
+- regression : delta.logRetentionDuration don't seems to be respected [\#2447](https://github.com/delta-io/delta-rs/issues/2447)
+- Issue writing to mounted storage in AKS using delta-rs library [\#2445](https://github.com/delta-io/delta-rs/issues/2445)
+- TableMerger - when\_matched\_delete\(\) fails when Column names contain special characters [\#2438](https://github.com/delta-io/delta-rs/issues/2438)
+- Generic DeltaTable error: External error: Arrow error: Invalid argument error: arguments need to have the same data type - while merge data in to delta table [\#2423](https://github.com/delta-io/delta-rs/issues/2423)
+- Merge on predicate throw error on date colum: Unable to convert expression to string [\#2420](https://github.com/delta-io/delta-rs/issues/2420)
+- Writing Tables with Append mode errors if the schema metadata is different [\#2419](https://github.com/delta-io/delta-rs/issues/2419)
+- Logstore issues on AWS Lambda [\#2410](https://github.com/delta-io/delta-rs/issues/2410)
+- Datafusion timestamp type doesn't respect delta lake schema [\#2408](https://github.com/delta-io/delta-rs/issues/2408)
+- Compacting produces smaller row groups than expected [\#2386](https://github.com/delta-io/delta-rs/issues/2386)
+- ValueError: Partition value cannot be parsed from string. [\#2380](https://github.com/delta-io/delta-rs/issues/2380)
+- Very slow s3 connection after 0.16.1 [\#2377](https://github.com/delta-io/delta-rs/issues/2377)
+- Merge update+insert truncates a delta table if the table is big enough [\#2362](https://github.com/delta-io/delta-rs/issues/2362)
+- Do not add readerFeatures or writerFeatures keys under checkpoint files if minReaderVersion or minWriterVersion do not satisfy the requirements [\#2360](https://github.com/delta-io/delta-rs/issues/2360)
+- Create empty table failed on rust engine [\#2354](https://github.com/delta-io/delta-rs/issues/2354)
+- Getting error message when running in lambda: message: "Too many open files" [\#2353](https://github.com/delta-io/delta-rs/issues/2353)
+- Temporary files filling up \_delta\_log folder - increasing table load time [\#2351](https://github.com/delta-io/delta-rs/issues/2351)
+- compact fails with merged schemas [\#2347](https://github.com/delta-io/delta-rs/issues/2347)
+- Cannot merge into table partitioned by date type column on 0.16.3 [\#2344](https://github.com/delta-io/delta-rs/issues/2344)
+- Merge breaks using logical datatype decimal128 [\#2343](https://github.com/delta-io/delta-rs/issues/2343)
+- Decimal types are not checked against max precision/scale at table creation [\#2331](https://github.com/delta-io/delta-rs/issues/2331)
+- Merge update+insert truncates a delta table [\#2320](https://github.com/delta-io/delta-rs/issues/2320)
+- Extract `add.stats_parsed` with wrong type [\#2312](https://github.com/delta-io/delta-rs/issues/2312)
+- Process fails without error message when executing merge [\#2310](https://github.com/delta-io/delta-rs/issues/2310)
+- delta\_rs don't seems to respect the row group size [\#2309](https://github.com/delta-io/delta-rs/issues/2309)
+- Auth error when running inside VS Code [\#2306](https://github.com/delta-io/delta-rs/issues/2306)
+- Unable to read deltatables with binary columns: Binary is not supported by JSON [\#2302](https://github.com/delta-io/delta-rs/issues/2302)
+- Schema evolution not coercing with Large arrow types [\#2298](https://github.com/delta-io/delta-rs/issues/2298)
+- Panic in `deltalake_core::kernel::snapshot::log_segment::list_log_files_with_checkpoint::{{closure}}` [\#2290](https://github.com/delta-io/delta-rs/issues/2290)
+- Checkpoint does not preserve reader and writer features for the table protocol. [\#2288](https://github.com/delta-io/delta-rs/issues/2288)
+- Z-Order with larger dataset resulting in memory error [\#2284](https://github.com/delta-io/delta-rs/issues/2284)
+- Successful writes return error when using concurrent writers [\#2279](https://github.com/delta-io/delta-rs/issues/2279)
+- Rust writer should raise when decimal types are incompatible \(currently writers and puts table in invalid state\) [\#2275](https://github.com/delta-io/delta-rs/issues/2275)
+- Generic DeltaTable error: Version mismatch with new schema merge functionality in AWS S3 [\#2262](https://github.com/delta-io/delta-rs/issues/2262)
+- DeltaTable is not resilient to corrupted checkpoint state [\#2258](https://github.com/delta-io/delta-rs/issues/2258)
+- Inconsistent units of time [\#2256](https://github.com/delta-io/delta-rs/issues/2256)
+- Partition column comparison is an assertion rather than if block with raise exception [\#2242](https://github.com/delta-io/delta-rs/issues/2242)
+- Unable to merge column names starting from numbers [\#2230](https://github.com/delta-io/delta-rs/issues/2230)
+- Merging to a table with multiple distinct partitions in parallel fails [\#2227](https://github.com/delta-io/delta-rs/issues/2227)
+- cleanup\_metadata not respecting custom `logRetentionDuration` [\#2180](https://github.com/delta-io/delta-rs/issues/2180)
+- Merge predicate fails with a field with a space [\#2167](https://github.com/delta-io/delta-rs/issues/2167)
+- When\_matched\_update causes records to be lost with explicit predicate [\#2158](https://github.com/delta-io/delta-rs/issues/2158)
+- Merge execution time grows exponetially with the number of column [\#2107](https://github.com/delta-io/delta-rs/issues/2107)
+- \_internal.DeltaError when merging [\#2084](https://github.com/delta-io/delta-rs/issues/2084)
+
+## [rust-v0.17.1](https://github.com/delta-io/delta-rs/tree/rust-v0.17.1) (2024-03-06)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.0...rust-v0.17.1)
+
+**Implemented enhancements:**
+
+- Get statistics metadata [\#2233](https://github.com/delta-io/delta-rs/issues/2233)
+- add option to append only a subsets of columns [\#2212](https://github.com/delta-io/delta-rs/issues/2212)
+- add documentation how to configure delta.logRetentionDuration [\#2072](https://github.com/delta-io/delta-rs/issues/2072)
+- Add `drop constraint` [\#2070](https://github.com/delta-io/delta-rs/issues/2070)
+- Add 0.16 deprecation warnings for DynamoDB lock [\#2049](https://github.com/delta-io/delta-rs/issues/2049)
+
+**Fixed bugs:**
+
+- cleanup\_metadata not respecting custom `logRetentionDuration` [\#2180](https://github.com/delta-io/delta-rs/issues/2180)
+- Rust writer panics on empty record batches [\#2253](https://github.com/delta-io/delta-rs/issues/2253)
+- DeltaLake executed Rust: write method not found in `DeltaOps` [\#2244](https://github.com/delta-io/delta-rs/issues/2244)
+- DELTA\_FILE\_PATTERN regex is incorrectly matching tmp commit files [\#2201](https://github.com/delta-io/delta-rs/issues/2201)
+- Failed to create checkpoint with "Parquet does not support writing empty structs" [\#2189](https://github.com/delta-io/delta-rs/issues/2189)
+- Error when parsing delete expressions [\#2187](https://github.com/delta-io/delta-rs/issues/2187)
+- terminate called without an active exception [\#2184](https://github.com/delta-io/delta-rs/issues/2184)
+- Now conda-installable on M1 [\#2178](https://github.com/delta-io/delta-rs/issues/2178)
+- Add error message for parition\_by check [\#2177](https://github.com/delta-io/delta-rs/issues/2177)
+- deltalake 0.15.2 prints partitions\_values and paths which is not desired [\#2176](https://github.com/delta-io/delta-rs/issues/2176)
+- cleanup\_metadata can potentially delete most recent checkpoint, corrupting table [\#2174](https://github.com/delta-io/delta-rs/issues/2174)
+- Broken filter for newly created delta table [\#2169](https://github.com/delta-io/delta-rs/issues/2169)
+- Hash for StructField should consider more than the name [\#2045](https://github.com/delta-io/delta-rs/issues/2045)
+- Schema comparaison in writer [\#1853](https://github.com/delta-io/delta-rs/issues/1853)
+- fix\(python\): sort before schema comparison [\#2209](https://github.com/delta-io/delta-rs/pull/2209) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: prevent writing checkpoints with a version that does not exist in table state [\#1863](https://github.com/delta-io/delta-rs/pull/1863) ([rtyler](https://github.com/rtyler))
+
+**Closed issues:**
+
+- Bug/Question: arrow's`FixedSizeList` is not roundtrippable [\#2162](https://github.com/delta-io/delta-rs/issues/2162)
+
+**Merged pull requests:**
+
+- fix: fixes panic on empty write [\#2254](https://github.com/delta-io/delta-rs/pull/2254) ([aersam](https://github.com/aersam))
+- fix\(rust\): typo deletionvectors [\#2251](https://github.com/delta-io/delta-rs/pull/2251) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): make interval parsing compatible with plural form [\#2250](https://github.com/delta-io/delta-rs/pull/2250) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump to 0.16 [\#2248](https://github.com/delta-io/delta-rs/pull/2248) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: merge schema support for the write operation and Python [\#2246](https://github.com/delta-io/delta-rs/pull/2246) ([rtyler](https://github.com/rtyler))
+- fix: object\_store 0.9.0 since 0.9.1 causes CI failure [\#2245](https://github.com/delta-io/delta-rs/pull/2245) ([aersam](https://github.com/aersam))
+- chore\(python\): bump version [\#2241](https://github.com/delta-io/delta-rs/pull/2241) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: fix ruff and mypy version and do formatting [\#2240](https://github.com/delta-io/delta-rs/pull/2240) ([aersam](https://github.com/aersam))
+- feat\(python, rust\): timestampNtz support [\#2236](https://github.com/delta-io/delta-rs/pull/2236) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: clean up some compilation failures and un-ignore some tests [\#2231](https://github.com/delta-io/delta-rs/pull/2231) ([rtyler](https://github.com/rtyler))
+- docs: fixing example in CONTRIBUTING.md [\#2224](https://github.com/delta-io/delta-rs/pull/2224) ([gacharya](https://github.com/gacharya))
+- perf: directly create projection instead of using DataFrame::with\_column [\#2222](https://github.com/delta-io/delta-rs/pull/2222) ([emcake](https://github.com/emcake))
+- chore: remove caches from github actions [\#2215](https://github.com/delta-io/delta-rs/pull/2215) ([rtyler](https://github.com/rtyler))
+- fix: `is_commit_file` should only catch commit jsons [\#2213](https://github.com/delta-io/delta-rs/pull/2213) ([emcake](https://github.com/emcake))
+- chore: fix the Cargo.tomls to publish information properly on docs.rs [\#2211](https://github.com/delta-io/delta-rs/pull/2211) ([rtyler](https://github.com/rtyler))
+- fix\(writer\): retry storage.put on temporary network errors [\#2207](https://github.com/delta-io/delta-rs/pull/2207) ([qinix](https://github.com/qinix))
+- fix: canonicalize config keys [\#2206](https://github.com/delta-io/delta-rs/pull/2206) ([emcake](https://github.com/emcake))
+- docs: update README code samples for newer versions [\#2202](https://github.com/delta-io/delta-rs/pull/2202) ([jhoekx](https://github.com/jhoekx))
+- docs: dask integration fix formatting typo [\#2196](https://github.com/delta-io/delta-rs/pull/2196) ([avriiil](https://github.com/avriiil))
+- fix: add data\_type and nullable to StructField hash \(\#2045\) [\#2190](https://github.com/delta-io/delta-rs/pull/2190) ([sonhmai](https://github.com/sonhmai))
+- fix: removed panic in method [\#2185](https://github.com/delta-io/delta-rs/pull/2185) ([mightyshazam](https://github.com/mightyshazam))
+- feat: implement string representation for PartitionFilter [\#2183](https://github.com/delta-io/delta-rs/pull/2183) ([sonhmai](https://github.com/sonhmai))
+- fix: correct map field names [\#2182](https://github.com/delta-io/delta-rs/pull/2182) ([emcake](https://github.com/emcake))
+- feat: add comment to explain why assert has failed and show state [\#2179](https://github.com/delta-io/delta-rs/pull/2179) ([braaannigan](https://github.com/braaannigan))
+- docs: include the 0.17.0 changelog [\#2173](https://github.com/delta-io/delta-rs/pull/2173) ([rtyler](https://github.com/rtyler))
+- fix\(python\): skip empty row groups during stats gathering [\#2172](https://github.com/delta-io/delta-rs/pull/2172) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: 0.17.0 publish changes [\#2171](https://github.com/delta-io/delta-rs/pull/2171) ([rtyler](https://github.com/rtyler))
+- chore\(python\): bump version [\#2170](https://github.com/delta-io/delta-rs/pull/2170) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: update all the package metadata for publication to crates.io [\#2168](https://github.com/delta-io/delta-rs/pull/2168) ([rtyler](https://github.com/rtyler))
+- fix: rm println in python lib [\#2166](https://github.com/delta-io/delta-rs/pull/2166) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: cleanup minor clippies and other warns [\#2161](https://github.com/delta-io/delta-rs/pull/2161) ([rtyler](https://github.com/rtyler))
+- feat: implement clone for DeltaTable struct [\#2160](https://github.com/delta-io/delta-rs/pull/2160) ([mightyshazam](https://github.com/mightyshazam))
+- fix: allow loading of tables with identity columns [\#2155](https://github.com/delta-io/delta-rs/pull/2155) ([rtyler](https://github.com/rtyler))
+- fix: replace BTreeMap with IndexMap to preserve insertion order [\#2150](https://github.com/delta-io/delta-rs/pull/2150) ([roeap](https://github.com/roeap))
+- fix: made generalize\_filter less permissive, also added more cases [\#2149](https://github.com/delta-io/delta-rs/pull/2149) ([emcake](https://github.com/emcake))
+- docs: add delta lake best practices [\#2147](https://github.com/delta-io/delta-rs/pull/2147) ([MrPowers](https://github.com/MrPowers))
+- chore: shorten up the crate folder names in the tree [\#2145](https://github.com/delta-io/delta-rs/pull/2145) ([rtyler](https://github.com/rtyler))
+- fix\(\#2143\): keep specific error type when writing fails [\#2144](https://github.com/delta-io/delta-rs/pull/2144) ([abaerptc](https://github.com/abaerptc))
+- refactor\(python\): drop custom filesystem in write\_deltalake [\#2137](https://github.com/delta-io/delta-rs/pull/2137) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: use transparent logo in README [\#2132](https://github.com/delta-io/delta-rs/pull/2132) ([roeap](https://github.com/roeap))
+- fix: order logical schema to match physical schema [\#2129](https://github.com/delta-io/delta-rs/pull/2129) ([Blajda](https://github.com/Blajda))
+- feat: expose stats schema on Snapshot [\#2128](https://github.com/delta-io/delta-rs/pull/2128) ([roeap](https://github.com/roeap))
+- feat: update table config to contain new config keys [\#2127](https://github.com/delta-io/delta-rs/pull/2127) ([roeap](https://github.com/roeap))
+- fix: clean-up paths created during tests [\#2126](https://github.com/delta-io/delta-rs/pull/2126) ([roeap](https://github.com/roeap))
+- fix: prevent empty stats struct during parquet write [\#2125](https://github.com/delta-io/delta-rs/pull/2125) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- fix: temporarily skip s3 roundtrip test [\#2124](https://github.com/delta-io/delta-rs/pull/2124) ([roeap](https://github.com/roeap))
+- fix: do not write empty parquet file/add on writer close; accurately … [\#2123](https://github.com/delta-io/delta-rs/pull/2123) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- docs: add dask page to integration docs [\#2122](https://github.com/delta-io/delta-rs/pull/2122) ([avriiil](https://github.com/avriiil))
+- chore: upgrade to DataFusion 35.0 [\#2121](https://github.com/delta-io/delta-rs/pull/2121) ([philippemnoel](https://github.com/philippemnoel))
+- fix\(s3\): restore working test for DynamoDb log store repair log on read [\#2120](https://github.com/delta-io/delta-rs/pull/2120) ([dispanser](https://github.com/dispanser))
+- fix: set partition values for added files when building compaction plan [\#2119](https://github.com/delta-io/delta-rs/pull/2119) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- fix: add missing pandas import [\#2116](https://github.com/delta-io/delta-rs/pull/2116) ([Tim-Haarman](https://github.com/Tim-Haarman))
+- chore: temporarily ignore the repair on update test [\#2114](https://github.com/delta-io/delta-rs/pull/2114) ([rtyler](https://github.com/rtyler))
+- docs: delta lake is great for small data [\#2113](https://github.com/delta-io/delta-rs/pull/2113) ([MrPowers](https://github.com/MrPowers))
+- chore: removed unnecessary print statement from update method [\#2111](https://github.com/delta-io/delta-rs/pull/2111) ([LilMonk](https://github.com/LilMonk))
+- fix: schema issue within writebuilder [\#2106](https://github.com/delta-io/delta-rs/pull/2106) ([universalmind303](https://github.com/universalmind303))
+- docs: fix arg indent [\#2103](https://github.com/delta-io/delta-rs/pull/2103) ([wchatx](https://github.com/wchatx))
+- docs: delta lake file skipping [\#2096](https://github.com/delta-io/delta-rs/pull/2096) ([MrPowers](https://github.com/MrPowers))
+- docs: move dynamo docs into new docs page [\#2093](https://github.com/delta-io/delta-rs/pull/2093) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump python [\#2092](https://github.com/delta-io/delta-rs/pull/2092) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: allow merge\_execute to release the GIL [\#2091](https://github.com/delta-io/delta-rs/pull/2091) ([emcake](https://github.com/emcake))
+- docs: how delta lake transactions work [\#2089](https://github.com/delta-io/delta-rs/pull/2089) ([MrPowers](https://github.com/MrPowers))
+- fix: reinstate copy-if-not-exists passthrough [\#2083](https://github.com/delta-io/delta-rs/pull/2083) ([emcake](https://github.com/emcake))
+- docs: make an overview tab visible in docs [\#2080](https://github.com/delta-io/delta-rs/pull/2080) ([r3stl355](https://github.com/r3stl355))
+- docs: add usage guide for check constraints [\#2079](https://github.com/delta-io/delta-rs/pull/2079) ([hntd187](https://github.com/hntd187))
+- docs: update docs for rust print statement [\#2077](https://github.com/delta-io/delta-rs/pull/2077) ([skariyania](https://github.com/skariyania))
+- docs: add page on why to use delta lake [\#2076](https://github.com/delta-io/delta-rs/pull/2076) ([MrPowers](https://github.com/MrPowers))
+- feat\(rust, python\): add `drop constraint` operation [\#2071](https://github.com/delta-io/delta-rs/pull/2071) ([ion-elgreco](https://github.com/ion-elgreco))
+- refactor: add deltalake-gcp crate [\#2061](https://github.com/delta-io/delta-rs/pull/2061) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: allow checkpoints to contain metadata actions without a createdTime value [\#2059](https://github.com/delta-io/delta-rs/pull/2059) ([rtyler](https://github.com/rtyler))
+- chore: bump version python [\#2047](https://github.com/delta-io/delta-rs/pull/2047) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: ensure metadata cleanup do not corrupt tables without checkpoints [\#2044](https://github.com/delta-io/delta-rs/pull/2044) ([Blajda](https://github.com/Blajda))
+- docs: update docs for merge [\#2042](https://github.com/delta-io/delta-rs/pull/2042) ([Blajda](https://github.com/Blajda))
+- chore: update documentation for S3 / DynamoDb log store configuration [\#2041](https://github.com/delta-io/delta-rs/pull/2041) ([dispanser](https://github.com/dispanser))
+- feat: arrow backed log replay and table state [\#2037](https://github.com/delta-io/delta-rs/pull/2037) ([roeap](https://github.com/roeap))
+- fix: properly deserialize percent-encoded file paths of Remove actions, to make sure tombstone and file paths match [\#2035](https://github.com/delta-io/delta-rs/pull/2035) ([sigorbor](https://github.com/sigorbor))
+- fix: remove casts of structs to record batch [\#2033](https://github.com/delta-io/delta-rs/pull/2033) ([Blajda](https://github.com/Blajda))
+- feat\(python, rust\): expose custom\_metadata for all operations [\#2032](https://github.com/delta-io/delta-rs/pull/2032) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: refactor WriterProperties class [\#2030](https://github.com/delta-io/delta-rs/pull/2030) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: update datafusion [\#2029](https://github.com/delta-io/delta-rs/pull/2029) ([roeap](https://github.com/roeap))
+- refactor: increase metadata action usage [\#2027](https://github.com/delta-io/delta-rs/pull/2027) ([roeap](https://github.com/roeap))
+- fix: github actions for releasing docs [\#2026](https://github.com/delta-io/delta-rs/pull/2026) ([r3stl355](https://github.com/r3stl355))
+- feat: introduce schema evolution on RecordBatchWriter [\#2024](https://github.com/delta-io/delta-rs/pull/2024) ([rtyler](https://github.com/rtyler))
+- refactor: move azure integration to dedicated crate [\#2023](https://github.com/delta-io/delta-rs/pull/2023) ([roeap](https://github.com/roeap))
+- fix: use temporary table names during the constraint checks [\#2017](https://github.com/delta-io/delta-rs/pull/2017) ([r3stl355](https://github.com/r3stl355))
+- docs: add alterer [\#2014](https://github.com/delta-io/delta-rs/pull/2014) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: version bump python release [\#2011](https://github.com/delta-io/delta-rs/pull/2011) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: fix the test\_restore\_by\_datetime test [\#2010](https://github.com/delta-io/delta-rs/pull/2010) ([r3stl355](https://github.com/r3stl355))
+- feat\(rust\): add more commit info to most operations [\#2009](https://github.com/delta-io/delta-rs/pull/2009) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python\): add schema conversion of FixedSizeBinaryArray and FixedSizeListType [\#2005](https://github.com/delta-io/delta-rs/pull/2005) ([balbok0](https://github.com/balbok0))
+- feat\(python\): expose large\_dtype param in `merge` [\#2003](https://github.com/delta-io/delta-rs/pull/2003) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: add writer properties to docs [\#2002](https://github.com/delta-io/delta-rs/pull/2002) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: fix CI breaking lint issues [\#1999](https://github.com/delta-io/delta-rs/pull/1999) ([r3stl355](https://github.com/r3stl355))
+- feat: implementation for replaceWhere [\#1996](https://github.com/delta-io/delta-rs/pull/1996) ([r3stl355](https://github.com/r3stl355))
+- chore: refactoring AWS code out of the core crate [\#1995](https://github.com/delta-io/delta-rs/pull/1995) ([rtyler](https://github.com/rtyler))
+- feat\(python\): expose custom metadata to writers [\#1994](https://github.com/delta-io/delta-rs/pull/1994) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: datafusion integration [\#1993](https://github.com/delta-io/delta-rs/pull/1993) ([MrPowers](https://github.com/MrPowers))
+- fix: flakey gcs test [\#1987](https://github.com/delta-io/delta-rs/pull/1987) ([roeap](https://github.com/roeap))
+- fix: implement consistent formatting for constraint expressions [\#1985](https://github.com/delta-io/delta-rs/pull/1985) ([Blajda](https://github.com/Blajda))
+- fix: case sensitivity for z-order [\#1982](https://github.com/delta-io/delta-rs/pull/1982) ([Blajda](https://github.com/Blajda))
+- feat\(python\): add writer\_properties to all operations [\#1980](https://github.com/delta-io/delta-rs/pull/1980) ([ion-elgreco](https://github.com/ion-elgreco))
+- refactor: trigger metadata retrieval only during `DeltaTable.metadata` [\#1979](https://github.com/delta-io/delta-rs/pull/1979) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: retry with exponential backoff for DynamoDb interaction [\#1975](https://github.com/delta-io/delta-rs/pull/1975) ([dispanser](https://github.com/dispanser))
+- feat\(python\): expose `add constraint` operation [\#1973](https://github.com/delta-io/delta-rs/pull/1973) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: properly decode percent-encoded file paths coming from parquet checkpoints [\#1970](https://github.com/delta-io/delta-rs/pull/1970) ([sigorbor](https://github.com/sigorbor))
+- feat: omit unmodified files during merge write [\#1969](https://github.com/delta-io/delta-rs/pull/1969) ([Blajda](https://github.com/Blajda))
+- feat\(python\): combine load\_version/load\_with\_datetime into `load_as_version` [\#1968](https://github.com/delta-io/delta-rs/pull/1968) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: enable S3 integration tests to be configured via environment vars [\#1966](https://github.com/delta-io/delta-rs/pull/1966) ([dispanser](https://github.com/dispanser))
+- fix: handle empty table response in unity api [\#1963](https://github.com/delta-io/delta-rs/pull/1963) ([JonasDev1](https://github.com/JonasDev1))
+- docs: add auto-release when docs are merged to main [\#1962](https://github.com/delta-io/delta-rs/pull/1962) ([r3stl355](https://github.com/r3stl355))
+- feat: cast list items to default before write with different item names [\#1959](https://github.com/delta-io/delta-rs/pull/1959) ([JonasDev1](https://github.com/JonasDev1))
+- feat: merge using partition filters [\#1958](https://github.com/delta-io/delta-rs/pull/1958) ([emcake](https://github.com/emcake))
+- chore: relocate cast\_record\_batch into its own module to shed the datafusion dependency [\#1955](https://github.com/delta-io/delta-rs/pull/1955) ([rtyler](https://github.com/rtyler))
+- fix: respect case sensitivity on operations [\#1954](https://github.com/delta-io/delta-rs/pull/1954) ([Blajda](https://github.com/Blajda))
+- docs: add better installation instructions [\#1951](https://github.com/delta-io/delta-rs/pull/1951) ([MrPowers](https://github.com/MrPowers))
+- docs: add polars integration [\#1949](https://github.com/delta-io/delta-rs/pull/1949) ([MrPowers](https://github.com/MrPowers))
+- fix: add arrow page back [\#1944](https://github.com/delta-io/delta-rs/pull/1944) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: remove the get\_data\_catalog\(\) function [\#1941](https://github.com/delta-io/delta-rs/pull/1941) ([rtyler](https://github.com/rtyler))
+- chore: update runs-on value in python\_release.yml [\#1940](https://github.com/delta-io/delta-rs/pull/1940) ([wjones127](https://github.com/wjones127))
+- docs: start how delta lake works [\#1938](https://github.com/delta-io/delta-rs/pull/1938) ([MrPowers](https://github.com/MrPowers))
+- docs: add logo, dark mode, boost search [\#1936](https://github.com/delta-io/delta-rs/pull/1936) ([ion-elgreco](https://github.com/ion-elgreco))
+- refactor: prefer usage of metadata and protocol fields [\#1935](https://github.com/delta-io/delta-rs/pull/1935) ([roeap](https://github.com/roeap))
+- chore: update python version [\#1934](https://github.com/delta-io/delta-rs/pull/1934) ([wjones127](https://github.com/wjones127))
+- feat\(python\): expose create to DeltaTable class [\#1932](https://github.com/delta-io/delta-rs/pull/1932) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: fix all examples and change overall structure [\#1931](https://github.com/delta-io/delta-rs/pull/1931) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: update to include pyarrow-hotfix [\#1930](https://github.com/delta-io/delta-rs/pull/1930) ([dennyglee](https://github.com/dennyglee))
+- fix: get rid of panic in during table [\#1928](https://github.com/delta-io/delta-rs/pull/1928) ([dimonchik-suvorov](https://github.com/dimonchik-suvorov))
+- fix\(rust/python\): `optimize.compact` not working with tables with mixed large/normal arrow [\#1926](https://github.com/delta-io/delta-rs/pull/1926) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: extend write\_deltalake to accept Deltalake schema [\#1922](https://github.com/delta-io/delta-rs/pull/1922) ([r3stl355](https://github.com/r3stl355))
+- fix: fail fast for opening non-existent path [\#1917](https://github.com/delta-io/delta-rs/pull/1917) ([dimonchik-suvorov](https://github.com/dimonchik-suvorov))
+- feat: check constraints [\#1915](https://github.com/delta-io/delta-rs/pull/1915) ([hntd187](https://github.com/hntd187))
+- docs: delta lake arrow integration page [\#1914](https://github.com/delta-io/delta-rs/pull/1914) ([MrPowers](https://github.com/MrPowers))
+- feat: add more info for contributors [\#1913](https://github.com/delta-io/delta-rs/pull/1913) ([r3stl355](https://github.com/r3stl355))
+- fix: add buffer flushing to filesystem writes [\#1911](https://github.com/delta-io/delta-rs/pull/1911) ([r3stl355](https://github.com/r3stl355))
+- docs: update docs home page and add pandas integration [\#1905](https://github.com/delta-io/delta-rs/pull/1905) ([MrPowers](https://github.com/MrPowers))
+- feat: implement S3 log store with transactions backed by DynamoDb [\#1904](https://github.com/delta-io/delta-rs/pull/1904) ([dispanser](https://github.com/dispanser))
+- fix: prune each merge bin with only 1 file [\#1902](https://github.com/delta-io/delta-rs/pull/1902) ([haruband](https://github.com/haruband))
+- docs: update python docs link in readme.md [\#1899](https://github.com/delta-io/delta-rs/pull/1899) ([thomasfrederikhoeck](https://github.com/thomasfrederikhoeck))
+- docs: on append, overwrite, delete and z-ordering [\#1897](https://github.com/delta-io/delta-rs/pull/1897) ([MrPowers](https://github.com/MrPowers))
+- feat: compare timestamp partition values as timestamps instead of strings [\#1895](https://github.com/delta-io/delta-rs/pull/1895) ([sigorbor](https://github.com/sigorbor))
+- feat\(python\): expose rust writer as additional engine v2 [\#1891](https://github.com/delta-io/delta-rs/pull/1891) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: add high-level checking for append-only tables [\#1887](https://github.com/delta-io/delta-rs/pull/1887) ([junjunjd](https://github.com/junjunjd))
+- test: loading version 0 Delta table [\#1885](https://github.com/delta-io/delta-rs/pull/1885) ([dimonchik-suvorov](https://github.com/dimonchik-suvorov))
+- fix: improve catalog failure error message, add missing Glue native-tls feature dependency [\#1883](https://github.com/delta-io/delta-rs/pull/1883) ([r3stl355](https://github.com/r3stl355))
+- refactor: simplify `DeltaTableState` [\#1877](https://github.com/delta-io/delta-rs/pull/1877) ([roeap](https://github.com/roeap))
+- refactor: express log schema in delta types [\#1876](https://github.com/delta-io/delta-rs/pull/1876) ([roeap](https://github.com/roeap))
+- docs: add Rust installation instructions [\#1875](https://github.com/delta-io/delta-rs/pull/1875) ([MrPowers](https://github.com/MrPowers))
+- chore: clippy [\#1871](https://github.com/delta-io/delta-rs/pull/1871) ([roeap](https://github.com/roeap))
+- fix: docs deployment action [\#1869](https://github.com/delta-io/delta-rs/pull/1869) ([r3stl355](https://github.com/r3stl355))
+- docs: tell how to claim an issue [\#1866](https://github.com/delta-io/delta-rs/pull/1866) ([wjones127](https://github.com/wjones127))
+- feat: drop python 3.7 and adopt 3.12 [\#1859](https://github.com/delta-io/delta-rs/pull/1859) ([roeap](https://github.com/roeap))
+- feat: create benchmarks for merge [\#1857](https://github.com/delta-io/delta-rs/pull/1857) ([Blajda](https://github.com/Blajda))
+- chore: add @ion-elgreco to python/ [\#1855](https://github.com/delta-io/delta-rs/pull/1855) ([rtyler](https://github.com/rtyler))
+- fix: compile error with lifetime issues on optimize \(\#1843\) [\#1852](https://github.com/delta-io/delta-rs/pull/1852) ([dispanser](https://github.com/dispanser))
+- feat: implement issue auto-assign on `take` comment [\#1851](https://github.com/delta-io/delta-rs/pull/1851) ([r3stl355](https://github.com/r3stl355))
+- docs: add docs on small file compaction with optimize [\#1850](https://github.com/delta-io/delta-rs/pull/1850) ([MrPowers](https://github.com/MrPowers))
+- fix: checkpoint error with Azure Synapse [\#1848](https://github.com/delta-io/delta-rs/pull/1848) ([PierreDubrulle](https://github.com/PierreDubrulle))
+- feat\(python\): expose `convert_to_deltalake` [\#1842](https://github.com/delta-io/delta-rs/pull/1842) ([ion-elgreco](https://github.com/ion-elgreco))
+- ci: adopt `ruff format` for formatting [\#1841](https://github.com/delta-io/delta-rs/pull/1841) ([roeap](https://github.com/roeap))
+
+## [rust-v0.17.0](https://github.com/delta-io/delta-rs/tree/rust-v0.17.0) (2024-02-06)
+
+:warning: The release of 0.17.0 **removes** the legacy dynamodb lock functionality, AWS users must read these release notes! :warning:
+
+### File handlers
+
+The 0.17.0 release moves storage implementations into their own crates, such as
+`deltalake-aws`. A consequence of that refactoring is that custom storage and
+file scheme handlers must be registered/initialized at runtime. Storage
+subcrates conventionally define a `register_handlers` function which performs
+that task. Users may see errors such as:
+```
+thread 'main' panicked at /home/ubuntu/.cargo/registry/src/index.crates.io-6f17d22bba15001f/deltalake-core-0.17.0/src/table/builder.rs:189:48:
+The specified table_uri is not valid: InvalidTableLocation("Unknown scheme: s3")
+```
+
+* Users of the meta-crate (`deltalake`) can call the storage crate via: `deltalake::aws::register_handlers(None);` at the entrypoint for their code.
+* Users who adopt `core` and storage crates independently (e.g. `deltalake-aws`) can register via `deltalake_aws::register_handlers(None);`.
+
+The AWS, Azure, and GCP crates must all have their custom file schemes registered in this fashion.
+
+
+### dynamodblock to S3DynamoDbLogStore
+
+The locking mechanism is fundamentally different between `deltalake` v0.16.x and v0.17.0, starting with this release the `deltalake` and `deltalake-aws` crates this library now relies on the same [protocol for concurrent writes on AWS](https://docs.delta.io/latest/delta-storage.html#setup-configuration-s3-multi-cluster) as the Delta Lake/Spark implementation.
+
+Fundamentally the DynamoDB table structure changes, [which is documented here](https://docs.delta.io/latest/delta-storage.html#setup-configuration-s3-multi-cluster). The configuration of a Rust process should continue to use the `AWS_S3_LOCKING_PROVIDER` environment value of `dynamodb`. The new table must be specified with the `DELTA_DYNAMO_TABLE_NAME` environment or configuration variable, and that should name the _new_ `S3DynamoDbLogStore` compatible DynamoDB table.
+
+Because locking is required to ensure safe cconsistent writes, **there is no iterative migration**, 0.16 and 0.17 writers **cannot** safely coexist. The following steps should be taken when upgrading:
+
+1. Stop all 0.16.x writers
+2. Ensure writes are completed, and lock table is empty.
+3. Deploy 0.17.0 writers
+
+
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.5...rust-v0.17.0)
+
+**Implemented enhancements:**
+
+- Expose the ability to compile DataFusion with SIMD [\#2118](https://github.com/delta-io/delta-rs/issues/2118)
+- Updating Table log retention configuration with `write_deltalake` silently changes nothing [\#2108](https://github.com/delta-io/delta-rs/issues/2108)
+- ALTER table, ALTER Column, Add/Modify Comment, Add/remove/rename partitions, Set Tags, Set location, Set TBLProperties [\#2088](https://github.com/delta-io/delta-rs/issues/2088)
+- Docs: Update docs for check constraints [\#2063](https://github.com/delta-io/delta-rs/issues/2063)
+- Don't `ensure_table_uri` when creating a table `with_log_store` [\#2036](https://github.com/delta-io/delta-rs/issues/2036)
+- Exposing custom\_metadata in merge operation [\#2031](https://github.com/delta-io/delta-rs/issues/2031)
+- Support custom table properties via TableAlterer and write/merge [\#2022](https://github.com/delta-io/delta-rs/issues/2022)
+- Remove parquet2 crate support [\#2004](https://github.com/delta-io/delta-rs/issues/2004)
+- Merge operation that only touches necessary partitions [\#1991](https://github.com/delta-io/delta-rs/issues/1991)
+- store userMetadata on write operations [\#1990](https://github.com/delta-io/delta-rs/issues/1990)
+- Create Dask integration page [\#1956](https://github.com/delta-io/delta-rs/issues/1956)
+- Merge: Filtering on partitions [\#1918](https://github.com/delta-io/delta-rs/issues/1918)
+- Rethink the load\_version and load\_with\_datetime interfaces [\#1910](https://github.com/delta-io/delta-rs/issues/1910)
+- docs: Delta Lake + Arrow Integration [\#1908](https://github.com/delta-io/delta-rs/issues/1908)
+- docs: Delta Lake + Polars integration [\#1906](https://github.com/delta-io/delta-rs/issues/1906)
+- Rethink decision to expose the public interface in namespaces [\#1900](https://github.com/delta-io/delta-rs/issues/1900)
+- Add documentation on how to build and run documentation locally [\#1893](https://github.com/delta-io/delta-rs/issues/1893)
+- Add API to create an empty Delta Lake table [\#1892](https://github.com/delta-io/delta-rs/issues/1892)
+- Implementing CHECK constraints [\#1881](https://github.com/delta-io/delta-rs/issues/1881)
+- Check Invariants are respecting table features for write paths [\#1880](https://github.com/delta-io/delta-rs/issues/1880)
+- Organize docs with single lefthand sidebar [\#1873](https://github.com/delta-io/delta-rs/issues/1873)
+- Make sure invariants are handled properly throughout the codebase [\#1870](https://github.com/delta-io/delta-rs/issues/1870)
+- Unable to use deltalake `Schema` in `write_deltalake` [\#1862](https://github.com/delta-io/delta-rs/issues/1862)
+- Add a Rust-backed engine for write\_deltalake [\#1861](https://github.com/delta-io/delta-rs/issues/1861)
+- Run doctest in CI for Python API examples [\#1783](https://github.com/delta-io/delta-rs/issues/1783)
+- \[RFC\] Use arrow for checkpoint reading and state handling [\#1776](https://github.com/delta-io/delta-rs/issues/1776)
+- Expose Python exceptions in public module [\#1771](https://github.com/delta-io/delta-rs/issues/1771)
+- Expose cleanup\_metadata or create\_checkpoint\_from\_table\_uri\_and\_cleanup to the Python API [\#1768](https://github.com/delta-io/delta-rs/issues/1768)
+- Expose convert\_to\_delta to Python API [\#1767](https://github.com/delta-io/delta-rs/issues/1767)
+- Add high-level checking for append-only tables [\#1759](https://github.com/delta-io/delta-rs/issues/1759)
+
+**Fixed bugs:**
+
+- Row order no longer preserved after merge operation [\#2165](https://github.com/delta-io/delta-rs/issues/2165)
+- Error when reading delta table with IDENTITY column [\#2152](https://github.com/delta-io/delta-rs/issues/2152)
+- Merge on IS NULL condition doesn't work for empty table [\#2148](https://github.com/delta-io/delta-rs/issues/2148)
+- JsonWriter converts structured parsing error into plain string [\#2143](https://github.com/delta-io/delta-rs/issues/2143)
+- Pandas import error when merging tables [\#2112](https://github.com/delta-io/delta-rs/issues/2112)
+- test\_repair\_on\_update broken in main [\#2109](https://github.com/delta-io/delta-rs/issues/2109)
+- `WriteBuilder::with_input_execution_plan` does not apply the schema to the log's metadata fields [\#2105](https://github.com/delta-io/delta-rs/issues/2105)
+- MERGE logical plan vs execution plan schema mismatch [\#2104](https://github.com/delta-io/delta-rs/issues/2104)
+- Partitions not pushed down [\#2090](https://github.com/delta-io/delta-rs/issues/2090)
+- Cant create empty table with write\_deltalake [\#2086](https://github.com/delta-io/delta-rs/issues/2086)
+- Unexpected high costs on Google Cloud Storage [\#2085](https://github.com/delta-io/delta-rs/issues/2085)
+- Unable to read s3 table: `Unknown scheme: s3` [\#2065](https://github.com/delta-io/delta-rs/issues/2065)
+- write\_deltalake not respecting writer\_properties [\#2064](https://github.com/delta-io/delta-rs/issues/2064)
+- Unable to read/write tables with the "gs" schema in the table\_uri in 0.15.1 [\#2060](https://github.com/delta-io/delta-rs/issues/2060)
+- LockClient requiered error for S3 backend in 0.15.1 python [\#2057](https://github.com/delta-io/delta-rs/issues/2057)
+- Error while writing Pandas DataFrame to Delta Lake \(S3\) [\#2051](https://github.com/delta-io/delta-rs/issues/2051)
+- Error with dynamo locking provider on 0.15 [\#2034](https://github.com/delta-io/delta-rs/issues/2034)
+- Conda version 0.15.0 is missing files [\#2021](https://github.com/delta-io/delta-rs/issues/2021)
+- Rust panicking through Python library when a delete predicate uses a nullable field [\#2019](https://github.com/delta-io/delta-rs/issues/2019)
+- No snapshot or version 0 found, perhaps /Users/watsy0007/resources/test\_table/ is an empty dir? [\#2016](https://github.com/delta-io/delta-rs/issues/2016)
+- Generic DeltaTable error: type\_coercion in Struct column in merge operation [\#1998](https://github.com/delta-io/delta-rs/issues/1998)
+- Constraint expr not formatted during commit action [\#1971](https://github.com/delta-io/delta-rs/issues/1971)
+- .load\_with\_datetime\(\) is incorrectly rounding to nearest second [\#1967](https://github.com/delta-io/delta-rs/issues/1967)
+- vacuuming log files [\#1965](https://github.com/delta-io/delta-rs/issues/1965)
+- Unable to merge uppercase column names [\#1960](https://github.com/delta-io/delta-rs/issues/1960)
+- Schema error: Invalid data type for Delta Lake: Null [\#1946](https://github.com/delta-io/delta-rs/issues/1946)
+- Python v0.14 wheel files not up to date [\#1945](https://github.com/delta-io/delta-rs/issues/1945)
+- python Release 0.14 is missing Windows wheels [\#1942](https://github.com/delta-io/delta-rs/issues/1942)
+- CI integration test fails randomly: test\_restore\_by\_datetime [\#1925](https://github.com/delta-io/delta-rs/issues/1925)
+- Merge data freezes indefenetely [\#1920](https://github.com/delta-io/delta-rs/issues/1920)
+- Load DeltaTable from non-existing folder causing empty folder creation [\#1916](https://github.com/delta-io/delta-rs/issues/1916)
+- Reoptimizes merge bins with only 1 file, even though they have no effect. [\#1901](https://github.com/delta-io/delta-rs/issues/1901)
+- The Python Docs link in README.MD points to old docs [\#1898](https://github.com/delta-io/delta-rs/issues/1898)
+- optimize.compact\(\) fails with bad schema after updating to pyarrow 8.0 [\#1889](https://github.com/delta-io/delta-rs/issues/1889)
+- Python build is broken on main [\#1856](https://github.com/delta-io/delta-rs/issues/1856)
+- Checkpoint error with Azure Synapse [\#1847](https://github.com/delta-io/delta-rs/issues/1847)
+- merge very slow compared to delete + append on larger dataset [\#1846](https://github.com/delta-io/delta-rs/issues/1846)
+- get\_add\_actions fails with deltalake 0.13 [\#1835](https://github.com/delta-io/delta-rs/issues/1835)
+- Handle PyArrow CVE-2023-47248 [\#1834](https://github.com/delta-io/delta-rs/issues/1834)
+- Delta-rs writer hangs with to many file handles open \(Azure\) [\#1832](https://github.com/delta-io/delta-rs/issues/1832)
+- Encountering NotATable\("No snapshot or version 0 found, perhaps xxx is an empty dir?"\) [\#1831](https://github.com/delta-io/delta-rs/issues/1831)
+- write\_deltalake is not creating checkpoints [\#1815](https://github.com/delta-io/delta-rs/issues/1815)
+- Problem writing tables in directory named with char `~` [\#1806](https://github.com/delta-io/delta-rs/issues/1806)
+- DeltaTable Merge throws in merging if there are uppercase in Schema. [\#1797](https://github.com/delta-io/delta-rs/issues/1797)
+- rust merge error - datafusion panics [\#1790](https://github.com/delta-io/delta-rs/issues/1790)
+- expose use\_dictionary=False when writing Delta Table and running optimize [\#1772](https://github.com/delta-io/delta-rs/issues/1772)
+
+**Closed issues:**
+
+- Is this print necessary? Can we remove this. [\#2110](https://github.com/delta-io/delta-rs/issues/2110)
+- Azure concurrent writes [\#2069](https://github.com/delta-io/delta-rs/issues/2069)
+- Fix docs deployment [\#1867](https://github.com/delta-io/delta-rs/issues/1867)
+- Add a header in old docs and direct users to new docs [\#1865](https://github.com/delta-io/delta-rs/issues/1865)
+
+## [rust-v0.16.5](https://github.com/delta-io/delta-rs/tree/rust-v0.16.5) (2023-11-15)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.4...rust-v0.16.5)
+
+**Implemented enhancements:**
+
+- When will upgrade object\_store to 0.8? [\#1858](https://github.com/delta-io/delta-rs/issues/1858)
+- No Official Help [\#1849](https://github.com/delta-io/delta-rs/issues/1849)
+- Auto assign GitHub issues with a "take" message [\#1791](https://github.com/delta-io/delta-rs/issues/1791)
+
+**Fixed bugs:**
+
+- cargo clippy fails on core in main [\#1843](https://github.com/delta-io/delta-rs/issues/1843)
+
+## [rust-v0.16.4](https://github.com/delta-io/delta-rs/tree/rust-v0.16.4) (2023-11-12)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.3...rust-v0.16.4)
+
+**Implemented enhancements:**
+
+- Unable to add deltalake git dependency to cargo.toml [\#1821](https://github.com/delta-io/delta-rs/issues/1821)
+
+## [rust-v0.16.3](https://github.com/delta-io/delta-rs/tree/rust-v0.16.3) (2023-11-08)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.2...rust-v0.16.3)
+
+**Implemented enhancements:**
+
+- Docs: add release GitHub action [\#1799](https://github.com/delta-io/delta-rs/issues/1799)
+- Use bulk deletes where possible [\#1761](https://github.com/delta-io/delta-rs/issues/1761)
+
+**Fixed bugs:**
+
+- Code Owners no longer valid [\#1794](https://github.com/delta-io/delta-rs/issues/1794)
+- `MERGE` works incorrectly with partitioned table if the data column order is not same as table column order [\#1787](https://github.com/delta-io/delta-rs/issues/1787)
+- errors when using pyarrow dataset as a source [\#1779](https://github.com/delta-io/delta-rs/issues/1779)
+- Write to Microsoft OneLake failed. [\#1764](https://github.com/delta-io/delta-rs/issues/1764)
+
+## [rust-v0.16.2](https://github.com/delta-io/delta-rs/tree/rust-v0.16.2) (2023-10-21)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.1...rust-v0.16.2)
+
+## [rust-v0.16.1](https://github.com/delta-io/delta-rs/tree/rust-v0.16.1) (2023-10-21)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.0...rust-v0.16.1)
+
## [rust-v0.16.0](https://github.com/delta-io/delta-rs/tree/rust-v0.16.0) (2023-09-27)
[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.15.0...rust-v0.16.0)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ee258a3ce8..f681aa3948 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,6 @@
# Contributing to delta-rs
-Development on this project is mostly driven by volunteer contributors. We welcome new contributors, including not only those who develop new features, but also those who are able to help with documentation and provide detailed bug reports.
+Development on this project is mostly driven by volunteer contributors. We welcome new contributors, including not only those who develop new features, but also those who are able to help with documentation and provide detailed bug reports.
Please take note of our [code of conduct](CODE_OF_CONDUCT.md).
@@ -17,34 +17,40 @@ If you want to claim an issue to work on, you can write the word `take` as a com
- Install Rust, e.g. as described [here](https://doc.rust-lang.org/cargo/getting-started/installation.html)
- Have a compatible Python version installed (check `python/pyproject.toml` for current requirement)
- Create a Python virtual environment (required for development builds), e.g. as described [here](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/)
+ ```sh
+ python -m venv .venv
+ ```
+
- Build the project for development (this requires an active virtual environment and will also install `deltalake` in that virtual environment)
-```
-cd python
-make develop
-```
+ ```sh
+ cd python
+ make develop
+ ```
- Run some Python code, e.g. to run a specific test
-```
-python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
-```
+ ```sh
+ python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
+ ```
- Run some Rust code, e.g. run an example
-```
-cd crates/deltalake
-cargo run --examples basic_operations
-```
+ ```sh
+ cd crates/deltalake
+ cargo run --example basic_operations --features="datafusion"
+ ```
## Run the docs locally
-*This serves your local contens of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
-```
+*This serves your local contents of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
+
+```sh
(cd python; make develop)
pip install -r docs/requirements.txt
mkdocs serve
```
## To make a pull request (PR)
-- Make sure all the following steps run/pass locally before submitting a PR
-```
+Make sure all the following steps run/pass locally before submitting a PR
+
+```sh
cargo fmt -- --check
cd python
make check-rust
@@ -62,7 +68,7 @@ make build-docs
- For debugging Rust code, install [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb). The extension should even create Debug launch configurations for the project if you allow it, an easy way to get started. Just set a breakpoint and run the relevant configuration.
- For debugging from Python into Rust, follow this procedure:
1. Add this to `.vscode/launch.json`
-```
+```json
{
"type": "lldb",
"request": "attach",
diff --git a/Cargo.toml b/Cargo.toml
index cfcb4eaf3c..0892b0f12b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,12 +1,20 @@
[workspace]
-members = [
- "crates/*",
- "delta-inspect",
- "python",
-]
+members = ["crates/*", "delta-inspect", "python"]
exclude = ["proofs"]
resolver = "2"
+[workspace.package]
+authors = ["Qingping Hou "]
+rust-version = "1.75"
+keywords = ["deltalake", "delta", "datalake"]
+readme = "README.md"
+edition = "2021"
+description = "Native Delta Lake implementation in Rust"
+homepage = "https://github.com/delta-io/delta.rs"
+license = "Apache-2.0"
+documentation = "https://docs.rs/deltalake"
+repository = "https://github.com/delta-io/delta.rs"
+
[profile.release-with-debug]
inherits = "release"
debug = true
@@ -18,28 +26,33 @@ debug = true
debug = "line-tables-only"
[workspace.dependencies]
+delta_kernel = { version = "0.3.0" }
+# delta_kernel = { path = "../delta-kernel-rs/kernel" }
+
# arrow
-arrow = { version = "50" }
-arrow-arith = { version = "50" }
-arrow-array = { version = "50" }
-arrow-buffer = { version = "50" }
-arrow-cast = { version = "50" }
-arrow-ipc = { version = "50" }
-arrow-json = { version = "50" }
-arrow-ord = { version = "50" }
-arrow-row = { version = "50" }
-arrow-schema = { version = "50" }
-arrow-select = { version = "50" }
-object_store = { version = "0.9" }
-parquet = { version = "50" }
+arrow = { version = "52" }
+arrow-arith = { version = "52" }
+arrow-array = { version = "52", features = ["chrono-tz"] }
+arrow-buffer = { version = "52" }
+arrow-cast = { version = "52" }
+arrow-ipc = { version = "52" }
+arrow-json = { version = "52" }
+arrow-ord = { version = "52" }
+arrow-row = { version = "52" }
+arrow-schema = { version = "52" }
+arrow-select = { version = "52" }
+object_store = { version = "0.10.1" }
+parquet = { version = "52" }
# datafusion
-datafusion = { version = "35" }
-datafusion-expr = { version = "35" }
-datafusion-common = { version = "35" }
-datafusion-proto = { version = "35" }
-datafusion-sql = { version = "35" }
-datafusion-physical-expr = { version = "35" }
+datafusion = { version = "40" }
+datafusion-expr = { version = "40" }
+datafusion-common = { version = "40" }
+datafusion-proto = { version = "40" }
+datafusion-sql = { version = "40" }
+datafusion-physical-expr = { version = "40" }
+datafusion-functions = { version = "40" }
+datafusion-functions-array = { version = "40" }
# serde
serde = { version = "1.0.194", features = ["derive"] }
@@ -47,11 +60,12 @@ serde_json = "1"
# "stdlib"
bytes = { version = "1" }
-chrono = { version = "0.4.31", default-features = false, features = ["clock"] }
+chrono = { version = ">0.4.34", default-features = false, features = ["clock"] }
tracing = { version = "0.1", features = ["log"] }
regex = { version = "1" }
thiserror = { version = "1" }
url = { version = "2" }
+urlencoding = "2.1.3"
uuid = { version = "1" }
# runtime / async
diff --git a/README.md b/README.md
index 927b68ee63..b00026b8d8 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@
+Delta Lake is an open-source storage format that runs on top of existing data lakes. Delta Lake is compatible with processing engines like Apache Spark and provides benefits such as ACID transaction guarantees, schema enforcement, and scalable data handling.
The Delta Lake project aims to unlock the power of the Deltalake for as many users and projects as possible
by providing native low-level APIs aimed at developers and integrators, as well as a high-level operations
@@ -77,7 +78,7 @@ write_deltalake("./data/delta", df)
dt = DeltaTable("./data/delta")
df2 = dt.to_pandas()
-assert df == df2
+assert df.equals(df2)
```
The same table can also be loaded using the core Rust crate:
@@ -91,7 +92,7 @@ async fn main() -> Result<(), DeltaTableError> {
let table = open_table("./data/delta").await?;
// show all active files in the table
- let files = table.get_files();
+ let files: Vec<_> = table.get_file_uris()?.collect();
println!("{:?}", files);
Ok(())
@@ -116,6 +117,7 @@ Libraries and frameworks that interoperate with delta-rs - in alphabetical order
- [AWS SDK for Pandas](https://github.com/aws/aws-sdk-pandas)
- [ballista][ballista]
- [datafusion][datafusion]
+- [Daft](https://www.getdaft.io/)
- [Dask](https://github.com/dask-contrib/dask-deltatable)
- [datahub](https://datahubproject.io/)
- [DuckDB](https://duckdb.org/)
@@ -130,45 +132,46 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc
### Cloud Integrations
-| Storage | Rust | Python | Comment |
-| -------------------- | :-----: | :-----: | ----------------------------------- |
-| Local | ![done] | ![done] | |
-| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes |
-| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes |
-| S3 - R2 | ![done] | ![done] | requires lock for concurrent writes |
-| Azure Blob | ![done] | ![done] | |
-| Azure ADLS Gen2 | ![done] | ![done] | |
-| Microsoft OneLake | ![done] | ![done] | |
-| Google Cloud Storage | ![done] | ![done] | |
+| Storage | Rust | Python | Comment |
+| -------------------- | :-----: | :-----: | ---------------------------------------------------------------- |
+| Local | ![done] | ![done] | |
+| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes |
+| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes |
+| S3 - R2 | ![done] | ![done] | No lock required when using `AmazonS3ConfigKey::CopyIfNotExists` |
+| Azure Blob | ![done] | ![done] | |
+| Azure ADLS Gen2 | ![done] | ![done] | |
+| Microsoft OneLake | ![done] | ![done] | |
+| Google Cloud Storage | ![done] | ![done] | |
+| HDFS | ![done] | ![done] | |
### Supported Operations
-| Operation | Rust | Python | Description |
-| --------------------- | :----------------------: | :----------------------: | ------------------------------------------- |
-| Create | ![done] | ![done] | Create a new table |
-| Read | ![done] | ![done] | Read data from a table |
-| Vacuum | ![done] | ![done] | Remove unused files and log entries |
-| Delete - partitions | | ![done] | Delete a table partition |
-| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
-| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
-| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
-| Merge | ![done] | ![done] | Merge a target Delta table with source data |
-| FS check | ![done] | ![done] | Remove corrupted files from table |
+| Operation | Rust | Python | Description |
+| --------------------- | :-----: | :-----: | ------------------------------------------- |
+| Create | ![done] | ![done] | Create a new table |
+| Read | ![done] | ![done] | Read data from a table |
+| Vacuum | ![done] | ![done] | Remove unused files and log entries |
+| Delete - partitions | | ![done] | Delete a table partition |
+| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
+| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
+| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
+| Merge | ![done] | ![done] | Merge a target Delta table with source data |
+| FS check | ![done] | ![done] | Remove corrupted files from table |
### Protocol Support Level
-| Writer Version | Requirement | Status |
-| -------------- | --------------------------------------------- | :------------------: |
-| Version 2 | Append Only Tables | ![done] |
-| Version 2 | Column Invariants | ![done] |
-| Version 3 | Enforce `delta.checkpoint.writeStatsAsJson` | [![open]][writer-rs] |
-| Version 3 | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] |
+| Writer Version | Requirement | Status |
+| -------------- | --------------------------------------------- | :-------------------------------: |
+| Version 2 | Append Only Tables | ![done] |
+| Version 2 | Column Invariants | ![done] |
+| Version 3 | Enforce `delta.checkpoint.writeStatsAsJson` | [![open]][writer-rs] |
+| Version 3 | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] |
| Version 3 | CHECK constraints | [![semi-done]][check-constraints] |
-| Version 4 | Change Data Feed | |
-| Version 4 | Generated Columns | |
-| Version 5 | Column Mapping | |
-| Version 6 | Identity Columns | |
-| Version 7 | Table Features | |
+| Version 4 | Change Data Feed | |
+| Version 4 | Generated Columns | |
+| Version 5 | Column Mapping | |
+| Version 6 | Identity Columns | |
+| Version 7 | Table Features | |
| Reader Version | Requirement | Status |
| -------------- | ----------------------------------- | ------ |
diff --git a/crates/aws/Cargo.toml b/crates/aws/Cargo.toml
index b18729e262..e6913a2162 100644
--- a/crates/aws/Cargo.toml
+++ b/crates/aws/Cargo.toml
@@ -1,14 +1,24 @@
[package]
name = "deltalake-aws"
-version = "0.1.0"
-edition = "2021"
+version = "0.1.2"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
[dependencies]
-deltalake-core = { version = "0.17.0", path = "../core" }
-rusoto_core = { version = "0.47", default-features = false, optional = true }
-rusoto_credential = { version = "0.47" }
-rusoto_sts = { version = "0.47", default-features = false, optional = true }
-rusoto_dynamodb = { version = "0.47", default-features = false, optional = true }
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
+aws-smithy-runtime-api = { version="1.1.7" }
+aws-smithy-runtime = { version="1.1.7", optional = true}
+aws-credential-types = { version="1.1.7", features = ["hardcoded-credentials"]}
+aws-config = { version = "1.1.6", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
+aws-sdk-dynamodb = {version = "1.15.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
+aws-sdk-sts = {version = "1.1.6", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
lazy_static = "1"
maplit = "1"
@@ -24,13 +34,14 @@ regex = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4"] }
url = { workspace = true }
backoff = { version = "0.4", features = [ "tokio" ] }
+hyper-tls = { version = "0.5", optional = true }
[dev-dependencies]
deltalake-core = { path = "../core", features = ["datafusion"] }
chrono = { workspace = true }
serial_test = "3"
deltalake-test = { path = "../test" }
-pretty_env_logger = "*"
+pretty_env_logger = "0.5.0"
rand = "0.8"
serde_json = { workspace = true }
@@ -38,12 +49,13 @@ serde_json = { workspace = true }
default = ["rustls"]
integration_test = []
native-tls = [
- "rusoto_core/native-tls",
- "rusoto_sts/native-tls",
- "rusoto_dynamodb/native-tls",
+ "aws-config/client-hyper",
+ "aws-smithy-runtime/connector-hyper-0-14-x",
+ "hyper-tls"
]
rustls = [
- "rusoto_core/rustls",
- "rusoto_sts/rustls",
- "rusoto_dynamodb/rustls",
+ "aws-config/client-hyper",
+ "aws-config/rustls",
+ "aws-sdk-dynamodb/rustls",
+ "aws-sdk-sts/rustls",
]
diff --git a/crates/aws/src/credentials.rs b/crates/aws/src/credentials.rs
new file mode 100644
index 0000000000..9ddf19b74c
--- /dev/null
+++ b/crates/aws/src/credentials.rs
@@ -0,0 +1,118 @@
+use std::{sync::Arc, time::Duration};
+
+use aws_config::{
+ ecs::EcsCredentialsProvider,
+ environment::{EnvironmentVariableCredentialsProvider, EnvironmentVariableRegionProvider},
+ imds::credentials::ImdsCredentialsProvider,
+ meta::{credentials::CredentialsProviderChain, region::RegionProviderChain},
+ profile::ProfileFileCredentialsProvider,
+ provider_config::ProviderConfig,
+ web_identity_token::WebIdentityTokenCredentialsProvider,
+};
+use aws_credential_types::provider::{self, ProvideCredentials};
+use tracing::Instrument;
+
+const IMDS_PROVIDER_NAME: &str = "Ec2InstanceMetadata";
+
+#[derive(Debug)]
+pub struct ConfiguredCredentialChain {
+ provider_chain: CredentialsProviderChain,
+}
+
+#[derive(Debug)]
+pub struct NoOpCredentials {}
+
+pub fn new_region_provider(disable_imds: bool, imds_timeout: u64) -> RegionProviderChain {
+ let env_provider = EnvironmentVariableRegionProvider::new();
+ let profile_file = aws_config::profile::region::ProfileFileRegionProvider::default();
+ if disable_imds {
+ return RegionProviderChain::first_try(env_provider).or_else(profile_file);
+ }
+
+ RegionProviderChain::first_try(env_provider)
+ .or_else(profile_file)
+ .or_else(
+ aws_config::imds::region::Builder::default()
+ .imds_client(
+ aws_config::imds::Client::builder()
+ .connect_timeout(Duration::from_millis(imds_timeout))
+ .read_timeout(Duration::from_millis(imds_timeout))
+ .build(),
+ )
+ .build(),
+ )
+}
+
+impl ConfiguredCredentialChain {
+ pub fn new(disable_imds: bool, imds_timeout: u64, conf: &ProviderConfig) -> Self {
+ let imds_provider = Self::build_imds_provider(conf, disable_imds, imds_timeout);
+ let env_provider = EnvironmentVariableCredentialsProvider::default();
+ let profile_provider = ProfileFileCredentialsProvider::builder()
+ .configure(conf)
+ .with_custom_provider(IMDS_PROVIDER_NAME, imds_provider.clone())
+ .build();
+ let web_identity_token_provider = WebIdentityTokenCredentialsProvider::builder()
+ .configure(conf)
+ .build();
+
+ let ecs_provider = EcsCredentialsProvider::builder().configure(conf).build();
+
+ let provider_chain = CredentialsProviderChain::first_try("Environment", env_provider)
+ .or_else("Profile", profile_provider)
+ .or_else("WebIdentityToken", web_identity_token_provider)
+ .or_else("EcsContainer", ecs_provider)
+ .or_else(IMDS_PROVIDER_NAME, imds_provider);
+
+ Self { provider_chain }
+ }
+
+ async fn credentials(&self) -> provider::Result {
+ self.provider_chain
+ .provide_credentials()
+ .instrument(tracing::debug_span!("provide_credentials", provider = %"default_chain"))
+ .await
+ }
+
+ fn build_imds_provider(
+ conf: &ProviderConfig,
+ disable_imds: bool,
+ imds_timeout: u64,
+ ) -> Arc {
+ if disable_imds {
+ return Arc::new(NoOpCredentials {});
+ }
+
+ let imds_provider = ImdsCredentialsProvider::builder()
+ .configure(conf)
+ .imds_client(
+ aws_config::imds::Client::builder()
+ .connect_timeout(Duration::from_millis(imds_timeout))
+ .read_timeout(Duration::from_millis(imds_timeout))
+ .build(),
+ )
+ .build();
+ Arc::new(imds_provider)
+ }
+}
+
+impl ProvideCredentials for ConfiguredCredentialChain {
+ fn provide_credentials<'a>(
+ &'a self,
+ ) -> aws_credential_types::provider::future::ProvideCredentials<'a>
+ where
+ Self: 'a,
+ {
+ aws_credential_types::provider::future::ProvideCredentials::new(self.credentials())
+ }
+}
+
+impl ProvideCredentials for NoOpCredentials {
+ fn provide_credentials<'a>(&'a self) -> provider::future::ProvideCredentials<'a>
+ where
+ Self: 'a,
+ {
+ aws_credential_types::provider::future::ProvideCredentials::new(std::future::ready(Err(
+ provider::error::CredentialsError::not_loaded_no_source(),
+ )))
+ }
+}
diff --git a/crates/aws/src/errors.rs b/crates/aws/src/errors.rs
index bbce9dc426..55f2a2d013 100644
--- a/crates/aws/src/errors.rs
+++ b/crates/aws/src/errors.rs
@@ -2,27 +2,45 @@
use std::num::ParseIntError;
-use rusoto_core::RusotoError;
-use rusoto_dynamodb::{CreateTableError, GetItemError, PutItemError, QueryError, UpdateItemError};
-
-#[derive(thiserror::Error, Debug, PartialEq)]
-pub enum DynamoDbConfigError {
- /// Error raised creating http client
- #[error("Failed to create request dispatcher: {source}")]
- HttpClient {
- /// The underlying Rusoto TlsError
- #[from]
- source: rusoto_core::request::TlsError,
+use aws_credential_types::provider::error::CredentialsError;
+use aws_sdk_dynamodb::{
+ error::SdkError,
+ operation::{
+ create_table::CreateTableError, delete_item::DeleteItemError, get_item::GetItemError,
+ put_item::PutItemError, query::QueryError, update_item::UpdateItemError,
},
+};
+use aws_smithy_runtime_api::client::result::ServiceError;
+
+macro_rules! impl_from_service_error {
+ ($error_type:ty) => {
+ impl From> for LockClientError
+ where
+ R: Send + Sync + std::fmt::Debug + 'static,
+ {
+ fn from(err: SdkError<$error_type, R>) -> Self {
+ match err {
+ SdkError::ServiceError(e) => e.into(),
+ _ => LockClientError::GenericDynamoDb {
+ source: Box::new(err),
+ },
+ }
+ }
+ }
- /// Error raised getting credentials
- #[error("Failed to retrieve AWS credentials: {source}")]
- Credentials {
- /// The underlying Rusoto CredentialsError
- #[from]
- source: rusoto_credential::CredentialsError,
- },
+ impl From> for LockClientError
+ where
+ R: Send + Sync + std::fmt::Debug + 'static,
+ {
+ fn from(value: ServiceError<$error_type, R>) -> Self {
+ value.into_err().into()
+ }
+ }
+ };
+}
+#[derive(thiserror::Error, Debug)]
+pub enum DynamoDbConfigError {
/// Billing mode string invalid
#[error("Invalid billing mode : {0}, supported values : ['provided', 'pay_per_request']")]
InvalidBillingMode(String),
@@ -33,6 +51,9 @@ pub enum DynamoDbConfigError {
// config_value: String,
source: ParseIntError,
},
+ /// Cannot initialize DynamoDbConfiguration due to some sort of threading issue
+ #[error("Cannot initialize dynamodb lock configuration")]
+ InitializationError,
}
/// Errors produced by `DynamoDbLockClient`
@@ -44,7 +65,7 @@ pub enum LockClientError {
#[error("Lock table '{name}': creation failed: {source}")]
LockTableCreateFailure {
name: String,
- source: RusotoError,
+ source: Box,
},
#[error("Log entry for table '{table_path}' and version '{version}' already exists")]
@@ -60,29 +81,30 @@ pub enum LockClientError {
GenericDynamoDb {
source: Box,
},
-
#[error("configuration error: {source}")]
- Credentials {
- source: rusoto_credential::CredentialsError,
- },
-
+ Credentials { source: CredentialsError },
#[error(
"Atomic rename requires a LockClient for S3 backends. \
Either configure the LockClient, or set AWS_S3_ALLOW_UNSAFE_RENAME=true \
to opt out of support for concurrent writers."
)]
LockClientRequired,
+
+ #[error("Log entry for table '{table_path}' and version '{version}' is already complete")]
+ VersionAlreadyCompleted { table_path: String, version: i64 },
}
impl From for LockClientError {
fn from(err: GetItemError) -> Self {
match err {
- GetItemError::InternalServerError(_) => err.into(),
- GetItemError::ProvisionedThroughputExceeded(_) => {
+ GetItemError::ProvisionedThroughputExceededException(_) => {
LockClientError::ProvisionedThroughputExceeded
}
GetItemError::RequestLimitExceeded(_) => LockClientError::ProvisionedThroughputExceeded,
- GetItemError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
+ GetItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+ _ => LockClientError::GenericDynamoDb {
+ source: Box::new(err),
+ },
}
}
}
@@ -90,12 +112,14 @@ impl From for LockClientError {
impl From for LockClientError {
fn from(err: QueryError) -> Self {
match err {
- QueryError::InternalServerError(_) => err.into(),
- QueryError::ProvisionedThroughputExceeded(_) => {
+ QueryError::ProvisionedThroughputExceededException(_) => {
LockClientError::ProvisionedThroughputExceeded
}
QueryError::RequestLimitExceeded(_) => LockClientError::ProvisionedThroughputExceeded,
- QueryError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
+ QueryError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+ _ => LockClientError::GenericDynamoDb {
+ source: Box::new(err),
+ },
}
}
}
@@ -103,17 +127,19 @@ impl From for LockClientError {
impl From for LockClientError {
fn from(err: PutItemError) -> Self {
match err {
- PutItemError::ConditionalCheckFailed(_) => {
+ PutItemError::ConditionalCheckFailedException(_) => {
unreachable!("error must be handled explicitely")
}
- PutItemError::InternalServerError(_) => err.into(),
- PutItemError::ProvisionedThroughputExceeded(_) => {
+ PutItemError::ProvisionedThroughputExceededException(_) => {
LockClientError::ProvisionedThroughputExceeded
}
PutItemError::RequestLimitExceeded(_) => LockClientError::ProvisionedThroughputExceeded,
- PutItemError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
- PutItemError::ItemCollectionSizeLimitExceeded(_) => err.into(),
- PutItemError::TransactionConflict(_) => err.into(),
+ PutItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+ PutItemError::ItemCollectionSizeLimitExceededException(_) => err.into(),
+ PutItemError::TransactionConflictException(_) => err.into(),
+ _ => LockClientError::GenericDynamoDb {
+ source: Box::new(err),
+ },
}
}
}
@@ -121,34 +147,51 @@ impl From for LockClientError {
impl From for LockClientError {
fn from(err: UpdateItemError) -> Self {
match err {
- UpdateItemError::ConditionalCheckFailed(_) => {
+ UpdateItemError::ConditionalCheckFailedException(_) => {
unreachable!("condition check failure in update is not an error")
}
UpdateItemError::InternalServerError(_) => err.into(),
- UpdateItemError::ProvisionedThroughputExceeded(_) => {
+ UpdateItemError::ProvisionedThroughputExceededException(_) => {
LockClientError::ProvisionedThroughputExceeded
}
UpdateItemError::RequestLimitExceeded(_) => {
LockClientError::ProvisionedThroughputExceeded
}
- UpdateItemError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
- UpdateItemError::ItemCollectionSizeLimitExceeded(_) => err.into(),
- UpdateItemError::TransactionConflict(_) => err.into(),
+ UpdateItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+ UpdateItemError::ItemCollectionSizeLimitExceededException(_) => err.into(),
+ UpdateItemError::TransactionConflictException(_) => err.into(),
+ _ => LockClientError::GenericDynamoDb {
+ source: Box::new(err),
+ },
}
}
}
-impl From> for LockClientError
-where
- E: Into + std::error::Error + Send + Sync + 'static,
-{
- fn from(err: RusotoError) -> Self {
+impl From for LockClientError {
+ fn from(err: DeleteItemError) -> Self {
match err {
- RusotoError::Service(e) => e.into(),
- RusotoError::Credentials(e) => LockClientError::Credentials { source: e },
+ DeleteItemError::ConditionalCheckFailedException(_) => {
+ unreachable!("error must be handled explicitly")
+ }
+ DeleteItemError::InternalServerError(_) => err.into(),
+ DeleteItemError::ProvisionedThroughputExceededException(_) => {
+ LockClientError::ProvisionedThroughputExceeded
+ }
+ DeleteItemError::RequestLimitExceeded(_) => {
+ LockClientError::ProvisionedThroughputExceeded
+ }
+ DeleteItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+ DeleteItemError::ItemCollectionSizeLimitExceededException(_) => err.into(),
+ DeleteItemError::TransactionConflictException(_) => err.into(),
_ => LockClientError::GenericDynamoDb {
source: Box::new(err),
},
}
}
}
+
+impl_from_service_error!(GetItemError);
+impl_from_service_error!(PutItemError);
+impl_from_service_error!(QueryError);
+impl_from_service_error!(UpdateItemError);
+impl_from_service_error!(DeleteItemError);
diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs
index 2630f80512..a0a99c01f0 100644
--- a/crates/aws/src/lib.rs
+++ b/crates/aws/src/lib.rs
@@ -1,9 +1,23 @@
//! Lock client implementation based on DynamoDb.
+mod credentials;
pub mod errors;
pub mod logstore;
+#[cfg(feature = "native-tls")]
+mod native;
pub mod storage;
-
+use aws_config::SdkConfig;
+use aws_sdk_dynamodb::{
+ operation::{
+ create_table::CreateTableError, delete_item::DeleteItemError, get_item::GetItemError,
+ put_item::PutItemError, query::QueryError, update_item::UpdateItemError,
+ },
+ types::{
+ AttributeDefinition, AttributeValue, BillingMode, KeySchemaElement, KeyType,
+ ScalarAttributeType,
+ },
+ Client,
+};
use lazy_static::lazy_static;
use object_store::aws::AmazonS3ConfigKey;
use regex::Regex;
@@ -18,21 +32,13 @@ use tracing::debug;
use deltalake_core::logstore::{logstores, LogStore, LogStoreFactory};
use deltalake_core::storage::{factories, url_prefix_handler, ObjectStoreRef, StorageOptions};
use deltalake_core::{DeltaResult, Path};
-use rusoto_core::{HttpClient, Region, RusotoError};
-use rusoto_credential::AutoRefreshingProvider;
-use rusoto_dynamodb::{
- AttributeDefinition, AttributeValue, CreateTableError, CreateTableInput, DynamoDb,
- DynamoDbClient, GetItemError, GetItemInput, KeySchemaElement, PutItemError, PutItemInput,
- QueryError, QueryInput, UpdateItemError, UpdateItemInput,
-};
-use rusoto_sts::WebIdentityProvider;
use url::Url;
use errors::{DynamoDbConfigError, LockClientError};
use storage::{S3ObjectStoreFactory, S3StorageOptions};
#[derive(Clone, Debug, Default)]
-struct S3LogStoreFactory {}
+pub struct S3LogStoreFactory {}
impl LogStoreFactory for S3LogStoreFactory {
fn with_options(
@@ -41,7 +47,7 @@ impl LogStoreFactory for S3LogStoreFactory {
location: &Url,
options: &StorageOptions,
) -> DeltaResult> {
- let store = url_prefix_handler(store, Path::parse(location.path())?)?;
+ let store = url_prefix_handler(store, Path::parse(location.path())?);
if options
.0
@@ -53,7 +59,7 @@ impl LogStoreFactory for S3LogStoreFactory {
));
}
- let s3_options = S3StorageOptions::from_map(&options.0);
+ let s3_options = S3StorageOptions::from_map(&options.0)?;
if s3_options.locking_provider.as_deref() != Some("dynamodb") {
debug!("S3LogStoreFactory has been asked to create a LogStore without the dynamodb locking provider");
@@ -117,7 +123,7 @@ impl CommitEntry {
/// Lock client backed by DynamoDb.
pub struct DynamoDbLockClient {
/// DynamoDb client
- dynamodb_client: DynamoDbClient,
+ dynamodb_client: Client,
/// configuration of the
config: DynamoDbConfig,
}
@@ -131,24 +137,30 @@ impl std::fmt::Debug for DynamoDbLockClient {
impl DynamoDbLockClient {
/// Creates a new DynamoDbLockClient from the supplied storage options.
pub fn try_new(
+ sdk_config: &SdkConfig,
lock_table_name: Option,
billing_mode: Option,
max_elapsed_request_time: Option,
- region: Region,
- use_web_identity: bool,
+ dynamodb_override_endpoint: Option,
) -> Result {
- let dynamodb_client = create_dynamodb_client(region.clone(), use_web_identity)?;
+ let dynamodb_sdk_config =
+ Self::create_dynamodb_sdk_config(sdk_config, dynamodb_override_endpoint);
+
+ let dynamodb_client = aws_sdk_dynamodb::Client::new(&dynamodb_sdk_config);
let lock_table_name = lock_table_name
.or_else(|| std::env::var(constants::LOCK_TABLE_KEY_NAME).ok())
.unwrap_or(constants::DEFAULT_LOCK_TABLE_NAME.to_owned());
- let billing_mode = billing_mode
+ let billing_mode = if let Some(bm) = billing_mode
.or_else(|| std::env::var(constants::BILLING_MODE_KEY_NAME).ok())
- .map_or_else(
- || Ok(BillingMode::PayPerRequest),
- |bm| BillingMode::from_str(&bm),
- )?;
+ .as_ref()
+ {
+ BillingMode::try_parse(bm.to_ascii_uppercase().as_str())
+ .map_err(|_| DynamoDbConfigError::InvalidBillingMode(String::default()))?
+ } else {
+ BillingMode::PayPerRequest
+ };
let max_elapsed_request_time = max_elapsed_request_time
.or_else(|| std::env::var(constants::MAX_ELAPSED_REQUEST_TIME_KEY_NAME).ok())
@@ -162,14 +174,31 @@ impl DynamoDbLockClient {
billing_mode,
lock_table_name,
max_elapsed_request_time,
- use_web_identity,
- region,
+ sdk_config: sdk_config.clone(),
};
Ok(Self {
dynamodb_client,
config,
})
}
+ fn create_dynamodb_sdk_config(
+ sdk_config: &SdkConfig,
+ dynamodb_override_endpoint: Option,
+ ) -> SdkConfig {
+ /*
+ if dynamodb_override_endpoint exists/AWS_ENDPOINT_URL_DYNAMODB is specified by user
+ use dynamodb_override_endpoint to create dynamodb client
+ */
+
+ match dynamodb_override_endpoint {
+ Some(dynamodb_endpoint_url) => sdk_config
+ .to_owned()
+ .to_builder()
+ .endpoint_url(dynamodb_endpoint_url)
+ .build(),
+ None => sdk_config.to_owned(),
+ }
+ }
/// Create the lock table where DynamoDb stores the commit information for all delta tables.
///
@@ -179,40 +208,50 @@ impl DynamoDbLockClient {
/// `active`, so transient failures might occurr when immediately using the lock client.
pub async fn try_create_lock_table(&self) -> Result {
let attribute_definitions = vec![
- AttributeDefinition {
- attribute_name: constants::ATTR_TABLE_PATH.to_owned(),
- attribute_type: constants::STRING_TYPE.to_owned(),
- },
- AttributeDefinition {
- attribute_name: constants::ATTR_FILE_NAME.to_owned(),
- attribute_type: constants::STRING_TYPE.to_owned(),
- },
+ AttributeDefinition::builder()
+ .attribute_name(constants::ATTR_TABLE_PATH)
+ .attribute_type(ScalarAttributeType::S)
+ .build()
+ .unwrap(),
+ AttributeDefinition::builder()
+ .attribute_name(constants::ATTR_FILE_NAME)
+ .attribute_type(ScalarAttributeType::S)
+ .build()
+ .unwrap(),
];
- let input = CreateTableInput {
- attribute_definitions,
- key_schema: vec![
- KeySchemaElement {
- attribute_name: constants::ATTR_TABLE_PATH.to_owned(),
- key_type: constants::KEY_TYPE_HASH.to_owned(),
- },
- KeySchemaElement {
- attribute_name: constants::ATTR_FILE_NAME.to_owned(),
- key_type: constants::KEY_TYPE_RANGE.to_owned(),
- },
- ],
- billing_mode: Some(self.config.billing_mode.to_str()),
- table_name: self.config.lock_table_name.clone(),
- ..Default::default()
- };
- match self.dynamodb_client.create_table(input).await {
+ let request = self
+ .dynamodb_client
+ .create_table()
+ .set_attribute_definitions(Some(attribute_definitions))
+ .set_key_schema(Some(vec![
+ KeySchemaElement::builder()
+ .attribute_name(constants::ATTR_TABLE_PATH.to_owned())
+ .key_type(KeyType::Hash)
+ .build()
+ .unwrap(),
+ KeySchemaElement::builder()
+ .attribute_name(constants::ATTR_FILE_NAME.to_owned())
+ .key_type(KeyType::Range)
+ .build()
+ .unwrap(),
+ ]))
+ .billing_mode(self.config.billing_mode.clone())
+ .table_name(&self.config.lock_table_name)
+ .send();
+ match request.await {
Ok(_) => Ok(CreateLockTableResult::TableCreated),
- Err(RusotoError::Service(CreateTableError::ResourceInUse(_))) => {
- Ok(CreateLockTableResult::TableAlreadyExists)
- }
- Err(reason) => Err(LockClientError::LockTableCreateFailure {
- name: self.config.lock_table_name.clone(),
- source: reason,
- }),
+ Err(sdk_err) => match sdk_err.as_service_error() {
+ Some(CreateTableError::ResourceInUseException(_)) => {
+ Ok(CreateLockTableResult::TableAlreadyExists)
+ }
+ Some(_) => Err(LockClientError::LockTableCreateFailure {
+ name: self.config.lock_table_name.clone(),
+ source: Box::new(sdk_err.into_service_error()),
+ }),
+ _ => Err(LockClientError::GenericDynamoDb {
+ source: Box::new(sdk_err),
+ }),
+ },
}
}
@@ -238,22 +277,26 @@ impl DynamoDbLockClient {
table_path: &str,
version: i64,
) -> Result