diff --git a/.cargo/config b/.cargo/config.toml
similarity index 100%
rename from .cargo/config
rename to .cargo/config.toml
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index b99809d1f6..736703c551 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,4 +1,4 @@
-crates/ @wjones127 @roeap @rtyler
+crates/ @wjones127 @roeap @rtyler @hntd187 @ion-elgreco
 delta-inspect/ @wjones127 @rtyler
 proofs/ @houqp
 python/ @wjones127 @fvaleye @roeap @ion-elgreco
diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml
new file mode 100644
index 0000000000..7875107ddd
--- /dev/null
+++ b/.github/actions/setup-env/action.yml
@@ -0,0 +1,34 @@
+name: "Setup Python and Rust Environment"
+description: "Set up Python, virtual environment, and Rust toolchain"
+
+inputs:
+
+  python-version:
+    description: "The Python version to set up"
+    required: true
+    default: "3.10"
+
+  rust-toolchain:
+    description: "The Rust toolchain to set up"
+    required: true
+    default: "stable"
+
+runs:
+  using: "composite"
+
+  steps:
+  
+    - name: Set up Python ${{ inputs.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ inputs.python-version }}
+
+    - name: Install Rust toolchain
+      uses: actions-rs/toolchain@v1
+      with:
+        profile: default
+        toolchain: ${{ inputs.rust-toolchain }}
+        override: true
+        components: rustfmt, clippy
+
+    - uses: Swatinem/rust-cache@v2
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 220c5b21d9..93b3cbdc3e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -5,6 +5,7 @@ on:
     branches: [main, "rust-v*"]
   pull_request:
     branches: [main, "rust-v*"]
+  merge_group:
 
 jobs:
   format:
@@ -28,7 +29,6 @@ jobs:
       matrix:
         os:
           - ubuntu-latest
-          - macos-11
           - windows-latest
     runs-on: ${{ matrix.os }}
 
@@ -42,16 +42,14 @@ jobs:
           toolchain: stable
           override: true
 
-      - uses: Swatinem/rust-cache@v2
-
       - name: build and lint with clippy
-        run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests
+        run: cargo clippy --features azure,datafusion,s3,gcs,glue,hdfs --tests
 
       - name: Spot-check build for native-tls features
         run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue --tests
 
       - name: Check docs
-        run: cargo doc --features azure,datafusion,s3,gcs,glue
+        run: cargo doc --features azure,datafusion,s3,gcs,glue,hdfs
 
       - name: Check no default features (except rustls)
         run: cargo check --no-default-features --features rustls
@@ -62,7 +60,6 @@ jobs:
       matrix:
         os:
           - ubuntu-latest
-          - macos-11
           - windows-latest
     runs-on: ${{ matrix.os }}
     env:
@@ -82,8 +79,6 @@ jobs:
           toolchain: "stable"
           override: true
 
-      - uses: Swatinem/rust-cache@v2
-
       - name: Run tests
         run: cargo test --verbose --features datafusion,azure
 
@@ -118,28 +113,24 @@ jobs:
           toolchain: stable
           override: true
 
-      # - uses: actions/setup-java@v3
-      #   with:
-      #     distribution: "zulu"
-      #     java-version: "17"
-
-      # - uses: beyondstorage/setup-hdfs@master
-      #   with:
-      #     hdfs-version: "3.3.2"
-
-      # - name: Set Hadoop env
-      #   run: |
-      #     echo "CLASSPATH=$CLASSPATH:`hadoop classpath --glob`" >> $GITHUB_ENV
-      #     echo "LD_LIBRARY_PATH=$JAVA_HOME/lib/server" >> $GITHUB_ENV
+      # Install Java and Hadoop for HDFS integration tests
+      - uses: actions/setup-java@v4
+        with:
+          distribution: "temurin"
+          java-version: "17"
 
-      - uses: Swatinem/rust-cache@v2
+      - name: Download Hadoop
+        run: |
+          wget -q https://dlcdn.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
+          tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE
+          echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH
 
       - name: Start emulated services
-        run: docker-compose up -d
+        run: docker compose up -d
 
       - name: Run tests with rustls (default)
         run: |
-          cargo test --features integration_test,azure,s3,gcs,datafusion
+          cargo test --features integration_test,azure,s3,gcs,datafusion,hdfs
 
       - name: Run tests with native-tls
         run: |
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 6b3d5a7ddb..121e0b8882 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -2,6 +2,7 @@ name: dev_pr
 
 # Trigger whenever a PR is changed (title as well as new / changed commits)
 on:
+  merge_group:
   pull_request_target:
     types:
       - opened
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 079cd66fcc..5729b87624 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -1,6 +1,7 @@
 name: Build (and maybe release) the documentation
 
 on:
+  merge_group:
   pull_request:
     paths:
       - python/**
@@ -31,9 +32,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-      - uses: psf/black@stable
-        with:
-          src: docs/src/python
+      - run: |
+          cd docs
+          make check
 
   build-deploy:
     needs:
@@ -47,25 +48,13 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: '3.10'
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Build and install deltalake
         run: |
           cd python
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make ${{ env.BUILD_ARGS }}
 
diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml
index bc2f20cc9a..ce2a7e0bfd 100644
--- a/.github/workflows/python_build.yml
+++ b/.github/workflows/python_build.yml
@@ -1,6 +1,7 @@
 name: python_build
 
 on:
+  merge_group:
   push:
     branches: [main]
   pull_request:
@@ -15,23 +16,17 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-      - name: Setup Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
+
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Check Python
         run: |
-          pip install ruff black mypy types-dataclasses typing-extensions
+          python -m venv venv
+          source venv/bin/activate
+          pip install ruff==0.5.2 mypy==1.10.1 types-dataclasses typing-extensions
           make check-python
 
-      - name: Install minimal stable with clippy and rustfmt
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-
       - name: Check Rust
         run: make check-rust
 
@@ -45,24 +40,14 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Setup Python
-        uses: actions/setup-python@v2
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
         with:
           python-version: 3.8
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make setup
           # Install minimum PyArrow version
@@ -89,26 +74,15 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.10"
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Start emulated services
-        run: docker-compose up -d
+        run: docker compose up -d
 
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make develop
 
@@ -137,23 +111,12 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Build deltalake in release mode
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           MATURIN_EXTRA_ARGS=--release make develop
 
@@ -187,18 +150,8 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.10"
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - uses: actions/setup-java@v2
         with:
@@ -207,8 +160,7 @@ jobs:
 
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make develop-pyspark
 
@@ -231,15 +183,14 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
         with:
           python-version: ${{ matrix.python-version }}
 
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make setup
           maturin develop
diff --git a/.github/workflows/python_release.yml b/.github/workflows/python_release.yml
index 48611bacb4..46b4230af1 100644
--- a/.github/workflows/python_release.yml
+++ b/.github/workflows/python_release.yml
@@ -35,7 +35,7 @@ jobs:
       fail-fast: false
       matrix:
         target: [x86_64-apple-darwin, aarch64-apple-darwin]
-    runs-on: macos-12
+    runs-on: macos-14
     steps:
       - uses: actions/checkout@v3
 
diff --git a/.gitignore b/.gitignore
index ca0576b47c..18dcc39f69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,8 @@ tlaplus/*.toolbox/*/[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*/
 /.idea
 .vscode
 .env
+.venv
+venv
 **/.DS_Store
 **/.python-version
 .coverage
@@ -20,6 +22,7 @@ __blobstorage__
 .githubchangeloggenerator.cache.log
 .githubchangeloggenerator.cache/
 .githubchangeloggenerator*
+data
 
 # Add all Cargo.lock files except for those in binary crates
 Cargo.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 922a49f47e..9161a320c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,689 @@
 # Changelog
 
+## [rust-v0.18.2](https://github.com/delta-io/delta-rs/tree/rust-v0.18.2) (2024-08-07)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.18.1...rust-v0.18.2)
+
+**Implemented enhancements:**
+
+- Choose which columns to store min/max values for [\#2709](https://github.com/delta-io/delta-rs/issues/2709)
+- Projection pushdown for load\_cdf [\#2681](https://github.com/delta-io/delta-rs/issues/2681)
+- Way to check if Delta table exists at specified path [\#2662](https://github.com/delta-io/delta-rs/issues/2662)
+- Support HDFS via hdfs-native package [\#2611](https://github.com/delta-io/delta-rs/issues/2611)
+- Deletion `_change_type` does not appear in change data feed [\#2579](https://github.com/delta-io/delta-rs/issues/2579)
+- Could you please explain in the README what "Deltalake" is for the uninitiated? [\#2523](https://github.com/delta-io/delta-rs/issues/2523)
+- Discuss: Allow protocol change during write actions  [\#2444](https://github.com/delta-io/delta-rs/issues/2444)
+- Support for Arrow PyCapsule interface [\#2376](https://github.com/delta-io/delta-rs/issues/2376)
+
+**Fixed bugs:**
+
+- Slow add\_actions.to\_pydict for tables with large number of columns, impacting read performance [\#2733](https://github.com/delta-io/delta-rs/issues/2733)
+- append is deleting records [\#2716](https://github.com/delta-io/delta-rs/issues/2716)
+- segmentation fault - Python 3.10 on Mac M3  [\#2706](https://github.com/delta-io/delta-rs/issues/2706)
+- Failure to delete dir and files [\#2703](https://github.com/delta-io/delta-rs/issues/2703)
+- DeltaTable.from\_data\_catalog not working [\#2699](https://github.com/delta-io/delta-rs/issues/2699)
+- Project should use the same version of `ruff` in the `lint` stage of `python_build.yml` as in `pyproject.toml` [\#2678](https://github.com/delta-io/delta-rs/issues/2678)
+- un-tracked columns are giving json error when pyarrow schema have feild with nullable=False and create\_checkpoint is trigged  [\#2675](https://github.com/delta-io/delta-rs/issues/2675)
+- \[BUG\]write\_delta\({'custom\_metadata':str}\) cannot be converted. str to pyDict error \(0.18.2\_DeltaPython/Windows10\) [\#2697](https://github.com/delta-io/delta-rs/issues/2697)
+- Pyarrow engine not supporting schema overwrite with Append mode [\#2654](https://github.com/delta-io/delta-rs/issues/2654)
+- `deltalake-core` version re-exported by `deltalake` different than versions used by `deltalake-azure` and `deltalake-gcp` [\#2647](https://github.com/delta-io/delta-rs/issues/2647)
+- i32 limit in JSON stats [\#2646](https://github.com/delta-io/delta-rs/issues/2646)
+- Rust writer not encoding correct URL for partitions in delta table [\#2634](https://github.com/delta-io/delta-rs/issues/2634)
+- Large Types breaks merge predicate pruning [\#2632](https://github.com/delta-io/delta-rs/issues/2632)
+- Getting error when converting a partitioned parquet table to delta table [\#2626](https://github.com/delta-io/delta-rs/issues/2626)
+- Arrow: Parquet does not support writing empty structs when creating checkpoint [\#2622](https://github.com/delta-io/delta-rs/issues/2622)
+- InvalidTableLocation\("Unknown scheme: gs"\) on 0.18.0 [\#2610](https://github.com/delta-io/delta-rs/issues/2610)
+- Unable to read delta table created using Uniform [\#2578](https://github.com/delta-io/delta-rs/issues/2578)
+- schema merging doesn't work when overwriting with a predicate [\#2567](https://github.com/delta-io/delta-rs/issues/2567)
+- Not working in AWS Lambda \(0.16.2 - 0.17.4\) OSError: Generic S3 error [\#2511](https://github.com/delta-io/delta-rs/issues/2511)
+- DataFusion filter on partition column doesn't work. \(when the phsical schema ordering is different to logical one\) [\#2494](https://github.com/delta-io/delta-rs/issues/2494)
+- Creating checkpoints for tables with missing column stats results in Err [\#2493](https://github.com/delta-io/delta-rs/issues/2493)
+- Cannot merge to a table with a timestamp column after upgrading delta-rs [\#2478](https://github.com/delta-io/delta-rs/issues/2478)
+- Azure AD Auth fails on ARM64 [\#2475](https://github.com/delta-io/delta-rs/issues/2475)
+- Generic S3 error: Error after 0 retries ... Broken pipe \(os error 32\) [\#2403](https://github.com/delta-io/delta-rs/issues/2403)
+- write\_deltalake identifies large\_string as datatype even though string is set in schema [\#2374](https://github.com/delta-io/delta-rs/issues/2374)
+- Inconsistent arrow timestamp type breaks datafusion query [\#2341](https://github.com/delta-io/delta-rs/issues/2341)
+
+**Closed issues:**
+
+- Unable to write new partitions with type timestamp on tables created with delta-rs 0.10.0 [\#2631](https://github.com/delta-io/delta-rs/issues/2631)
+
+**Merged pull requests:**
+
+- fix: schema adapter doesn't map partial batches correctly [\#2735](https://github.com/delta-io/delta-rs/pull/2735) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- perf: grab file size in rust [\#2734](https://github.com/delta-io/delta-rs/pull/2734) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: use logical plan in update, refactor/simplify CDCTracker [\#2727](https://github.com/delta-io/delta-rs/pull/2727) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: use logical plan in delete, delta planner refactoring [\#2725](https://github.com/delta-io/delta-rs/pull/2725) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: try an alternative docke compose invocation syntax [\#2724](https://github.com/delta-io/delta-rs/pull/2724) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): use input schema to get correct schema in cdf reads [\#2723](https://github.com/delta-io/delta-rs/pull/2723) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): cdc write-support for `overwrite` and `replacewhere` writes [\#2722](https://github.com/delta-io/delta-rs/pull/2722) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): cdc write-support for `delete` operation [\#2721](https://github.com/delta-io/delta-rs/pull/2721) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: enabling actions for merge groups [\#2718](https://github.com/delta-io/delta-rs/pull/2718) ([rtyler](https://github.com/rtyler))
+- perf: apply projection when reading checkpoint parquet [\#2717](https://github.com/delta-io/delta-rs/pull/2717) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- feat\(python\): add DeltaTable.is\_deltatable static method \(\#2662\) [\#2715](https://github.com/delta-io/delta-rs/pull/2715) ([omkar-foss](https://github.com/omkar-foss))
+- chore: prepare python release 0.18.3 [\#2707](https://github.com/delta-io/delta-rs/pull/2707) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): use url encoder when encoding partition values [\#2705](https://github.com/delta-io/delta-rs/pull/2705) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): add projection in CDF reads [\#2704](https://github.com/delta-io/delta-rs/pull/2704) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: ensure DataFusion SessionState Parquet options are applied to DeltaScan [\#2702](https://github.com/delta-io/delta-rs/pull/2702) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: refactor `write_deltalake` in `writer.py` [\#2695](https://github.com/delta-io/delta-rs/pull/2695) ([fpgmaas](https://github.com/fpgmaas))
+- fix\(python\): empty dataset fix for "pyarrow" engine [\#2689](https://github.com/delta-io/delta-rs/pull/2689) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: add test coverage command to `Makefile` [\#2688](https://github.com/delta-io/delta-rs/pull/2688) ([fpgmaas](https://github.com/fpgmaas))
+- chore: create separate action to setup python and rust in the cicd pipeline [\#2687](https://github.com/delta-io/delta-rs/pull/2687) ([fpgmaas](https://github.com/fpgmaas))
+- fix: update delta kernel version [\#2685](https://github.com/delta-io/delta-rs/pull/2685) ([jeppe742](https://github.com/jeppe742))
+- chore: update README.md [\#2684](https://github.com/delta-io/delta-rs/pull/2684) ([veronewra](https://github.com/veronewra))
+- fix\(rust,python\): checkpoint with column nullable false [\#2680](https://github.com/delta-io/delta-rs/pull/2680) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: pin `ruff` and `mypy` versions in the `lint` stage in the CI pipeline [\#2679](https://github.com/delta-io/delta-rs/pull/2679) ([fpgmaas](https://github.com/fpgmaas))
+- chore: enable `RUF` ruleset for `ruff` [\#2677](https://github.com/delta-io/delta-rs/pull/2677) ([fpgmaas](https://github.com/fpgmaas))
+- chore: remove stale code for conditional import of `Literal` [\#2676](https://github.com/delta-io/delta-rs/pull/2676) ([fpgmaas](https://github.com/fpgmaas))
+- chore: remove references to black from the project [\#2674](https://github.com/delta-io/delta-rs/pull/2674) ([fpgmaas](https://github.com/fpgmaas))
+- chore: bump ruff to 0.5.2 [\#2673](https://github.com/delta-io/delta-rs/pull/2673) ([fpgmaas](https://github.com/fpgmaas))
+- chore: improve contributing.md [\#2672](https://github.com/delta-io/delta-rs/pull/2672) ([fpgmaas](https://github.com/fpgmaas))
+- feat: support userMetadata in CommitInfo [\#2670](https://github.com/delta-io/delta-rs/pull/2670) ([jkylling](https://github.com/jkylling))
+- chore: upgrade to datafusion 40 [\#2661](https://github.com/delta-io/delta-rs/pull/2661) ([rtyler](https://github.com/rtyler))
+- docs: improve navigation fixes [\#2660](https://github.com/delta-io/delta-rs/pull/2660) ([avriiil](https://github.com/avriiil))
+- docs: add integration docs for s3 backend [\#2658](https://github.com/delta-io/delta-rs/pull/2658) ([avriiil](https://github.com/avriiil))
+- docs: fix bullets on hdfs docs [\#2653](https://github.com/delta-io/delta-rs/pull/2653) ([Kimahriman](https://github.com/Kimahriman))
+- ci: update CODEOWNERS [\#2650](https://github.com/delta-io/delta-rs/pull/2650) ([hntd187](https://github.com/hntd187))
+- feat\(rust\): fix size\_in\_bytes in last\_checkpoint\_ to i64 [\#2649](https://github.com/delta-io/delta-rs/pull/2649) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: increase subcrate versions [\#2648](https://github.com/delta-io/delta-rs/pull/2648) ([rtyler](https://github.com/rtyler))
+- chore: missed one macos runner reference in actions [\#2645](https://github.com/delta-io/delta-rs/pull/2645) ([rtyler](https://github.com/rtyler))
+- chore: add a reproduction case for merge failures with struct\<string\> [\#2644](https://github.com/delta-io/delta-rs/pull/2644) ([rtyler](https://github.com/rtyler))
+- chore: remove macos builders from pull request flow [\#2638](https://github.com/delta-io/delta-rs/pull/2638) ([rtyler](https://github.com/rtyler))
+- fix: enable parquet pushdown for DeltaScan via TableProvider impl for DeltaTable  \(rebase\) [\#2637](https://github.com/delta-io/delta-rs/pull/2637) ([rtyler](https://github.com/rtyler))
+- chore: fix documentation generation with a pin of griffe [\#2636](https://github.com/delta-io/delta-rs/pull/2636) ([rtyler](https://github.com/rtyler))
+- fix\(python\): fixed large\_dtype to schema convert [\#2635](https://github.com/delta-io/delta-rs/pull/2635) ([sherlockbeard](https://github.com/sherlockbeard))
+- fix\(rust, python\): fix writing empty structs when creating checkpoint [\#2627](https://github.com/delta-io/delta-rs/pull/2627) ([sherlockbeard](https://github.com/sherlockbeard))
+- fix\(rust, python\): fix merge schema with overwrite [\#2623](https://github.com/delta-io/delta-rs/pull/2623) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: bump python 0.18.2 [\#2621](https://github.com/delta-io/delta-rs/pull/2621) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: report DataFusion metrics for DeltaScan [\#2617](https://github.com/delta-io/delta-rs/pull/2617) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- feat\(rust,python\): cast each parquet file to delta schema [\#2615](https://github.com/delta-io/delta-rs/pull/2615) ([HawaiianSpork](https://github.com/HawaiianSpork))
+- fix\(rust\): inconsistent order of partitioning columns \(\#2494\) [\#2614](https://github.com/delta-io/delta-rs/pull/2614) ([aditanase](https://github.com/aditanase))
+- docs: add Daft writer [\#2594](https://github.com/delta-io/delta-rs/pull/2594) ([avriiil](https://github.com/avriiil))
+- feat\(python, rust\): `add column` operation [\#2562](https://github.com/delta-io/delta-rs/pull/2562) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: change arrow map root name to follow with parquet root name [\#2538](https://github.com/delta-io/delta-rs/pull/2538) ([sclmn](https://github.com/sclmn))
+- feat\(python\): handle PyCapsule interface objects in write\_deltalake [\#2534](https://github.com/delta-io/delta-rs/pull/2534) ([kylebarron](https://github.com/kylebarron))
+- feat: improve merge performance by using predicate non-partition columns min/max for prefiltering [\#2513](https://github.com/delta-io/delta-rs/pull/2513) ([JonasDev1](https://github.com/JonasDev1))
+- feat\(python, rust\): cleanup expired logs post-commit hook [\#2459](https://github.com/delta-io/delta-rs/pull/2459) ([ion-elgreco](https://github.com/ion-elgreco))
+
+## [rust-v0.18.0](https://github.com/delta-io/delta-rs/tree/rust-v0.18.0) (2024-06-12)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.3...rust-v0.18.0)
+
+**Implemented enhancements:**
+
+- documentation: concurrent writes for non-S3 backends [\#2556](https://github.com/delta-io/delta-rs/issues/2556)
+- pyarrow options for `write_delta` [\#2515](https://github.com/delta-io/delta-rs/issues/2515)
+- \[deltalake\_aws\] Allow configuring separate endpoints for S3 and DynamoDB clients. [\#2498](https://github.com/delta-io/delta-rs/issues/2498)
+- Include file stats when converting a parquet directory to a Delta table [\#2490](https://github.com/delta-io/delta-rs/issues/2490)
+- Adopt the delta kernel types [\#2489](https://github.com/delta-io/delta-rs/issues/2489)
+
+**Fixed bugs:**
+
+- `raise_if_not_exists` for properties not configurable on CreateBuilder [\#2564](https://github.com/delta-io/delta-rs/issues/2564)
+- write\_deltalake with rust engine fails when mode is append and overwrite schema is enabled [\#2553](https://github.com/delta-io/delta-rs/issues/2553)
+- Running the basic\_operations examples fails with `Error: Transaction { source: WriterFeaturesRequired(TimestampWithoutTimezone) `} [\#2552](https://github.com/delta-io/delta-rs/issues/2552)
+-  invalid peer certificate: BadSignature when connecting to s3 from  arm64/aarch64 [\#2551](https://github.com/delta-io/delta-rs/issues/2551)
+- load\_cdf\(\) issue : Generic S3 error: request or response body error: operation timed out [\#2549](https://github.com/delta-io/delta-rs/issues/2549)
+- write\_deltalake fails on Databricks volume [\#2540](https://github.com/delta-io/delta-rs/issues/2540)
+- Getting "Microsoft Azure Error: Operation timed out" when trying to retrieve big files [\#2537](https://github.com/delta-io/delta-rs/issues/2537)
+- Impossible to append to a DeltaTable with float data type on RHEL [\#2520](https://github.com/delta-io/delta-rs/issues/2520)
+- Creating DeltaTable object slow [\#2518](https://github.com/delta-io/delta-rs/issues/2518)
+- `write_deltalake` throws parser error when using `rust` engine and big decimals [\#2510](https://github.com/delta-io/delta-rs/issues/2510)
+- TypeError: Object of type int64 is not JSON serializable when writing using a Pandas dataframe [\#2501](https://github.com/delta-io/delta-rs/issues/2501)
+- unable to read delta table when table contains both null and non-null add stats [\#2477](https://github.com/delta-io/delta-rs/issues/2477)
+- Commits on WriteMode::MergeSchema cause table metadata corruption [\#2468](https://github.com/delta-io/delta-rs/issues/2468)
+- S3 object store always returns IMDS warnings [\#2460](https://github.com/delta-io/delta-rs/issues/2460)
+- File skipping according to documentation [\#2427](https://github.com/delta-io/delta-rs/issues/2427)
+- LockClientError [\#2379](https://github.com/delta-io/delta-rs/issues/2379)
+- get\_app\_transaction\_version\(\) returns wrong result [\#2340](https://github.com/delta-io/delta-rs/issues/2340)
+- Property setting in `create` is not handled correctly [\#2247](https://github.com/delta-io/delta-rs/issues/2247)
+- Handling of decimals in scientific notation  [\#2221](https://github.com/delta-io/delta-rs/issues/2221)
+- Unable to append to delta table without datafusion feature [\#2204](https://github.com/delta-io/delta-rs/issues/2204)
+- Decimal Column with Value 0 Causes Failure in Python Binding [\#2193](https://github.com/delta-io/delta-rs/issues/2193)
+
+**Merged pull requests:**
+
+- docs: improve S3 access docs [\#2589](https://github.com/delta-io/delta-rs/pull/2589) ([avriiil](https://github.com/avriiil))
+- chore: bump macOS runners, maybe resolve import error [\#2588](https://github.com/delta-io/delta-rs/pull/2588) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump to datafusion 39, arrow 52, pyo3 0.21 [\#2581](https://github.com/delta-io/delta-rs/pull/2581) ([abhiaagarwal](https://github.com/abhiaagarwal))
+- feat: add custom dynamodb endpoint configuration [\#2575](https://github.com/delta-io/delta-rs/pull/2575) ([hnaoto](https://github.com/hnaoto))
+- fix: consistently use raise\_if\_key\_not\_exists in CreateBuilder [\#2569](https://github.com/delta-io/delta-rs/pull/2569) ([vegarsti](https://github.com/vegarsti))
+- fix: add raise\_if\_key\_not\_exists to CreateBuilder [\#2565](https://github.com/delta-io/delta-rs/pull/2565) ([vegarsti](https://github.com/vegarsti))
+- docs: dt.delete add context + api docs link [\#2560](https://github.com/delta-io/delta-rs/pull/2560) ([avriiil](https://github.com/avriiil))
+- fix: update deltalake crate examples for crate layout and TimestampNtz [\#2559](https://github.com/delta-io/delta-rs/pull/2559) ([jhoekx](https://github.com/jhoekx))
+- docs: clarify locking mechanism requirement for S3 [\#2558](https://github.com/delta-io/delta-rs/pull/2558) ([inigohidalgo](https://github.com/inigohidalgo))
+- fix: remove deprecated overwrite\_schema configuration which has incorrect behavior [\#2554](https://github.com/delta-io/delta-rs/pull/2554) ([rtyler](https://github.com/rtyler))
+- fix: clippy warnings [\#2548](https://github.com/delta-io/delta-rs/pull/2548) ([imor](https://github.com/imor))
+- docs: dask write syntax fix [\#2543](https://github.com/delta-io/delta-rs/pull/2543) ([avriiil](https://github.com/avriiil))
+- fix: cast support fields nested in lists and maps [\#2541](https://github.com/delta-io/delta-rs/pull/2541) ([HawaiianSpork](https://github.com/HawaiianSpork))
+- feat: implement transaction identifiers - continued [\#2539](https://github.com/delta-io/delta-rs/pull/2539) ([roeap](https://github.com/roeap))
+- docs: pull delta from conda not pip [\#2535](https://github.com/delta-io/delta-rs/pull/2535) ([avriiil](https://github.com/avriiil))
+- chore: expose `files_by_partition` to public api [\#2533](https://github.com/delta-io/delta-rs/pull/2533) ([edmondop](https://github.com/edmondop))
+- chore: bump python 0.17.5 [\#2531](https://github.com/delta-io/delta-rs/pull/2531) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(rust\): make PartitionWriter public [\#2525](https://github.com/delta-io/delta-rs/pull/2525) ([adriangb](https://github.com/adriangb))
+- fix: msrv in workspace [\#2524](https://github.com/delta-io/delta-rs/pull/2524) ([roeap](https://github.com/roeap))
+- chore: fixing some clips [\#2521](https://github.com/delta-io/delta-rs/pull/2521) ([rtyler](https://github.com/rtyler))
+- fix: enable field\_with\_name to support nested fields with '.' delimiter [\#2519](https://github.com/delta-io/delta-rs/pull/2519) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: tidying up builds without datafusion feature and clippy [\#2516](https://github.com/delta-io/delta-rs/pull/2516) ([rtyler](https://github.com/rtyler))
+- fix\(python\): release GIL on most operations [\#2512](https://github.com/delta-io/delta-rs/pull/2512) ([adriangb](https://github.com/adriangb))
+- docs: fix typo [\#2508](https://github.com/delta-io/delta-rs/pull/2508) ([avriiil](https://github.com/avriiil))
+- fix\(rust, python\): fixed differences in storage options between log and object stores [\#2500](https://github.com/delta-io/delta-rs/pull/2500) ([mightyshazam](https://github.com/mightyshazam))
+- docs: improve daft integration docs [\#2496](https://github.com/delta-io/delta-rs/pull/2496) ([avriiil](https://github.com/avriiil))
+- feat: adopt kernel schema types [\#2495](https://github.com/delta-io/delta-rs/pull/2495) ([roeap](https://github.com/roeap))
+- feat: add stats to convert-to-delta operation [\#2491](https://github.com/delta-io/delta-rs/pull/2491) ([gruuya](https://github.com/gruuya))
+- fix\(python, rust\): region lookup wasn't working correctly for dynamo [\#2488](https://github.com/delta-io/delta-rs/pull/2488) ([mightyshazam](https://github.com/mightyshazam))
+- feat: introduce CDC write-side support for the Update operations [\#2486](https://github.com/delta-io/delta-rs/pull/2486) ([rtyler](https://github.com/rtyler))
+- fix\(python\): reuse state in `to_pyarrow_dataset` [\#2485](https://github.com/delta-io/delta-rs/pull/2485) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: check to see if the file exists before attempting to rename [\#2482](https://github.com/delta-io/delta-rs/pull/2482) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): use new schema for stats parsing instead of old [\#2480](https://github.com/delta-io/delta-rs/pull/2480) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): unable to read delta table when table contains both null and non-null add stats [\#2476](https://github.com/delta-io/delta-rs/pull/2476) ([yjshen](https://github.com/yjshen))
+- chore: update the changelog to include rust-v0.17.3 [\#2473](https://github.com/delta-io/delta-rs/pull/2473) ([rtyler](https://github.com/rtyler))
+- chore: a bunch of tweaks to get releases out the door [\#2472](https://github.com/delta-io/delta-rs/pull/2472) ([rtyler](https://github.com/rtyler))
+- chore: bump the core crate for its next release [\#2470](https://github.com/delta-io/delta-rs/pull/2470) ([rtyler](https://github.com/rtyler))
+- fix: return unsupported error for merging schemas in the presence of partition columns [\#2469](https://github.com/delta-io/delta-rs/pull/2469) ([emcake](https://github.com/emcake))
+- feat\(python\): add  parameter to DeltaTable.to\_pyarrow\_dataset\(\) [\#2465](https://github.com/delta-io/delta-rs/pull/2465) ([adriangb](https://github.com/adriangb))
+- feat\(python, rust\): add OBJECT\_STORE\_CONCURRENCY\_LIMIT setting for ObjectStoreFactory [\#2458](https://github.com/delta-io/delta-rs/pull/2458) ([vigimite](https://github.com/vigimite))
+- fix\(rust\): handle 429 from GCS [\#2454](https://github.com/delta-io/delta-rs/pull/2454) ([adriangb](https://github.com/adriangb))
+- fix\(python\): reuse table state in write engine [\#2453](https://github.com/delta-io/delta-rs/pull/2453) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): implement abort commit for S3DynamoDBLogStore [\#2452](https://github.com/delta-io/delta-rs/pull/2452) ([PeterKeDer](https://github.com/PeterKeDer))
+- fix\(python, rust\): check timestamp\_ntz in nested fields, add check\_can\_write in pyarrow writer [\#2443](https://github.com/delta-io/delta-rs/pull/2443) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): remove imds calls from profile auth and region [\#2442](https://github.com/delta-io/delta-rs/pull/2442) ([mightyshazam](https://github.com/mightyshazam))
+- fix\(python, rust\): use from\_name during column projection creation [\#2441](https://github.com/delta-io/delta-rs/pull/2441) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump python for 0.17 release [\#2439](https://github.com/delta-io/delta-rs/pull/2439) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python,rust\): missing remove actions during `create_or_replace` [\#2437](https://github.com/delta-io/delta-rs/pull/2437) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: introduce the Operation trait to enforce consistency between operations [\#2435](https://github.com/delta-io/delta-rs/pull/2435) ([rtyler](https://github.com/rtyler))
+- fix\(python\): load\_as\_version with datetime object with no timezone specified [\#2429](https://github.com/delta-io/delta-rs/pull/2429) ([t1g0rz](https://github.com/t1g0rz))
+- feat\(python, rust\): respect column stats collection configurations [\#2428](https://github.com/delta-io/delta-rs/pull/2428) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: lazy static runtime in python [\#2424](https://github.com/delta-io/delta-rs/pull/2424) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: implement repartitioned for DeltaScan [\#2421](https://github.com/delta-io/delta-rs/pull/2421) ([jkylling](https://github.com/jkylling))
+- fix: return error when checkpoints and metadata get out of sync [\#2406](https://github.com/delta-io/delta-rs/pull/2406) ([esarili](https://github.com/esarili))
+- fix\(rust\): stats\_parsed has different number of records with stats [\#2405](https://github.com/delta-io/delta-rs/pull/2405) ([yjshen](https://github.com/yjshen))
+- docs: add Daft integration [\#2402](https://github.com/delta-io/delta-rs/pull/2402) ([avriiil](https://github.com/avriiil))
+- feat\(rust\): advance state in post commit [\#2396](https://github.com/delta-io/delta-rs/pull/2396) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore\(rust\): bump arrow v51 and datafusion v37.1 [\#2395](https://github.com/delta-io/delta-rs/pull/2395) ([lasantosr](https://github.com/lasantosr))
+- docs: document required aws permissions [\#2393](https://github.com/delta-io/delta-rs/pull/2393) ([ale-rinaldi](https://github.com/ale-rinaldi))
+- feat\(rust\): post commit hook \(v2\), create checkpoint hook [\#2391](https://github.com/delta-io/delta-rs/pull/2391) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: time travel when checkpointed and logs removed [\#2389](https://github.com/delta-io/delta-rs/pull/2389) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): remove flush after writing every batch [\#2387](https://github.com/delta-io/delta-rs/pull/2387) ([PeterKeDer](https://github.com/PeterKeDer))
+- feat: added configuration variables to handle EC2 metadata service [\#2385](https://github.com/delta-io/delta-rs/pull/2385) ([mightyshazam](https://github.com/mightyshazam))
+- fix\(rust\): timestamp deserialization format, missing type [\#2383](https://github.com/delta-io/delta-rs/pull/2383) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump chrono [\#2372](https://github.com/delta-io/delta-rs/pull/2372) ([universalmind303](https://github.com/universalmind303))
+- chore: bump python 0.16.4 [\#2371](https://github.com/delta-io/delta-rs/pull/2371) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add snappy compression on checkpoint files [\#2365](https://github.com/delta-io/delta-rs/pull/2365) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add config for parquet pushdown on delta scan [\#2364](https://github.com/delta-io/delta-rs/pull/2364) ([Blajda](https://github.com/Blajda))
+- fix\(python,rust\): optimize compact on schema evolved table [\#2358](https://github.com/delta-io/delta-rs/pull/2358) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): expr parsing date/timestamp [\#2357](https://github.com/delta-io/delta-rs/pull/2357) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: remove tmp files in cleanup\_metadata [\#2356](https://github.com/delta-io/delta-rs/pull/2356) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: make struct fields nullable in stats schema [\#2346](https://github.com/delta-io/delta-rs/pull/2346) ([qinix](https://github.com/qinix))
+- fix\(rust\): adhere to protocol for Decimal [\#2332](https://github.com/delta-io/delta-rs/pull/2332) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): raise schema mismatch when decimal is not subset [\#2330](https://github.com/delta-io/delta-rs/pull/2330) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(rust\): derive Copy on some public enums [\#2329](https://github.com/delta-io/delta-rs/pull/2329) ([lasantosr](https://github.com/lasantosr))
+- fix: merge pushdown handling [\#2326](https://github.com/delta-io/delta-rs/pull/2326) ([Blajda](https://github.com/Blajda))
+- fix: merge concurrency control [\#2324](https://github.com/delta-io/delta-rs/pull/2324) ([ion-elgreco](https://github.com/ion-elgreco))
+- Revert 2291 merge predicate fix [\#2323](https://github.com/delta-io/delta-rs/pull/2323) ([Blajda](https://github.com/Blajda))
+- fix: try to fix timeouts [\#2318](https://github.com/delta-io/delta-rs/pull/2318) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): serialize MetricDetails from compaction runs to a string [\#2317](https://github.com/delta-io/delta-rs/pull/2317) ([liamphmurphy](https://github.com/liamphmurphy))
+- docs: add example in to\_pyarrow\_dataset [\#2315](https://github.com/delta-io/delta-rs/pull/2315) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python\): wrong batch size [\#2314](https://github.com/delta-io/delta-rs/pull/2314) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: object store 0.9.1 [\#2311](https://github.com/delta-io/delta-rs/pull/2311) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: checkpoint features format below v3,7 [\#2307](https://github.com/delta-io/delta-rs/pull/2307) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: schema evolution not coercing with large arrow types [\#2305](https://github.com/delta-io/delta-rs/pull/2305) ([aersam](https://github.com/aersam))
+- fix: clean up some non-datafusion builds [\#2303](https://github.com/delta-io/delta-rs/pull/2303) ([rtyler](https://github.com/rtyler))
+- docs: fix typo [\#2300](https://github.com/delta-io/delta-rs/pull/2300) ([LauH1987](https://github.com/LauH1987))
+- docs: make replaceWhere example compile [\#2299](https://github.com/delta-io/delta-rs/pull/2299) ([LauH1987](https://github.com/LauH1987))
+- fix\(rust\): add missing chrono-tz feature [\#2295](https://github.com/delta-io/delta-rs/pull/2295) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore\(python\): bump to v0.16.1 [\#2294](https://github.com/delta-io/delta-rs/pull/2294) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): features not maintained in protocol after checkpoint [\#2293](https://github.com/delta-io/delta-rs/pull/2293) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: merge predicate for concurrent writes [\#2291](https://github.com/delta-io/delta-rs/pull/2291) ([JonasDev1](https://github.com/JonasDev1))
+- fix: replace assert and AssertionError with appropriate exceptions [\#2286](https://github.com/delta-io/delta-rs/pull/2286) ([joe-sharman](https://github.com/joe-sharman))
+- docs: fix typo in delta-lake-polars.md [\#2285](https://github.com/delta-io/delta-rs/pull/2285) ([vladdoster](https://github.com/vladdoster))
+- fix\(python, rust\): prevent table scan returning large arrow dtypes [\#2274](https://github.com/delta-io/delta-rs/pull/2274) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python\): always encapsulate column names in backticks in \_all functions [\#2271](https://github.com/delta-io/delta-rs/pull/2271) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): read only checkpoints that match \_last\_checkpoint version [\#2270](https://github.com/delta-io/delta-rs/pull/2270) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add .venv to .gitignore [\#2268](https://github.com/delta-io/delta-rs/pull/2268) ([gacharya](https://github.com/gacharya))
+- feat\(python, rust\): add `set table properties` operation [\#2264](https://github.com/delta-io/delta-rs/pull/2264) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: use dagster deltalake polars library [\#2263](https://github.com/delta-io/delta-rs/pull/2263) ([avriiil](https://github.com/avriiil))
+- docs: update comment about r2 requiring locks [\#2261](https://github.com/delta-io/delta-rs/pull/2261) ([cmackenzie1](https://github.com/cmackenzie1))
+- fix\(\#2256\): use consistent units of time [\#2260](https://github.com/delta-io/delta-rs/pull/2260) ([cmackenzie1](https://github.com/cmackenzie1))
+- chore: update the changelog for rust-v0.17.1 [\#2259](https://github.com/delta-io/delta-rs/pull/2259) ([rtyler](https://github.com/rtyler))
+- feat\(python\): release GIL in the write\_deltalake function [\#2257](https://github.com/delta-io/delta-rs/pull/2257) ([franz101](https://github.com/franz101))
+- chore\(rust\): bump datafusion to 36 [\#2249](https://github.com/delta-io/delta-rs/pull/2249) ([universalmind303](https://github.com/universalmind303))
+- chore!: replace rusoto with AWS SDK [\#2243](https://github.com/delta-io/delta-rs/pull/2243) ([mightyshazam](https://github.com/mightyshazam))
+- fix: handle conflict checking in optimize correctly [\#2208](https://github.com/delta-io/delta-rs/pull/2208) ([emcake](https://github.com/emcake))
+- feat: logical Node for find files [\#2194](https://github.com/delta-io/delta-rs/pull/2194) ([hntd187](https://github.com/hntd187))
+
+## [rust-v0.17.3](https://github.com/delta-io/delta-rs/tree/rust-v0.17.3) (2024-05-01)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.1...rust-v0.17.3)
+
+**Implemented enhancements:**
+
+- Limit concurrent ObjectStore access to avoid resource limitations in constrained environments [\#2457](https://github.com/delta-io/delta-rs/issues/2457)
+- How to get a DataFrame in Rust? [\#2404](https://github.com/delta-io/delta-rs/issues/2404)
+- Allow checkpoint creation when partion column is "timestampNtz " [\#2381](https://github.com/delta-io/delta-rs/issues/2381)
+- is there a way to make writing timestamp\_ntz optional [\#2339](https://github.com/delta-io/delta-rs/issues/2339)
+- Update arrow dependency [\#2328](https://github.com/delta-io/delta-rs/issues/2328)
+- Release GIL in deltalake.write\_deltalake [\#2234](https://github.com/delta-io/delta-rs/issues/2234)
+- Unable to retrieve custom metadata from tables in rust [\#2153](https://github.com/delta-io/delta-rs/issues/2153)
+- Refactor commit interface to be a Builder [\#2131](https://github.com/delta-io/delta-rs/issues/2131)
+
+**Fixed bugs:**
+
+- Handle rate limiting during write contention [\#2451](https://github.com/delta-io/delta-rs/issues/2451)
+- regression : delta.logRetentionDuration don't seems to be respected  [\#2447](https://github.com/delta-io/delta-rs/issues/2447)
+- Issue writing to mounted storage in AKS using delta-rs library [\#2445](https://github.com/delta-io/delta-rs/issues/2445)
+- TableMerger - when\_matched\_delete\(\) fails when Column names contain special characters [\#2438](https://github.com/delta-io/delta-rs/issues/2438)
+-  Generic DeltaTable error: External error: Arrow error: Invalid argument error: arguments need to have the same data type - while merge data in to delta table [\#2423](https://github.com/delta-io/delta-rs/issues/2423)
+- Merge on predicate throw error on date colum: Unable to convert expression to string [\#2420](https://github.com/delta-io/delta-rs/issues/2420)
+- Writing Tables with Append mode errors if the schema metadata is different [\#2419](https://github.com/delta-io/delta-rs/issues/2419)
+- Logstore issues on AWS Lambda [\#2410](https://github.com/delta-io/delta-rs/issues/2410)
+- Datafusion timestamp type doesn't respect delta lake schema [\#2408](https://github.com/delta-io/delta-rs/issues/2408)
+- Compacting produces smaller row groups than expected [\#2386](https://github.com/delta-io/delta-rs/issues/2386)
+- ValueError: Partition value cannot be parsed from string. [\#2380](https://github.com/delta-io/delta-rs/issues/2380)
+- Very slow s3 connection after 0.16.1 [\#2377](https://github.com/delta-io/delta-rs/issues/2377)
+- Merge update+insert truncates a delta table if the table is big enough [\#2362](https://github.com/delta-io/delta-rs/issues/2362)
+- Do not add readerFeatures or writerFeatures keys under checkpoint files if minReaderVersion or minWriterVersion do not satisfy the requirements [\#2360](https://github.com/delta-io/delta-rs/issues/2360)
+- Create empty table failed on rust engine [\#2354](https://github.com/delta-io/delta-rs/issues/2354)
+- Getting error message when running in lambda: message: "Too many open files" [\#2353](https://github.com/delta-io/delta-rs/issues/2353)
+- Temporary files filling up \_delta\_log folder - increasing table load time [\#2351](https://github.com/delta-io/delta-rs/issues/2351)
+- compact fails with merged schemas [\#2347](https://github.com/delta-io/delta-rs/issues/2347)
+- Cannot merge into table partitioned by date type column on 0.16.3 [\#2344](https://github.com/delta-io/delta-rs/issues/2344)
+- Merge breaks using logical datatype decimal128 [\#2343](https://github.com/delta-io/delta-rs/issues/2343)
+- Decimal types are not checked against max precision/scale at table creation [\#2331](https://github.com/delta-io/delta-rs/issues/2331)
+- Merge update+insert truncates a delta table [\#2320](https://github.com/delta-io/delta-rs/issues/2320)
+- Extract `add.stats_parsed` with wrong type [\#2312](https://github.com/delta-io/delta-rs/issues/2312)
+- Process fails without error message when executing merge [\#2310](https://github.com/delta-io/delta-rs/issues/2310)
+- delta\_rs don't seems to respect the row group size [\#2309](https://github.com/delta-io/delta-rs/issues/2309)
+- Auth error when running inside VS Code  [\#2306](https://github.com/delta-io/delta-rs/issues/2306)
+- Unable to read deltatables with binary columns: Binary is not supported by JSON [\#2302](https://github.com/delta-io/delta-rs/issues/2302)
+- Schema evolution not coercing with Large arrow types [\#2298](https://github.com/delta-io/delta-rs/issues/2298)
+- Panic in `deltalake_core::kernel::snapshot::log_segment::list_log_files_with_checkpoint::{{closure}}` [\#2290](https://github.com/delta-io/delta-rs/issues/2290)
+- Checkpoint does not preserve reader and writer features for the table protocol. [\#2288](https://github.com/delta-io/delta-rs/issues/2288)
+- Z-Order with larger dataset resulting in memory error [\#2284](https://github.com/delta-io/delta-rs/issues/2284)
+- Successful writes return error when using concurrent writers [\#2279](https://github.com/delta-io/delta-rs/issues/2279)
+- Rust writer should raise when decimal types are incompatible \(currently writers and puts table in invalid state\) [\#2275](https://github.com/delta-io/delta-rs/issues/2275)
+- Generic DeltaTable error: Version mismatch with new schema merge functionality in AWS S3 [\#2262](https://github.com/delta-io/delta-rs/issues/2262)
+- DeltaTable is not resilient to corrupted checkpoint state [\#2258](https://github.com/delta-io/delta-rs/issues/2258)
+- Inconsistent units of time [\#2256](https://github.com/delta-io/delta-rs/issues/2256)
+- Partition column comparison is an assertion rather than if block with raise exception [\#2242](https://github.com/delta-io/delta-rs/issues/2242)
+- Unable to merge column names starting from numbers [\#2230](https://github.com/delta-io/delta-rs/issues/2230)
+- Merging to a table with multiple distinct partitions in parallel fails [\#2227](https://github.com/delta-io/delta-rs/issues/2227)
+- cleanup\_metadata not respecting custom `logRetentionDuration` [\#2180](https://github.com/delta-io/delta-rs/issues/2180)
+- Merge predicate fails with a field with a space [\#2167](https://github.com/delta-io/delta-rs/issues/2167)
+- When\_matched\_update causes records to be lost with  explicit predicate [\#2158](https://github.com/delta-io/delta-rs/issues/2158)
+- Merge execution time grows exponetially with the number of column [\#2107](https://github.com/delta-io/delta-rs/issues/2107)
+- \_internal.DeltaError when merging [\#2084](https://github.com/delta-io/delta-rs/issues/2084)
+
+## [rust-v0.17.1](https://github.com/delta-io/delta-rs/tree/rust-v0.17.1) (2024-03-06)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.0...rust-v0.17.1)
+
+**Implemented enhancements:**
+
+- Get statistics metadata [\#2233](https://github.com/delta-io/delta-rs/issues/2233)
+- add option to append only a subsets of columns [\#2212](https://github.com/delta-io/delta-rs/issues/2212)
+- add documentation how to configure delta.logRetentionDuration [\#2072](https://github.com/delta-io/delta-rs/issues/2072)
+- Add `drop constraint` [\#2070](https://github.com/delta-io/delta-rs/issues/2070)
+- Add 0.16 deprecation warnings for DynamoDB lock [\#2049](https://github.com/delta-io/delta-rs/issues/2049)
+
+**Fixed bugs:**
+
+- cleanup\_metadata not respecting custom `logRetentionDuration` [\#2180](https://github.com/delta-io/delta-rs/issues/2180)
+- Rust writer panics on empty record batches [\#2253](https://github.com/delta-io/delta-rs/issues/2253)
+- DeltaLake executed Rust: write method not found in `DeltaOps`  [\#2244](https://github.com/delta-io/delta-rs/issues/2244)
+- DELTA\_FILE\_PATTERN regex is incorrectly matching tmp commit files [\#2201](https://github.com/delta-io/delta-rs/issues/2201)
+- Failed to create checkpoint with "Parquet does not support writing empty structs" [\#2189](https://github.com/delta-io/delta-rs/issues/2189)
+- Error when parsing delete expressions [\#2187](https://github.com/delta-io/delta-rs/issues/2187)
+- terminate called without an active exception [\#2184](https://github.com/delta-io/delta-rs/issues/2184)
+- Now conda-installable on M1 [\#2178](https://github.com/delta-io/delta-rs/issues/2178)
+- Add error message for parition\_by check [\#2177](https://github.com/delta-io/delta-rs/issues/2177)
+- deltalake 0.15.2 prints partitions\_values and paths which is not desired [\#2176](https://github.com/delta-io/delta-rs/issues/2176)
+- cleanup\_metadata can potentially delete most recent checkpoint, corrupting table [\#2174](https://github.com/delta-io/delta-rs/issues/2174)
+- Broken filter for newly created delta table [\#2169](https://github.com/delta-io/delta-rs/issues/2169)
+- Hash for StructField should consider more than the name [\#2045](https://github.com/delta-io/delta-rs/issues/2045)
+- Schema comparaison in writer [\#1853](https://github.com/delta-io/delta-rs/issues/1853)
+- fix\(python\): sort before schema comparison [\#2209](https://github.com/delta-io/delta-rs/pull/2209) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: prevent writing checkpoints with a version that does not exist in table state [\#1863](https://github.com/delta-io/delta-rs/pull/1863) ([rtyler](https://github.com/rtyler))
+
+**Closed issues:**
+
+- Bug/Question:  arrow's`FixedSizeList` is not roundtrippable [\#2162](https://github.com/delta-io/delta-rs/issues/2162)
+
+**Merged pull requests:**
+
+- fix: fixes panic on empty write [\#2254](https://github.com/delta-io/delta-rs/pull/2254) ([aersam](https://github.com/aersam))
+- fix\(rust\): typo deletionvectors [\#2251](https://github.com/delta-io/delta-rs/pull/2251) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): make interval parsing compatible with plural form [\#2250](https://github.com/delta-io/delta-rs/pull/2250) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump to 0.16 [\#2248](https://github.com/delta-io/delta-rs/pull/2248) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: merge schema support for the write operation and Python [\#2246](https://github.com/delta-io/delta-rs/pull/2246) ([rtyler](https://github.com/rtyler))
+- fix: object\_store 0.9.0 since 0.9.1 causes CI failure [\#2245](https://github.com/delta-io/delta-rs/pull/2245) ([aersam](https://github.com/aersam))
+- chore\(python\): bump version [\#2241](https://github.com/delta-io/delta-rs/pull/2241) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: fix ruff and mypy version and do formatting [\#2240](https://github.com/delta-io/delta-rs/pull/2240) ([aersam](https://github.com/aersam))
+- feat\(python, rust\): timestampNtz support [\#2236](https://github.com/delta-io/delta-rs/pull/2236) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: clean up some compilation failures and un-ignore some tests [\#2231](https://github.com/delta-io/delta-rs/pull/2231) ([rtyler](https://github.com/rtyler))
+- docs: fixing example in CONTRIBUTING.md [\#2224](https://github.com/delta-io/delta-rs/pull/2224) ([gacharya](https://github.com/gacharya))
+- perf: directly create projection instead of using DataFrame::with\_column [\#2222](https://github.com/delta-io/delta-rs/pull/2222) ([emcake](https://github.com/emcake))
+- chore: remove caches from github actions [\#2215](https://github.com/delta-io/delta-rs/pull/2215) ([rtyler](https://github.com/rtyler))
+- fix: `is_commit_file` should only catch commit jsons [\#2213](https://github.com/delta-io/delta-rs/pull/2213) ([emcake](https://github.com/emcake))
+- chore: fix the Cargo.tomls to publish information properly on docs.rs [\#2211](https://github.com/delta-io/delta-rs/pull/2211) ([rtyler](https://github.com/rtyler))
+- fix\(writer\): retry storage.put on temporary network errors [\#2207](https://github.com/delta-io/delta-rs/pull/2207) ([qinix](https://github.com/qinix))
+- fix: canonicalize config keys [\#2206](https://github.com/delta-io/delta-rs/pull/2206) ([emcake](https://github.com/emcake))
+- docs: update README code samples for newer versions [\#2202](https://github.com/delta-io/delta-rs/pull/2202) ([jhoekx](https://github.com/jhoekx))
+- docs: dask integration fix formatting typo [\#2196](https://github.com/delta-io/delta-rs/pull/2196) ([avriiil](https://github.com/avriiil))
+- fix: add data\_type and nullable to StructField hash \(\#2045\) [\#2190](https://github.com/delta-io/delta-rs/pull/2190) ([sonhmai](https://github.com/sonhmai))
+- fix: removed panic in  method [\#2185](https://github.com/delta-io/delta-rs/pull/2185) ([mightyshazam](https://github.com/mightyshazam))
+- feat: implement string representation for PartitionFilter [\#2183](https://github.com/delta-io/delta-rs/pull/2183) ([sonhmai](https://github.com/sonhmai))
+- fix: correct map field names [\#2182](https://github.com/delta-io/delta-rs/pull/2182) ([emcake](https://github.com/emcake))
+- feat: add comment to explain why assert has failed and show state [\#2179](https://github.com/delta-io/delta-rs/pull/2179) ([braaannigan](https://github.com/braaannigan))
+- docs: include the 0.17.0 changelog [\#2173](https://github.com/delta-io/delta-rs/pull/2173) ([rtyler](https://github.com/rtyler))
+- fix\(python\): skip empty row groups during stats gathering [\#2172](https://github.com/delta-io/delta-rs/pull/2172) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: 0.17.0 publish changes [\#2171](https://github.com/delta-io/delta-rs/pull/2171) ([rtyler](https://github.com/rtyler))
+- chore\(python\): bump version [\#2170](https://github.com/delta-io/delta-rs/pull/2170) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: update all the package metadata for publication to crates.io [\#2168](https://github.com/delta-io/delta-rs/pull/2168) ([rtyler](https://github.com/rtyler))
+- fix: rm println in python lib [\#2166](https://github.com/delta-io/delta-rs/pull/2166) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: cleanup minor clippies and other warns [\#2161](https://github.com/delta-io/delta-rs/pull/2161) ([rtyler](https://github.com/rtyler))
+- feat: implement clone for DeltaTable struct [\#2160](https://github.com/delta-io/delta-rs/pull/2160) ([mightyshazam](https://github.com/mightyshazam))
+- fix: allow loading of tables with identity columns [\#2155](https://github.com/delta-io/delta-rs/pull/2155) ([rtyler](https://github.com/rtyler))
+- fix: replace BTreeMap with IndexMap to preserve insertion order [\#2150](https://github.com/delta-io/delta-rs/pull/2150) ([roeap](https://github.com/roeap))
+- fix: made generalize\_filter less permissive, also added more cases [\#2149](https://github.com/delta-io/delta-rs/pull/2149) ([emcake](https://github.com/emcake))
+- docs: add delta lake best practices [\#2147](https://github.com/delta-io/delta-rs/pull/2147) ([MrPowers](https://github.com/MrPowers))
+- chore: shorten up the crate folder names in the tree [\#2145](https://github.com/delta-io/delta-rs/pull/2145) ([rtyler](https://github.com/rtyler))
+- fix\(\#2143\): keep specific error type when writing fails [\#2144](https://github.com/delta-io/delta-rs/pull/2144) ([abaerptc](https://github.com/abaerptc))
+- refactor\(python\): drop custom filesystem in write\_deltalake [\#2137](https://github.com/delta-io/delta-rs/pull/2137) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: use transparent logo in README [\#2132](https://github.com/delta-io/delta-rs/pull/2132) ([roeap](https://github.com/roeap))
+- fix: order logical schema to match physical schema [\#2129](https://github.com/delta-io/delta-rs/pull/2129) ([Blajda](https://github.com/Blajda))
+- feat: expose stats schema on Snapshot [\#2128](https://github.com/delta-io/delta-rs/pull/2128) ([roeap](https://github.com/roeap))
+- feat: update table config to contain new config keys [\#2127](https://github.com/delta-io/delta-rs/pull/2127) ([roeap](https://github.com/roeap))
+- fix: clean-up paths created during tests [\#2126](https://github.com/delta-io/delta-rs/pull/2126) ([roeap](https://github.com/roeap))
+- fix: prevent empty stats struct during parquet write [\#2125](https://github.com/delta-io/delta-rs/pull/2125) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- fix: temporarily skip s3 roundtrip test [\#2124](https://github.com/delta-io/delta-rs/pull/2124) ([roeap](https://github.com/roeap))
+- fix: do not write empty parquet file/add on writer close; accurately … [\#2123](https://github.com/delta-io/delta-rs/pull/2123) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- docs: add dask page to integration docs [\#2122](https://github.com/delta-io/delta-rs/pull/2122) ([avriiil](https://github.com/avriiil))
+- chore: upgrade to DataFusion 35.0 [\#2121](https://github.com/delta-io/delta-rs/pull/2121) ([philippemnoel](https://github.com/philippemnoel))
+- fix\(s3\): restore working test for DynamoDb log store repair log on read [\#2120](https://github.com/delta-io/delta-rs/pull/2120) ([dispanser](https://github.com/dispanser))
+- fix: set partition values for added files when building compaction plan [\#2119](https://github.com/delta-io/delta-rs/pull/2119) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- fix: add missing pandas import [\#2116](https://github.com/delta-io/delta-rs/pull/2116) ([Tim-Haarman](https://github.com/Tim-Haarman))
+- chore: temporarily ignore the repair on update test [\#2114](https://github.com/delta-io/delta-rs/pull/2114) ([rtyler](https://github.com/rtyler))
+- docs: delta lake is great for small data [\#2113](https://github.com/delta-io/delta-rs/pull/2113) ([MrPowers](https://github.com/MrPowers))
+- chore: removed unnecessary print statement from update method [\#2111](https://github.com/delta-io/delta-rs/pull/2111) ([LilMonk](https://github.com/LilMonk))
+- fix: schema issue within writebuilder [\#2106](https://github.com/delta-io/delta-rs/pull/2106) ([universalmind303](https://github.com/universalmind303))
+- docs: fix arg indent [\#2103](https://github.com/delta-io/delta-rs/pull/2103) ([wchatx](https://github.com/wchatx))
+- docs: delta lake file skipping [\#2096](https://github.com/delta-io/delta-rs/pull/2096) ([MrPowers](https://github.com/MrPowers))
+- docs: move dynamo docs into new docs page [\#2093](https://github.com/delta-io/delta-rs/pull/2093) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump python [\#2092](https://github.com/delta-io/delta-rs/pull/2092) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: allow merge\_execute to release the GIL [\#2091](https://github.com/delta-io/delta-rs/pull/2091) ([emcake](https://github.com/emcake))
+- docs: how delta lake transactions work [\#2089](https://github.com/delta-io/delta-rs/pull/2089) ([MrPowers](https://github.com/MrPowers))
+- fix: reinstate copy-if-not-exists passthrough [\#2083](https://github.com/delta-io/delta-rs/pull/2083) ([emcake](https://github.com/emcake))
+- docs: make an overview tab visible in docs [\#2080](https://github.com/delta-io/delta-rs/pull/2080) ([r3stl355](https://github.com/r3stl355))
+- docs: add usage guide for check constraints [\#2079](https://github.com/delta-io/delta-rs/pull/2079) ([hntd187](https://github.com/hntd187))
+- docs: update docs for rust print statement [\#2077](https://github.com/delta-io/delta-rs/pull/2077) ([skariyania](https://github.com/skariyania))
+- docs: add page on why to use delta lake [\#2076](https://github.com/delta-io/delta-rs/pull/2076) ([MrPowers](https://github.com/MrPowers))
+- feat\(rust, python\): add `drop constraint` operation [\#2071](https://github.com/delta-io/delta-rs/pull/2071) ([ion-elgreco](https://github.com/ion-elgreco))
+- refactor: add deltalake-gcp crate [\#2061](https://github.com/delta-io/delta-rs/pull/2061) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: allow checkpoints to contain metadata actions without a createdTime value [\#2059](https://github.com/delta-io/delta-rs/pull/2059) ([rtyler](https://github.com/rtyler))
+- chore: bump version python [\#2047](https://github.com/delta-io/delta-rs/pull/2047) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: ensure metadata cleanup do not corrupt tables without checkpoints [\#2044](https://github.com/delta-io/delta-rs/pull/2044) ([Blajda](https://github.com/Blajda))
+- docs: update docs for merge [\#2042](https://github.com/delta-io/delta-rs/pull/2042) ([Blajda](https://github.com/Blajda))
+- chore: update documentation for S3 / DynamoDb log store configuration [\#2041](https://github.com/delta-io/delta-rs/pull/2041) ([dispanser](https://github.com/dispanser))
+- feat: arrow backed log replay and table state [\#2037](https://github.com/delta-io/delta-rs/pull/2037) ([roeap](https://github.com/roeap))
+- fix: properly deserialize percent-encoded file paths of Remove actions, to make sure tombstone and file paths match [\#2035](https://github.com/delta-io/delta-rs/pull/2035) ([sigorbor](https://github.com/sigorbor))
+- fix: remove casts of structs to record batch [\#2033](https://github.com/delta-io/delta-rs/pull/2033) ([Blajda](https://github.com/Blajda))
+- feat\(python, rust\): expose custom\_metadata for all operations [\#2032](https://github.com/delta-io/delta-rs/pull/2032) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: refactor WriterProperties class [\#2030](https://github.com/delta-io/delta-rs/pull/2030) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: update datafusion [\#2029](https://github.com/delta-io/delta-rs/pull/2029) ([roeap](https://github.com/roeap))
+- refactor: increase metadata action usage [\#2027](https://github.com/delta-io/delta-rs/pull/2027) ([roeap](https://github.com/roeap))
+- fix: github actions for releasing docs [\#2026](https://github.com/delta-io/delta-rs/pull/2026) ([r3stl355](https://github.com/r3stl355))
+- feat: introduce schema evolution on RecordBatchWriter [\#2024](https://github.com/delta-io/delta-rs/pull/2024) ([rtyler](https://github.com/rtyler))
+- refactor: move azure integration to dedicated crate [\#2023](https://github.com/delta-io/delta-rs/pull/2023) ([roeap](https://github.com/roeap))
+- fix: use temporary table names during the constraint checks [\#2017](https://github.com/delta-io/delta-rs/pull/2017) ([r3stl355](https://github.com/r3stl355))
+- docs: add alterer [\#2014](https://github.com/delta-io/delta-rs/pull/2014) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: version bump python release [\#2011](https://github.com/delta-io/delta-rs/pull/2011) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: fix the test\_restore\_by\_datetime test [\#2010](https://github.com/delta-io/delta-rs/pull/2010) ([r3stl355](https://github.com/r3stl355))
+- feat\(rust\): add more commit info to most operations [\#2009](https://github.com/delta-io/delta-rs/pull/2009) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python\): add schema conversion of FixedSizeBinaryArray and FixedSizeListType [\#2005](https://github.com/delta-io/delta-rs/pull/2005) ([balbok0](https://github.com/balbok0))
+- feat\(python\): expose large\_dtype param in `merge` [\#2003](https://github.com/delta-io/delta-rs/pull/2003) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: add writer properties to docs [\#2002](https://github.com/delta-io/delta-rs/pull/2002) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: fix CI breaking lint issues [\#1999](https://github.com/delta-io/delta-rs/pull/1999) ([r3stl355](https://github.com/r3stl355))
+- feat: implementation for replaceWhere [\#1996](https://github.com/delta-io/delta-rs/pull/1996) ([r3stl355](https://github.com/r3stl355))
+- chore: refactoring AWS code out of the core crate [\#1995](https://github.com/delta-io/delta-rs/pull/1995) ([rtyler](https://github.com/rtyler))
+- feat\(python\): expose custom metadata to writers [\#1994](https://github.com/delta-io/delta-rs/pull/1994) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: datafusion integration [\#1993](https://github.com/delta-io/delta-rs/pull/1993) ([MrPowers](https://github.com/MrPowers))
+- fix: flakey gcs test [\#1987](https://github.com/delta-io/delta-rs/pull/1987) ([roeap](https://github.com/roeap))
+- fix: implement consistent formatting for constraint expressions [\#1985](https://github.com/delta-io/delta-rs/pull/1985) ([Blajda](https://github.com/Blajda))
+- fix: case sensitivity for z-order [\#1982](https://github.com/delta-io/delta-rs/pull/1982) ([Blajda](https://github.com/Blajda))
+- feat\(python\): add writer\_properties to all operations [\#1980](https://github.com/delta-io/delta-rs/pull/1980) ([ion-elgreco](https://github.com/ion-elgreco))
+- refactor: trigger metadata retrieval only during `DeltaTable.metadata` [\#1979](https://github.com/delta-io/delta-rs/pull/1979) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: retry with exponential backoff for DynamoDb interaction [\#1975](https://github.com/delta-io/delta-rs/pull/1975) ([dispanser](https://github.com/dispanser))
+- feat\(python\): expose `add constraint` operation [\#1973](https://github.com/delta-io/delta-rs/pull/1973) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: properly decode percent-encoded file paths coming from parquet checkpoints [\#1970](https://github.com/delta-io/delta-rs/pull/1970) ([sigorbor](https://github.com/sigorbor))
+- feat: omit unmodified files during merge write [\#1969](https://github.com/delta-io/delta-rs/pull/1969) ([Blajda](https://github.com/Blajda))
+- feat\(python\): combine load\_version/load\_with\_datetime into `load_as_version` [\#1968](https://github.com/delta-io/delta-rs/pull/1968) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: enable S3 integration tests to be configured via environment vars [\#1966](https://github.com/delta-io/delta-rs/pull/1966) ([dispanser](https://github.com/dispanser))
+- fix: handle empty table response in unity api [\#1963](https://github.com/delta-io/delta-rs/pull/1963) ([JonasDev1](https://github.com/JonasDev1))
+- docs: add auto-release when docs are merged to main [\#1962](https://github.com/delta-io/delta-rs/pull/1962) ([r3stl355](https://github.com/r3stl355))
+- feat: cast list items to default before write with different item names [\#1959](https://github.com/delta-io/delta-rs/pull/1959) ([JonasDev1](https://github.com/JonasDev1))
+- feat: merge using partition filters [\#1958](https://github.com/delta-io/delta-rs/pull/1958) ([emcake](https://github.com/emcake))
+- chore: relocate cast\_record\_batch into its own module to shed the datafusion dependency [\#1955](https://github.com/delta-io/delta-rs/pull/1955) ([rtyler](https://github.com/rtyler))
+- fix: respect case sensitivity on operations [\#1954](https://github.com/delta-io/delta-rs/pull/1954) ([Blajda](https://github.com/Blajda))
+- docs: add better installation instructions [\#1951](https://github.com/delta-io/delta-rs/pull/1951) ([MrPowers](https://github.com/MrPowers))
+- docs: add polars integration [\#1949](https://github.com/delta-io/delta-rs/pull/1949) ([MrPowers](https://github.com/MrPowers))
+- fix: add arrow page back [\#1944](https://github.com/delta-io/delta-rs/pull/1944) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: remove the get\_data\_catalog\(\) function [\#1941](https://github.com/delta-io/delta-rs/pull/1941) ([rtyler](https://github.com/rtyler))
+- chore: update runs-on value in python\_release.yml [\#1940](https://github.com/delta-io/delta-rs/pull/1940) ([wjones127](https://github.com/wjones127))
+- docs: start how delta lake works [\#1938](https://github.com/delta-io/delta-rs/pull/1938) ([MrPowers](https://github.com/MrPowers))
+- docs: add logo, dark mode, boost search [\#1936](https://github.com/delta-io/delta-rs/pull/1936) ([ion-elgreco](https://github.com/ion-elgreco))
+- refactor: prefer usage of metadata and protocol fields [\#1935](https://github.com/delta-io/delta-rs/pull/1935) ([roeap](https://github.com/roeap))
+- chore: update python version [\#1934](https://github.com/delta-io/delta-rs/pull/1934) ([wjones127](https://github.com/wjones127))
+- feat\(python\): expose create to DeltaTable class [\#1932](https://github.com/delta-io/delta-rs/pull/1932) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: fix all examples and change overall structure [\#1931](https://github.com/delta-io/delta-rs/pull/1931) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: update to include pyarrow-hotfix [\#1930](https://github.com/delta-io/delta-rs/pull/1930) ([dennyglee](https://github.com/dennyglee))
+- fix: get rid of panic in during table [\#1928](https://github.com/delta-io/delta-rs/pull/1928) ([dimonchik-suvorov](https://github.com/dimonchik-suvorov))
+- fix\(rust/python\): `optimize.compact` not working with tables with mixed large/normal arrow [\#1926](https://github.com/delta-io/delta-rs/pull/1926) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: extend write\_deltalake to accept Deltalake schema [\#1922](https://github.com/delta-io/delta-rs/pull/1922) ([r3stl355](https://github.com/r3stl355))
+- fix: fail fast for opening non-existent path [\#1917](https://github.com/delta-io/delta-rs/pull/1917) ([dimonchik-suvorov](https://github.com/dimonchik-suvorov))
+- feat: check constraints [\#1915](https://github.com/delta-io/delta-rs/pull/1915) ([hntd187](https://github.com/hntd187))
+- docs: delta lake arrow integration page [\#1914](https://github.com/delta-io/delta-rs/pull/1914) ([MrPowers](https://github.com/MrPowers))
+- feat: add more info for contributors [\#1913](https://github.com/delta-io/delta-rs/pull/1913) ([r3stl355](https://github.com/r3stl355))
+- fix: add buffer flushing to filesystem writes [\#1911](https://github.com/delta-io/delta-rs/pull/1911) ([r3stl355](https://github.com/r3stl355))
+- docs: update docs home page and add pandas integration [\#1905](https://github.com/delta-io/delta-rs/pull/1905) ([MrPowers](https://github.com/MrPowers))
+- feat: implement S3 log store with transactions backed by DynamoDb [\#1904](https://github.com/delta-io/delta-rs/pull/1904) ([dispanser](https://github.com/dispanser))
+- fix: prune each merge bin with only 1 file [\#1902](https://github.com/delta-io/delta-rs/pull/1902) ([haruband](https://github.com/haruband))
+- docs: update python docs link in readme.md [\#1899](https://github.com/delta-io/delta-rs/pull/1899) ([thomasfrederikhoeck](https://github.com/thomasfrederikhoeck))
+- docs: on append, overwrite, delete and z-ordering [\#1897](https://github.com/delta-io/delta-rs/pull/1897) ([MrPowers](https://github.com/MrPowers))
+- feat: compare timestamp partition values as timestamps instead of strings [\#1895](https://github.com/delta-io/delta-rs/pull/1895) ([sigorbor](https://github.com/sigorbor))
+- feat\(python\): expose rust writer as additional engine v2 [\#1891](https://github.com/delta-io/delta-rs/pull/1891) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: add high-level checking for append-only tables [\#1887](https://github.com/delta-io/delta-rs/pull/1887) ([junjunjd](https://github.com/junjunjd))
+- test: loading version 0 Delta table [\#1885](https://github.com/delta-io/delta-rs/pull/1885) ([dimonchik-suvorov](https://github.com/dimonchik-suvorov))
+- fix: improve catalog failure error message, add missing Glue native-tls feature dependency [\#1883](https://github.com/delta-io/delta-rs/pull/1883) ([r3stl355](https://github.com/r3stl355))
+- refactor: simplify `DeltaTableState` [\#1877](https://github.com/delta-io/delta-rs/pull/1877) ([roeap](https://github.com/roeap))
+- refactor: express log schema in delta types [\#1876](https://github.com/delta-io/delta-rs/pull/1876) ([roeap](https://github.com/roeap))
+- docs: add Rust installation instructions [\#1875](https://github.com/delta-io/delta-rs/pull/1875) ([MrPowers](https://github.com/MrPowers))
+- chore: clippy [\#1871](https://github.com/delta-io/delta-rs/pull/1871) ([roeap](https://github.com/roeap))
+- fix: docs deployment action [\#1869](https://github.com/delta-io/delta-rs/pull/1869) ([r3stl355](https://github.com/r3stl355))
+- docs: tell how to claim an issue [\#1866](https://github.com/delta-io/delta-rs/pull/1866) ([wjones127](https://github.com/wjones127))
+- feat: drop python 3.7 and adopt 3.12 [\#1859](https://github.com/delta-io/delta-rs/pull/1859) ([roeap](https://github.com/roeap))
+- feat: create benchmarks for merge [\#1857](https://github.com/delta-io/delta-rs/pull/1857) ([Blajda](https://github.com/Blajda))
+- chore: add @ion-elgreco to python/ [\#1855](https://github.com/delta-io/delta-rs/pull/1855) ([rtyler](https://github.com/rtyler))
+- fix: compile error with lifetime issues on optimize \(\#1843\) [\#1852](https://github.com/delta-io/delta-rs/pull/1852) ([dispanser](https://github.com/dispanser))
+- feat: implement issue auto-assign on `take` comment [\#1851](https://github.com/delta-io/delta-rs/pull/1851) ([r3stl355](https://github.com/r3stl355))
+- docs: add docs on small file compaction with optimize [\#1850](https://github.com/delta-io/delta-rs/pull/1850) ([MrPowers](https://github.com/MrPowers))
+- fix: checkpoint error with Azure Synapse [\#1848](https://github.com/delta-io/delta-rs/pull/1848) ([PierreDubrulle](https://github.com/PierreDubrulle))
+- feat\(python\): expose `convert_to_deltalake` [\#1842](https://github.com/delta-io/delta-rs/pull/1842) ([ion-elgreco](https://github.com/ion-elgreco))
+- ci: adopt `ruff format` for formatting [\#1841](https://github.com/delta-io/delta-rs/pull/1841) ([roeap](https://github.com/roeap))
+
+## [rust-v0.17.0](https://github.com/delta-io/delta-rs/tree/rust-v0.17.0) (2024-02-06)
+
+:warning: The release of 0.17.0 **removes** the legacy dynamodb lock functionality, AWS users must read these release notes! :warning:
+
+### File handlers
+
+The 0.17.0 release moves storage implementations into their own crates, such as
+`deltalake-aws`. A consequence of that refactoring is that custom storage and
+file scheme handlers must be registered/initialized at runtime. Storage
+subcrates conventionally define a `register_handlers` function which performs
+that task. Users may see errors such as:
+```
+thread 'main' panicked at /home/ubuntu/.cargo/registry/src/index.crates.io-6f17d22bba15001f/deltalake-core-0.17.0/src/table/builder.rs:189:48:
+The specified table_uri is not valid: InvalidTableLocation("Unknown scheme: s3")
+```
+
+* Users of the meta-crate (`deltalake`) can call the storage crate via: `deltalake::aws::register_handlers(None);` at the entrypoint for their code.
+* Users who adopt `core` and storage crates independently (e.g. `deltalake-aws`) can register via `deltalake_aws::register_handlers(None);`.
+
+The AWS, Azure, and GCP crates must all have their custom file schemes registered in this fashion.
+
+
+### dynamodblock to S3DynamoDbLogStore
+
+The locking mechanism is fundamentally different between `deltalake` v0.16.x and v0.17.0, starting with this release the `deltalake` and `deltalake-aws` crates this library now relies on the same [protocol for concurrent writes on AWS](https://docs.delta.io/latest/delta-storage.html#setup-configuration-s3-multi-cluster) as the Delta Lake/Spark implementation.
+
+Fundamentally the DynamoDB table structure changes, [which is documented here](https://docs.delta.io/latest/delta-storage.html#setup-configuration-s3-multi-cluster). The configuration of a Rust process should continue to use the `AWS_S3_LOCKING_PROVIDER` environment value of `dynamodb`.  The new table must be specified with the `DELTA_DYNAMO_TABLE_NAME` environment or configuration variable, and that should name the _new_ `S3DynamoDbLogStore` compatible DynamoDB table.
+
+Because locking is required to ensure safe cconsistent writes, **there is no iterative migration**, 0.16 and 0.17 writers **cannot** safely coexist. The following steps should be taken when upgrading:
+
+1. Stop all 0.16.x writers
+2. Ensure writes are completed, and lock table is empty.
+3. Deploy 0.17.0 writers
+
+
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.5...rust-v0.17.0)
+
+**Implemented enhancements:**
+
+- Expose the ability to compile DataFusion with SIMD [\#2118](https://github.com/delta-io/delta-rs/issues/2118)
+- Updating Table log retention configuration with `write_deltalake` silently changes nothing [\#2108](https://github.com/delta-io/delta-rs/issues/2108)
+- ALTER table, ALTER Column, Add/Modify Comment, Add/remove/rename partitions, Set Tags, Set location, Set TBLProperties [\#2088](https://github.com/delta-io/delta-rs/issues/2088)
+- Docs: Update docs for check constraints [\#2063](https://github.com/delta-io/delta-rs/issues/2063)
+- Don't `ensure_table_uri` when creating a table `with_log_store` [\#2036](https://github.com/delta-io/delta-rs/issues/2036)
+- Exposing custom\_metadata in merge operation [\#2031](https://github.com/delta-io/delta-rs/issues/2031)
+- Support custom table properties via TableAlterer and write/merge [\#2022](https://github.com/delta-io/delta-rs/issues/2022)
+- Remove parquet2 crate support [\#2004](https://github.com/delta-io/delta-rs/issues/2004)
+- Merge operation that only touches necessary partitions [\#1991](https://github.com/delta-io/delta-rs/issues/1991)
+- store userMetadata on write operations [\#1990](https://github.com/delta-io/delta-rs/issues/1990)
+- Create Dask integration page [\#1956](https://github.com/delta-io/delta-rs/issues/1956)
+- Merge: Filtering on partitions [\#1918](https://github.com/delta-io/delta-rs/issues/1918)
+- Rethink the load\_version and load\_with\_datetime interfaces [\#1910](https://github.com/delta-io/delta-rs/issues/1910)
+- docs: Delta Lake + Arrow Integration [\#1908](https://github.com/delta-io/delta-rs/issues/1908)
+- docs: Delta Lake + Polars integration [\#1906](https://github.com/delta-io/delta-rs/issues/1906)
+- Rethink decision to expose the public interface in namespaces [\#1900](https://github.com/delta-io/delta-rs/issues/1900)
+- Add documentation on how to build and run documentation locally [\#1893](https://github.com/delta-io/delta-rs/issues/1893)
+- Add API to create an empty Delta Lake table [\#1892](https://github.com/delta-io/delta-rs/issues/1892)
+- Implementing CHECK constraints  [\#1881](https://github.com/delta-io/delta-rs/issues/1881)
+- Check Invariants are respecting table features for write paths  [\#1880](https://github.com/delta-io/delta-rs/issues/1880)
+- Organize docs with single lefthand sidebar [\#1873](https://github.com/delta-io/delta-rs/issues/1873)
+- Make sure invariants are handled properly throughout the codebase [\#1870](https://github.com/delta-io/delta-rs/issues/1870)
+- Unable to use deltalake `Schema` in `write_deltalake` [\#1862](https://github.com/delta-io/delta-rs/issues/1862)
+- Add a Rust-backed engine for write\_deltalake [\#1861](https://github.com/delta-io/delta-rs/issues/1861)
+- Run doctest in CI for Python API examples [\#1783](https://github.com/delta-io/delta-rs/issues/1783)
+- \[RFC\] Use arrow for checkpoint reading and state handling [\#1776](https://github.com/delta-io/delta-rs/issues/1776)
+- Expose Python exceptions in public module [\#1771](https://github.com/delta-io/delta-rs/issues/1771)
+- Expose cleanup\_metadata or create\_checkpoint\_from\_table\_uri\_and\_cleanup to the Python API [\#1768](https://github.com/delta-io/delta-rs/issues/1768)
+- Expose convert\_to\_delta to Python API [\#1767](https://github.com/delta-io/delta-rs/issues/1767)
+- Add high-level checking for append-only tables [\#1759](https://github.com/delta-io/delta-rs/issues/1759)
+
+**Fixed bugs:**
+
+- Row order no longer preserved after merge operation [\#2165](https://github.com/delta-io/delta-rs/issues/2165)
+- Error when reading delta table with IDENTITY column [\#2152](https://github.com/delta-io/delta-rs/issues/2152)
+- Merge on IS NULL condition doesn't work for empty table [\#2148](https://github.com/delta-io/delta-rs/issues/2148)
+- JsonWriter converts structured parsing error into plain string [\#2143](https://github.com/delta-io/delta-rs/issues/2143)
+- Pandas import error when merging tables  [\#2112](https://github.com/delta-io/delta-rs/issues/2112)
+-   test\_repair\_on\_update broken in main [\#2109](https://github.com/delta-io/delta-rs/issues/2109)
+- `WriteBuilder::with_input_execution_plan` does not apply the schema to the log's metadata fields [\#2105](https://github.com/delta-io/delta-rs/issues/2105)
+- MERGE logical plan vs execution plan schema mismatch [\#2104](https://github.com/delta-io/delta-rs/issues/2104)
+- Partitions not pushed down [\#2090](https://github.com/delta-io/delta-rs/issues/2090)
+- Cant create empty table with write\_deltalake [\#2086](https://github.com/delta-io/delta-rs/issues/2086)
+- Unexpected high costs on Google Cloud Storage [\#2085](https://github.com/delta-io/delta-rs/issues/2085)
+- Unable to read s3 table: `Unknown scheme: s3` [\#2065](https://github.com/delta-io/delta-rs/issues/2065)
+- write\_deltalake not respecting writer\_properties [\#2064](https://github.com/delta-io/delta-rs/issues/2064)
+- Unable to read/write tables with the "gs" schema in the table\_uri in 0.15.1 [\#2060](https://github.com/delta-io/delta-rs/issues/2060)
+- LockClient requiered error for S3 backend in 0.15.1 python [\#2057](https://github.com/delta-io/delta-rs/issues/2057)
+- Error while writing Pandas DataFrame to Delta Lake \(S3\) [\#2051](https://github.com/delta-io/delta-rs/issues/2051)
+- Error with dynamo locking provider on 0.15 [\#2034](https://github.com/delta-io/delta-rs/issues/2034)
+- Conda version 0.15.0 is missing files [\#2021](https://github.com/delta-io/delta-rs/issues/2021)
+- Rust panicking through Python library when a delete predicate uses a nullable field [\#2019](https://github.com/delta-io/delta-rs/issues/2019)
+- No snapshot or version 0 found, perhaps /Users/watsy0007/resources/test\_table/ is an empty dir? [\#2016](https://github.com/delta-io/delta-rs/issues/2016)
+- Generic DeltaTable error: type\_coercion in Struct column in merge operation [\#1998](https://github.com/delta-io/delta-rs/issues/1998)
+- Constraint expr not formatted during commit action [\#1971](https://github.com/delta-io/delta-rs/issues/1971)
+- .load\_with\_datetime\(\) is incorrectly rounding to nearest second [\#1967](https://github.com/delta-io/delta-rs/issues/1967)
+- vacuuming log files [\#1965](https://github.com/delta-io/delta-rs/issues/1965)
+- Unable to merge uppercase column names [\#1960](https://github.com/delta-io/delta-rs/issues/1960)
+- Schema error: Invalid data type for Delta Lake: Null [\#1946](https://github.com/delta-io/delta-rs/issues/1946)
+- Python v0.14 wheel files not up to date [\#1945](https://github.com/delta-io/delta-rs/issues/1945)
+- python Release 0.14 is missing Windows wheels [\#1942](https://github.com/delta-io/delta-rs/issues/1942)
+- CI integration test fails randomly:  test\_restore\_by\_datetime [\#1925](https://github.com/delta-io/delta-rs/issues/1925)
+- Merge data freezes indefenetely [\#1920](https://github.com/delta-io/delta-rs/issues/1920)
+- Load DeltaTable from non-existing folder causing empty folder creation [\#1916](https://github.com/delta-io/delta-rs/issues/1916)
+- Reoptimizes merge bins with only 1 file, even though they have no effect. [\#1901](https://github.com/delta-io/delta-rs/issues/1901)
+- The Python Docs link in README.MD points to old docs [\#1898](https://github.com/delta-io/delta-rs/issues/1898)
+- optimize.compact\(\) fails with bad schema after updating to pyarrow 8.0 [\#1889](https://github.com/delta-io/delta-rs/issues/1889)
+- Python build is broken on main [\#1856](https://github.com/delta-io/delta-rs/issues/1856)
+- Checkpoint error with Azure Synapse [\#1847](https://github.com/delta-io/delta-rs/issues/1847)
+- merge very slow compared to delete + append on larger dataset [\#1846](https://github.com/delta-io/delta-rs/issues/1846)
+- get\_add\_actions fails with deltalake 0.13 [\#1835](https://github.com/delta-io/delta-rs/issues/1835)
+- Handle PyArrow CVE-2023-47248 [\#1834](https://github.com/delta-io/delta-rs/issues/1834)
+- Delta-rs writer hangs with to many file handles open \(Azure\) [\#1832](https://github.com/delta-io/delta-rs/issues/1832)
+- Encountering NotATable\("No snapshot or version 0 found, perhaps xxx is an empty dir?"\) [\#1831](https://github.com/delta-io/delta-rs/issues/1831)
+- write\_deltalake is not creating checkpoints [\#1815](https://github.com/delta-io/delta-rs/issues/1815)
+- Problem writing tables in directory named with char `~` [\#1806](https://github.com/delta-io/delta-rs/issues/1806)
+- DeltaTable Merge throws in merging if there are uppercase in Schema. [\#1797](https://github.com/delta-io/delta-rs/issues/1797)
+- rust merge error - datafusion panics [\#1790](https://github.com/delta-io/delta-rs/issues/1790)
+- expose use\_dictionary=False when writing Delta Table and running optimize [\#1772](https://github.com/delta-io/delta-rs/issues/1772)
+
+**Closed issues:**
+
+- Is this print necessary? Can we remove this. [\#2110](https://github.com/delta-io/delta-rs/issues/2110)
+- Azure concurrent writes [\#2069](https://github.com/delta-io/delta-rs/issues/2069)
+- Fix docs deployment [\#1867](https://github.com/delta-io/delta-rs/issues/1867)
+- Add a header in old docs and direct users to new docs [\#1865](https://github.com/delta-io/delta-rs/issues/1865)
+
+## [rust-v0.16.5](https://github.com/delta-io/delta-rs/tree/rust-v0.16.5) (2023-11-15)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.4...rust-v0.16.5)
+
+**Implemented enhancements:**
+
+- When will upgrade object\_store to 0.8? [\#1858](https://github.com/delta-io/delta-rs/issues/1858)
+- No Official Help [\#1849](https://github.com/delta-io/delta-rs/issues/1849)
+- Auto assign GitHub issues with a "take" message [\#1791](https://github.com/delta-io/delta-rs/issues/1791)
+
+**Fixed bugs:**
+
+- cargo clippy fails on core in main [\#1843](https://github.com/delta-io/delta-rs/issues/1843)
+
+## [rust-v0.16.4](https://github.com/delta-io/delta-rs/tree/rust-v0.16.4) (2023-11-12)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.3...rust-v0.16.4)
+
+**Implemented enhancements:**
+
+- Unable to add deltalake git dependency to cargo.toml [\#1821](https://github.com/delta-io/delta-rs/issues/1821)
+
+## [rust-v0.16.3](https://github.com/delta-io/delta-rs/tree/rust-v0.16.3) (2023-11-08)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.2...rust-v0.16.3)
+
+**Implemented enhancements:**
+
+- Docs: add release GitHub action [\#1799](https://github.com/delta-io/delta-rs/issues/1799)
+- Use bulk deletes where possible [\#1761](https://github.com/delta-io/delta-rs/issues/1761)
+
+**Fixed bugs:**
+
+- Code Owners no longer valid [\#1794](https://github.com/delta-io/delta-rs/issues/1794)
+- `MERGE` works incorrectly with partitioned table if the data column order is not same as table column order [\#1787](https://github.com/delta-io/delta-rs/issues/1787)
+- errors when using pyarrow dataset as a source [\#1779](https://github.com/delta-io/delta-rs/issues/1779)
+- Write to Microsoft OneLake failed. [\#1764](https://github.com/delta-io/delta-rs/issues/1764)
+
+## [rust-v0.16.2](https://github.com/delta-io/delta-rs/tree/rust-v0.16.2) (2023-10-21)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.1...rust-v0.16.2)
+
+## [rust-v0.16.1](https://github.com/delta-io/delta-rs/tree/rust-v0.16.1) (2023-10-21)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.16.0...rust-v0.16.1)
+
 ## [rust-v0.16.0](https://github.com/delta-io/delta-rs/tree/rust-v0.16.0) (2023-09-27)
 
 [Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.15.0...rust-v0.16.0)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ee258a3ce8..f681aa3948 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,6 @@
 # Contributing to delta-rs
 
-Development on this project is mostly driven by volunteer contributors. We welcome new contributors, including not only those who develop new features, but also those who are able to help with documentation and provide detailed bug reports. 
+Development on this project is mostly driven by volunteer contributors. We welcome new contributors, including not only those who develop new features, but also those who are able to help with documentation and provide detailed bug reports.
 
 Please take note of our [code of conduct](CODE_OF_CONDUCT.md).
 
@@ -17,34 +17,40 @@ If you want to claim an issue to work on, you can write the word `take` as a com
 - Install Rust, e.g. as described [here](https://doc.rust-lang.org/cargo/getting-started/installation.html)
 - Have a compatible Python version installed (check `python/pyproject.toml` for current requirement)
 - Create a Python virtual environment (required for development builds), e.g. as described [here](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/)
+    ```sh
+    python -m venv .venv
+    ```
+
 - Build the project for development (this requires an active virtual environment and will also install `deltalake` in that virtual environment)
-```
-cd python
-make develop
-```
+    ```sh
+    cd python
+    make develop
+    ```
 
 - Run some Python code, e.g. to run a specific test
-```
-python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
-```
+    ```sh
+    python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
+    ```
 
 - Run some Rust code, e.g. run an example
-```
-cd crates/deltalake
-cargo run --examples basic_operations
-```
+    ```sh
+    cd crates/deltalake
+    cargo run --example basic_operations --features="datafusion"
+    ```
 
 ## Run the docs locally
-*This serves your local contens of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
-```
+*This serves your local contents of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
+
+```sh
 (cd python; make develop)
 pip install -r docs/requirements.txt
 mkdocs serve
 ```
 
 ## To make a pull request (PR)
-- Make sure all the following steps run/pass locally before submitting a PR
-```
+Make sure all the following steps run/pass locally before submitting a PR
+
+```sh
 cargo fmt -- --check
 cd python
 make check-rust
@@ -62,7 +68,7 @@ make build-docs
 - For debugging Rust code, install [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb). The extension should even create Debug launch configurations for the project if you allow it, an easy way to get started. Just set a breakpoint and run the relevant configuration.
 - For debugging from Python into Rust, follow this procedure:
 1. Add this to `.vscode/launch.json`
-```
+```json
 {
             "type": "lldb",
             "request": "attach",
diff --git a/Cargo.toml b/Cargo.toml
index cfcb4eaf3c..0892b0f12b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,12 +1,20 @@
 [workspace]
-members = [
-    "crates/*",
-    "delta-inspect",
-    "python",
-]
+members = ["crates/*", "delta-inspect", "python"]
 exclude = ["proofs"]
 resolver = "2"
 
+[workspace.package]
+authors = ["Qingping Hou <dave2008713@gmail.com>"]
+rust-version = "1.75"
+keywords = ["deltalake", "delta", "datalake"]
+readme = "README.md"
+edition = "2021"
+description = "Native Delta Lake implementation in Rust"
+homepage = "https://github.com/delta-io/delta.rs"
+license = "Apache-2.0"
+documentation = "https://docs.rs/deltalake"
+repository = "https://github.com/delta-io/delta.rs"
+
 [profile.release-with-debug]
 inherits = "release"
 debug = true
@@ -18,28 +26,33 @@ debug = true
 debug = "line-tables-only"
 
 [workspace.dependencies]
+delta_kernel = { version = "0.3.0" }
+# delta_kernel = { path = "../delta-kernel-rs/kernel" }
+
 # arrow
-arrow = { version = "50" }
-arrow-arith = { version = "50" }
-arrow-array = { version = "50" }
-arrow-buffer = { version = "50" }
-arrow-cast = { version = "50" }
-arrow-ipc = { version = "50" }
-arrow-json = { version = "50" }
-arrow-ord = { version = "50" }
-arrow-row = { version = "50" }
-arrow-schema = { version = "50" }
-arrow-select = { version = "50" }
-object_store = { version = "0.9" }
-parquet = { version = "50" }
+arrow = { version = "52" }
+arrow-arith = { version = "52" }
+arrow-array = { version = "52", features = ["chrono-tz"] }
+arrow-buffer = { version = "52" }
+arrow-cast = { version = "52" }
+arrow-ipc = { version = "52" }
+arrow-json = { version = "52" }
+arrow-ord = { version = "52" }
+arrow-row = { version = "52" }
+arrow-schema = { version = "52" }
+arrow-select = { version = "52" }
+object_store = { version = "0.10.1" }
+parquet = { version = "52" }
 
 # datafusion
-datafusion = { version = "35" }
-datafusion-expr = { version = "35" }
-datafusion-common = { version = "35" }
-datafusion-proto = { version = "35" }
-datafusion-sql = { version = "35" }
-datafusion-physical-expr = { version = "35" }
+datafusion = { version = "40" }
+datafusion-expr = { version = "40" }
+datafusion-common = { version = "40" }
+datafusion-proto = { version = "40" }
+datafusion-sql = { version = "40" }
+datafusion-physical-expr = { version = "40" }
+datafusion-functions = { version = "40" }
+datafusion-functions-array = { version = "40" }
 
 # serde
 serde = { version = "1.0.194", features = ["derive"] }
@@ -47,11 +60,12 @@ serde_json = "1"
 
 # "stdlib"
 bytes = { version = "1" }
-chrono = { version = "0.4.31", default-features = false, features = ["clock"] }
+chrono = { version = ">0.4.34", default-features = false, features = ["clock"] }
 tracing = { version = "0.1", features = ["log"] }
 regex = { version = "1" }
 thiserror = { version = "1" }
 url = { version = "2" }
+urlencoding = "2.1.3"
 uuid = { version = "1" }
 
 # runtime / async
diff --git a/README.md b/README.md
index 927b68ee63..b00026b8d8 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@
     <img alt="#delta-rs in the Delta Lake Slack workspace" src="https://img.shields.io/badge/slack-delta-blue.svg?logo=slack&style=flat-square&color=F75101">
   </a>
 </p>
+Delta Lake is an open-source storage format that runs on top of existing data lakes. Delta Lake is compatible with processing engines like Apache Spark and provides benefits such as ACID transaction guarantees, schema enforcement, and scalable data handling.
 
 The Delta Lake project aims to unlock the power of the Deltalake for as many users and projects as possible
 by providing native low-level APIs aimed at developers and integrators, as well as a high-level operations
@@ -77,7 +78,7 @@ write_deltalake("./data/delta", df)
 dt = DeltaTable("./data/delta")
 df2 = dt.to_pandas()
 
-assert df == df2
+assert df.equals(df2)
 ```
 
 The same table can also be loaded using the core Rust crate:
@@ -91,7 +92,7 @@ async fn main() -> Result<(), DeltaTableError> {
     let table = open_table("./data/delta").await?;
 
     // show all active files in the table
-    let files = table.get_files();
+    let files: Vec<_> = table.get_file_uris()?.collect();
     println!("{:?}", files);
 
     Ok(())
@@ -116,6 +117,7 @@ Libraries and frameworks that interoperate with delta-rs - in alphabetical order
 - [AWS SDK for Pandas](https://github.com/aws/aws-sdk-pandas)
 - [ballista][ballista]
 - [datafusion][datafusion]
+- [Daft](https://www.getdaft.io/)
 - [Dask](https://github.com/dask-contrib/dask-deltatable)
 - [datahub](https://datahubproject.io/)
 - [DuckDB](https://duckdb.org/)
@@ -130,45 +132,46 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc
 
 ### Cloud Integrations
 
-| Storage              |  Rust   | Python  | Comment                             |
-| -------------------- | :-----: | :-----: | ----------------------------------- |
-| Local                | ![done] | ![done] |                                     |
-| S3 - AWS             | ![done] | ![done] | requires lock for concurrent writes |
-| S3 - MinIO           | ![done] | ![done] | requires lock for concurrent writes |
-| S3 - R2              | ![done] | ![done] | requires lock for concurrent writes |
-| Azure Blob           | ![done] | ![done] |                                     |
-| Azure ADLS Gen2      | ![done] | ![done] |                                     |
-| Microsoft OneLake    | ![done] | ![done] |                                     |
-| Google Cloud Storage | ![done] | ![done] |                                     |
+| Storage              |  Rust   | Python  | Comment                                                          |
+| -------------------- | :-----: | :-----: | ---------------------------------------------------------------- |
+| Local                | ![done] | ![done] |                                                                  |
+| S3 - AWS             | ![done] | ![done] | requires lock for concurrent writes                              |
+| S3 - MinIO           | ![done] | ![done] | requires lock for concurrent writes                              |
+| S3 - R2              | ![done] | ![done] | No lock required when using `AmazonS3ConfigKey::CopyIfNotExists` |
+| Azure Blob           | ![done] | ![done] |                                                                  |
+| Azure ADLS Gen2      | ![done] | ![done] |                                                                  |
+| Microsoft OneLake    | ![done] | ![done] |                                                                  |
+| Google Cloud Storage | ![done] | ![done] |                                                                  |
+| HDFS                 | ![done] | ![done] |                                                                  |
 
 ### Supported Operations
 
-| Operation             |           Rust           |          Python          | Description                                 |
-| --------------------- | :----------------------: | :----------------------: | ------------------------------------------- |
-| Create                |         ![done]          |         ![done]          | Create a new table                          |
-| Read                  |         ![done]          |         ![done]          | Read data from a table                      |
-| Vacuum                |         ![done]          |         ![done]          | Remove unused files and log entries         |
-| Delete - partitions   |                          |         ![done]          | Delete a table partition                    |
-| Delete - predicates   |         ![done]          |         ![done]          | Delete data based on a predicate            |
-| Optimize - compaction |         ![done]          |         ![done]          | Harmonize the size of data file             |
-| Optimize - Z-order    |         ![done]          |         ![done]          | Place similar data into the same file       |
-| Merge                 |         ![done]          |         ![done]          | Merge a target Delta table with source data |
-| FS check              |         ![done]          |         ![done]          | Remove corrupted files from table           |
+| Operation             |  Rust   | Python  | Description                                 |
+| --------------------- | :-----: | :-----: | ------------------------------------------- |
+| Create                | ![done] | ![done] | Create a new table                          |
+| Read                  | ![done] | ![done] | Read data from a table                      |
+| Vacuum                | ![done] | ![done] | Remove unused files and log entries         |
+| Delete - partitions   |         | ![done] | Delete a table partition                    |
+| Delete - predicates   | ![done] | ![done] | Delete data based on a predicate            |
+| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file             |
+| Optimize - Z-order    | ![done] | ![done] | Place similar data into the same file       |
+| Merge                 | ![done] | ![done] | Merge a target Delta table with source data |
+| FS check              | ![done] | ![done] | Remove corrupted files from table           |
 
 ### Protocol Support Level
 
-| Writer Version | Requirement                                   |        Status        |
-| -------------- | --------------------------------------------- | :------------------: |
-| Version 2      | Append Only Tables                            |       ![done]        |
-| Version 2      | Column Invariants                             |       ![done]        |
-| Version 3      | Enforce `delta.checkpoint.writeStatsAsJson`   | [![open]][writer-rs] |
-| Version 3      | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] |
+| Writer Version | Requirement                                   |              Status               |
+| -------------- | --------------------------------------------- | :-------------------------------: |
+| Version 2      | Append Only Tables                            |              ![done]              |
+| Version 2      | Column Invariants                             |              ![done]              |
+| Version 3      | Enforce `delta.checkpoint.writeStatsAsJson`   |       [![open]][writer-rs]        |
+| Version 3      | Enforce `delta.checkpoint.writeStatsAsStruct` |       [![open]][writer-rs]        |
 | Version 3      | CHECK constraints                             | [![semi-done]][check-constraints] |
-| Version 4      | Change Data Feed                              |                      |
-| Version 4      | Generated Columns                             |                      |
-| Version 5      | Column Mapping                                |                      |
-| Version 6      | Identity Columns                              |                      |
-| Version 7      | Table Features                                |                      |
+| Version 4      | Change Data Feed                              |                                   |
+| Version 4      | Generated Columns                             |                                   |
+| Version 5      | Column Mapping                                |                                   |
+| Version 6      | Identity Columns                              |                                   |
+| Version 7      | Table Features                                |                                   |
 
 | Reader Version | Requirement                         | Status |
 | -------------- | ----------------------------------- | ------ |
diff --git a/crates/aws/Cargo.toml b/crates/aws/Cargo.toml
index b18729e262..e6913a2162 100644
--- a/crates/aws/Cargo.toml
+++ b/crates/aws/Cargo.toml
@@ -1,14 +1,24 @@
 [package]
 name = "deltalake-aws"
-version = "0.1.0"
-edition = "2021"
+version = "0.1.2"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = "0.17.0", path = "../core" }
-rusoto_core = { version = "0.47", default-features = false, optional = true }
-rusoto_credential = { version = "0.47" }
-rusoto_sts = { version = "0.47", default-features = false, optional = true }
-rusoto_dynamodb = { version = "0.47", default-features = false, optional = true }
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
+aws-smithy-runtime-api = { version="1.1.7" }
+aws-smithy-runtime = { version="1.1.7", optional = true}
+aws-credential-types = { version="1.1.7", features = ["hardcoded-credentials"]}
+aws-config = { version = "1.1.6", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
+aws-sdk-dynamodb = {version = "1.15.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
+aws-sdk-sts = {version = "1.1.6", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
 lazy_static = "1"
 maplit = "1"
 
@@ -24,13 +34,14 @@ regex = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
 backoff = { version = "0.4", features = [ "tokio" ] }
+hyper-tls = { version = "0.5", optional = true }
 
 [dev-dependencies]
 deltalake-core = { path = "../core", features = ["datafusion"] }
 chrono = { workspace = true }
 serial_test = "3"
 deltalake-test = { path = "../test" }
-pretty_env_logger = "*"
+pretty_env_logger = "0.5.0"
 rand = "0.8"
 serde_json = { workspace = true }
 
@@ -38,12 +49,13 @@ serde_json = { workspace = true }
 default = ["rustls"]
 integration_test = []
 native-tls = [
-    "rusoto_core/native-tls",
-    "rusoto_sts/native-tls",
-    "rusoto_dynamodb/native-tls",
+    "aws-config/client-hyper",
+    "aws-smithy-runtime/connector-hyper-0-14-x",
+    "hyper-tls"
 ]
 rustls = [
-    "rusoto_core/rustls",
-    "rusoto_sts/rustls",
-    "rusoto_dynamodb/rustls",
+    "aws-config/client-hyper",
+    "aws-config/rustls",
+    "aws-sdk-dynamodb/rustls",
+    "aws-sdk-sts/rustls",
 ]
diff --git a/crates/aws/src/credentials.rs b/crates/aws/src/credentials.rs
new file mode 100644
index 0000000000..9ddf19b74c
--- /dev/null
+++ b/crates/aws/src/credentials.rs
@@ -0,0 +1,118 @@
+use std::{sync::Arc, time::Duration};
+
+use aws_config::{
+    ecs::EcsCredentialsProvider,
+    environment::{EnvironmentVariableCredentialsProvider, EnvironmentVariableRegionProvider},
+    imds::credentials::ImdsCredentialsProvider,
+    meta::{credentials::CredentialsProviderChain, region::RegionProviderChain},
+    profile::ProfileFileCredentialsProvider,
+    provider_config::ProviderConfig,
+    web_identity_token::WebIdentityTokenCredentialsProvider,
+};
+use aws_credential_types::provider::{self, ProvideCredentials};
+use tracing::Instrument;
+
+const IMDS_PROVIDER_NAME: &str = "Ec2InstanceMetadata";
+
+#[derive(Debug)]
+pub struct ConfiguredCredentialChain {
+    provider_chain: CredentialsProviderChain,
+}
+
+#[derive(Debug)]
+pub struct NoOpCredentials {}
+
+pub fn new_region_provider(disable_imds: bool, imds_timeout: u64) -> RegionProviderChain {
+    let env_provider = EnvironmentVariableRegionProvider::new();
+    let profile_file = aws_config::profile::region::ProfileFileRegionProvider::default();
+    if disable_imds {
+        return RegionProviderChain::first_try(env_provider).or_else(profile_file);
+    }
+
+    RegionProviderChain::first_try(env_provider)
+        .or_else(profile_file)
+        .or_else(
+            aws_config::imds::region::Builder::default()
+                .imds_client(
+                    aws_config::imds::Client::builder()
+                        .connect_timeout(Duration::from_millis(imds_timeout))
+                        .read_timeout(Duration::from_millis(imds_timeout))
+                        .build(),
+                )
+                .build(),
+        )
+}
+
+impl ConfiguredCredentialChain {
+    pub fn new(disable_imds: bool, imds_timeout: u64, conf: &ProviderConfig) -> Self {
+        let imds_provider = Self::build_imds_provider(conf, disable_imds, imds_timeout);
+        let env_provider = EnvironmentVariableCredentialsProvider::default();
+        let profile_provider = ProfileFileCredentialsProvider::builder()
+            .configure(conf)
+            .with_custom_provider(IMDS_PROVIDER_NAME, imds_provider.clone())
+            .build();
+        let web_identity_token_provider = WebIdentityTokenCredentialsProvider::builder()
+            .configure(conf)
+            .build();
+
+        let ecs_provider = EcsCredentialsProvider::builder().configure(conf).build();
+
+        let provider_chain = CredentialsProviderChain::first_try("Environment", env_provider)
+            .or_else("Profile", profile_provider)
+            .or_else("WebIdentityToken", web_identity_token_provider)
+            .or_else("EcsContainer", ecs_provider)
+            .or_else(IMDS_PROVIDER_NAME, imds_provider);
+
+        Self { provider_chain }
+    }
+
+    async fn credentials(&self) -> provider::Result {
+        self.provider_chain
+            .provide_credentials()
+            .instrument(tracing::debug_span!("provide_credentials", provider = %"default_chain"))
+            .await
+    }
+
+    fn build_imds_provider(
+        conf: &ProviderConfig,
+        disable_imds: bool,
+        imds_timeout: u64,
+    ) -> Arc<dyn ProvideCredentials> {
+        if disable_imds {
+            return Arc::new(NoOpCredentials {});
+        }
+
+        let imds_provider = ImdsCredentialsProvider::builder()
+            .configure(conf)
+            .imds_client(
+                aws_config::imds::Client::builder()
+                    .connect_timeout(Duration::from_millis(imds_timeout))
+                    .read_timeout(Duration::from_millis(imds_timeout))
+                    .build(),
+            )
+            .build();
+        Arc::new(imds_provider)
+    }
+}
+
+impl ProvideCredentials for ConfiguredCredentialChain {
+    fn provide_credentials<'a>(
+        &'a self,
+    ) -> aws_credential_types::provider::future::ProvideCredentials<'a>
+    where
+        Self: 'a,
+    {
+        aws_credential_types::provider::future::ProvideCredentials::new(self.credentials())
+    }
+}
+
+impl ProvideCredentials for NoOpCredentials {
+    fn provide_credentials<'a>(&'a self) -> provider::future::ProvideCredentials<'a>
+    where
+        Self: 'a,
+    {
+        aws_credential_types::provider::future::ProvideCredentials::new(std::future::ready(Err(
+            provider::error::CredentialsError::not_loaded_no_source(),
+        )))
+    }
+}
diff --git a/crates/aws/src/errors.rs b/crates/aws/src/errors.rs
index bbce9dc426..55f2a2d013 100644
--- a/crates/aws/src/errors.rs
+++ b/crates/aws/src/errors.rs
@@ -2,27 +2,45 @@
 
 use std::num::ParseIntError;
 
-use rusoto_core::RusotoError;
-use rusoto_dynamodb::{CreateTableError, GetItemError, PutItemError, QueryError, UpdateItemError};
-
-#[derive(thiserror::Error, Debug, PartialEq)]
-pub enum DynamoDbConfigError {
-    /// Error raised creating http client
-    #[error("Failed to create request dispatcher: {source}")]
-    HttpClient {
-        /// The underlying Rusoto TlsError
-        #[from]
-        source: rusoto_core::request::TlsError,
+use aws_credential_types::provider::error::CredentialsError;
+use aws_sdk_dynamodb::{
+    error::SdkError,
+    operation::{
+        create_table::CreateTableError, delete_item::DeleteItemError, get_item::GetItemError,
+        put_item::PutItemError, query::QueryError, update_item::UpdateItemError,
     },
+};
+use aws_smithy_runtime_api::client::result::ServiceError;
+
+macro_rules! impl_from_service_error {
+    ($error_type:ty) => {
+        impl<R> From<SdkError<$error_type, R>> for LockClientError
+        where
+            R: Send + Sync + std::fmt::Debug + 'static,
+        {
+            fn from(err: SdkError<$error_type, R>) -> Self {
+                match err {
+                    SdkError::ServiceError(e) => e.into(),
+                    _ => LockClientError::GenericDynamoDb {
+                        source: Box::new(err),
+                    },
+                }
+            }
+        }
 
-    /// Error raised getting credentials
-    #[error("Failed to retrieve AWS credentials: {source}")]
-    Credentials {
-        /// The underlying Rusoto CredentialsError
-        #[from]
-        source: rusoto_credential::CredentialsError,
-    },
+        impl<R> From<ServiceError<$error_type, R>> for LockClientError
+        where
+            R: Send + Sync + std::fmt::Debug + 'static,
+        {
+            fn from(value: ServiceError<$error_type, R>) -> Self {
+                value.into_err().into()
+            }
+        }
+    };
+}
 
+#[derive(thiserror::Error, Debug)]
+pub enum DynamoDbConfigError {
     /// Billing mode string invalid
     #[error("Invalid billing mode : {0}, supported values : ['provided', 'pay_per_request']")]
     InvalidBillingMode(String),
@@ -33,6 +51,9 @@ pub enum DynamoDbConfigError {
         // config_value: String,
         source: ParseIntError,
     },
+    /// Cannot initialize DynamoDbConfiguration due to some sort of threading issue
+    #[error("Cannot initialize dynamodb lock configuration")]
+    InitializationError,
 }
 
 /// Errors produced by `DynamoDbLockClient`
@@ -44,7 +65,7 @@ pub enum LockClientError {
     #[error("Lock table '{name}': creation failed: {source}")]
     LockTableCreateFailure {
         name: String,
-        source: RusotoError<CreateTableError>,
+        source: Box<CreateTableError>,
     },
 
     #[error("Log entry for table '{table_path}' and version '{version}' already exists")]
@@ -60,29 +81,30 @@ pub enum LockClientError {
     GenericDynamoDb {
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
-
     #[error("configuration error: {source}")]
-    Credentials {
-        source: rusoto_credential::CredentialsError,
-    },
-
+    Credentials { source: CredentialsError },
     #[error(
         "Atomic rename requires a LockClient for S3 backends. \
          Either configure the LockClient, or set AWS_S3_ALLOW_UNSAFE_RENAME=true \
          to opt out of support for concurrent writers."
     )]
     LockClientRequired,
+
+    #[error("Log entry for table '{table_path}' and version '{version}' is already complete")]
+    VersionAlreadyCompleted { table_path: String, version: i64 },
 }
 
 impl From<GetItemError> for LockClientError {
     fn from(err: GetItemError) -> Self {
         match err {
-            GetItemError::InternalServerError(_) => err.into(),
-            GetItemError::ProvisionedThroughputExceeded(_) => {
+            GetItemError::ProvisionedThroughputExceededException(_) => {
                 LockClientError::ProvisionedThroughputExceeded
             }
             GetItemError::RequestLimitExceeded(_) => LockClientError::ProvisionedThroughputExceeded,
-            GetItemError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
+            GetItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+            _ => LockClientError::GenericDynamoDb {
+                source: Box::new(err),
+            },
         }
     }
 }
@@ -90,12 +112,14 @@ impl From<GetItemError> for LockClientError {
 impl From<QueryError> for LockClientError {
     fn from(err: QueryError) -> Self {
         match err {
-            QueryError::InternalServerError(_) => err.into(),
-            QueryError::ProvisionedThroughputExceeded(_) => {
+            QueryError::ProvisionedThroughputExceededException(_) => {
                 LockClientError::ProvisionedThroughputExceeded
             }
             QueryError::RequestLimitExceeded(_) => LockClientError::ProvisionedThroughputExceeded,
-            QueryError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
+            QueryError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+            _ => LockClientError::GenericDynamoDb {
+                source: Box::new(err),
+            },
         }
     }
 }
@@ -103,17 +127,19 @@ impl From<QueryError> for LockClientError {
 impl From<PutItemError> for LockClientError {
     fn from(err: PutItemError) -> Self {
         match err {
-            PutItemError::ConditionalCheckFailed(_) => {
+            PutItemError::ConditionalCheckFailedException(_) => {
                 unreachable!("error must be handled explicitely")
             }
-            PutItemError::InternalServerError(_) => err.into(),
-            PutItemError::ProvisionedThroughputExceeded(_) => {
+            PutItemError::ProvisionedThroughputExceededException(_) => {
                 LockClientError::ProvisionedThroughputExceeded
             }
             PutItemError::RequestLimitExceeded(_) => LockClientError::ProvisionedThroughputExceeded,
-            PutItemError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
-            PutItemError::ItemCollectionSizeLimitExceeded(_) => err.into(),
-            PutItemError::TransactionConflict(_) => err.into(),
+            PutItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+            PutItemError::ItemCollectionSizeLimitExceededException(_) => err.into(),
+            PutItemError::TransactionConflictException(_) => err.into(),
+            _ => LockClientError::GenericDynamoDb {
+                source: Box::new(err),
+            },
         }
     }
 }
@@ -121,34 +147,51 @@ impl From<PutItemError> for LockClientError {
 impl From<UpdateItemError> for LockClientError {
     fn from(err: UpdateItemError) -> Self {
         match err {
-            UpdateItemError::ConditionalCheckFailed(_) => {
+            UpdateItemError::ConditionalCheckFailedException(_) => {
                 unreachable!("condition check failure in update is not an error")
             }
             UpdateItemError::InternalServerError(_) => err.into(),
-            UpdateItemError::ProvisionedThroughputExceeded(_) => {
+            UpdateItemError::ProvisionedThroughputExceededException(_) => {
                 LockClientError::ProvisionedThroughputExceeded
             }
             UpdateItemError::RequestLimitExceeded(_) => {
                 LockClientError::ProvisionedThroughputExceeded
             }
-            UpdateItemError::ResourceNotFound(_) => LockClientError::LockTableNotFound,
-            UpdateItemError::ItemCollectionSizeLimitExceeded(_) => err.into(),
-            UpdateItemError::TransactionConflict(_) => err.into(),
+            UpdateItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+            UpdateItemError::ItemCollectionSizeLimitExceededException(_) => err.into(),
+            UpdateItemError::TransactionConflictException(_) => err.into(),
+            _ => LockClientError::GenericDynamoDb {
+                source: Box::new(err),
+            },
         }
     }
 }
 
-impl<E> From<RusotoError<E>> for LockClientError
-where
-    E: Into<LockClientError> + std::error::Error + Send + Sync + 'static,
-{
-    fn from(err: RusotoError<E>) -> Self {
+impl From<DeleteItemError> for LockClientError {
+    fn from(err: DeleteItemError) -> Self {
         match err {
-            RusotoError::Service(e) => e.into(),
-            RusotoError::Credentials(e) => LockClientError::Credentials { source: e },
+            DeleteItemError::ConditionalCheckFailedException(_) => {
+                unreachable!("error must be handled explicitly")
+            }
+            DeleteItemError::InternalServerError(_) => err.into(),
+            DeleteItemError::ProvisionedThroughputExceededException(_) => {
+                LockClientError::ProvisionedThroughputExceeded
+            }
+            DeleteItemError::RequestLimitExceeded(_) => {
+                LockClientError::ProvisionedThroughputExceeded
+            }
+            DeleteItemError::ResourceNotFoundException(_) => LockClientError::LockTableNotFound,
+            DeleteItemError::ItemCollectionSizeLimitExceededException(_) => err.into(),
+            DeleteItemError::TransactionConflictException(_) => err.into(),
             _ => LockClientError::GenericDynamoDb {
                 source: Box::new(err),
             },
         }
     }
 }
+
+impl_from_service_error!(GetItemError);
+impl_from_service_error!(PutItemError);
+impl_from_service_error!(QueryError);
+impl_from_service_error!(UpdateItemError);
+impl_from_service_error!(DeleteItemError);
diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs
index 2630f80512..a0a99c01f0 100644
--- a/crates/aws/src/lib.rs
+++ b/crates/aws/src/lib.rs
@@ -1,9 +1,23 @@
 //! Lock client implementation based on DynamoDb.
 
+mod credentials;
 pub mod errors;
 pub mod logstore;
+#[cfg(feature = "native-tls")]
+mod native;
 pub mod storage;
-
+use aws_config::SdkConfig;
+use aws_sdk_dynamodb::{
+    operation::{
+        create_table::CreateTableError, delete_item::DeleteItemError, get_item::GetItemError,
+        put_item::PutItemError, query::QueryError, update_item::UpdateItemError,
+    },
+    types::{
+        AttributeDefinition, AttributeValue, BillingMode, KeySchemaElement, KeyType,
+        ScalarAttributeType,
+    },
+    Client,
+};
 use lazy_static::lazy_static;
 use object_store::aws::AmazonS3ConfigKey;
 use regex::Regex;
@@ -18,21 +32,13 @@ use tracing::debug;
 use deltalake_core::logstore::{logstores, LogStore, LogStoreFactory};
 use deltalake_core::storage::{factories, url_prefix_handler, ObjectStoreRef, StorageOptions};
 use deltalake_core::{DeltaResult, Path};
-use rusoto_core::{HttpClient, Region, RusotoError};
-use rusoto_credential::AutoRefreshingProvider;
-use rusoto_dynamodb::{
-    AttributeDefinition, AttributeValue, CreateTableError, CreateTableInput, DynamoDb,
-    DynamoDbClient, GetItemError, GetItemInput, KeySchemaElement, PutItemError, PutItemInput,
-    QueryError, QueryInput, UpdateItemError, UpdateItemInput,
-};
-use rusoto_sts::WebIdentityProvider;
 use url::Url;
 
 use errors::{DynamoDbConfigError, LockClientError};
 use storage::{S3ObjectStoreFactory, S3StorageOptions};
 
 #[derive(Clone, Debug, Default)]
-struct S3LogStoreFactory {}
+pub struct S3LogStoreFactory {}
 
 impl LogStoreFactory for S3LogStoreFactory {
     fn with_options(
@@ -41,7 +47,7 @@ impl LogStoreFactory for S3LogStoreFactory {
         location: &Url,
         options: &StorageOptions,
     ) -> DeltaResult<Arc<dyn LogStore>> {
-        let store = url_prefix_handler(store, Path::parse(location.path())?)?;
+        let store = url_prefix_handler(store, Path::parse(location.path())?);
 
         if options
             .0
@@ -53,7 +59,7 @@ impl LogStoreFactory for S3LogStoreFactory {
             ));
         }
 
-        let s3_options = S3StorageOptions::from_map(&options.0);
+        let s3_options = S3StorageOptions::from_map(&options.0)?;
 
         if s3_options.locking_provider.as_deref() != Some("dynamodb") {
             debug!("S3LogStoreFactory has been asked to create a LogStore without the dynamodb locking provider");
@@ -117,7 +123,7 @@ impl CommitEntry {
 /// Lock client backed by DynamoDb.
 pub struct DynamoDbLockClient {
     /// DynamoDb client
-    dynamodb_client: DynamoDbClient,
+    dynamodb_client: Client,
     /// configuration of the
     config: DynamoDbConfig,
 }
@@ -131,24 +137,30 @@ impl std::fmt::Debug for DynamoDbLockClient {
 impl DynamoDbLockClient {
     /// Creates a new DynamoDbLockClient from the supplied storage options.
     pub fn try_new(
+        sdk_config: &SdkConfig,
         lock_table_name: Option<String>,
         billing_mode: Option<String>,
         max_elapsed_request_time: Option<String>,
-        region: Region,
-        use_web_identity: bool,
+        dynamodb_override_endpoint: Option<String>,
     ) -> Result<Self, DynamoDbConfigError> {
-        let dynamodb_client = create_dynamodb_client(region.clone(), use_web_identity)?;
+        let dynamodb_sdk_config =
+            Self::create_dynamodb_sdk_config(sdk_config, dynamodb_override_endpoint);
+
+        let dynamodb_client = aws_sdk_dynamodb::Client::new(&dynamodb_sdk_config);
 
         let lock_table_name = lock_table_name
             .or_else(|| std::env::var(constants::LOCK_TABLE_KEY_NAME).ok())
             .unwrap_or(constants::DEFAULT_LOCK_TABLE_NAME.to_owned());
 
-        let billing_mode = billing_mode
+        let billing_mode = if let Some(bm) = billing_mode
             .or_else(|| std::env::var(constants::BILLING_MODE_KEY_NAME).ok())
-            .map_or_else(
-                || Ok(BillingMode::PayPerRequest),
-                |bm| BillingMode::from_str(&bm),
-            )?;
+            .as_ref()
+        {
+            BillingMode::try_parse(bm.to_ascii_uppercase().as_str())
+                .map_err(|_| DynamoDbConfigError::InvalidBillingMode(String::default()))?
+        } else {
+            BillingMode::PayPerRequest
+        };
 
         let max_elapsed_request_time = max_elapsed_request_time
             .or_else(|| std::env::var(constants::MAX_ELAPSED_REQUEST_TIME_KEY_NAME).ok())
@@ -162,14 +174,31 @@ impl DynamoDbLockClient {
             billing_mode,
             lock_table_name,
             max_elapsed_request_time,
-            use_web_identity,
-            region,
+            sdk_config: sdk_config.clone(),
         };
         Ok(Self {
             dynamodb_client,
             config,
         })
     }
+    fn create_dynamodb_sdk_config(
+        sdk_config: &SdkConfig,
+        dynamodb_override_endpoint: Option<String>,
+    ) -> SdkConfig {
+        /*
+        if dynamodb_override_endpoint exists/AWS_ENDPOINT_URL_DYNAMODB is specified by user
+        use dynamodb_override_endpoint to create dynamodb client
+        */
+
+        match dynamodb_override_endpoint {
+            Some(dynamodb_endpoint_url) => sdk_config
+                .to_owned()
+                .to_builder()
+                .endpoint_url(dynamodb_endpoint_url)
+                .build(),
+            None => sdk_config.to_owned(),
+        }
+    }
 
     /// Create the lock table where DynamoDb stores the commit information for all delta tables.
     ///
@@ -179,40 +208,50 @@ impl DynamoDbLockClient {
     /// `active`, so transient failures might occurr when immediately using the lock client.
     pub async fn try_create_lock_table(&self) -> Result<CreateLockTableResult, LockClientError> {
         let attribute_definitions = vec![
-            AttributeDefinition {
-                attribute_name: constants::ATTR_TABLE_PATH.to_owned(),
-                attribute_type: constants::STRING_TYPE.to_owned(),
-            },
-            AttributeDefinition {
-                attribute_name: constants::ATTR_FILE_NAME.to_owned(),
-                attribute_type: constants::STRING_TYPE.to_owned(),
-            },
+            AttributeDefinition::builder()
+                .attribute_name(constants::ATTR_TABLE_PATH)
+                .attribute_type(ScalarAttributeType::S)
+                .build()
+                .unwrap(),
+            AttributeDefinition::builder()
+                .attribute_name(constants::ATTR_FILE_NAME)
+                .attribute_type(ScalarAttributeType::S)
+                .build()
+                .unwrap(),
         ];
-        let input = CreateTableInput {
-            attribute_definitions,
-            key_schema: vec![
-                KeySchemaElement {
-                    attribute_name: constants::ATTR_TABLE_PATH.to_owned(),
-                    key_type: constants::KEY_TYPE_HASH.to_owned(),
-                },
-                KeySchemaElement {
-                    attribute_name: constants::ATTR_FILE_NAME.to_owned(),
-                    key_type: constants::KEY_TYPE_RANGE.to_owned(),
-                },
-            ],
-            billing_mode: Some(self.config.billing_mode.to_str()),
-            table_name: self.config.lock_table_name.clone(),
-            ..Default::default()
-        };
-        match self.dynamodb_client.create_table(input).await {
+        let request = self
+            .dynamodb_client
+            .create_table()
+            .set_attribute_definitions(Some(attribute_definitions))
+            .set_key_schema(Some(vec![
+                KeySchemaElement::builder()
+                    .attribute_name(constants::ATTR_TABLE_PATH.to_owned())
+                    .key_type(KeyType::Hash)
+                    .build()
+                    .unwrap(),
+                KeySchemaElement::builder()
+                    .attribute_name(constants::ATTR_FILE_NAME.to_owned())
+                    .key_type(KeyType::Range)
+                    .build()
+                    .unwrap(),
+            ]))
+            .billing_mode(self.config.billing_mode.clone())
+            .table_name(&self.config.lock_table_name)
+            .send();
+        match request.await {
             Ok(_) => Ok(CreateLockTableResult::TableCreated),
-            Err(RusotoError::Service(CreateTableError::ResourceInUse(_))) => {
-                Ok(CreateLockTableResult::TableAlreadyExists)
-            }
-            Err(reason) => Err(LockClientError::LockTableCreateFailure {
-                name: self.config.lock_table_name.clone(),
-                source: reason,
-            }),
+            Err(sdk_err) => match sdk_err.as_service_error() {
+                Some(CreateTableError::ResourceInUseException(_)) => {
+                    Ok(CreateLockTableResult::TableAlreadyExists)
+                }
+                Some(_) => Err(LockClientError::LockTableCreateFailure {
+                    name: self.config.lock_table_name.clone(),
+                    source: Box::new(sdk_err.into_service_error()),
+                }),
+                _ => Err(LockClientError::GenericDynamoDb {
+                    source: Box::new(sdk_err),
+                }),
+            },
         }
     }
 
@@ -238,22 +277,26 @@ impl DynamoDbLockClient {
         table_path: &str,
         version: i64,
     ) -> Result<Option<CommitEntry>, LockClientError> {
-        let input = GetItemInput {
-            consistent_read: Some(true),
-            table_name: self.config.lock_table_name.clone(),
-            key: self.get_primary_key(version, table_path),
-            ..Default::default()
-        };
         let item = self
             .retry(|| async {
-                match self.dynamodb_client.get_item(input.clone()).await {
+                match self
+                    .dynamodb_client
+                    .get_item()
+                    .consistent_read(true)
+                    .table_name(&self.config.lock_table_name)
+                    .set_key(Some(self.get_primary_key(version, table_path)))
+                    .send()
+                    .await
+                {
                     Ok(x) => Ok(x),
-                    Err(RusotoError::Service(GetItemError::ProvisionedThroughputExceeded(_))) => {
-                        Err(backoff::Error::transient(
-                            LockClientError::ProvisionedThroughputExceeded,
-                        ))
-                    }
-                    Err(err) => Err(backoff::Error::permanent(err.into())),
+                    Err(sdk_err) => match sdk_err.as_service_error() {
+                        Some(GetItemError::ProvisionedThroughputExceededException(_)) => {
+                            Err(backoff::Error::transient(
+                                LockClientError::ProvisionedThroughputExceeded,
+                            ))
+                        }
+                        _ => Err(backoff::Error::permanent(sdk_err.into())),
+                    },
                 }
             })
             .await?;
@@ -266,29 +309,33 @@ impl DynamoDbLockClient {
         table_path: &str,
         entry: &CommitEntry,
     ) -> Result<(), LockClientError> {
-        let item = create_value_map(entry, table_path);
-        let input = PutItemInput {
-            condition_expression: Some(constants::CONDITION_EXPR_CREATE.to_owned()),
-            table_name: self.get_lock_table_name(),
-            item,
-            ..Default::default()
-        };
         self.retry(|| async {
-            match self.dynamodb_client.put_item(input.clone()).await {
+            let item = create_value_map(entry, table_path);
+            match self
+                .dynamodb_client
+                .put_item()
+                .condition_expression(constants::CONDITION_EXPR_CREATE.as_str())
+                .table_name(self.get_lock_table_name())
+                .set_item(Some(item))
+                .send()
+                .await
+            {
                 Ok(_) => Ok(()),
-                Err(RusotoError::Service(PutItemError::ProvisionedThroughputExceeded(_))) => Err(
-                    backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
-                ),
-                Err(RusotoError::Service(PutItemError::ConditionalCheckFailed(_))) => Err(
-                    backoff::Error::permanent(LockClientError::VersionAlreadyExists {
-                        table_path: table_path.to_owned(),
-                        version: entry.version,
-                    }),
-                ),
-                Err(RusotoError::Service(PutItemError::ResourceNotFound(_))) => Err(
-                    backoff::Error::permanent(LockClientError::LockTableNotFound),
-                ),
-                Err(err) => Err(backoff::Error::permanent(err.into())),
+                Err(err) => match err.as_service_error() {
+                    Some(PutItemError::ProvisionedThroughputExceededException(_)) => Err(
+                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
+                    ),
+                    Some(PutItemError::ConditionalCheckFailedException(_)) => Err(
+                        backoff::Error::permanent(LockClientError::VersionAlreadyExists {
+                            table_path: table_path.to_owned(),
+                            version: entry.version,
+                        }),
+                    ),
+                    Some(PutItemError::ResourceNotFoundException(_)) => Err(
+                        backoff::Error::permanent(LockClientError::LockTableNotFound),
+                    ),
+                    _ => Err(backoff::Error::permanent(err.into())),
+                },
             }
         })
         .await
@@ -312,25 +359,31 @@ impl DynamoDbLockClient {
         table_path: &str,
         limit: i64,
     ) -> Result<Vec<CommitEntry>, LockClientError> {
-        let input = QueryInput {
-            table_name: self.get_lock_table_name(),
-            consistent_read: Some(true),
-            limit: Some(limit),
-            scan_index_forward: Some(false),
-            key_condition_expression: Some(format!("{} = :tn", constants::ATTR_TABLE_PATH)),
-            expression_attribute_values: Some(
-                maplit::hashmap!(":tn".into() => string_attr(table_path)),
-            ),
-            ..Default::default()
-        };
         let query_result = self
             .retry(|| async {
-                match self.dynamodb_client.query(input.clone()).await {
+                match self
+                    .dynamodb_client
+                    .query()
+                    .table_name(self.get_lock_table_name())
+                    .consistent_read(true)
+                    .limit(limit.try_into().unwrap_or(i32::MAX))
+                    .scan_index_forward(false)
+                    .key_condition_expression(format!("{} = :tn", constants::ATTR_TABLE_PATH))
+                    .set_expression_attribute_values(Some(
+                        maplit::hashmap!(":tn".into() => string_attr(table_path)),
+                    ))
+                    .send()
+                    .await
+                {
                     Ok(result) => Ok(result),
-                    Err(RusotoError::Service(QueryError::ProvisionedThroughputExceeded(_))) => Err(
-                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
-                    ),
-                    Err(err) => Err(backoff::Error::permanent(err.into())),
+                    Err(sdk_err) => match sdk_err.as_service_error() {
+                        Some(QueryError::ProvisionedThroughputExceededException(_)) => {
+                            Err(backoff::Error::transient(
+                                LockClientError::ProvisionedThroughputExceeded,
+                            ))
+                        }
+                        _ => Err(backoff::Error::permanent(sdk_err.into())),
+                    },
                 }
             })
             .await?;
@@ -354,31 +407,69 @@ impl DynamoDbLockClient {
             .duration_since(SystemTime::UNIX_EPOCH)
             .unwrap()
             .as_secs();
-        let input = UpdateItemInput {
-            table_name: self.get_lock_table_name(),
-            key: self.get_primary_key(version, table_path),
-            update_expression: Some("SET complete = :c, expireTime = :e".to_owned()),
-            expression_attribute_values: Some(maplit::hashmap! {
-                ":c".to_owned() => string_attr("true"),
-                ":e".to_owned() => num_attr(seconds_since_epoch),
-                ":f".into() => string_attr("false"),
-            }),
-            condition_expression: Some(constants::CONDITION_UPDATE_INCOMPLETE.to_owned()),
-            ..Default::default()
-        };
-
         self.retry(|| async {
-            match self.dynamodb_client.update_item(input.clone()).await {
+            match self
+                .dynamodb_client
+                .update_item()
+                .table_name(self.get_lock_table_name())
+                .set_key(Some(self.get_primary_key(version, table_path)))
+                .update_expression("SET complete = :c, expireTime = :e".to_owned())
+                .set_expression_attribute_values(Some(maplit::hashmap! {
+                    ":c".to_owned() => string_attr("true"),
+                    ":e".to_owned() => num_attr(seconds_since_epoch),
+                    ":f".into() => string_attr("false"),
+                }))
+                .condition_expression(constants::CONDITION_UPDATE_INCOMPLETE)
+                .send()
+                .await
+            {
                 Ok(_) => Ok(UpdateLogEntryResult::UpdatePerformed),
-                Err(RusotoError::Service(UpdateItemError::ConditionalCheckFailed(_))) => {
-                    Ok(UpdateLogEntryResult::AlreadyCompleted)
-                }
-                Err(RusotoError::Service(UpdateItemError::ProvisionedThroughputExceeded(_))) => {
-                    Err(backoff::Error::transient(
-                        LockClientError::ProvisionedThroughputExceeded,
-                    ))
-                }
-                Err(err) => Err(backoff::Error::permanent(err.into())),
+                Err(err) => match err.as_service_error() {
+                    Some(UpdateItemError::ProvisionedThroughputExceededException(_)) => Err(
+                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
+                    ),
+                    Some(UpdateItemError::ConditionalCheckFailedException(_)) => {
+                        Ok(UpdateLogEntryResult::AlreadyCompleted)
+                    }
+                    _ => Err(backoff::Error::permanent(err.into())),
+                },
+            }
+        })
+        .await
+    }
+
+    /// Delete existing log entry if it is not already complete
+    pub async fn delete_commit_entry(
+        &self,
+        version: i64,
+        table_path: &str,
+    ) -> Result<(), LockClientError> {
+        self.retry(|| async {
+            match self
+                .dynamodb_client
+                .delete_item()
+                .table_name(self.get_lock_table_name())
+                .set_key(Some(self.get_primary_key(version, table_path)))
+                .set_expression_attribute_values(Some(maplit::hashmap! {
+                    ":f".into() => string_attr("false"),
+                }))
+                .condition_expression(constants::CONDITION_DELETE_INCOMPLETE.as_str())
+                .send()
+                .await
+            {
+                Ok(_) => Ok(()),
+                Err(err) => match err.as_service_error() {
+                    Some(DeleteItemError::ProvisionedThroughputExceededException(_)) => Err(
+                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
+                    ),
+                    Some(DeleteItemError::ConditionalCheckFailedException(_)) => Err(
+                        backoff::Error::permanent(LockClientError::VersionAlreadyCompleted {
+                            table_path: table_path.to_owned(),
+                            version,
+                        }),
+                    ),
+                    _ => Err(backoff::Error::permanent(err.into())),
+                },
             }
         })
         .await
@@ -467,40 +558,23 @@ fn create_value_map(
     value_map
 }
 
-#[derive(Debug, PartialEq)]
-pub enum BillingMode {
-    PayPerRequest,
-    Provisioned,
-}
-
-impl BillingMode {
-    fn to_str(&self) -> String {
-        match self {
-            Self::PayPerRequest => "PAY_PER_REQUEST".to_owned(),
-            Self::Provisioned => "PROVISIONED".to_owned(),
-        }
-    }
-}
-
-impl FromStr for BillingMode {
-    type Err = DynamoDbConfigError;
-
-    fn from_str(s: &str) -> Result<Self, DynamoDbConfigError> {
-        match s.to_ascii_lowercase().as_str() {
-            "provisioned" => Ok(BillingMode::Provisioned),
-            "pay_per_request" => Ok(BillingMode::PayPerRequest),
-            _ => Err(DynamoDbConfigError::InvalidBillingMode(s.to_owned())),
-        }
-    }
-}
-
-#[derive(Debug, PartialEq)]
+#[derive(Debug)]
 pub struct DynamoDbConfig {
     pub billing_mode: BillingMode,
     pub lock_table_name: String,
     pub max_elapsed_request_time: Duration,
-    pub use_web_identity: bool,
-    pub region: Region,
+    pub sdk_config: SdkConfig,
+}
+
+impl Eq for DynamoDbConfig {}
+impl PartialEq for DynamoDbConfig {
+    fn eq(&self, other: &Self) -> bool {
+        self.billing_mode == other.billing_mode
+            && self.lock_table_name == other.lock_table_name
+            && self.max_elapsed_request_time == other.max_elapsed_request_time
+            && self.sdk_config.endpoint_url() == other.sdk_config.endpoint_url()
+            && self.sdk_config.region() == other.sdk_config.region()
+    }
 }
 
 /// Represents the possible, positive outcomes of calling `DynamoDbClient::try_create_lock_table()`
@@ -538,6 +612,10 @@ pub mod constants {
         pub static ref CONDITION_EXPR_CREATE: String = format!(
             "attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME})"
         );
+
+        pub static ref CONDITION_DELETE_INCOMPLETE: String = format!(
+            "(complete = :f) or (attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME}))"
+        );
     }
 
     pub const CONDITION_UPDATE_INCOMPLETE: &str = "complete = :f";
@@ -545,23 +623,6 @@ pub mod constants {
     pub const DEFAULT_COMMIT_ENTRY_EXPIRATION_DELAY: Duration = Duration::from_secs(86_400);
 }
 
-fn create_dynamodb_client(
-    region: Region,
-    use_web_identity: bool,
-) -> Result<DynamoDbClient, DynamoDbConfigError> {
-    Ok(match use_web_identity {
-        true => {
-            let dispatcher = HttpClient::new()?;
-            rusoto_dynamodb::DynamoDbClient::new_with(
-                dispatcher,
-                get_web_identity_provider()?,
-                region,
-            )
-        }
-        false => rusoto_dynamodb::DynamoDbClient::new(region),
-    })
-}
-
 /// Extract a field from an item's attribute value map, producing a descriptive error
 /// of the various failure cases.
 fn extract_required_string_field<'a>(
@@ -573,12 +634,11 @@ fn extract_required_string_field<'a>(
         .ok_or_else(|| LockClientError::InconsistentData {
             description: format!("mandatory string field '{field_name}' missing"),
         })?
-        .s
-        .as_ref()
-        .ok_or_else(|| LockClientError::InconsistentData {
+        .as_s()
+        .map_err(|v| LockClientError::InconsistentData {
             description: format!(
                 "mandatory string field '{field_name}' exists, but is not a string: {:#?}",
-                fields.get(field_name)
+                v,
             ),
         })
         .map(|s| s.as_str())
@@ -593,35 +653,21 @@ fn extract_optional_number_field<'a>(
     fields
         .get(field_name)
         .map(|attr| {
-            attr.n
-                .as_ref()
-                .ok_or_else(|| LockClientError::InconsistentData {
-                    description: format!(
-                        "field with name '{field_name}' exists, but is not of type number"
-                    ),
-                })
+            attr.as_n().map_err(|_| LockClientError::InconsistentData {
+                description: format!(
+                    "field with name '{field_name}' exists, but is not of type number"
+                ),
+            })
         })
         .transpose()
 }
 
 fn string_attr<T: ToString>(s: T) -> AttributeValue {
-    AttributeValue {
-        s: Some(s.to_string()),
-        ..Default::default()
-    }
+    AttributeValue::S(s.to_string())
 }
 
 fn num_attr<T: ToString>(n: T) -> AttributeValue {
-    AttributeValue {
-        n: Some(n.to_string()),
-        ..Default::default()
-    }
-}
-
-fn get_web_identity_provider(
-) -> Result<AutoRefreshingProvider<WebIdentityProvider>, DynamoDbConfigError> {
-    let provider = WebIdentityProvider::from_k8s_env();
-    Ok(AutoRefreshingProvider::new(provider)?)
+    AttributeValue::N(n.to_string())
 }
 
 lazy_static! {
@@ -639,6 +685,7 @@ fn extract_version_from_filename(name: &str) -> Option<i64> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use aws_config::Region;
     use object_store::memory::InMemory;
     use serial_test::serial;
 
@@ -687,4 +734,31 @@ mod tests {
             .unwrap();
         assert_eq!(logstore.name(), "DefaultLogStore");
     }
+
+    #[test]
+    #[serial]
+    fn test_create_dynamodb_sdk_config() {
+        let sdk_config = SdkConfig::builder()
+            .region(Region::from_static("eu-west-1"))
+            .endpoint_url("http://localhost:1234")
+            .build();
+        let dynamodb_sdk_config = DynamoDbLockClient::create_dynamodb_sdk_config(
+            &sdk_config,
+            Some("http://localhost:2345".to_string()),
+        );
+        assert_eq!(
+            dynamodb_sdk_config.endpoint_url(),
+            Some("http://localhost:2345"),
+        );
+        assert_eq!(
+            dynamodb_sdk_config.region().unwrap().to_string(),
+            "eu-west-1".to_string(),
+        );
+        let dynamodb_sdk_no_override_config =
+            DynamoDbLockClient::create_dynamodb_sdk_config(&sdk_config, None);
+        assert_eq!(
+            dynamodb_sdk_no_override_config.endpoint_url(),
+            Some("http://localhost:1234"),
+        );
+    }
 }
diff --git a/crates/aws/src/logstore.rs b/crates/aws/src/logstore.rs
index 123aadd2d1..fe569256ee 100644
--- a/crates/aws/src/logstore.rs
+++ b/crates/aws/src/logstore.rs
@@ -45,6 +45,7 @@ impl S3DynamoDbLogStore {
         object_store: ObjectStoreRef,
     ) -> DeltaResult<Self> {
         let lock_client = DynamoDbLockClient::try_new(
+            &s3_options.sdk_config,
             s3_options
                 .extra_opts
                 .get(constants::LOCK_TABLE_KEY_NAME)
@@ -57,13 +58,12 @@ impl S3DynamoDbLogStore {
                 .extra_opts
                 .get(constants::MAX_ELAPSED_REQUEST_TIME_KEY_NAME)
                 .cloned(),
-            s3_options.region.clone(),
-            s3_options.use_web_identity,
+            s3_options.dynamodb_endpoint.clone(),
         )
         .map_err(|err| DeltaTableError::ObjectStore {
             source: ObjectStoreError::Generic {
                 store: STORE_NAME,
-                source: err.into(),
+                source: Box::new(err),
             },
         })?;
         let table_path = to_uri(&location, &Path::from(""));
@@ -240,6 +240,36 @@ impl LogStore for S3DynamoDbLogStore {
         Ok(())
     }
 
+    /// Tries to abort an entry by first deleting the commit log entry, then deleting the temp commit file
+    async fn abort_commit_entry(
+        &self,
+        version: i64,
+        tmp_commit: &Path,
+    ) -> Result<(), TransactionError> {
+        self.lock_client
+            .delete_commit_entry(version, &self.table_path)
+            .await
+            .map_err(|err| match err {
+                LockClientError::ProvisionedThroughputExceeded => todo!(
+                    "deltalake-aws does not yet handle DynamoDB provisioned throughput errors"
+                ),
+                LockClientError::VersionAlreadyCompleted { version, .. } => {
+                    error!("Trying to abort a completed commit");
+                    TransactionError::LogStoreError {
+                        msg: format!("trying to abort a completed log entry: {}", version),
+                        source: Box::new(err),
+                    }
+                }
+                err => TransactionError::LogStoreError {
+                    msg: "dynamodb client failed to delete log entry".to_owned(),
+                    source: Box::new(err),
+                },
+            })?;
+
+        abort_commit_entry(&self.storage, version, tmp_commit).await?;
+        Ok(())
+    }
+
     async fn get_latest_version(&self, current_version: i64) -> DeltaResult<i64> {
         debug!("Retrieving latest version of {self:?} at v{current_version}");
         let entry = self
diff --git a/crates/aws/src/native.rs b/crates/aws/src/native.rs
new file mode 100644
index 0000000000..c647194eb7
--- /dev/null
+++ b/crates/aws/src/native.rs
@@ -0,0 +1,12 @@
+use aws_sdk_sts::config::SharedHttpClient;
+use aws_smithy_runtime::client::http::hyper_014::HyperClientBuilder;
+
+pub fn use_native_tls_client(allow_http: bool) -> SharedHttpClient {
+    let mut tls_connector = hyper_tls::HttpsConnector::new();
+    if allow_http {
+        tls_connector.https_only(false);
+    }
+
+    let client = HyperClientBuilder::new().build(tls_connector);
+    client
+}
diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs
index 87d488b54f..4625bb6be9 100644
--- a/crates/aws/src/storage.rs
+++ b/crates/aws/src/storage.rs
@@ -1,23 +1,32 @@
 //! AWS S3 storage backend.
 
+use aws_config::meta::region::ProvideRegion;
+use aws_config::provider_config::ProviderConfig;
+use aws_config::{Region, SdkConfig};
 use bytes::Bytes;
 use deltalake_core::storage::object_store::{
-    aws::AmazonS3ConfigKey, parse_url_opts, GetOptions, GetResult, ListResult, MultipartId,
-    ObjectMeta, ObjectStore, PutOptions, PutResult, Result as ObjectStoreResult,
+    aws::AmazonS3ConfigKey, parse_url_opts, GetOptions, GetResult, ListResult, ObjectMeta,
+    ObjectStore, PutOptions, PutResult, Result as ObjectStoreResult,
+};
+use deltalake_core::storage::{
+    limit_store_handler, str_is_truthy, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
 };
-use deltalake_core::storage::{str_is_truthy, ObjectStoreFactory, ObjectStoreRef, StorageOptions};
 use deltalake_core::{DeltaResult, ObjectStoreError, Path};
 use futures::stream::BoxStream;
-use rusoto_core::Region;
+use futures::Future;
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::ops::Range;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use tokio::io::AsyncWrite;
 use url::Url;
 
+use crate::errors::DynamoDbConfigError;
+#[cfg(feature = "native-tls")]
+use crate::native;
+
 const STORE_NAME: &str = "DeltaS3ObjectStore";
 
 #[derive(Clone, Default, Debug)]
@@ -25,7 +34,21 @@ pub struct S3ObjectStoreFactory {}
 
 impl S3ObjectStoreFactory {
     fn with_env_s3(&self, options: &StorageOptions) -> StorageOptions {
-        let mut options = options.clone();
+        let mut options = StorageOptions(
+            options
+                .0
+                .clone()
+                .into_iter()
+                .map(|(k, v)| {
+                    if let Ok(config_key) = AmazonS3ConfigKey::from_str(&k.to_ascii_lowercase()) {
+                        (config_key.as_ref().to_string(), v)
+                    } else {
+                        (k, v)
+                    }
+                })
+                .collect(),
+        );
+
         for (os_key, os_value) in std::env::vars_os() {
             if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
                 if let Ok(config_key) = AmazonS3ConfigKey::from_str(&key.to_ascii_lowercase()) {
@@ -45,10 +68,10 @@ impl ObjectStoreFactory for S3ObjectStoreFactory {
     fn parse_url_opts(
         &self,
         url: &Url,
-        options: &StorageOptions,
+        storage_options: &StorageOptions,
     ) -> DeltaResult<(ObjectStoreRef, Path)> {
-        let options = self.with_env_s3(options);
-        let (store, prefix) = parse_url_opts(
+        let options = self.with_env_s3(storage_options);
+        let (inner, prefix) = parse_url_opts(
             url,
             options.0.iter().filter_map(|(key, value)| {
                 let s3_key = AmazonS3ConfigKey::from_str(&key.to_ascii_lowercase()).ok()?;
@@ -56,58 +79,70 @@ impl ObjectStoreFactory for S3ObjectStoreFactory {
             }),
         )?;
 
+        let store = limit_store_handler(inner, &options);
+
+        // If the copy-if-not-exists env var is set, we don't need to instantiate a locking client or check for allow-unsafe-rename.
         if options
             .0
             .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref())
         {
-            // If the copy-if-not-exists env var is set, we don't need to instantiate a locking client or check for allow-unsafe-rename.
-            return Ok((Arc::from(store), prefix));
-        }
-
-        let options = S3StorageOptions::from_map(&options.0);
+            Ok((store, prefix))
+        } else {
+            let s3_options = S3StorageOptions::from_map(&storage_options.0)?;
 
-        let store = S3StorageBackend::try_new(
-            store.into(),
-            Some("dynamodb") == options.locking_provider.as_deref() || options.allow_unsafe_rename,
-        )?;
+            let store = S3StorageBackend::try_new(
+                store,
+                Some("dynamodb") == s3_options.locking_provider.as_deref()
+                    || s3_options.allow_unsafe_rename,
+            )?;
 
-        Ok((Arc::new(store), prefix))
+            Ok((Arc::new(store), prefix))
+        }
     }
 }
 
 /// Options used to configure the [S3StorageBackend].
 ///
 /// Available options are described in [s3_constants].
-#[derive(Clone, Debug, PartialEq, Eq)]
+#[derive(Clone, Debug)]
 #[allow(missing_docs)]
 pub struct S3StorageOptions {
-    pub endpoint_url: Option<String>,
-    pub region: Region,
-    pub profile: Option<String>,
-    pub aws_access_key_id: Option<String>,
-    pub aws_secret_access_key: Option<String>,
-    pub aws_session_token: Option<String>,
     pub virtual_hosted_style_request: bool,
     pub locking_provider: Option<String>,
-    pub assume_role_arn: Option<String>,
-    pub assume_role_session_name: Option<String>,
-    pub use_web_identity: bool,
+    pub dynamodb_endpoint: Option<String>,
     pub s3_pool_idle_timeout: Duration,
     pub sts_pool_idle_timeout: Duration,
     pub s3_get_internal_server_error_retries: usize,
     pub allow_unsafe_rename: bool,
     pub extra_opts: HashMap<String, String>,
+    pub sdk_config: SdkConfig,
+}
+
+impl Eq for S3StorageOptions {}
+impl PartialEq for S3StorageOptions {
+    fn eq(&self, other: &Self) -> bool {
+        self.virtual_hosted_style_request == other.virtual_hosted_style_request
+            && self.locking_provider == other.locking_provider
+            && self.dynamodb_endpoint == other.dynamodb_endpoint
+            && self.s3_pool_idle_timeout == other.s3_pool_idle_timeout
+            && self.sts_pool_idle_timeout == other.sts_pool_idle_timeout
+            && self.s3_get_internal_server_error_retries
+                == other.s3_get_internal_server_error_retries
+            && self.allow_unsafe_rename == other.allow_unsafe_rename
+            && self.extra_opts == other.extra_opts
+            && self.sdk_config.endpoint_url() == other.sdk_config.endpoint_url()
+            && self.sdk_config.region() == other.sdk_config.region()
+    }
 }
 
 impl S3StorageOptions {
     /// Creates an instance of S3StorageOptions from the given HashMap.
-    pub fn from_map(options: &HashMap<String, String>) -> S3StorageOptions {
+    pub fn from_map(options: &HashMap<String, String>) -> DeltaResult<S3StorageOptions> {
         let extra_opts = options
             .iter()
             .filter(|(k, _)| !s3_constants::S3_OPTS.contains(&k.as_str()))
             .map(|(k, v)| (k.to_owned(), v.to_owned()))
             .collect();
-
         // Copy web identity values provided in options but not the environment into the environment
         // to get picked up by the `from_k8s_env` call in `get_web_identity_provider`.
         Self::ensure_env_var(options, s3_constants::AWS_REGION);
@@ -118,18 +153,6 @@ impl S3StorageOptions {
         Self::ensure_env_var(options, s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE);
         Self::ensure_env_var(options, s3_constants::AWS_ROLE_ARN);
         Self::ensure_env_var(options, s3_constants::AWS_ROLE_SESSION_NAME);
-
-        let endpoint_url = str_option(options, s3_constants::AWS_ENDPOINT_URL);
-        let region = if let Some(endpoint_url) = endpoint_url.as_ref() {
-            Region::Custom {
-                name: Self::str_or_default(options, s3_constants::AWS_REGION, "custom".to_string()),
-                endpoint: endpoint_url.to_owned(),
-            }
-        } else {
-            Region::default()
-        };
-        let profile = str_option(options, s3_constants::AWS_PROFILE);
-
         let s3_pool_idle_timeout =
             Self::u64_or_default(options, s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, 15);
         let sts_pool_idle_timeout =
@@ -149,31 +172,81 @@ impl S3StorageOptions {
         let allow_unsafe_rename = str_option(options, s3_constants::AWS_S3_ALLOW_UNSAFE_RENAME)
             .map(|val| str_is_truthy(&val))
             .unwrap_or(false);
+        let disable_imds = str_option(options, s3_constants::AWS_EC2_METADATA_DISABLED)
+            .map(|val| str_is_truthy(&val))
+            .unwrap_or(true);
+        let imds_timeout =
+            Self::u64_or_default(options, s3_constants::AWS_EC2_METADATA_TIMEOUT, 100);
+        let (loader, provider_config) =
+            if let Some(endpoint_url) = str_option(options, s3_constants::AWS_ENDPOINT_URL) {
+                let (region_provider, provider_config) = Self::create_provider_config(
+                    str_option(options, s3_constants::AWS_REGION)
+                        .or_else(|| std::env::var("AWS_DEFAULT_REGION").ok())
+                        .map_or(Region::from_static("custom"), Region::new),
+                )?;
+                let loader = aws_config::from_env()
+                    .endpoint_url(endpoint_url)
+                    .region(region_provider);
+                (loader, provider_config)
+            } else {
+                let (region_provider, provider_config) = Self::create_provider_config(
+                    crate::credentials::new_region_provider(disable_imds, imds_timeout),
+                )?;
+                (
+                    aws_config::from_env().region(region_provider),
+                    provider_config,
+                )
+            };
+
+        let credentials_provider = crate::credentials::ConfiguredCredentialChain::new(
+            disable_imds,
+            imds_timeout,
+            &provider_config,
+        );
+        #[cfg(feature = "native-tls")]
+        let sdk_config = execute_sdk_future(
+            loader
+                .http_client(native::use_native_tls_client(
+                    str_option(options, s3_constants::AWS_ALLOW_HTTP)
+                        .map(|val| str_is_truthy(&val))
+                        .unwrap_or(false),
+                ))
+                .credentials_provider(credentials_provider)
+                .load(),
+        )?;
+        #[cfg(feature = "rustls")]
+        let sdk_config =
+            execute_sdk_future(loader.credentials_provider(credentials_provider).load())?;
 
-        Self {
-            endpoint_url,
-            region,
-            profile,
-            aws_access_key_id: str_option(options, s3_constants::AWS_ACCESS_KEY_ID),
-            aws_secret_access_key: str_option(options, s3_constants::AWS_SECRET_ACCESS_KEY),
-            aws_session_token: str_option(options, s3_constants::AWS_SESSION_TOKEN),
+        Ok(Self {
             virtual_hosted_style_request,
             locking_provider: str_option(options, s3_constants::AWS_S3_LOCKING_PROVIDER),
-            assume_role_arn: str_option(options, s3_constants::AWS_S3_ASSUME_ROLE_ARN),
-            assume_role_session_name: str_option(options, s3_constants::AWS_S3_ROLE_SESSION_NAME),
-            use_web_identity: std::env::var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE).is_ok(),
+            dynamodb_endpoint: str_option(options, s3_constants::AWS_ENDPOINT_URL_DYNAMODB),
             s3_pool_idle_timeout: Duration::from_secs(s3_pool_idle_timeout),
             sts_pool_idle_timeout: Duration::from_secs(sts_pool_idle_timeout),
             s3_get_internal_server_error_retries,
             allow_unsafe_rename,
             extra_opts,
-        }
+            sdk_config,
+        })
+    }
+
+    pub fn endpoint_url(&self) -> Option<&str> {
+        self.sdk_config.endpoint_url()
     }
 
-    fn str_or_default(map: &HashMap<String, String>, key: &str, default: String) -> String {
-        map.get(key)
-            .map(|v| v.to_owned())
-            .unwrap_or_else(|| std::env::var(key).unwrap_or(default))
+    pub fn region(&self) -> Option<&Region> {
+        self.sdk_config.region()
+    }
+
+    fn create_provider_config<T: ProvideRegion>(
+        region_provider: T,
+    ) -> DeltaResult<(T, ProviderConfig)> {
+        let region = execute_sdk_future(region_provider.region())?;
+        Ok((
+            region_provider,
+            ProviderConfig::default().with_region(region),
+        ))
     }
 
     fn u64_or_default(map: &HashMap<String, String>, key: &str, default: u64) -> u64 {
@@ -187,15 +260,47 @@ impl S3StorageOptions {
             std::env::set_var(key, val);
         }
     }
-}
 
-impl Default for S3StorageOptions {
-    /// Creates an instance of S3StorageOptions from environment variables.
-    fn default() -> S3StorageOptions {
+    pub fn try_default() -> DeltaResult<Self> {
         Self::from_map(&HashMap::new())
     }
 }
 
+fn execute_sdk_future<F, T>(future: F) -> DeltaResult<T>
+where
+    T: Send,
+    F: Future<Output = T> + Send,
+{
+    match tokio::runtime::Handle::try_current() {
+        Ok(handle) => match handle.runtime_flavor() {
+            tokio::runtime::RuntimeFlavor::MultiThread => {
+                Ok(tokio::task::block_in_place(move || handle.block_on(future)))
+            }
+            _ => {
+                let mut cfg: Option<T> = None;
+                std::thread::scope(|scope| {
+                    scope.spawn(|| {
+                        cfg = Some(handle.block_on(future));
+                    });
+                });
+                cfg.ok_or(deltalake_core::DeltaTableError::ObjectStore {
+                    source: ObjectStoreError::Generic {
+                        store: STORE_NAME,
+                        source: Box::new(DynamoDbConfigError::InitializationError),
+                    },
+                })
+            }
+        },
+        Err(_) => {
+            let runtime = tokio::runtime::Builder::new_current_thread()
+                .enable_all()
+                .build()
+                .expect("a tokio runtime is required by the AWS sdk");
+            Ok(runtime.block_on(future))
+        }
+    }
+}
+
 /// An S3 implementation of the [ObjectStore] trait
 pub struct S3StorageBackend {
     inner: ObjectStoreRef,
@@ -229,14 +334,14 @@ impl std::fmt::Debug for S3StorageBackend {
 
 #[async_trait::async_trait]
 impl ObjectStore for S3StorageBackend {
-    async fn put(&self, location: &Path, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &Path,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -297,19 +402,16 @@ impl ObjectStore for S3StorageBackend {
         }
     }
 
-    async fn put_multipart(
-        &self,
-        location: &Path,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &Path,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
@@ -319,6 +421,10 @@ impl ObjectStore for S3StorageBackend {
 pub mod s3_constants {
     /// Custom S3 endpoint.
     pub const AWS_ENDPOINT_URL: &str = "AWS_ENDPOINT_URL";
+    /// Custom DynamoDB endpoint.
+    /// If DynamoDB endpoint is not supplied, will use S3 endpoint (AWS_ENDPOINT_URL)
+    /// If it is supplied, this endpoint takes precedence over the global endpoint set in AWS_ENDPOINT_URL for DynamoDB
+    pub const AWS_ENDPOINT_URL_DYNAMODB: &str = "AWS_ENDPOINT_URL_DYNAMODB";
     /// The AWS region.
     pub const AWS_REGION: &str = "AWS_REGION";
     /// The AWS profile.
@@ -375,11 +481,20 @@ pub mod s3_constants {
     /// Only safe if there is one writer to a given table.
     pub const AWS_S3_ALLOW_UNSAFE_RENAME: &str = "AWS_S3_ALLOW_UNSAFE_RENAME";
 
+    /// If set to "true", disables the imds client
+    /// Defaults to "true"
+    pub const AWS_EC2_METADATA_DISABLED: &str = "AWS_EC2_METADATA_DISABLED";
+
+    /// The timeout in milliseconds for the EC2 metadata endpoint
+    /// Defaults to 100
+    pub const AWS_EC2_METADATA_TIMEOUT: &str = "AWS_EC2_METADATA_TIMEOUT";
+
     /// The list of option keys owned by the S3 module.
     /// Option keys not contained in this list will be added to the `extra_opts`
     /// field of [crate::storage::s3::S3StorageOptions].
     pub const S3_OPTS: &[&str] = &[
         AWS_ENDPOINT_URL,
+        AWS_ENDPOINT_URL_DYNAMODB,
         AWS_REGION,
         AWS_PROFILE,
         AWS_ACCESS_KEY_ID,
@@ -394,216 +509,412 @@ pub mod s3_constants {
         AWS_S3_POOL_IDLE_TIMEOUT_SECONDS,
         AWS_STS_POOL_IDLE_TIMEOUT_SECONDS,
         AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES,
+        AWS_EC2_METADATA_DISABLED,
+        AWS_EC2_METADATA_TIMEOUT,
     ];
 }
 
 pub(crate) fn str_option(map: &HashMap<String, String>, key: &str) -> Option<String> {
-    map.get(key)
-        .map_or_else(|| std::env::var(key).ok(), |v| Some(v.to_owned()))
+    if let Some(s) = map.get(key) {
+        return Some(s.to_owned());
+    }
+
+    if let Some(s) = map.get(&key.to_ascii_lowercase()) {
+        return Some(s.to_owned());
+    }
+
+    std::env::var(key).ok()
 }
 
 #[cfg(test)]
 mod tests {
+    use std::time::SystemTime;
+
     use super::*;
 
+    use aws_sdk_sts::config::ProvideCredentials;
     use maplit::hashmap;
     use serial_test::serial;
 
+    struct ScopedEnv {
+        vars: HashMap<std::ffi::OsString, std::ffi::OsString>,
+    }
+
+    impl ScopedEnv {
+        pub fn new() -> Self {
+            let vars = std::env::vars_os().collect();
+            Self { vars }
+        }
+
+        pub fn run<T>(mut f: impl FnMut() -> T) -> T {
+            let _env_scope = Self::new();
+            f()
+        }
+
+        pub async fn run_async<F>(future: F) -> F::Output
+        where
+            F: Future + Send + 'static,
+            F::Output: Send + 'static,
+        {
+            let _env_scope = Self::new();
+            future.await
+        }
+    }
+
+    impl Drop for ScopedEnv {
+        fn drop(&mut self) {
+            let to_remove: Vec<_> = std::env::vars_os()
+                .map(|kv| kv.0)
+                .filter(|k| !self.vars.contains_key(k))
+                .collect();
+            for k in to_remove {
+                std::env::remove_var(k);
+            }
+            for (key, value) in self.vars.drain() {
+                std::env::set_var(key, value);
+            }
+        }
+    }
+
+    fn clear_env_of_aws_keys() {
+        let keys_to_clear = std::env::vars().filter_map(|(k, _v)| {
+            if AmazonS3ConfigKey::from_str(&k.to_ascii_lowercase()).is_ok() {
+                Some(k)
+            } else {
+                None
+            }
+        });
+
+        for k in keys_to_clear {
+            std::env::remove_var(k);
+        }
+    }
+
     #[test]
     #[serial]
     fn storage_options_default_test() {
-        std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "http://localhost");
-        std::env::set_var(s3_constants::AWS_REGION, "us-west-1");
-        std::env::set_var(s3_constants::AWS_PROFILE, "default");
-        std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "default_key_id");
-        std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "default_secret_key");
-        std::env::set_var(s3_constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
-        std::env::set_var(
-            s3_constants::AWS_S3_ASSUME_ROLE_ARN,
-            "arn:aws:iam::123456789012:role/some_role",
-        );
-        std::env::set_var(s3_constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
-        std::env::set_var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
-        std::env::remove_var(s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS);
-        std::env::remove_var(s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS);
-        std::env::remove_var(s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES);
-
-        let options = S3StorageOptions::default();
-
-        assert_eq!(
-            S3StorageOptions {
-                endpoint_url: Some("http://localhost".to_string()),
-                region: Region::Custom {
-                    name: "us-west-1".to_string(),
-                    endpoint: "http://localhost".to_string()
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+
+            std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "http://localhost");
+            std::env::set_var(s3_constants::AWS_REGION, "us-west-1");
+            std::env::set_var(s3_constants::AWS_PROFILE, "default");
+            std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "default_key_id");
+            std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "default_secret_key");
+            std::env::set_var(s3_constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
+            std::env::set_var(
+                s3_constants::AWS_S3_ASSUME_ROLE_ARN,
+                "arn:aws:iam::123456789012:role/some_role",
+            );
+            std::env::set_var(s3_constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
+            std::env::set_var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
+
+            let options = S3StorageOptions::try_default().unwrap();
+            assert_eq!(
+                S3StorageOptions {
+                    sdk_config: SdkConfig::builder()
+                        .endpoint_url("http://localhost".to_string())
+                        .region(Region::from_static("us-west-1"))
+                        .build(),
+                    virtual_hosted_style_request: false,
+                    locking_provider: Some("dynamodb".to_string()),
+                    dynamodb_endpoint: None,
+                    s3_pool_idle_timeout: Duration::from_secs(15),
+                    sts_pool_idle_timeout: Duration::from_secs(10),
+                    s3_get_internal_server_error_retries: 10,
+                    extra_opts: HashMap::new(),
+                    allow_unsafe_rename: false,
                 },
-                profile: Some("default".to_string()),
-                aws_access_key_id: Some("default_key_id".to_string()),
-                aws_secret_access_key: Some("default_secret_key".to_string()),
-                aws_session_token: None,
-                virtual_hosted_style_request: false,
-                assume_role_arn: Some("arn:aws:iam::123456789012:role/some_role".to_string()),
-                assume_role_session_name: Some("session_name".to_string()),
-                use_web_identity: true,
-                locking_provider: Some("dynamodb".to_string()),
-                s3_pool_idle_timeout: Duration::from_secs(15),
-                sts_pool_idle_timeout: Duration::from_secs(10),
-                s3_get_internal_server_error_retries: 10,
-                extra_opts: HashMap::new(),
-                allow_unsafe_rename: false,
-            },
-            options
-        );
+                options
+            );
+        });
     }
 
     #[test]
     #[serial]
     fn storage_options_with_only_region_and_credentials() {
-        std::env::remove_var(s3_constants::AWS_ENDPOINT_URL);
-        let options = S3StorageOptions::from_map(&hashmap! {
-            s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
-            s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test".to_string(),
-            s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
-        });
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            std::env::remove_var(s3_constants::AWS_ENDPOINT_URL);
+            let options = S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
+                s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test".to_string(),
+                s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+            })
+            .unwrap();
 
-        assert_eq!(
-            S3StorageOptions {
-                endpoint_url: None,
-                region: Region::default(),
-                aws_access_key_id: Some("test".to_string()),
-                aws_secret_access_key: Some("test_secret".to_string()),
-                ..Default::default()
-            },
-            options
-        );
+            let mut expected = S3StorageOptions::try_default().unwrap();
+            expected.sdk_config = SdkConfig::builder()
+                .region(Region::from_static("eu-west-1"))
+                .build();
+            assert_eq!(expected, options);
+        });
     }
 
     #[test]
     #[serial]
     fn storage_options_from_map_test() {
-        let options = S3StorageOptions::from_map(&hashmap! {
-            s3_constants::AWS_ENDPOINT_URL.to_string() => "http://localhost:1234".to_string(),
-            s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
-            s3_constants::AWS_PROFILE.to_string() => "default".to_string(),
-            s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string(),
-            s3_constants::AWS_S3_LOCKING_PROVIDER.to_string() => "another_locking_provider".to_string(),
-            s3_constants::AWS_S3_ASSUME_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/another_role".to_string(),
-            s3_constants::AWS_S3_ROLE_SESSION_NAME.to_string() => "another_session_name".to_string(),
-            s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "another_token_file".to_string(),
-            s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "1".to_string(),
-            s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "2".to_string(),
-            s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES.to_string() => "3".to_string(),
-            s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
-            s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            let options = S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_ENDPOINT_URL.to_string() => "http://localhost:1234".to_string(),
+                s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
+                s3_constants::AWS_PROFILE.to_string() => "default".to_string(),
+                s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string(),
+                s3_constants::AWS_S3_LOCKING_PROVIDER.to_string() => "another_locking_provider".to_string(),
+                s3_constants::AWS_S3_ASSUME_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/another_role".to_string(),
+                s3_constants::AWS_S3_ROLE_SESSION_NAME.to_string() => "another_session_name".to_string(),
+                s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "another_token_file".to_string(),
+                s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "1".to_string(),
+                s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "2".to_string(),
+                s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES.to_string() => "3".to_string(),
+                s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
+                s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+            }).unwrap();
+
+            assert_eq!(
+                S3StorageOptions {
+                    sdk_config: SdkConfig::builder()
+                        .endpoint_url("http://localhost:1234".to_string())
+                        .region(Region::from_static("us-west-2"))
+                        .build(),
+                    virtual_hosted_style_request: true,
+                    locking_provider: Some("another_locking_provider".to_string()),
+                    dynamodb_endpoint: None,
+                    s3_pool_idle_timeout: Duration::from_secs(1),
+                    sts_pool_idle_timeout: Duration::from_secs(2),
+                    s3_get_internal_server_error_retries: 3,
+                    extra_opts: hashmap! {
+                        s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string()
+                    },
+                    allow_unsafe_rename: false,
+                },
+                options
+            );
         });
+    }
 
-        assert_eq!(
-            S3StorageOptions {
-                endpoint_url: Some("http://localhost:1234".to_string()),
-                region: Region::Custom {
-                    name: "us-west-2".to_string(),
-                    endpoint: "http://localhost:1234".to_string()
-                },
-                profile: Some("default".to_string()),
-                aws_access_key_id: Some("test_id".to_string()),
-                aws_secret_access_key: Some("test_secret".to_string()),
-                aws_session_token: None,
-                virtual_hosted_style_request: true,
-                assume_role_arn: Some("arn:aws:iam::123456789012:role/another_role".to_string()),
-                assume_role_session_name: Some("another_session_name".to_string()),
-                use_web_identity: true,
-                locking_provider: Some("another_locking_provider".to_string()),
-                s3_pool_idle_timeout: Duration::from_secs(1),
-                sts_pool_idle_timeout: Duration::from_secs(2),
-                s3_get_internal_server_error_retries: 3,
-                extra_opts: hashmap! {
-                    s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string()
+    #[test]
+    #[serial]
+    fn storage_options_from_map_with_dynamodb_endpoint_test() {
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            let options = S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_ENDPOINT_URL.to_string() => "http://localhost:1234".to_string(),
+                s3_constants::AWS_ENDPOINT_URL_DYNAMODB.to_string() => "http://localhost:2345".to_string(),
+                s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
+                s3_constants::AWS_PROFILE.to_string() => "default".to_string(),
+                s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string(),
+                s3_constants::AWS_S3_LOCKING_PROVIDER.to_string() => "another_locking_provider".to_string(),
+                s3_constants::AWS_S3_ASSUME_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/another_role".to_string(),
+                s3_constants::AWS_S3_ROLE_SESSION_NAME.to_string() => "another_session_name".to_string(),
+                s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "another_token_file".to_string(),
+                s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "1".to_string(),
+                s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "2".to_string(),
+                s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES.to_string() => "3".to_string(),
+                s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
+                s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+            }).unwrap();
+
+            assert_eq!(
+                S3StorageOptions {
+                    sdk_config: SdkConfig::builder()
+                        .endpoint_url("http://localhost:1234".to_string())
+                        .region(Region::from_static("us-west-2"))
+                        .build(),
+                    virtual_hosted_style_request: true,
+                    locking_provider: Some("another_locking_provider".to_string()),
+                    dynamodb_endpoint: Some("http://localhost:2345".to_string()),
+                    s3_pool_idle_timeout: Duration::from_secs(1),
+                    sts_pool_idle_timeout: Duration::from_secs(2),
+                    s3_get_internal_server_error_retries: 3,
+                    extra_opts: hashmap! {
+                        s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string()
+                    },
+                    allow_unsafe_rename: false,
                 },
-                allow_unsafe_rename: false,
-            },
-            options
-        );
+                options
+            );
+        });
     }
 
     #[test]
     #[serial]
     fn storage_options_mixed_test() {
-        std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "http://localhost");
-        std::env::set_var(s3_constants::AWS_REGION, "us-west-1");
-        std::env::set_var(s3_constants::AWS_PROFILE, "default");
-        std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "wrong_key_id");
-        std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "wrong_secret_key");
-        std::env::set_var(s3_constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
-        std::env::set_var(
-            s3_constants::AWS_S3_ASSUME_ROLE_ARN,
-            "arn:aws:iam::123456789012:role/some_role",
-        );
-        std::env::set_var(s3_constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
-        std::env::set_var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
-
-        std::env::set_var(s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, "1");
-        std::env::set_var(s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS, "2");
-        std::env::set_var(s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES, "3");
-        let options = S3StorageOptions::from_map(&hashmap! {
-            s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id_mixed".to_string(),
-            s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret_mixed".to_string(),
-            s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
-            "AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES".to_string() => "3".to_string(),
-        });
-
-        assert_eq!(
-            S3StorageOptions {
-                endpoint_url: Some("http://localhost".to_string()),
-                region: Region::Custom {
-                    name: "us-west-2".to_string(),
-                    endpoint: "http://localhost".to_string()
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "http://localhost");
+            std::env::set_var(
+                s3_constants::AWS_ENDPOINT_URL_DYNAMODB,
+                "http://localhost:dynamodb",
+            );
+            std::env::set_var(s3_constants::AWS_REGION, "us-west-1");
+            std::env::set_var(s3_constants::AWS_PROFILE, "default");
+            std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "wrong_key_id");
+            std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "wrong_secret_key");
+            std::env::set_var(s3_constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
+            std::env::set_var(
+                s3_constants::AWS_S3_ASSUME_ROLE_ARN,
+                "arn:aws:iam::123456789012:role/some_role",
+            );
+            std::env::set_var(s3_constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
+            std::env::set_var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
+
+            std::env::set_var(s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, "1");
+            std::env::set_var(s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS, "2");
+            std::env::set_var(s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES, "3");
+            let options = S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id_mixed".to_string(),
+                s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret_mixed".to_string(),
+                s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
+                "AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES".to_string() => "3".to_string(),
+            })
+            .unwrap();
+
+            assert_eq!(
+                S3StorageOptions {
+                    sdk_config: SdkConfig::builder()
+                        .endpoint_url("http://localhost".to_string())
+                        .region(Region::from_static("us-west-2"))
+                        .build(),
+                    virtual_hosted_style_request: false,
+                    locking_provider: Some("dynamodb".to_string()),
+                    dynamodb_endpoint: Some("http://localhost:dynamodb".to_string()),
+                    s3_pool_idle_timeout: Duration::from_secs(1),
+                    sts_pool_idle_timeout: Duration::from_secs(2),
+                    s3_get_internal_server_error_retries: 3,
+                    extra_opts: hashmap! {},
+                    allow_unsafe_rename: false,
                 },
-                profile: Some("default".to_string()),
-                aws_access_key_id: Some("test_id_mixed".to_string()),
-                aws_secret_access_key: Some("test_secret_mixed".to_string()),
-                aws_session_token: None,
-                virtual_hosted_style_request: false,
-                assume_role_arn: Some("arn:aws:iam::123456789012:role/some_role".to_string()),
-                assume_role_session_name: Some("session_name".to_string()),
-                use_web_identity: true,
-                locking_provider: Some("dynamodb".to_string()),
-                s3_pool_idle_timeout: Duration::from_secs(1),
-                sts_pool_idle_timeout: Duration::from_secs(2),
-                s3_get_internal_server_error_retries: 3,
-                extra_opts: hashmap! {},
-                allow_unsafe_rename: false,
-            },
-            options
-        );
+                options
+            );
+        });
     }
+
     #[test]
     #[serial]
     fn storage_options_web_identity_test() {
-        let _options = S3StorageOptions::from_map(&hashmap! {
-            s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
-            s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "web_identity_token_file".to_string(),
-            s3_constants::AWS_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/web_identity_role".to_string(),
-            s3_constants::AWS_ROLE_SESSION_NAME.to_string() => "web_identity_session_name".to_string(),
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            let _options = S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
+                s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "web_identity_token_file".to_string(),
+                s3_constants::AWS_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/web_identity_role".to_string(),
+                s3_constants::AWS_ROLE_SESSION_NAME.to_string() => "web_identity_session_name".to_string(),
+            }).unwrap();
+
+            assert_eq!(
+                "eu-west-1",
+                std::env::var(s3_constants::AWS_REGION).unwrap()
+            );
+
+            assert_eq!(
+                "web_identity_token_file",
+                std::env::var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE).unwrap()
+            );
+
+            assert_eq!(
+                "arn:aws:iam::123456789012:role/web_identity_role",
+                std::env::var(s3_constants::AWS_ROLE_ARN).unwrap()
+            );
+
+            assert_eq!(
+                "web_identity_session_name",
+                std::env::var(s3_constants::AWS_ROLE_SESSION_NAME).unwrap()
+            );
         });
+    }
 
-        assert_eq!(
-            "eu-west-1",
-            std::env::var(s3_constants::AWS_REGION).unwrap()
-        );
+    #[test]
+    #[serial]
+    fn when_merging_with_env_unsupplied_options_are_added() {
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            let raw_options = hashmap! {};
 
-        assert_eq!(
-            "web_identity_token_file",
-            std::env::var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE).unwrap()
-        );
+            std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "env_key");
+            std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "env_key");
+            std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "env_key");
+            std::env::set_var(s3_constants::AWS_REGION, "env_key");
 
-        assert_eq!(
-            "arn:aws:iam::123456789012:role/web_identity_role",
-            std::env::var(s3_constants::AWS_ROLE_ARN).unwrap()
-        );
+            let combined_options =
+                S3ObjectStoreFactory {}.with_env_s3(&StorageOptions(raw_options));
+
+            assert_eq!(combined_options.0.len(), 4);
+
+            for v in combined_options.0.values() {
+                assert_eq!(v, "env_key");
+            }
+        });
+    }
+
+    #[tokio::test]
+    #[serial]
+    async fn when_merging_with_env_supplied_options_take_precedence() {
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            let raw_options = hashmap! {
+                "AWS_ACCESS_KEY_ID".to_string() => "options_key".to_string(),
+                "AWS_ENDPOINT_URL".to_string() => "options_key".to_string(),
+                "AWS_SECRET_ACCESS_KEY".to_string() => "options_key".to_string(),
+                "AWS_REGION".to_string() => "options_key".to_string()
+            };
+
+            std::env::set_var("aws_access_key_id", "env_key");
+            std::env::set_var("aws_endpoint", "env_key");
+            std::env::set_var("aws_secret_access_key", "env_key");
+            std::env::set_var("aws_region", "env_key");
+
+            let combined_options =
+                S3ObjectStoreFactory {}.with_env_s3(&StorageOptions(raw_options));
+
+            for v in combined_options.0.values() {
+                assert_eq!(v, "options_key");
+            }
+        });
+    }
+
+    #[tokio::test]
+    #[serial]
+    async fn storage_options_toggle_imds() {
+        ScopedEnv::run_async(async {
+            clear_env_of_aws_keys();
+            let disabled_time = storage_options_configure_imds(Some("true")).await;
+            let enabled_time = storage_options_configure_imds(Some("false")).await;
+            let default_time = storage_options_configure_imds(None).await;
+            println!(
+                "enabled_time: {}, disabled_time: {}, default_time: {}",
+                enabled_time.as_micros(),
+                disabled_time.as_micros(),
+                default_time.as_micros(),
+            );
+            assert!(disabled_time < enabled_time);
+            assert!(default_time < enabled_time);
+        })
+        .await;
+    }
+
+    async fn storage_options_configure_imds(value: Option<&str>) -> Duration {
+        let _options = match value {
+            Some(value) => S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
+                s3_constants::AWS_EC2_METADATA_DISABLED.to_string() => value.to_string(),
+            })
+            .unwrap(),
+            None => S3StorageOptions::from_map(&hashmap! {
+                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
+            })
+            .unwrap(),
+        };
 
         assert_eq!(
-            "web_identity_session_name",
-            std::env::var(s3_constants::AWS_ROLE_SESSION_NAME).unwrap()
+            "eu-west-1",
+            std::env::var(s3_constants::AWS_REGION).unwrap()
         );
+
+        let provider = _options.sdk_config.credentials_provider().unwrap();
+        let now = SystemTime::now();
+        _ = provider.provide_credentials().await;
+        now.elapsed().unwrap()
     }
 }
diff --git a/crates/aws/tests/common.rs b/crates/aws/tests/common.rs
index 01aa505b1b..dfa2a9cd51 100644
--- a/crates/aws/tests/common.rs
+++ b/crates/aws/tests/common.rs
@@ -87,7 +87,7 @@ impl S3Integration {
             "dynamodb",
             "create-table",
             "--table-name",
-            &table_name,
+            table_name,
             "--provisioned-throughput",
             "ReadCapacityUnits=1,WriteCapacityUnits=1",
             "--attribute-definitions",
@@ -112,7 +112,7 @@ impl S3Integration {
     }
 
     fn wait_for_table(table_name: &str) -> std::io::Result<()> {
-        let args = ["dynamodb", "describe-table", "--table-name", &table_name];
+        let args = ["dynamodb", "describe-table", "--table-name", table_name];
         loop {
             let output = Command::new("aws")
                 .args(args)
@@ -145,7 +145,7 @@ impl S3Integration {
 
     fn delete_dynamodb_table(table_name: &str) -> std::io::Result<ExitStatus> {
         let mut child = Command::new("aws")
-            .args(["dynamodb", "delete-table", "--table-name", &table_name])
+            .args(["dynamodb", "delete-table", "--table-name", table_name])
             .stdout(Stdio::null())
             .spawn()
             .expect("aws command is installed");
diff --git a/crates/aws/tests/integration_s3_dynamodb.rs b/crates/aws/tests/integration_s3_dynamodb.rs
index 179c46fc5a..57eb44ea24 100644
--- a/crates/aws/tests/integration_s3_dynamodb.rs
+++ b/crates/aws/tests/integration_s3_dynamodb.rs
@@ -5,17 +5,18 @@
 use std::collections::HashMap;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
+use aws_sdk_dynamodb::types::BillingMode;
 use deltalake_aws::logstore::{RepairLogEntryResult, S3DynamoDbLogStore};
-use deltalake_aws::storage::S3StorageOptions;
+use deltalake_aws::storage::{s3_constants, S3StorageOptions};
 use deltalake_aws::{CommitEntry, DynamoDbConfig, DynamoDbLockClient};
 use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType};
 use deltalake_core::logstore::LogStore;
-use deltalake_core::operations::transaction::{commit, prepare_commit};
+use deltalake_core::operations::transaction::CommitBuilder;
 use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::storage::commit_uri_from_version;
 use deltalake_core::storage::StorageOptions;
 use deltalake_core::table::builder::ensure_table_uri;
-use deltalake_core::{DeltaOps, DeltaTable, DeltaTableBuilder};
+use deltalake_core::{DeltaOps, DeltaTable, DeltaTableBuilder, ObjectStoreError};
 use deltalake_test::utils::*;
 use lazy_static::lazy_static;
 use object_store::path::Path;
@@ -31,17 +32,17 @@ lazy_static! {
     static ref OPTIONS: HashMap<String, String> = maplit::hashmap! {
         "allow_http".to_owned() => "true".to_owned(),
     };
-    static ref S3_OPTIONS: S3StorageOptions = S3StorageOptions::from_map(&OPTIONS);
+    static ref S3_OPTIONS: S3StorageOptions = S3StorageOptions::from_map(&OPTIONS).unwrap();
 }
 
 fn make_client() -> TestResult<DynamoDbLockClient> {
-    let options: S3StorageOptions = S3StorageOptions::default();
+    let options: S3StorageOptions = S3StorageOptions::try_default().unwrap();
     Ok(DynamoDbLockClient::try_new(
+        &options.sdk_config,
+        None,
         None,
         None,
         None,
-        options.region.clone(),
-        false,
     )?)
 }
 
@@ -62,13 +63,13 @@ fn client_configs_via_env_variables() -> TestResult<()> {
     );
     let client = make_client()?;
     let config = client.get_dynamodb_config();
+    let options: S3StorageOptions = S3StorageOptions::try_default().unwrap();
     assert_eq!(
         DynamoDbConfig {
-            billing_mode: deltalake_aws::BillingMode::PayPerRequest,
+            billing_mode: BillingMode::PayPerRequest,
             lock_table_name: "some_table".to_owned(),
             max_elapsed_request_time: Duration::from_secs(64),
-            use_web_identity: false,
-            region: config.region.clone(),
+            sdk_config: options.sdk_config,
         },
         *config,
     );
@@ -180,6 +181,80 @@ async fn test_repair_on_load() -> TestResult<()> {
     Ok(())
 }
 
+#[tokio::test]
+#[serial]
+async fn test_abort_commit_entry() -> TestResult<()> {
+    let context = IntegrationContext::new(Box::new(S3Integration::default()))?;
+    let client = make_client()?;
+    let table = prepare_table(&context, "abort_entry").await?;
+    let options: StorageOptions = OPTIONS.clone().into();
+    let log_store: S3DynamoDbLogStore = S3DynamoDbLogStore::try_new(
+        ensure_table_uri(table.table_uri())?,
+        options.clone(),
+        &S3_OPTIONS,
+        std::sync::Arc::new(table.object_store()),
+    )?;
+
+    let entry = create_incomplete_commit_entry(&table, 1, "unfinished_commit").await?;
+
+    log_store
+        .abort_commit_entry(entry.version, &entry.temp_path)
+        .await?;
+
+    // The entry should have been aborted - the latest entry should be one version lower
+    if let Some(new_entry) = client.get_latest_entry(&table.table_uri()).await? {
+        assert_eq!(entry.version - 1, new_entry.version);
+    }
+    // Temp commit file should have been deleted
+    assert!(matches!(
+        log_store.object_store().get(&entry.temp_path).await,
+        Err(ObjectStoreError::NotFound { .. })
+    ));
+
+    // Test abort commit is idempotent - still works if already aborted
+    log_store
+        .abort_commit_entry(entry.version, &entry.temp_path)
+        .await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+#[serial]
+async fn test_abort_commit_entry_fail_to_delete_entry() -> TestResult<()> {
+    // Test abort commit does not delete the temp commit if the DynamoDB entry is not deleted
+    let context = IntegrationContext::new(Box::new(S3Integration::default()))?;
+    let client = make_client()?;
+    let table = prepare_table(&context, "abort_entry_fail").await?;
+    let options: StorageOptions = OPTIONS.clone().into();
+    let log_store: S3DynamoDbLogStore = S3DynamoDbLogStore::try_new(
+        ensure_table_uri(table.table_uri())?,
+        options.clone(),
+        &S3_OPTIONS,
+        std::sync::Arc::new(table.object_store()),
+    )?;
+
+    let entry = create_incomplete_commit_entry(&table, 1, "finished_commit").await?;
+
+    // Mark entry as complete
+    client
+        .update_commit_entry(entry.version, &table.table_uri())
+        .await?;
+
+    // Abort will fail since we marked the entry as complete
+    assert!(matches!(
+        log_store
+            .abort_commit_entry(entry.version, &entry.temp_path)
+            .await,
+        Err(_),
+    ));
+
+    // Check temp commit file still exists
+    assert!(log_store.object_store().get(&entry.temp_path).await.is_ok());
+
+    Ok(())
+}
+
 const WORKERS: i64 = 3;
 const COMMITS: i64 = 15;
 
@@ -208,7 +283,9 @@ async fn test_concurrent_writers() -> TestResult<()> {
     for f in futures {
         map.extend(f.await?);
     }
+
     validate_lock_table_state(&table, WORKERS * COMMITS).await?;
+
     Ok(())
 }
 
@@ -258,18 +335,18 @@ async fn create_incomplete_commit_entry(
     tag: &str,
 ) -> TestResult<CommitEntry> {
     let actions = vec![add_action(tag)];
-    let temp_path = prepare_commit(
-        table.object_store().as_ref(),
-        &DeltaOperation::Write {
-            mode: SaveMode::Append,
-            partition_by: None,
-            predicate: None,
-        },
-        &actions,
-        None,
-    )
-    .await?;
-    let commit_entry = CommitEntry::new(version, temp_path);
+    let operation = DeltaOperation::Write {
+        mode: SaveMode::Append,
+        partition_by: None,
+        predicate: None,
+    };
+    let prepared = CommitBuilder::default()
+        .with_actions(actions)
+        .build(Some(table.snapshot()?), table.log_store(), operation)
+        .into_prepared_commit_future()
+        .await?;
+
+    let commit_entry = CommitEntry::new(version, prepared.path().to_owned());
     make_client()?
         .put_commit_entry(&table.table_uri(), &commit_entry)
         .await?;
@@ -314,7 +391,7 @@ async fn prepare_table(context: &IntegrationContext, table_name: &str) -> TestRe
     // create delta table
     let table = DeltaOps(table)
         .create()
-        .with_columns(schema.fields().clone())
+        .with_columns(schema.fields().cloned())
         .await?;
     println!("table created: {table:?}");
     Ok(table)
@@ -331,15 +408,12 @@ async fn append_to_table(
         predicate: None,
     };
     let actions = vec![add_action(name)];
-    let version = commit(
-        table.log_store().as_ref(),
-        &actions,
-        operation,
-        Some(table.snapshot()?),
-        metadata,
-    )
-    .await
-    .unwrap();
+    let version = CommitBuilder::default()
+        .with_actions(actions)
+        .with_app_metadata(metadata.unwrap_or_default())
+        .build(Some(table.snapshot()?), table.log_store(), operation)
+        .await?
+        .version();
     Ok(version)
 }
 
diff --git a/crates/aws/tests/repair_s3_rename_test.rs b/crates/aws/tests/repair_s3_rename_test.rs
index 68d8727ebe..d9e19de7b7 100644
--- a/crates/aws/tests/repair_s3_rename_test.rs
+++ b/crates/aws/tests/repair_s3_rename_test.rs
@@ -9,6 +9,7 @@ use deltalake_core::storage::object_store::{
 use deltalake_core::{DeltaTableBuilder, ObjectStore, Path};
 use deltalake_test::utils::IntegrationContext;
 use futures::stream::BoxStream;
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
 use serial_test::serial;
 use std::ops::Range;
 use std::sync::{Arc, Mutex};
@@ -60,8 +61,8 @@ async fn run_repair_test_case(path: &str, pause_copy: bool) -> Result<(), Object
     };
     let (s3_2, _) = create_s3_backend(&context, "w2", None, None);
 
-    s3_1.put(&src1, Bytes::from("test1")).await.unwrap();
-    s3_2.put(&src2, Bytes::from("test2")).await.unwrap();
+    s3_1.put(&src1, Bytes::from("test1").into()).await.unwrap();
+    s3_2.put(&src2, Bytes::from("test2").into()).await.unwrap();
 
     let rename1 = rename(s3_1, &src1, &dst1);
     // to ensure that first one is started actually first
@@ -166,14 +167,14 @@ impl ObjectStore for DelayedObjectStore {
         self.delete(from).await
     }
 
-    async fn put(&self, location: &Path, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &Path,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -227,19 +228,16 @@ impl ObjectStore for DelayedObjectStore {
         self.inner.rename_if_not_exists(from, to).await
     }
 
-    async fn put_multipart(
-        &self,
-        location: &Path,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &Path,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
diff --git a/crates/azure/Cargo.toml b/crates/azure/Cargo.toml
index 7ed67f74c9..574684627f 100644
--- a/crates/azure/Cargo.toml
+++ b/crates/azure/Cargo.toml
@@ -1,10 +1,18 @@
 [package]
 name = "deltalake-azure"
-version = "0.1.0"
-edition = "2021"
+version = "0.1.3"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = "0.17.0", path = "../core" }
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
 lazy_static = "1"
 
 # workspace depenndecies
@@ -22,7 +30,7 @@ url = { workspace = true }
 chrono = { workspace = true }
 serial_test = "3"
 deltalake-test = { path = "../test" }
-pretty_env_logger = "*"
+pretty_env_logger = "0.5.0"
 rand = "0.8"
 serde_json = { workspace = true }
 
diff --git a/crates/azure/src/lib.rs b/crates/azure/src/lib.rs
index 9b957c7b5e..7782f69f43 100644
--- a/crates/azure/src/lib.rs
+++ b/crates/azure/src/lib.rs
@@ -4,7 +4,8 @@ use std::sync::Arc;
 
 use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory};
 use deltalake_core::storage::{
-    factories, url_prefix_handler, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
+    factories, limit_store_handler, url_prefix_handler, ObjectStoreFactory, ObjectStoreRef,
+    StorageOptions,
 };
 use deltalake_core::{DeltaResult, Path};
 use object_store::azure::AzureConfigKey;
@@ -42,8 +43,9 @@ impl ObjectStoreFactory for AzureFactory {
         options: &StorageOptions,
     ) -> DeltaResult<(ObjectStoreRef, Path)> {
         let config = config::AzureConfigHelper::try_new(options.as_azure_options())?.build()?;
-        let (store, prefix) = parse_url_opts(url, config)?;
-        Ok((url_prefix_handler(store, prefix.clone())?, prefix))
+        let (inner, prefix) = parse_url_opts(url, config)?;
+        let store = limit_store_handler(url_prefix_handler(inner, prefix.clone()), options);
+        Ok((store, prefix))
     }
 }
 
diff --git a/crates/azure/tests/integration.rs b/crates/azure/tests/integration.rs
index 5230462c92..3ffaa00cc5 100644
--- a/crates/azure/tests/integration.rs
+++ b/crates/azure/tests/integration.rs
@@ -75,7 +75,10 @@ async fn read_write_test_onelake(context: &IntegrationContext, path: &Path) -> T
 
     let expected = Bytes::from_static(b"test world from delta-rs on friday");
 
-    delta_store.put(path, expected.clone()).await.unwrap();
+    delta_store
+        .put(path, expected.clone().into())
+        .await
+        .unwrap();
     let fetched = delta_store.get(path).await.unwrap().bytes().await.unwrap();
     assert_eq!(expected, fetched);
 
diff --git a/crates/benchmarks/src/bin/merge.rs b/crates/benchmarks/src/bin/merge.rs
index ea43171052..2465e23d94 100644
--- a/crates/benchmarks/src/bin/merge.rs
+++ b/crates/benchmarks/src/bin/merge.rs
@@ -7,9 +7,10 @@ use arrow::datatypes::Schema as ArrowSchema;
 use arrow_array::{RecordBatch, StringArray, UInt32Array};
 use chrono::Duration;
 use clap::{command, Args, Parser, Subcommand};
+use datafusion::functions::expr_fn::random;
 use datafusion::{datasource::MemTable, prelude::DataFrame};
 use datafusion_common::DataFusionError;
-use datafusion_expr::{cast, col, lit, random};
+use datafusion_expr::{cast, col, lit};
 use deltalake_core::protocol::SaveMode;
 use deltalake_core::{
     arrow::{
@@ -200,6 +201,7 @@ async fn benchmark_merge_tpcds(
         table.log_store(),
         DeltaScanConfig {
             file_column_name: Some("file_path".to_string()),
+            ..Default::default()
         },
     )
     .unwrap();
diff --git a/crates/catalog-glue/Cargo.toml b/crates/catalog-glue/Cargo.toml
index a535cb8984..c757563c1b 100644
--- a/crates/catalog-glue/Cargo.toml
+++ b/crates/catalog-glue/Cargo.toml
@@ -1,13 +1,21 @@
 [package]
 name = "deltalake-catalog-glue"
 version = "0.1.0"
-edition = "2021"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
 
 [dependencies]
 async-trait = { workspace = true }
 aws-config = "1"
 aws-sdk-glue = "1"
-deltalake-core = { version = "0.17.0", path = "../core" }
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
 # This can depend on a lowest common denominator of core once that's released
 # deltalake_core = { version = "0.17.0" }
 thiserror = { workspace = true }
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 9773f82c46..296abf2fef 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -1,27 +1,26 @@
 [package]
 name = "deltalake-core"
-version = "0.17.0"
-rust-version = "1.64"
-authors = ["Qingping Hou <dave2008713@gmail.com>"]
-homepage = "https://github.com/delta-io/delta.rs"
-license = "Apache-2.0"
-keywords = ["deltalake", "delta", "datalake"]
-description = "Native Delta Lake implementation in Rust"
-documentation = "https://docs.rs/deltalake"
-repository = "https://github.com/delta-io/delta.rs"
-readme = "README.md"
-edition = "2021"
+version = "0.18.3"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
 
 [package.metadata.docs.rs]
-# We cannot use all_features because TLS features are mutually exclusive.
-# We cannot use hdfs feature because it requires Java to be installed.
-features = ["azure", "datafusion", "gcs", "hdfs", "json", "python", "s3", "unity-experimental"]
+features = ["datafusion", "json", "unity-experimental"]
 
 [dependencies]
+delta_kernel.workspace = true
+
 # arrow
 arrow = { workspace = true }
 arrow-arith = { workspace = true }
-arrow-array = { workspace = true }
+arrow-array = { workspace = true , features = ["chrono-tz"]}
 arrow-buffer = { workspace = true }
 arrow-cast = { workspace = true }
 arrow-ipc = { workspace = true }
@@ -43,6 +42,8 @@ datafusion-common = { workspace = true, optional = true }
 datafusion-proto = { workspace = true, optional = true }
 datafusion-sql = { workspace = true, optional = true }
 datafusion-physical-expr = { workspace = true, optional = true }
+datafusion-functions = { workspace = true, optional = true }
+datafusion-functions-array = { workspace = true, optional = true }
 
 # serde
 serde = { workspace = true, features = ["derive"] }
@@ -51,11 +52,12 @@ serde_json = { workspace = true }
 # "stdlib"
 bytes = { workspace = true }
 chrono = { workspace = true, default-features = false, features = ["clock"] }
-hashbrown = "*"
+hashbrown = "0.14.3"
 regex = { workspace = true }
 thiserror = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
+urlencoding = { workspace = true}
 
 # runtime
 async-trait = { workspace = true }
@@ -72,12 +74,13 @@ tokio = { workspace = true, features = [
 
 # other deps (these should be organized and pulled into workspace.dependencies as necessary)
 cfg-if = "1"
-dashmap = "5"
+dashmap = "6"
 errno = "0.3"
 either = "1.8"
 fix-hidden-lifetime-bug = "0.2"
 hyper = { version = "0.14", optional = true }
-itertools = "0.12"
+indexmap = "2.2.1"
+itertools = "0.13"
 lazy_static = "1"
 libc = ">=0.2.90, <1"
 num-bigint = "0.4"
@@ -97,7 +100,7 @@ reqwest = { version = "0.11.18", default-features = false, features = [
     "rustls-tls",
     "json",
 ], optional = true }
-sqlparser = { version = "0.41", optional = true }
+sqlparser = { version = "0.49", optional = true }
 
 [dev-dependencies]
 criterion = "0.5"
@@ -107,7 +110,7 @@ dotenvy = "0"
 hyper = { version = "0.14", features = ["server"] }
 maplit = "1"
 pretty_assertions = "1.2.1"
-pretty_env_logger = "*"
+pretty_env_logger = "0.5.0"
 rand = "0.8"
 serial_test = "3"
 tempfile = "3"
@@ -115,7 +118,8 @@ tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
 utime = "0.3"
 
 [features]
-default = []
+cdf = []
+default = ["cdf"]
 datafusion = [
     "dep:datafusion",
     "datafusion-expr",
@@ -123,6 +127,8 @@ datafusion = [
     "datafusion-proto",
     "datafusion-physical-expr",
     "datafusion-sql",
+    "datafusion-functions",
+    "datafusion-functions-array",
     "sqlparser",
 ]
 datafusion-ext = ["datafusion"]
diff --git a/crates/core/src/data_catalog/storage/mod.rs b/crates/core/src/data_catalog/storage/mod.rs
index 5a25054316..fc30f32144 100644
--- a/crates/core/src/data_catalog/storage/mod.rs
+++ b/crates/core/src/data_catalog/storage/mod.rs
@@ -110,12 +110,13 @@ impl SchemaProvider for ListingSchemaProvider {
         self.tables.iter().map(|t| t.key().clone()).collect()
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        let location = self.tables.get(name).map(|t| t.clone())?;
-        let provider = open_table_with_storage_options(location, self.storage_options.0.clone())
-            .await
-            .ok()?;
-        Some(Arc::new(provider) as Arc<dyn TableProvider>)
+    async fn table(&self, name: &str) -> datafusion_common::Result<Option<Arc<dyn TableProvider>>> {
+        let Some(location) = self.tables.get(name).map(|t| t.clone()) else {
+            return Ok(None);
+        };
+        let provider =
+            open_table_with_storage_options(location, self.storage_options.0.clone()).await?;
+        Ok(Some(Arc::new(provider) as Arc<dyn TableProvider>))
     }
 
     fn register_table(
diff --git a/crates/core/src/data_catalog/unity/datafusion.rs b/crates/core/src/data_catalog/unity/datafusion.rs
index 21246c865a..6b6a4b4a63 100644
--- a/crates/core/src/data_catalog/unity/datafusion.rs
+++ b/crates/core/src/data_catalog/unity/datafusion.rs
@@ -6,8 +6,9 @@ use std::sync::Arc;
 
 use dashmap::DashMap;
 use datafusion::catalog::schema::SchemaProvider;
-use datafusion::catalog::{CatalogList, CatalogProvider};
+use datafusion::catalog::{CatalogProvider, CatalogProviderList};
 use datafusion::datasource::TableProvider;
+use datafusion_common::DataFusionError;
 use tracing::error;
 
 use super::models::{GetTableResponse, ListCatalogsResponse, ListTableSummariesResponse};
@@ -49,7 +50,7 @@ impl UnityCatalogList {
     }
 }
 
-impl CatalogList for UnityCatalogList {
+impl CatalogProviderList for UnityCatalogList {
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -180,25 +181,24 @@ impl SchemaProvider for UnitySchemaProvider {
         self.table_names.clone()
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
+    async fn table(&self, name: &str) -> datafusion_common::Result<Option<Arc<dyn TableProvider>>> {
         let maybe_table = self
             .client
             .get_table(&self.catalog_name, &self.schema_name, name)
             .await
-            .ok()?;
+            .map_err(|err| DataFusionError::External(Box::new(err)))?;
 
         match maybe_table {
             GetTableResponse::Success(table) => {
                 let table = DeltaTableBuilder::from_uri(table.storage_location)
                     .with_storage_options(self.storage_options.clone())
                     .load()
-                    .await
-                    .ok()?;
-                Some(Arc::new(table))
+                    .await?;
+                Ok(Some(Arc::new(table)))
             }
             GetTableResponse::Error(err) => {
                 error!("failed to fetch table from unity catalog: {}", err.message);
-                None
+                Err(DataFusionError::External(Box::new(err)))
             }
         }
     }
diff --git a/crates/core/src/data_catalog/unity/models.rs b/crates/core/src/data_catalog/unity/models.rs
index e1c8b7d1b7..265149b969 100644
--- a/crates/core/src/data_catalog/unity/models.rs
+++ b/crates/core/src/data_catalog/unity/models.rs
@@ -1,17 +1,24 @@
 //! Api models for databricks unity catalog APIs
 
+use core::fmt;
 use std::collections::HashMap;
 
 use serde::Deserialize;
 
 /// Error response from unity API
-#[derive(Deserialize)]
+#[derive(Debug, Deserialize)]
 pub struct ErrorResponse {
     /// The error code
     pub error_code: String,
     /// The error message
     pub message: String,
 }
+impl fmt::Display for ErrorResponse {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(f, "[{}] {}", self.error_code, self.message)
+    }
+}
+impl std::error::Error for ErrorResponse {}
 
 /// List catalogs response
 #[derive(Deserialize)]
diff --git a/crates/core/src/delta_datafusion/cdf/mod.rs b/crates/core/src/delta_datafusion/cdf/mod.rs
new file mode 100644
index 0000000000..02382aa725
--- /dev/null
+++ b/crates/core/src/delta_datafusion/cdf/mod.rs
@@ -0,0 +1,95 @@
+//! Logical operators and physical executions for CDF
+
+use arrow_schema::{DataType, Field, TimeUnit};
+use lazy_static::lazy_static;
+use std::collections::HashMap;
+
+pub(crate) use scan::*;
+pub(crate) use scan_utils::*;
+
+use crate::kernel::{Add, AddCDCFile};
+
+mod scan;
+mod scan_utils;
+
+/// Change type column name
+pub const CHANGE_TYPE_COL: &str = "_change_type";
+/// Commit version column name
+pub const COMMIT_VERSION_COL: &str = "_commit_version";
+/// Commit Timestamp column name
+pub const COMMIT_TIMESTAMP_COL: &str = "_commit_timestamp";
+
+lazy_static! {
+    pub(crate) static ref CDC_PARTITION_SCHEMA: Vec<Field> = vec![
+        Field::new(COMMIT_VERSION_COL, DataType::Int64, true),
+        Field::new(
+            COMMIT_TIMESTAMP_COL,
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+            true
+        )
+    ];
+    pub(crate) static ref ADD_PARTITION_SCHEMA: Vec<Field> = vec![
+        Field::new(CHANGE_TYPE_COL, DataType::Utf8, true),
+        Field::new(COMMIT_VERSION_COL, DataType::Int64, true),
+        Field::new(
+            COMMIT_TIMESTAMP_COL,
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+            true
+        ),
+    ];
+}
+
+#[derive(Debug)]
+pub(crate) struct CdcDataSpec<F: FileAction> {
+    version: i64,
+    timestamp: i64,
+    actions: Vec<F>,
+}
+
+impl<F: FileAction> CdcDataSpec<F> {
+    pub fn new(version: i64, timestamp: i64, actions: Vec<F>) -> Self {
+        Self {
+            version,
+            timestamp,
+            actions,
+        }
+    }
+}
+
+/// This trait defines a generic set of operations used by CDF Reader
+pub trait FileAction {
+    /// Adds partition values
+    fn partition_values(&self) -> &HashMap<String, Option<String>>;
+    /// Physical Path to the data
+    fn path(&self) -> String;
+    /// Byte size of the physical file
+    fn size(&self) -> usize;
+}
+
+impl FileAction for Add {
+    fn partition_values(&self) -> &HashMap<String, Option<String>> {
+        &self.partition_values
+    }
+
+    fn path(&self) -> String {
+        self.path.clone()
+    }
+
+    fn size(&self) -> usize {
+        self.size as usize
+    }
+}
+
+impl FileAction for AddCDCFile {
+    fn partition_values(&self) -> &HashMap<String, Option<String>> {
+        &self.partition_values
+    }
+
+    fn path(&self) -> String {
+        self.path.clone()
+    }
+
+    fn size(&self) -> usize {
+        self.size as usize
+    }
+}
diff --git a/crates/core/src/delta_datafusion/cdf/scan.rs b/crates/core/src/delta_datafusion/cdf/scan.rs
new file mode 100644
index 0000000000..bd7488899f
--- /dev/null
+++ b/crates/core/src/delta_datafusion/cdf/scan.rs
@@ -0,0 +1,63 @@
+use std::any::Any;
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use arrow_schema::SchemaRef;
+use datafusion::execution::{SendableRecordBatchStream, TaskContext};
+use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
+
+/// Physical execution of a scan
+#[derive(Debug, Clone)]
+pub struct DeltaCdfScan {
+    plan: Arc<dyn ExecutionPlan>,
+}
+
+impl DeltaCdfScan {
+    /// Creates a new scan
+    pub fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
+        Self { plan }
+    }
+}
+
+impl DisplayAs for DeltaCdfScan {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl ExecutionPlan for DeltaCdfScan {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.plan.schema().clone()
+    }
+
+    fn properties(&self) -> &datafusion::physical_plan::PlanProperties {
+        self.plan.properties()
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        self.plan.clone().with_new_children(_children)
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> datafusion_common::Result<SendableRecordBatchStream> {
+        self.plan.execute(partition, context)
+    }
+}
diff --git a/crates/core/src/delta_datafusion/cdf/scan_utils.rs b/crates/core/src/delta_datafusion/cdf/scan_utils.rs
new file mode 100644
index 0000000000..79d7a2359e
--- /dev/null
+++ b/crates/core/src/delta_datafusion/cdf/scan_utils.rs
@@ -0,0 +1,100 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use chrono::TimeZone;
+use datafusion::datasource::listing::PartitionedFile;
+use datafusion_common::ScalarValue;
+use object_store::path::Path;
+use object_store::ObjectMeta;
+use serde_json::Value;
+
+use crate::delta_datafusion::cdf::CHANGE_TYPE_COL;
+use crate::delta_datafusion::cdf::{CdcDataSpec, FileAction};
+use crate::delta_datafusion::{get_null_of_arrow_type, to_correct_scalar_value};
+use crate::DeltaResult;
+
+pub fn map_action_to_scalar<F: FileAction>(
+    action: &F,
+    part: &str,
+    schema: SchemaRef,
+) -> ScalarValue {
+    action
+        .partition_values()
+        .get(part)
+        .map(|val| {
+            schema
+                .field_with_name(part)
+                .map(|field| match val {
+                    Some(value) => to_correct_scalar_value(
+                        &Value::String(value.to_string()),
+                        field.data_type(),
+                    )
+                    .unwrap_or(Some(ScalarValue::Null))
+                    .unwrap_or(ScalarValue::Null),
+                    None => get_null_of_arrow_type(field.data_type()).unwrap_or(ScalarValue::Null),
+                })
+                .unwrap_or(ScalarValue::Null)
+        })
+        .unwrap_or(ScalarValue::Null)
+}
+
+pub fn create_spec_partition_values<F: FileAction>(
+    spec: &CdcDataSpec<F>,
+    action_type: Option<&ScalarValue>,
+) -> Vec<ScalarValue> {
+    let mut spec_partition_values = action_type.cloned().map(|at| vec![at]).unwrap_or_default();
+    spec_partition_values.push(ScalarValue::Int64(Some(spec.version)));
+    spec_partition_values.push(ScalarValue::TimestampMillisecond(
+        Some(spec.timestamp),
+        None,
+    ));
+    spec_partition_values
+}
+
+pub fn create_partition_values<F: FileAction>(
+    schema: SchemaRef,
+    specs: Vec<CdcDataSpec<F>>,
+    table_partition_cols: &[String],
+    action_type: Option<ScalarValue>,
+) -> DeltaResult<HashMap<Vec<ScalarValue>, Vec<PartitionedFile>>> {
+    let mut file_groups: HashMap<Vec<ScalarValue>, Vec<PartitionedFile>> = HashMap::new();
+
+    for spec in specs {
+        let spec_partition_values = create_spec_partition_values(&spec, action_type.as_ref());
+
+        for action in spec.actions {
+            let partition_values = table_partition_cols
+                .iter()
+                .map(|part| map_action_to_scalar(&action, part, schema.clone()))
+                .collect::<Vec<ScalarValue>>();
+
+            let mut new_part_values = spec_partition_values.clone();
+            new_part_values.extend(partition_values);
+
+            let part = PartitionedFile {
+                object_meta: ObjectMeta {
+                    location: Path::parse(action.path().as_str())?,
+                    size: action.size(),
+                    e_tag: None,
+                    last_modified: chrono::Utc.timestamp_nanos(0),
+                    version: None,
+                },
+                partition_values: new_part_values.clone(),
+                extensions: None,
+                range: None,
+                statistics: None,
+            };
+
+            file_groups.entry(new_part_values).or_default().push(part);
+        }
+    }
+    Ok(file_groups)
+}
+
+pub fn create_cdc_schema(mut schema_fields: Vec<Field>, include_type: bool) -> SchemaRef {
+    if include_type {
+        schema_fields.push(Field::new(CHANGE_TYPE_COL, DataType::Utf8, true));
+    }
+    Arc::new(Schema::new(schema_fields))
+}
diff --git a/crates/core/src/delta_datafusion/expr.rs b/crates/core/src/delta_datafusion/expr.rs
index 03849f4df9..2577d1a1db 100644
--- a/crates/core/src/delta_datafusion/expr.rs
+++ b/crates/core/src/delta_datafusion/expr.rs
@@ -22,22 +22,24 @@
 //! Utility functions for Datafusion's Expressions
 
 use std::{
-    fmt::{self, Display, Formatter, Write},
+    fmt::{self, Display, Error, Formatter, Write},
     sync::Arc,
 };
 
 use arrow_schema::DataType;
+use chrono::{DateTime, NaiveDate};
 use datafusion::execution::context::SessionState;
+use datafusion::execution::FunctionRegistry;
 use datafusion_common::Result as DFResult;
 use datafusion_common::{config::ConfigOptions, DFSchema, Result, ScalarValue, TableReference};
 use datafusion_expr::{
-    expr::InList, AggregateUDF, Between, BinaryExpr, Cast, Expr, GetIndexedField, Like, TableSource,
+    expr::InList, AggregateUDF, Between, BinaryExpr, Cast, Expr, Like, TableSource,
 };
 use datafusion_sql::planner::{ContextProvider, SqlToRel};
-use sqlparser::ast::escape_quoted_string;
-use sqlparser::dialect::GenericDialect;
-use sqlparser::parser::Parser;
-use sqlparser::tokenizer::Tokenizer;
+use datafusion_sql::sqlparser::ast::escape_quoted_string;
+use datafusion_sql::sqlparser::dialect::GenericDialect;
+use datafusion_sql::sqlparser::parser::Parser;
+use datafusion_sql::sqlparser::tokenizer::Tokenizer;
 
 use crate::{DeltaResult, DeltaTableError};
 
@@ -48,7 +50,7 @@ pub(crate) struct DeltaContextProvider<'a> {
 }
 
 impl<'a> ContextProvider for DeltaContextProvider<'a> {
-    fn get_table_provider(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
+    fn get_table_source(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
         unimplemented!()
     }
 
@@ -72,7 +74,15 @@ impl<'a> ContextProvider for DeltaContextProvider<'a> {
         self.state.window_functions().get(name).cloned()
     }
 
-    fn get_table_source(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
+    fn udf_names(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn udaf_names(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn udwf_names(&self) -> Vec<String> {
         unimplemented!()
     }
 }
@@ -98,9 +108,15 @@ pub(crate) fn parse_predicate_expression(
         })?;
 
     let context_provider = DeltaContextProvider { state: df_state };
-    let sql_to_rel =
+    let mut sql_to_rel =
         SqlToRel::new_with_options(&context_provider, DeltaParserOptions::default().into());
 
+    // NOTE: This can be probably removed with Datafusion 41 once
+    // <https://github.com/apache/datafusion/pull/11485> is released
+    for planner in context_provider.state.expr_planners() {
+        sql_to_rel = sql_to_rel.with_user_defined_planner(planner.clone());
+    }
+
     Ok(sql_to_rel.sql_to_expr(sql, schema, &mut Default::default())?)
 }
 
@@ -185,7 +201,7 @@ impl<'a> Display for SqlFormat<'a> {
             Expr::IsNotFalse(expr) => write!(f, "{} IS NOT FALSE", SqlFormat { expr }),
             Expr::IsNotUnknown(expr) => write!(f, "{} IS NOT UNKNOWN", SqlFormat { expr }),
             Expr::BinaryExpr(expr) => write!(f, "{}", BinaryExprFormat { expr }),
-            Expr::ScalarFunction(func) => fmt_function(f, func.func_def.name(), false, &func.args),
+            Expr::ScalarFunction(func) => fmt_function(f, func.func.name(), false, &func.args),
             Expr::Cast(Cast { expr, data_type }) => {
                 write!(f, "arrow_cast({}, '{}')", SqlFormat { expr }, data_type)
             }
@@ -263,28 +279,6 @@ impl<'a> Display for SqlFormat<'a> {
                     write!(f, "{expr} IN ({})", expr_vec_fmt!(list))
                 }
             }
-            Expr::GetIndexedField(GetIndexedField { expr, field }) => match field {
-                datafusion_expr::GetFieldAccess::NamedStructField { name } => {
-                    write!(
-                        f,
-                        "{}[{}]",
-                        SqlFormat { expr },
-                        ScalarValueFormat { scalar: name }
-                    )
-                }
-                datafusion_expr::GetFieldAccess::ListIndex { key } => {
-                    write!(f, "{}[{}]", SqlFormat { expr }, SqlFormat { expr: key })
-                }
-                datafusion_expr::GetFieldAccess::ListRange { start, stop } => {
-                    write!(
-                        f,
-                        "{}[{}:{}]",
-                        SqlFormat { expr },
-                        SqlFormat { expr: start },
-                        SqlFormat { expr: stop }
-                    )
-                }
-            },
             _ => Err(fmt::Error),
         }
     }
@@ -321,6 +315,9 @@ macro_rules! format_option {
     }};
 }
 
+/// Epoch days from ce calander until 1970-01-01
+pub const EPOCH_DAYS_FROM_CE: i32 = 719_163;
+
 struct ScalarValueFormat<'a> {
     scalar: &'a ScalarValue,
 }
@@ -339,6 +336,44 @@ impl<'a> fmt::Display for ScalarValueFormat<'a> {
             ScalarValue::UInt16(e) => format_option!(f, e)?,
             ScalarValue::UInt32(e) => format_option!(f, e)?,
             ScalarValue::UInt64(e) => format_option!(f, e)?,
+            ScalarValue::Date32(e) => match e {
+                Some(e) => write!(
+                    f,
+                    "{}",
+                    NaiveDate::from_num_days_from_ce_opt(EPOCH_DAYS_FROM_CE + (*e)).ok_or(Error)?
+                )?,
+                None => write!(f, "NULL")?,
+            },
+            ScalarValue::Date64(e) => match e {
+                Some(e) => write!(
+                    f,
+                    "'{}'::date",
+                    DateTime::from_timestamp_millis(*e)
+                        .ok_or(Error)?
+                        .date_naive()
+                        .format("%Y-%m-%d")
+                )?,
+                None => write!(f, "NULL")?,
+            },
+            ScalarValue::TimestampMicrosecond(e, tz) => match e {
+                Some(e) => match tz {
+                    Some(_tz) => write!(
+                        f,
+                        "arrow_cast('{}', 'Timestamp(Microsecond, Some(\"UTC\"))')",
+                        DateTime::from_timestamp_micros(*e)
+                            .ok_or(Error)?
+                            .format("%Y-%m-%dT%H:%M:%S%.6f")
+                    )?,
+                    None => write!(
+                        f,
+                        "arrow_cast('{}', 'Timestamp(Microsecond, None)')",
+                        DateTime::from_timestamp_micros(*e)
+                            .ok_or(Error)?
+                            .format("%Y-%m-%dT%H:%M:%S%.6f")
+                    )?,
+                },
+                None => write!(f, "NULL")?,
+            },
             ScalarValue::Utf8(e) | ScalarValue::LargeUtf8(e) => match e {
                 Some(e) => write!(f, "'{}'", escape_quoted_string(e, '\''))?,
                 None => write!(f, "NULL")?,
@@ -357,7 +392,7 @@ impl<'a> fmt::Display for ScalarValueFormat<'a> {
                 None => write!(f, "NULL")?,
             },
             ScalarValue::Null => write!(f, "NULL")?,
-            _ => return Err(fmt::Error),
+            _ => return Err(Error),
         };
         Ok(())
     }
@@ -367,10 +402,17 @@ impl<'a> fmt::Display for ScalarValueFormat<'a> {
 mod test {
     use arrow_schema::DataType as ArrowDataType;
     use datafusion::prelude::SessionContext;
-    use datafusion_common::{Column, DFSchema, ScalarValue};
-    use datafusion_expr::{cardinality, col, decode, lit, substring, Cast, Expr, ExprSchemable};
-
-    use crate::delta_datafusion::DeltaSessionContext;
+    use datafusion_common::{Column, ScalarValue, ToDFSchema};
+    use datafusion_expr::expr::ScalarFunction;
+    use datafusion_expr::{col, lit, BinaryExpr, Cast, Expr, ExprSchemable};
+    use datafusion_functions::core::arrow_cast;
+    use datafusion_functions::core::expr_ext::FieldAccessor;
+    use datafusion_functions::encoding::expr_fn::decode;
+    use datafusion_functions::expr_fn::substring;
+    use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
+    use datafusion_functions_array::expr_fn::cardinality;
+
+    use crate::delta_datafusion::{DataFusionMixins, DeltaSessionContext};
     use crate::kernel::{ArrayType, DataType, PrimitiveType, StructField, StructType};
     use crate::{DeltaOps, DeltaTable};
 
@@ -439,6 +481,11 @@ mod test {
                 DataType::Primitive(PrimitiveType::Timestamp),
                 true,
             ),
+            StructField::new(
+                "_timestamp_ntz".to_string(),
+                DataType::Primitive(PrimitiveType::TimestampNtz),
+                true,
+            ),
             StructField::new(
                 "_binary".to_string(),
                 DataType::Primitive(PrimitiveType::Binary),
@@ -472,7 +519,7 @@ mod test {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -485,13 +532,24 @@ mod test {
 
         // String expression that we output must be parsable for conflict resolution.
         let tests = vec![
-            simple!(
-                Expr::Cast(Cast {
+            ParseTest {
+                expr: Expr::Cast(Cast {
                     expr: Box::new(lit(1_i64)),
                     data_type: ArrowDataType::Int32
                 }),
-                "arrow_cast(1, 'Int32')".to_string()
-            ),
+                expected: "arrow_cast(1, 'Int32')".to_string(),
+                override_expected_expr: Some(
+                    datafusion_expr::Expr::ScalarFunction(
+                        ScalarFunction {
+                            func: arrow_cast(),
+                            args: vec![
+                                lit(ScalarValue::Int64(Some(1))),
+                                lit(ScalarValue::Utf8(Some("Int32".into())))
+                            ]
+                        }
+                    )
+                ),
+            },
             simple!(
                 Expr::Column(Column::from_qualified_name_ignore_case("Value3")).eq(lit(3_i64)),
                 "Value3 = 3".to_string()
@@ -570,9 +628,8 @@ mod test {
                 substring(col("modified"), lit(0_i64), lit(4_i64)).eq(lit("2021")),
                 "substr(modified, 0, 4) = '2021'".to_string()
             ),
-            simple!(
-                col("value")
-                    .cast_to::<DFSchema>(
+            ParseTest {
+                expr: col("value").cast_to(
                         &arrow_schema::DataType::Utf8,
                         &table
                             .snapshot()
@@ -581,29 +638,77 @@ mod test {
                             .unwrap()
                             .as_ref()
                             .to_owned()
-                            .try_into()
+                            .to_dfschema()
                             .unwrap()
                     )
                     .unwrap()
                     .eq(lit("1")),
-                "arrow_cast(value, 'Utf8') = '1'".to_string()
-            ),
+                expected: "arrow_cast(value, 'Utf8') = '1'".to_string(),
+                override_expected_expr: Some(
+                    datafusion_expr::Expr::BinaryExpr(BinaryExpr {
+                        left: Box::new(datafusion_expr::Expr::ScalarFunction(
+                            ScalarFunction {
+                                func: arrow_cast(),
+                                args: vec![
+                                    col("value"),
+                                    lit(ScalarValue::Utf8(Some("Utf8".into())))
+                                ]
+                            }
+                        )),
+                        op: datafusion_expr::Operator::Eq,
+                        right: Box::new(lit(ScalarValue::Utf8(Some("1".into()))))
+                    })
+                ),
+            },
             simple!(
                 col("_struct").field("a").eq(lit(20_i64)),
-                "_struct['a'] = 20".to_string()
+                "get_field(_struct, 'a') = 20".to_string()
             ),
             simple!(
                 col("_struct").field("nested").field("b").eq(lit(20_i64)),
-                "_struct['nested']['b'] = 20".to_string()
+                "get_field(get_field(_struct, 'nested'), 'b') = 20".to_string()
             ),
             simple!(
                 col("_list").index(lit(1_i64)).eq(lit(20_i64)),
-                "_list[1] = 20".to_string()
+                "array_element(_list, 1) = 20".to_string()
             ),
             simple!(
                 cardinality(col("_list").range(col("value"), lit(10_i64))),
-                "cardinality(_list[value:10])".to_string()
+                "cardinality(array_slice(_list, value, 10))".to_string()
             ),
+            ParseTest {
+                expr: col("_timestamp_ntz").gt(lit(ScalarValue::TimestampMicrosecond(Some(1262304000000000), None))),
+                expected: "_timestamp_ntz > arrow_cast('2010-01-01T00:00:00.000000', 'Timestamp(Microsecond, None)')".to_string(),
+                override_expected_expr: Some(col("_timestamp_ntz").gt(
+                    datafusion_expr::Expr::ScalarFunction(
+                        ScalarFunction {
+                            func: arrow_cast(),
+                            args: vec![
+                                lit(ScalarValue::Utf8(Some("2010-01-01T00:00:00.000000".into()))),
+                                lit(ScalarValue::Utf8(Some("Timestamp(Microsecond, None)".into())))
+                            ]
+                        }
+                    )
+                )),
+            },
+            ParseTest {
+                expr: col("_timestamp").gt(lit(ScalarValue::TimestampMicrosecond(
+                    Some(1262304000000000),
+                    Some("UTC".into())
+                ))),
+                expected: "_timestamp > arrow_cast('2010-01-01T00:00:00.000000', 'Timestamp(Microsecond, Some(\"UTC\"))')".to_string(),
+                override_expected_expr: Some(col("_timestamp").gt(
+                    datafusion_expr::Expr::ScalarFunction(
+                        ScalarFunction {
+                            func: arrow_cast(),
+                            args: vec![
+                                lit(ScalarValue::Utf8(Some("2010-01-01T00:00:00.000000".into()))),
+                                lit(ScalarValue::Utf8(Some("Timestamp(Microsecond, Some(\"UTC\"))".into())))
+                            ]
+                        }
+                    )
+                )),
+            },
         ];
 
         let session: SessionContext = DeltaSessionContext::default().into();
diff --git a/crates/core/src/delta_datafusion/find_files/logical.rs b/crates/core/src/delta_datafusion/find_files/logical.rs
new file mode 100644
index 0000000000..4dd4a3b5da
--- /dev/null
+++ b/crates/core/src/delta_datafusion/find_files/logical.rs
@@ -0,0 +1,107 @@
+use std::collections::HashSet;
+use std::hash::{Hash, Hasher};
+
+use datafusion_common::DFSchemaRef;
+use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::delta_datafusion::find_files::ONLY_FILES_DF_SCHEMA;
+use crate::logstore::LogStoreRef;
+use crate::table::state::DeltaTableState;
+
+#[derive(Debug, Clone)]
+pub struct FindFilesNode {
+    id: String,
+    predicate: Expr,
+    table_state: DeltaTableState,
+    log_store: LogStoreRef,
+    version: i64,
+}
+
+impl FindFilesNode {
+    pub fn new(
+        id: String,
+        table_state: DeltaTableState,
+        log_store: LogStoreRef,
+        predicate: Expr,
+    ) -> datafusion_common::Result<Self> {
+        let version = table_state.version();
+        Ok(Self {
+            id,
+            predicate,
+            log_store,
+            table_state,
+
+            version,
+        })
+    }
+
+    pub fn predicate(&self) -> Expr {
+        self.predicate.clone()
+    }
+
+    pub fn state(&self) -> DeltaTableState {
+        self.table_state.clone()
+    }
+
+    pub fn log_store(&self) -> LogStoreRef {
+        self.log_store.clone()
+    }
+}
+
+impl Eq for FindFilesNode {}
+
+impl PartialEq<Self> for FindFilesNode {
+    fn eq(&self, other: &Self) -> bool {
+        self.id == other.id
+    }
+}
+
+impl Hash for FindFilesNode {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        state.write(self.id.as_bytes());
+        state.finish();
+    }
+}
+
+impl UserDefinedLogicalNodeCore for FindFilesNode {
+    fn name(&self) -> &str {
+        "FindFiles"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &ONLY_FILES_DF_SCHEMA
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn prevent_predicate_push_down_columns(&self) -> HashSet<String> {
+        HashSet::new()
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "FindFiles[id={}, predicate=\"{}\", version={}]",
+            &self.id, self.predicate, self.version,
+        )
+    }
+
+    fn from_template(&self, exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        _inputs: Vec<LogicalPlan>,
+    ) -> datafusion_common::Result<Self> {
+        Ok(self.clone())
+    }
+}
diff --git a/crates/core/src/delta_datafusion/find_files/mod.rs b/crates/core/src/delta_datafusion/find_files/mod.rs
new file mode 100644
index 0000000000..d25d0765ee
--- /dev/null
+++ b/crates/core/src/delta_datafusion/find_files/mod.rs
@@ -0,0 +1,282 @@
+use arrow_array::cast::AsArray;
+use std::sync::Arc;
+
+use arrow_array::types::UInt16Type;
+use arrow_array::RecordBatch;
+use arrow_schema::SchemaBuilder;
+use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaRef};
+use arrow_select::concat::concat_batches;
+use async_trait::async_trait;
+use datafusion::datasource::MemTable;
+use datafusion::execution::context::{QueryPlanner, SessionState};
+use datafusion::execution::TaskContext;
+use datafusion::physical_plan::filter::FilterExec;
+use datafusion::physical_plan::limit::LocalLimitExec;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+use datafusion::prelude::SessionContext;
+use datafusion_common::{DFSchemaRef, Result, ToDFSchema};
+use datafusion_expr::{col, Expr, LogicalPlan, UserDefinedLogicalNode};
+use lazy_static::lazy_static;
+
+use crate::delta_datafusion::find_files::logical::FindFilesNode;
+use crate::delta_datafusion::find_files::physical::FindFilesExec;
+use crate::delta_datafusion::{
+    df_logical_schema, register_store, DeltaScanBuilder, DeltaScanConfigBuilder, PATH_COLUMN,
+};
+use crate::logstore::LogStoreRef;
+use crate::table::state::DeltaTableState;
+use crate::DeltaTableError;
+
+pub mod logical;
+pub mod physical;
+
+lazy_static! {
+    static ref ONLY_FILES_SCHEMA: Arc<Schema> = {
+        let mut builder = SchemaBuilder::new();
+        builder.push(Field::new(PATH_COLUMN, DataType::Utf8, false));
+        Arc::new(builder.finish())
+    };
+    static ref ONLY_FILES_DF_SCHEMA: DFSchemaRef =
+        ONLY_FILES_SCHEMA.clone().to_dfschema_ref().unwrap();
+}
+
+struct FindFilesPlannerExtension {}
+
+struct FindFilesPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for FindFilesPlannerExtension {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        _physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(find_files_node) = node.as_any().downcast_ref::<FindFilesNode>() {
+            return Ok(Some(Arc::new(FindFilesExec::new(
+                find_files_node.state(),
+                find_files_node.log_store(),
+                find_files_node.predicate(),
+            )?)));
+        }
+        Ok(None)
+    }
+}
+
+#[async_trait]
+impl QueryPlanner for FindFilesPlanner {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let planner = Arc::new(Box::new(DefaultPhysicalPlanner::with_extension_planners(
+            vec![Arc::new(FindFilesPlannerExtension {})],
+        )));
+        planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
+
+async fn scan_table_by_partitions(batch: RecordBatch, predicate: Expr) -> Result<RecordBatch> {
+    let mut arrays = Vec::new();
+    let mut fields = Vec::new();
+
+    let schema = batch.schema();
+
+    arrays.push(
+        batch
+            .column_by_name("path")
+            .ok_or(DeltaTableError::Generic(
+                "Column with name `path` does not exist".to_owned(),
+            ))?
+            .to_owned(),
+    );
+    fields.push(Field::new(PATH_COLUMN, DataType::Utf8, false));
+
+    for field in schema.fields() {
+        if field.name().starts_with("partition.") {
+            let name = field.name().strip_prefix("partition.").unwrap();
+
+            arrays.push(batch.column_by_name(field.name()).unwrap().to_owned());
+            fields.push(Field::new(
+                name,
+                field.data_type().to_owned(),
+                field.is_nullable(),
+            ));
+        }
+    }
+
+    let schema = Arc::new(Schema::new(fields));
+    let batch = RecordBatch::try_new(schema, arrays)?;
+    let mem_table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+
+    let ctx = SessionContext::new();
+    let mut df = ctx.read_table(Arc::new(mem_table))?;
+    df = df
+        .filter(predicate.to_owned())?
+        .select(vec![col(PATH_COLUMN)])?;
+    let df_schema = df.schema().clone();
+    let batches = df.collect().await?;
+    Ok(concat_batches(&SchemaRef::from(df_schema), &batches)?)
+}
+
+async fn scan_table_by_files(
+    snapshot: DeltaTableState,
+    log_store: LogStoreRef,
+    state: SessionState,
+    expression: Expr,
+) -> Result<RecordBatch> {
+    register_store(log_store.clone(), state.runtime_env().clone());
+    let scan_config = DeltaScanConfigBuilder::new()
+        .wrap_partition_values(true)
+        .with_file_column(true)
+        .build(&snapshot)?;
+
+    let logical_schema = df_logical_schema(&snapshot, &scan_config)?;
+
+    // Identify which columns we need to project
+    let mut used_columns = expression
+        .column_refs()
+        .into_iter()
+        .map(|column| logical_schema.index_of(&column.name))
+        .collect::<std::result::Result<Vec<usize>, ArrowError>>()?;
+    // Add path column
+    used_columns.push(logical_schema.index_of(scan_config.file_column_name.as_ref().unwrap())?);
+
+    let scan = DeltaScanBuilder::new(&snapshot, log_store, &state)
+        .with_filter(Some(expression.clone()))
+        .with_projection(Some(&used_columns))
+        .with_scan_config(scan_config)
+        .build()
+        .await?;
+
+    let scan = Arc::new(scan);
+    let input_schema = scan.logical_schema.as_ref().to_owned();
+    let input_dfschema = input_schema.clone().try_into()?;
+
+    let predicate_expr =
+        state.create_physical_expr(Expr::IsTrue(Box::new(expression.clone())), &input_dfschema)?;
+
+    let filter: Arc<dyn ExecutionPlan> =
+        Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
+    let limit: Arc<dyn ExecutionPlan> = Arc::new(LocalLimitExec::new(filter, 1));
+    let field_idx = input_schema.index_of(PATH_COLUMN)?;
+    let task_ctx = Arc::new(TaskContext::from(&state));
+    let path_batches: Vec<RecordBatch> = datafusion::physical_plan::collect(limit, task_ctx)
+        .await?
+        .into_iter()
+        .map(|batch| {
+            let col = batch
+                .column(field_idx)
+                .as_dictionary::<UInt16Type>()
+                .values();
+            RecordBatch::try_from_iter(vec![(PATH_COLUMN, col.clone())]).unwrap()
+        })
+        .collect();
+
+    let result_batches = concat_batches(&ONLY_FILES_SCHEMA.clone(), &path_batches)?;
+
+    Ok(result_batches)
+}
+
+#[cfg(test)]
+pub mod tests {
+    use std::sync::Arc;
+
+    use datafusion::prelude::{DataFrame, SessionContext};
+    use datafusion_common::{assert_batches_eq, assert_batches_sorted_eq};
+    use datafusion_expr::{col, lit, Expr, Extension, LogicalPlan};
+
+    use crate::delta_datafusion::find_files::logical::FindFilesNode;
+    use crate::delta_datafusion::find_files::FindFilesPlanner;
+    use crate::operations::collect_sendable_stream;
+    use crate::{DeltaResult, DeltaTable, DeltaTableError};
+
+    pub async fn test_plan<'a>(
+        table: DeltaTable,
+        expr: Expr,
+    ) -> Result<Vec<arrow_array::RecordBatch>, DeltaTableError> {
+        let ctx = SessionContext::new();
+        let state = ctx
+            .state()
+            .with_query_planner(Arc::new(FindFilesPlanner {}));
+        let find_files_node = LogicalPlan::Extension(Extension {
+            node: Arc::new(FindFilesNode::new(
+                "my_cool_plan".into(),
+                table.snapshot()?.clone(),
+                table.log_store().clone(),
+                expr,
+            )?),
+        });
+        let df = DataFrame::new(state.clone(), find_files_node);
+
+        let p = state
+            .clone()
+            .create_physical_plan(df.logical_plan())
+            .await?;
+
+        let e = p.execute(0, state.task_ctx())?;
+        collect_sendable_stream(e).await.map_err(Into::into)
+    }
+
+    #[tokio::test]
+    pub async fn test_find_files_partitioned() -> DeltaResult<()> {
+        let table = crate::open_table("../test/tests/data/delta-0.8.0-partitioned").await?;
+        let expr: Expr = col("year").eq(lit(2020));
+        let s = test_plan(table, expr).await?;
+
+        assert_batches_eq! {
+          ["+---------------------------------------------------------------------------------------------+",
+           "| __delta_rs_path                                                                             |",
+           "+---------------------------------------------------------------------------------------------+",
+           "| year=2020/month=1/day=1/part-00000-8eafa330-3be9-4a39-ad78-fd13c2027c7e.c000.snappy.parquet |",
+           "| year=2020/month=2/day=3/part-00000-94d16827-f2fd-42cd-a060-f67ccc63ced9.c000.snappy.parquet |",
+           "| year=2020/month=2/day=5/part-00000-89cdd4c8-2af7-4add-8ea3-3990b2f027b5.c000.snappy.parquet |",
+           "+---------------------------------------------------------------------------------------------+"],
+            &s
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    pub async fn test_find_files_unpartitioned() -> DeltaResult<()> {
+        let table = crate::open_table("../test/tests/data/simple_table").await?;
+        let expr: Expr = col("id").in_list(vec![lit(9i64), lit(7i64)], false);
+        let s = test_plan(table, expr).await?;
+
+        assert_batches_sorted_eq! {
+            ["+---------------------------------------------------------------------+",
+             "| __delta_rs_path                                                     |",
+             "+---------------------------------------------------------------------+",
+             "| part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet |",
+             "| part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet |",
+             "+---------------------------------------------------------------------+"],
+            &s
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    pub async fn test_find_files_unpartitioned2() -> DeltaResult<()> {
+        let table = crate::open_table("../test/tests/data/simple_table").await?;
+        let expr: Expr = col("id").is_not_null();
+        let s = test_plan(table, expr).await?;
+
+        assert_batches_sorted_eq! {
+            ["+---------------------------------------------------------------------+",
+             "| __delta_rs_path                                                     |",
+             "+---------------------------------------------------------------------+",
+             "| part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet |",
+             "| part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet |",
+             "| part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet |",
+             "+---------------------------------------------------------------------+"],
+            &s
+        }
+        Ok(())
+    }
+}
diff --git a/crates/core/src/delta_datafusion/find_files/physical.rs b/crates/core/src/delta_datafusion/find_files/physical.rs
new file mode 100644
index 0000000000..e23a561e5b
--- /dev/null
+++ b/crates/core/src/delta_datafusion/find_files/physical.rs
@@ -0,0 +1,158 @@
+use std::any::Any;
+use std::fmt::{Debug, Formatter};
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use arrow_array::RecordBatch;
+use arrow_schema::SchemaRef;
+use datafusion::error::Result;
+use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
+use datafusion::physical_plan::memory::MemoryStream;
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
+};
+use datafusion::prelude::SessionContext;
+use datafusion_common::tree_node::TreeNode;
+use datafusion_expr::Expr;
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
+use futures::stream::BoxStream;
+use futures::{FutureExt, Stream, StreamExt, TryStreamExt};
+
+use crate::delta_datafusion::find_files::{
+    scan_table_by_files, scan_table_by_partitions, ONLY_FILES_SCHEMA,
+};
+use crate::delta_datafusion::FindFilesExprProperties;
+use crate::logstore::LogStoreRef;
+use crate::table::state::DeltaTableState;
+
+pub struct FindFilesExec {
+    predicate: Expr,
+    state: DeltaTableState,
+    log_store: LogStoreRef,
+    plan_properties: PlanProperties,
+}
+
+impl FindFilesExec {
+    pub fn new(state: DeltaTableState, log_store: LogStoreRef, predicate: Expr) -> Result<Self> {
+        Ok(Self {
+            predicate,
+            log_store,
+            state,
+            plan_properties: PlanProperties::new(
+                EquivalenceProperties::new(ONLY_FILES_SCHEMA.clone()),
+                Partitioning::RoundRobinBatch(num_cpus::get()),
+                ExecutionMode::Bounded,
+            ),
+        })
+    }
+}
+
+struct FindFilesStream<'a> {
+    mem_stream: BoxStream<'a, Result<RecordBatch>>,
+}
+
+impl<'a> FindFilesStream<'a> {
+    pub fn new(mem_stream: BoxStream<'a, Result<RecordBatch>>) -> Result<Self> {
+        Ok(Self { mem_stream })
+    }
+}
+
+impl<'a> RecordBatchStream for FindFilesStream<'a> {
+    fn schema(&self) -> SchemaRef {
+        ONLY_FILES_SCHEMA.clone()
+    }
+}
+
+impl<'a> Stream for FindFilesStream<'a> {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        self.as_mut().mem_stream.poll_next_unpin(cx)
+    }
+}
+
+impl Debug for FindFilesExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "FindFilesExec[predicate=\"{}\"]", self.predicate)
+    }
+}
+
+impl DisplayAs for FindFilesExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "FindFilesExec[predicate=\"{}\"]", self.predicate)
+    }
+}
+
+impl ExecutionPlan for FindFilesExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        ONLY_FILES_SCHEMA.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.plan_properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if !children.is_empty() {
+            return Err(datafusion::error::DataFusionError::Plan(
+                "Children cannot be replaced in FindFilesExec".to_string(),
+            ));
+        }
+
+        Ok(self)
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let current_metadata = self.state.metadata();
+        let mut expr_properties = FindFilesExprProperties {
+            partition_only: true,
+            partition_columns: current_metadata.partition_columns.clone(),
+            result: Ok(()),
+        };
+
+        TreeNode::visit(&self.predicate, &mut expr_properties)?;
+        expr_properties.result?;
+
+        if expr_properties.partition_only {
+            let actions_table = self.state.add_actions_table(true)?;
+            let predicate = self.predicate.clone();
+            let schema = actions_table.schema();
+            let mem_stream =
+                MemoryStream::try_new(vec![actions_table.clone()], schema.clone(), None)?
+                    .and_then(move |batch| scan_table_by_partitions(batch, predicate.clone()))
+                    .boxed();
+
+            Ok(Box::pin(FindFilesStream::new(mem_stream)?))
+        } else {
+            let ctx = SessionContext::new();
+            let state = ctx.state();
+            let table_state = self.state.clone();
+            let predicate = self.predicate.clone();
+            let output_files =
+                scan_table_by_files(table_state, self.log_store.clone(), state, predicate);
+
+            let mem_stream = output_files.into_stream().boxed();
+            Ok(Box::pin(FindFilesStream::new(mem_stream)?))
+        }
+    }
+}
diff --git a/crates/core/src/delta_datafusion/logical.rs b/crates/core/src/delta_datafusion/logical.rs
index 75ed53d1b1..2ce435b5b6 100644
--- a/crates/core/src/delta_datafusion/logical.rs
+++ b/crates/core/src/delta_datafusion/logical.rs
@@ -34,10 +34,6 @@ impl UserDefinedLogicalNodeCore for MetricObserver {
         vec![]
     }
 
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "MetricObserver id={}", &self.id)
-    }
-
     fn prevent_predicate_push_down_columns(&self) -> HashSet<String> {
         if self.enable_pushdown {
             HashSet::new()
@@ -50,15 +46,28 @@ impl UserDefinedLogicalNodeCore for MetricObserver {
         }
     }
 
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "MetricObserver id={}", &self.id)
+    }
+
     fn from_template(
         &self,
-        _exprs: &[datafusion_expr::Expr],
+        exprs: &[datafusion_expr::Expr],
         inputs: &[datafusion_expr::LogicalPlan],
     ) -> Self {
-        MetricObserver {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<datafusion_expr::Expr>,
+        inputs: Vec<datafusion_expr::LogicalPlan>,
+    ) -> datafusion_common::Result<Self> {
+        Ok(MetricObserver {
             id: self.id.clone(),
             input: inputs[0].clone(),
             enable_pushdown: self.enable_pushdown,
-        }
+        })
     }
 }
diff --git a/crates/core/src/delta_datafusion/mod.rs b/crates/core/src/delta_datafusion/mod.rs
index 6ea60a0bda..c2b410cb74 100644
--- a/crates/core/src/delta_datafusion/mod.rs
+++ b/crates/core/src/delta_datafusion/mod.rs
@@ -27,17 +27,20 @@ use std::sync::Arc;
 
 use arrow::compute::{cast_with_options, CastOptions};
 use arrow::datatypes::DataType;
-use arrow::datatypes::{DataType as ArrowDataType, Schema as ArrowSchema, SchemaRef, TimeUnit};
+use arrow::datatypes::{
+    DataType as ArrowDataType, Schema as ArrowSchema, SchemaRef, SchemaRef as ArrowSchemaRef,
+    TimeUnit,
+};
 use arrow::error::ArrowError;
 use arrow::record_batch::RecordBatch;
 use arrow_array::types::UInt16Type;
 use arrow_array::{Array, DictionaryArray, StringArray, TypedDictionaryArray};
 use arrow_cast::display::array_value_to_string;
-
 use arrow_schema::Field;
 use async_trait::async_trait;
-use chrono::{NaiveDateTime, TimeZone, Utc};
-use datafusion::datasource::file_format::{parquet::ParquetFormat, FileFormat};
+use chrono::{DateTime, TimeZone, Utc};
+use datafusion::config::TableParquetOptions;
+use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder;
 use datafusion::datasource::physical_plan::{
     wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig,
 };
@@ -46,35 +49,37 @@ use datafusion::datasource::{listing::PartitionedFile, MemTable, TableProvider,
 use datafusion::execution::context::{SessionConfig, SessionContext, SessionState, TaskContext};
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::FunctionRegistry;
-use datafusion::physical_expr::PhysicalSortExpr;
 use datafusion::physical_optimizer::pruning::PruningPredicate;
 use datafusion::physical_plan::filter::FilterExec;
 use datafusion::physical_plan::limit::LocalLimitExec;
+use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use datafusion::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
+    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream,
     Statistics,
 };
 use datafusion_common::scalar::ScalarValue;
-use datafusion_common::tree_node::{TreeNode, TreeNodeVisitor, VisitRecursion};
-use datafusion_common::{Column, DataFusionError, Result as DataFusionResult, ToDFSchema};
-use datafusion_expr::expr::ScalarFunction;
+use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
+use datafusion_common::{
+    config::ConfigOptions, Column, DFSchema, DataFusionError, Result as DataFusionResult,
+    TableReference, ToDFSchema,
+};
 use datafusion_expr::logical_plan::CreateExternalTable;
 use datafusion_expr::utils::conjunction;
 use datafusion_expr::{col, Expr, Extension, LogicalPlan, TableProviderFilterPushDown, Volatility};
-use datafusion_physical_expr::execution_props::ExecutionProps;
-use datafusion_physical_expr::{create_physical_expr, PhysicalExpr};
 use datafusion_proto::logical_plan::LogicalExtensionCodec;
 use datafusion_proto::physical_plan::PhysicalExtensionCodec;
 use datafusion_sql::planner::ParserOptions;
+use either::Either;
 use futures::TryStreamExt;
-
 use itertools::Itertools;
 use object_store::ObjectMeta;
 use serde::{Deserialize, Serialize};
 use url::Url;
 
+use crate::delta_datafusion::expr::parse_predicate_expression;
+use crate::delta_datafusion::schema_adapter::DeltaSchemaAdapterFactory;
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Add, DataCheck, Invariant};
+use crate::kernel::{Add, DataCheck, EagerSnapshot, Invariant, Snapshot, StructTypeExt};
 use crate::logstore::LogStoreRef;
 use crate::table::builder::ensure_table_uri;
 use crate::table::state::DeltaTableState;
@@ -83,9 +88,14 @@ use crate::{open_table, open_table_with_storage_options, DeltaTable};
 
 const PATH_COLUMN: &str = "__delta_rs_path";
 
+pub mod cdf;
 pub mod expr;
 pub mod logical;
 pub mod physical;
+pub mod planner;
+
+mod find_files;
+mod schema_adapter;
 
 impl From<DeltaTableError> for DataFusionError {
     fn from(err: DeltaTableError) -> Self {
@@ -111,6 +121,155 @@ impl From<DataFusionError> for DeltaTableError {
     }
 }
 
+/// Convience trait for calling common methods on snapshot heirarchies
+pub trait DataFusionMixins {
+    /// The physical datafusion schema of a table
+    fn arrow_schema(&self) -> DeltaResult<ArrowSchemaRef>;
+
+    /// Get the table schema as an [`ArrowSchemaRef`]
+    fn input_schema(&self) -> DeltaResult<ArrowSchemaRef>;
+
+    /// Parse an expression string into a datafusion [`Expr`]
+    fn parse_predicate_expression(
+        &self,
+        expr: impl AsRef<str>,
+        df_state: &SessionState,
+    ) -> DeltaResult<Expr>;
+}
+
+impl DataFusionMixins for Snapshot {
+    fn arrow_schema(&self) -> DeltaResult<ArrowSchemaRef> {
+        _arrow_schema(self, true)
+    }
+
+    fn input_schema(&self) -> DeltaResult<ArrowSchemaRef> {
+        _arrow_schema(self, false)
+    }
+
+    fn parse_predicate_expression(
+        &self,
+        expr: impl AsRef<str>,
+        df_state: &SessionState,
+    ) -> DeltaResult<Expr> {
+        let schema = DFSchema::try_from(self.arrow_schema()?.as_ref().to_owned())?;
+        parse_predicate_expression(&schema, expr, df_state)
+    }
+}
+
+impl DataFusionMixins for EagerSnapshot {
+    fn arrow_schema(&self) -> DeltaResult<ArrowSchemaRef> {
+        self.snapshot().arrow_schema()
+    }
+
+    fn input_schema(&self) -> DeltaResult<ArrowSchemaRef> {
+        self.snapshot().input_schema()
+    }
+
+    fn parse_predicate_expression(
+        &self,
+        expr: impl AsRef<str>,
+        df_state: &SessionState,
+    ) -> DeltaResult<Expr> {
+        self.snapshot().parse_predicate_expression(expr, df_state)
+    }
+}
+
+impl DataFusionMixins for DeltaTableState {
+    fn arrow_schema(&self) -> DeltaResult<ArrowSchemaRef> {
+        self.snapshot.arrow_schema()
+    }
+
+    fn input_schema(&self) -> DeltaResult<ArrowSchemaRef> {
+        self.snapshot.input_schema()
+    }
+
+    fn parse_predicate_expression(
+        &self,
+        expr: impl AsRef<str>,
+        df_state: &SessionState,
+    ) -> DeltaResult<Expr> {
+        self.snapshot.parse_predicate_expression(expr, df_state)
+    }
+}
+
+fn _arrow_schema(snapshot: &Snapshot, wrap_partitions: bool) -> DeltaResult<ArrowSchemaRef> {
+    let meta = snapshot.metadata();
+
+    let schema = meta.schema()?;
+    let fields = schema
+        .fields()
+        .filter(|f| !meta.partition_columns.contains(&f.name().to_string()))
+        .map(|f| f.try_into())
+        .chain(
+            // We need stable order between logical and physical schemas, but the order of
+            // partitioning columns is not always the same in the json schema and the array
+            meta.partition_columns.iter().map(|partition_col| {
+                let f = schema.field(partition_col).unwrap();
+                let field = Field::try_from(f)?;
+                let corrected = if wrap_partitions {
+                    match field.data_type() {
+                        // Only dictionary-encode types that may be large
+                        // // https://github.com/apache/arrow-datafusion/pull/5545
+                        DataType::Utf8
+                        | DataType::LargeUtf8
+                        | DataType::Binary
+                        | DataType::LargeBinary => {
+                            wrap_partition_type_in_dict(field.data_type().clone())
+                        }
+                        _ => field.data_type().clone(),
+                    }
+                } else {
+                    field.data_type().clone()
+                };
+                Ok(field.with_data_type(corrected))
+            }),
+        )
+        .collect::<Result<Vec<Field>, _>>()?;
+
+    Ok(Arc::new(ArrowSchema::new(fields)))
+}
+
+pub(crate) trait DataFusionFileMixins {
+    /// Iterate over all files in the log matching a predicate
+    fn files_matching_predicate(&self, filters: &[Expr]) -> DeltaResult<impl Iterator<Item = Add>>;
+}
+
+impl DataFusionFileMixins for EagerSnapshot {
+    fn files_matching_predicate(&self, filters: &[Expr]) -> DeltaResult<impl Iterator<Item = Add>> {
+        files_matching_predicate(self, filters)
+    }
+}
+
+pub(crate) fn files_matching_predicate<'a>(
+    snapshot: &'a EagerSnapshot,
+    filters: &[Expr],
+) -> DeltaResult<impl Iterator<Item = Add> + 'a> {
+    if let Some(Some(predicate)) =
+        (!filters.is_empty()).then_some(conjunction(filters.iter().cloned()))
+    {
+        //let expr = logical_expr_to_physical_expr(predicate, snapshot.arrow_schema()?.as_ref());
+        let expr = SessionContext::new()
+            .create_physical_expr(predicate, &snapshot.arrow_schema()?.to_dfschema()?)?;
+        let pruning_predicate = PruningPredicate::try_new(expr, snapshot.arrow_schema()?)?;
+        Ok(Either::Left(
+            snapshot
+                .file_actions()?
+                .zip(pruning_predicate.prune(snapshot)?)
+                .filter_map(
+                    |(action, keep_file)| {
+                        if keep_file {
+                            Some(action)
+                        } else {
+                            None
+                        }
+                    },
+                ),
+        ))
+    } else {
+        Ok(Either::Right(snapshot.file_actions()?))
+    }
+}
+
 pub(crate) fn get_path_column<'a>(
     batch: &'a RecordBatch,
     path_column: &str,
@@ -141,8 +300,9 @@ pub(crate) fn register_store(store: LogStoreRef, env: Arc<RuntimeEnv>) {
     env.register_object_store(url, store.object_store());
 }
 
-/// The logical schema for a Deltatable is different then protocol level schema since partiton columns must appear at the end of the schema.
-/// This is to align with how partition are handled at the physical level
+/// The logical schema for a Deltatable is different from the protocol level schema since partition
+/// columns must appear at the end of the schema. This is to align with how partition are handled
+/// at the physical level
 pub(crate) fn df_logical_schema(
     snapshot: &DeltaTableState,
     scan_config: &DeltaScanConfig,
@@ -167,26 +327,40 @@ pub(crate) fn df_logical_schema(
     }
 
     if let Some(file_column_name) = &scan_config.file_column_name {
-        fields.push(Arc::new(Field::new(
-            file_column_name,
-            arrow_schema::DataType::Utf8,
-            true,
-        )));
+        fields.push(Arc::new(Field::new(file_column_name, DataType::Utf8, true)));
     }
 
     Ok(Arc::new(ArrowSchema::new(fields)))
 }
 
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 /// Used to specify if additional metadata columns are exposed to the user
 pub struct DeltaScanConfigBuilder {
-    /// Include the source path for each record. The name of this column is determine by `file_column_name`
+    /// Include the source path for each record. The name of this column is determined by `file_column_name`
     include_file_column: bool,
     /// Column name that contains the source path.
     ///
     /// If include_file_column is true and the name is None then it will be auto-generated
     /// Otherwise the user provided name will be used
     file_column_name: Option<String>,
+    /// Whether to wrap partition values in a dictionary encoding to potentially save space
+    wrap_partition_values: Option<bool>,
+    /// Whether to push down filter in end result or just prune the files
+    enable_parquet_pushdown: bool,
+    /// Schema to scan table with
+    schema: Option<SchemaRef>,
+}
+
+impl Default for DeltaScanConfigBuilder {
+    fn default() -> Self {
+        DeltaScanConfigBuilder {
+            include_file_column: false,
+            file_column_name: None,
+            wrap_partition_values: None,
+            enable_parquet_pushdown: true,
+            schema: None,
+        }
+    }
 }
 
 impl DeltaScanConfigBuilder {
@@ -210,16 +384,34 @@ impl DeltaScanConfigBuilder {
         self
     }
 
+    /// Whether to wrap partition values in a dictionary encoding
+    pub fn wrap_partition_values(mut self, wrap: bool) -> Self {
+        self.wrap_partition_values = Some(wrap);
+        self
+    }
+
+    /// Allow pushdown of the scan filter
+    /// When disabled the filter will only be used for pruning files
+    pub fn with_parquet_pushdown(mut self, pushdown: bool) -> Self {
+        self.enable_parquet_pushdown = pushdown;
+        self
+    }
+
+    /// Use the provided [SchemaRef] for the [DeltaScan]
+    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
+        self.schema = Some(schema);
+        self
+    }
+
     /// Build a DeltaScanConfig and ensure no column name conflicts occur during downstream processing
     pub fn build(&self, snapshot: &DeltaTableState) -> DeltaResult<DeltaScanConfig> {
-        let input_schema = snapshot.input_schema()?;
-        let mut file_column_name = None;
-        let mut column_names: HashSet<&String> = HashSet::new();
-        for field in input_schema.fields.iter() {
-            column_names.insert(field.name());
-        }
+        let file_column_name = if self.include_file_column {
+            let input_schema = snapshot.input_schema()?;
+            let mut column_names: HashSet<&String> = HashSet::new();
+            for field in input_schema.fields.iter() {
+                column_names.insert(field.name());
+            }
 
-        if self.include_file_column {
             match &self.file_column_name {
                 Some(name) => {
                     if column_names.contains(name) {
@@ -229,7 +421,7 @@ impl DeltaScanConfigBuilder {
                         )));
                     }
 
-                    file_column_name = Some(name.to_owned())
+                    Some(name.to_owned())
                 }
                 None => {
                     let prefix = PATH_COLUMN;
@@ -241,12 +433,19 @@ impl DeltaScanConfigBuilder {
                         name = format!("{}_{}", prefix, idx);
                     }
 
-                    file_column_name = Some(name);
+                    Some(name)
                 }
             }
-        }
+        } else {
+            None
+        };
 
-        Ok(DeltaScanConfig { file_column_name })
+        Ok(DeltaScanConfig {
+            file_column_name,
+            wrap_partition_values: self.wrap_partition_values.unwrap_or(true),
+            enable_parquet_pushdown: self.enable_parquet_pushdown,
+            schema: self.schema.clone(),
+        })
     }
 }
 
@@ -255,6 +454,12 @@ impl DeltaScanConfigBuilder {
 pub struct DeltaScanConfig {
     /// Include the source path for each record
     pub file_column_name: Option<String>,
+    /// Wrap partition values in a dictionary encoding
+    pub wrap_partition_values: bool,
+    /// Allow pushdown of the scan filter
+    pub enable_parquet_pushdown: bool,
+    /// Schema to read as
+    pub schema: Option<SchemaRef>,
 }
 
 #[derive(Debug)]
@@ -266,8 +471,7 @@ pub(crate) struct DeltaScanBuilder<'a> {
     projection: Option<&'a Vec<usize>>,
     limit: Option<usize>,
     files: Option<&'a [Add]>,
-    config: DeltaScanConfig,
-    schema: Option<SchemaRef>,
+    config: Option<DeltaScanConfig>,
 }
 
 impl<'a> DeltaScanBuilder<'a> {
@@ -281,11 +485,10 @@ impl<'a> DeltaScanBuilder<'a> {
             log_store,
             filter: None,
             state,
-            files: None,
             projection: None,
             limit: None,
-            config: DeltaScanConfig::default(),
-            schema: None,
+            files: None,
+            config: None,
         }
     }
 
@@ -310,20 +513,21 @@ impl<'a> DeltaScanBuilder<'a> {
     }
 
     pub fn with_scan_config(mut self, config: DeltaScanConfig) -> Self {
-        self.config = config;
+        self.config = Some(config);
         self
     }
 
     pub async fn build(self) -> DeltaResult<DeltaScan> {
-        let config = self.config;
-        let schema = match self.schema {
-            Some(schema) => schema,
-            None => {
-                self.snapshot
-                    .physical_arrow_schema(self.log_store.object_store())
-                    .await?
-            }
+        let config = match self.config {
+            Some(config) => config,
+            None => DeltaScanConfigBuilder::new().build(self.snapshot)?,
         };
+
+        let schema = match config.schema.clone() {
+            Some(value) => Ok(value),
+            None => self.snapshot.arrow_schema(),
+        }?;
+
         let logical_schema = df_logical_schema(self.snapshot, &config)?;
 
         let logical_schema = if let Some(used_columns) = self.projection {
@@ -336,34 +540,45 @@ impl<'a> DeltaScanBuilder<'a> {
             logical_schema
         };
 
+        let context = SessionContext::new();
+        let df_schema = logical_schema.clone().to_dfschema()?;
         let logical_filter = self
             .filter
-            .map(|expr| logical_expr_to_physical_expr(&expr, &logical_schema));
+            .map(|expr| context.create_physical_expr(expr, &df_schema).unwrap());
 
         // Perform Pruning of files to scan
-        let files = match self.files {
-            Some(files) => files.to_owned(),
+        let (files, files_scanned, files_pruned) = match self.files {
+            Some(files) => {
+                let files = files.to_owned();
+                let files_scanned = files.len();
+                (files, files_scanned, 0)
+            }
             None => {
                 if let Some(predicate) = &logical_filter {
                     let pruning_predicate =
                         PruningPredicate::try_new(predicate.clone(), logical_schema.clone())?;
                     let files_to_prune = pruning_predicate.prune(self.snapshot)?;
-                    self.snapshot
-                        .file_actions()?
-                        .iter()
+                    let mut files_pruned = 0usize;
+                    let files = self
+                        .snapshot
+                        .file_actions_iter()?
                         .zip(files_to_prune.into_iter())
-                        .filter_map(
-                            |(action, keep)| {
-                                if keep {
-                                    Some(action.to_owned())
-                                } else {
-                                    None
-                                }
-                            },
-                        )
-                        .collect()
+                        .filter_map(|(action, keep)| {
+                            if keep {
+                                Some(action.to_owned())
+                            } else {
+                                files_pruned += 1;
+                                None
+                            }
+                        })
+                        .collect::<Vec<_>>();
+
+                    let files_scanned = files.len();
+                    (files, files_scanned, files_pruned)
                 } else {
-                    self.snapshot.file_actions()?
+                    let files = self.snapshot.file_actions()?;
+                    let files_scanned = files.len();
+                    (files, files_scanned, 0)
                 }
             }
         };
@@ -379,10 +594,12 @@ impl<'a> DeltaScanBuilder<'a> {
             let mut part = partitioned_file_from_action(action, table_partition_cols, &schema);
 
             if config.file_column_name.is_some() {
-                part.partition_values
-                    .push(wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
-                        action.path.clone(),
-                    ))));
+                let partition_value = if config.wrap_partition_values {
+                    wrap_partition_value_in_dict(ScalarValue::Utf8(Some(action.path.clone())))
+                } else {
+                    ScalarValue::Utf8(Some(action.path.clone()))
+                };
+                part.partition_values.push(partition_value);
             }
 
             file_groups
@@ -406,9 +623,14 @@ impl<'a> DeltaScanBuilder<'a> {
             .collect::<Result<Vec<_>, ArrowError>>()?;
 
         if let Some(file_column_name) = &config.file_column_name {
+            let field_name_datatype = if config.wrap_partition_values {
+                wrap_partition_type_in_dict(DataType::Utf8)
+            } else {
+                DataType::Utf8
+            };
             table_partition_cols.push(Field::new(
                 file_column_name.clone(),
-                wrap_partition_type_in_dict(DataType::Utf8),
+                field_name_datatype,
                 false,
             ));
         }
@@ -418,28 +640,47 @@ impl<'a> DeltaScanBuilder<'a> {
             .datafusion_table_statistics()
             .unwrap_or(Statistics::new_unknown(&schema));
 
-        let scan = ParquetFormat::new()
-            .create_physical_plan(
-                self.state,
-                FileScanConfig {
-                    object_store_url: self.log_store.object_store_url(),
-                    file_schema,
-                    file_groups: file_groups.into_values().collect(),
-                    statistics: stats,
-                    projection: self.projection.cloned(),
-                    limit: self.limit,
-                    table_partition_cols,
-                    output_ordering: vec![],
-                },
-                logical_filter.as_ref(),
-            )
-            .await?;
+        let parquet_options = TableParquetOptions {
+            global: self.state.config().options().execution.parquet.clone(),
+            ..Default::default()
+        };
+
+        let mut exec_plan_builder = ParquetExecBuilder::new(FileScanConfig {
+            object_store_url: self.log_store.object_store_url(),
+            file_schema,
+            file_groups: file_groups.into_values().collect(),
+            statistics: stats,
+            projection: self.projection.cloned(),
+            limit: self.limit,
+            table_partition_cols,
+            output_ordering: vec![],
+        })
+        .with_schema_adapter_factory(Arc::new(DeltaSchemaAdapterFactory {}))
+        .with_table_parquet_options(parquet_options);
+
+        // Sometimes (i.e Merge) we want to prune files that don't make the
+        // filter and read the entire contents for files that do match the
+        // filter
+        if let Some(predicate) = logical_filter {
+            if config.enable_parquet_pushdown {
+                exec_plan_builder = exec_plan_builder.with_predicate(predicate);
+            }
+        };
+
+        let metrics = ExecutionPlanMetricsSet::new();
+        MetricBuilder::new(&metrics)
+            .global_counter("files_scanned")
+            .add(files_scanned);
+        MetricBuilder::new(&metrics)
+            .global_counter("files_pruned")
+            .add(files_pruned);
 
         Ok(DeltaScan {
             table_uri: ensure_table_uri(self.log_store.root_uri())?.as_str().into(),
-            parquet_scan: scan,
+            parquet_scan: exec_plan_builder.build_arc(),
             config,
             logical_schema,
+            metrics,
         })
     }
 }
@@ -487,11 +728,14 @@ impl TableProvider for DeltaTable {
         Ok(Arc::new(scan))
     }
 
-    fn supports_filter_pushdown(
+    fn supports_filters_pushdown(
         &self,
-        _filter: &Expr,
-    ) -> DataFusionResult<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
+        filter: &[&Expr],
+    ) -> DataFusionResult<Vec<TableProviderFilterPushDown>> {
+        Ok(filter
+            .into_iter()
+            .map(|_| TableProviderFilterPushDown::Inexact)
+            .collect())
     }
 
     fn statistics(&self) -> Option<Statistics> {
@@ -505,6 +749,7 @@ pub struct DeltaTableProvider {
     log_store: LogStoreRef,
     config: DeltaScanConfig,
     schema: Arc<ArrowSchema>,
+    files: Option<Vec<Add>>,
 }
 
 impl DeltaTableProvider {
@@ -519,8 +764,15 @@ impl DeltaTableProvider {
             snapshot,
             log_store,
             config,
+            files: None,
         })
     }
+
+    /// Define which files to consider while building a scan, for advanced usecases
+    pub fn with_files(mut self, files: Vec<Add>) -> DeltaTableProvider {
+        self.files = Some(files);
+        self
+    }
 }
 
 #[async_trait]
@@ -555,22 +807,23 @@ impl TableProvider for DeltaTableProvider {
         register_store(self.log_store.clone(), session.runtime_env().clone());
         let filter_expr = conjunction(filters.iter().cloned());
 
-        let scan = DeltaScanBuilder::new(&self.snapshot, self.log_store.clone(), session)
+        let mut scan = DeltaScanBuilder::new(&self.snapshot, self.log_store.clone(), session)
             .with_projection(projection)
             .with_limit(limit)
             .with_filter(filter_expr)
-            .with_scan_config(self.config.clone())
-            .build()
-            .await?;
+            .with_scan_config(self.config.clone());
 
-        Ok(Arc::new(scan))
+        if let Some(files) = &self.files {
+            scan = scan.with_files(files);
+        }
+        Ok(Arc::new(scan.build().await?))
     }
 
-    fn supports_filter_pushdown(
+    fn supports_filters_pushdown(
         &self,
-        _filter: &Expr,
-    ) -> DataFusionResult<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
+        _filter: &[&Expr],
+    ) -> DataFusionResult<Vec<TableProviderFilterPushDown>> {
+        Ok(vec![TableProviderFilterPushDown::Inexact])
     }
 
     fn statistics(&self) -> Option<Statistics> {
@@ -590,6 +843,8 @@ pub struct DeltaScan {
     pub parquet_scan: Arc<dyn ExecutionPlan>,
     /// The schema of the table to be used when evaluating expressions
     pub logical_schema: Arc<ArrowSchema>,
+    /// Metrics for scan reported via DataFusion
+    metrics: ExecutionPlanMetricsSet,
 }
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -606,6 +861,10 @@ impl DisplayAs for DeltaScan {
 }
 
 impl ExecutionPlan for DeltaScan {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -614,23 +873,31 @@ impl ExecutionPlan for DeltaScan {
         self.parquet_scan.schema()
     }
 
-    fn output_partitioning(&self) -> Partitioning {
-        self.parquet_scan.output_partitioning()
-    }
-
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        self.parquet_scan.output_ordering()
+    fn properties(&self) -> &PlanProperties {
+        self.parquet_scan.properties()
     }
 
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.parquet_scan.clone()]
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.parquet_scan]
     }
 
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
-        ExecutionPlan::with_new_children(self.parquet_scan.clone(), children)
+        if children.len() != 1 {
+            return Err(DataFusionError::Plan(format!(
+                "DeltaScan wrong number of children {}",
+                children.len()
+            )));
+        }
+        Ok(Arc::new(DeltaScan {
+            table_uri: self.table_uri.clone(),
+            config: self.config.clone(),
+            parquet_scan: children[0].clone(),
+            logical_schema: self.logical_schema.clone(),
+            metrics: self.metrics.clone(),
+        }))
     }
 
     fn execute(
@@ -641,9 +908,31 @@ impl ExecutionPlan for DeltaScan {
         self.parquet_scan.execute(partition, context)
     }
 
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
+
     fn statistics(&self) -> DataFusionResult<Statistics> {
         self.parquet_scan.statistics()
     }
+
+    fn repartitioned(
+        &self,
+        target_partitions: usize,
+        config: &ConfigOptions,
+    ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(parquet_scan) = self.parquet_scan.repartitioned(target_partitions, config)? {
+            Ok(Some(Arc::new(DeltaScan {
+                table_uri: self.table_uri.clone(),
+                config: self.config.clone(),
+                parquet_scan,
+                logical_schema: self.logical_schema.clone(),
+                metrics: self.metrics.clone(),
+            })))
+        } else {
+            Ok(None)
+        }
+    }
 }
 
 pub(crate) fn get_null_of_arrow_type(t: &ArrowDataType) -> DeltaResult<ScalarValue> {
@@ -700,6 +989,10 @@ pub(crate) fn get_null_of_arrow_type(t: &ArrowDataType) -> DeltaResult<ScalarVal
         | ArrowDataType::Duration(_)
         | ArrowDataType::Interval(_)
         | ArrowDataType::RunEndEncoded(_, _)
+        | ArrowDataType::BinaryView
+        | ArrowDataType::Utf8View
+        | ArrowDataType::LargeListView(_)
+        | ArrowDataType::ListView(_)
         | ArrowDataType::Map(_, _) => Err(DeltaTableError::Generic(format!(
             "Unsupported data type for Delta Lake {}",
             t
@@ -739,8 +1032,11 @@ pub(crate) fn partitioned_file_from_action(
 
     let ts_secs = action.modification_time / 1000;
     let ts_ns = (action.modification_time % 1000) * 1_000_000;
-    let last_modified =
-        Utc.from_utc_datetime(&NaiveDateTime::from_timestamp_opt(ts_secs, ts_ns as u32).unwrap());
+    let last_modified = Utc.from_utc_datetime(
+        &DateTime::from_timestamp(ts_secs, ts_ns as u32)
+            .unwrap()
+            .naive_utc(),
+    );
     PartitionedFile {
         object_meta: ObjectMeta {
             last_modified,
@@ -749,9 +1045,31 @@ pub(crate) fn partitioned_file_from_action(
         partition_values,
         range: None,
         extensions: None,
+        statistics: None,
     }
 }
 
+fn parse_date(
+    stat_val: &serde_json::Value,
+    field_dt: &ArrowDataType,
+) -> DataFusionResult<ScalarValue> {
+    let string = match stat_val {
+        serde_json::Value::String(s) => s.to_owned(),
+        _ => stat_val.to_string(),
+    };
+
+    let time_micro = ScalarValue::try_from_string(string, &ArrowDataType::Date32)?;
+    let cast_arr = cast_with_options(
+        &time_micro.to_array()?,
+        field_dt,
+        &CastOptions {
+            safe: false,
+            ..Default::default()
+        },
+    )?;
+    ScalarValue::try_from_array(&cast_arr, 0)
+}
+
 fn parse_timestamp(
     stat_val: &serde_json::Value,
     field_dt: &ArrowDataType,
@@ -786,6 +1104,7 @@ pub(crate) fn to_correct_scalar_value(
         serde_json::Value::Null => Ok(Some(get_null_of_arrow_type(field_dt)?)),
         serde_json::Value::String(string_val) => match field_dt {
             ArrowDataType::Timestamp(_, _) => Ok(Some(parse_timestamp(stat_val, field_dt)?)),
+            ArrowDataType::Date32 => Ok(Some(parse_date(stat_val, field_dt)?)),
             _ => Ok(Some(ScalarValue::try_from_string(
                 string_val.to_owned(),
                 field_dt,
@@ -793,6 +1112,7 @@ pub(crate) fn to_correct_scalar_value(
         },
         other => match field_dt {
             ArrowDataType::Timestamp(_, _) => Ok(Some(parse_timestamp(stat_val, field_dt)?)),
+            ArrowDataType::Date32 => Ok(Some(parse_date(stat_val, field_dt)?)),
             _ => Ok(Some(ScalarValue::try_from_string(
                 other.to_string(),
                 field_dt,
@@ -801,21 +1121,12 @@ pub(crate) fn to_correct_scalar_value(
     }
 }
 
-pub(crate) fn logical_expr_to_physical_expr(
-    expr: &Expr,
-    schema: &ArrowSchema,
-) -> Arc<dyn PhysicalExpr> {
-    let df_schema = schema.clone().to_dfschema().unwrap();
-    let execution_props = ExecutionProps::new();
-    create_physical_expr(expr, &df_schema, &execution_props).unwrap()
-}
-
 pub(crate) async fn execute_plan_to_batch(
     state: &SessionState,
     plan: Arc<dyn ExecutionPlan>,
 ) -> DeltaResult<arrow::record_batch::RecordBatch> {
-    let data =
-        futures::future::try_join_all((0..plan.output_partitioning().partition_count()).map(|p| {
+    let data = futures::future::try_join_all(
+        (0..plan.properties().output_partitioning().partition_count()).map(|p| {
             let plan_copy = plan.clone();
             let task_context = state.task_ctx().clone();
             async move {
@@ -827,8 +1138,9 @@ pub(crate) async fn execute_plan_to_batch(
 
                 DataFusionResult::<_>::Ok(arrow::compute::concat_batches(&schema, batches.iter())?)
             }
-        }))
-        .await?;
+        }),
+    )
+    .await?;
 
     let batch = arrow::compute::concat_batches(&plan.schema(), data.iter())?;
 
@@ -977,6 +1289,7 @@ impl PhysicalExtensionCodec for DeltaPhysicalCodec {
             parquet_scan: (*inputs)[0].clone(),
             config: wire.config,
             logical_schema: wire.logical_schema,
+            metrics: ExecutionPlanMetricsSet::new(),
         };
         Ok(Arc::new(delta_scan))
     }
@@ -1023,6 +1336,7 @@ impl LogicalExtensionCodec for DeltaLogicalCodec {
     fn try_decode_table_provider(
         &self,
         buf: &[u8],
+        _table_ref: &TableReference,
         _schema: SchemaRef,
         _ctx: &SessionContext,
     ) -> Result<Arc<dyn TableProvider>, DataFusionError> {
@@ -1033,6 +1347,7 @@ impl LogicalExtensionCodec for DeltaLogicalCodec {
 
     fn try_encode_table_provider(
         &self,
+        _table_ref: &TableReference,
         node: Arc<dyn TableProvider>,
         buf: &mut Vec<u8>,
     ) -> Result<(), DataFusionError> {
@@ -1077,10 +1392,10 @@ pub(crate) struct FindFilesExprProperties {
 /// Ensure only expressions that make sense are accepted, check for
 /// non-deterministic functions, and determine if the expression only contains
 /// partition columns
-impl TreeNodeVisitor for FindFilesExprProperties {
-    type N = Expr;
+impl TreeNodeVisitor<'_> for FindFilesExprProperties {
+    type Node = Expr;
 
-    fn pre_visit(&mut self, expr: &Self::N) -> datafusion_common::Result<VisitRecursion> {
+    fn f_down(&mut self, expr: &Self::Node) -> datafusion_common::Result<TreeNodeRecursion> {
         // TODO: We can likely relax the volatility to STABLE. Would require further
         // research to confirm the same value is generated during the scan and
         // rewrite phases.
@@ -1108,28 +1423,20 @@ impl TreeNodeVisitor for FindFilesExprProperties {
             | Expr::IsNotUnknown(_)
             | Expr::Negative(_)
             | Expr::InList { .. }
-            | Expr::GetIndexedField(_)
             | Expr::Between(_)
             | Expr::Case(_)
             | Expr::Cast(_)
             | Expr::TryCast(_) => (),
-            Expr::ScalarFunction(ScalarFunction { func_def, .. }) => {
-                let v = match func_def {
-                    datafusion_expr::ScalarFunctionDefinition::BuiltIn(f) => f.volatility(),
-                    datafusion_expr::ScalarFunctionDefinition::UDF(u) => u.signature().volatility,
-                    datafusion_expr::ScalarFunctionDefinition::Name(n) => {
+            Expr::ScalarFunction(scalar_function) => {
+                match scalar_function.func.signature().volatility {
+                    Volatility::Immutable => (),
+                    _ => {
                         self.result = Err(DeltaTableError::Generic(format!(
-                            "Cannot determine volatility of find files predicate function {n}",
+                            "Find files predicate contains nondeterministic function {}",
+                            scalar_function.func.name()
                         )));
-                        return Ok(VisitRecursion::Stop);
+                        return Ok(TreeNodeRecursion::Stop);
                     }
-                };
-                if v > Volatility::Immutable {
-                    self.result = Err(DeltaTableError::Generic(format!(
-                        "Find files predicate contains nondeterministic function {}",
-                        func_def.name()
-                    )));
-                    return Ok(VisitRecursion::Stop);
                 }
             }
             _ => {
@@ -1137,14 +1444,15 @@ impl TreeNodeVisitor for FindFilesExprProperties {
                     "Find files predicate contains unsupported expression {}",
                     expr
                 )));
-                return Ok(VisitRecursion::Stop);
+                return Ok(TreeNodeRecursion::Stop);
             }
         }
 
-        Ok(VisitRecursion::Continue)
+        Ok(TreeNodeRecursion::Continue)
     }
 }
 
+#[derive(Debug, Hash, Eq, PartialEq)]
 /// Representing the result of the [find_files] function.
 pub struct FindFiles {
     /// A list of `Add` objects that match the given predicate
@@ -1198,7 +1506,7 @@ fn join_batches_with_add_actions(
     Ok(files)
 }
 
-/// Determine which files contain a record that statisfies the predicate
+/// Determine which files contain a record that satisfies the predicate
 pub(crate) async fn find_files_scan<'a>(
     snapshot: &DeltaTableState,
     log_store: LogStoreRef,
@@ -1206,8 +1514,7 @@ pub(crate) async fn find_files_scan<'a>(
     expression: Expr,
 ) -> DeltaResult<Vec<Add>> {
     let candidate_map: HashMap<String, Add> = snapshot
-        .file_actions()?
-        .iter()
+        .file_actions_iter()?
         .map(|add| (add.path.clone(), add.to_owned()))
         .collect();
 
@@ -1221,7 +1528,7 @@ pub(crate) async fn find_files_scan<'a>(
 
     // Identify which columns we need to project
     let mut used_columns = expression
-        .to_columns()?
+        .column_refs()
         .into_iter()
         .map(|column| logical_schema.index_of(&column.name))
         .collect::<Result<Vec<usize>, ArrowError>>()?;
@@ -1240,11 +1547,8 @@ pub(crate) async fn find_files_scan<'a>(
     let input_schema = scan.logical_schema.as_ref().to_owned();
     let input_dfschema = input_schema.clone().try_into()?;
 
-    let predicate_expr = create_physical_expr(
-        &Expr::IsTrue(Box::new(expression.clone())),
-        &input_dfschema,
-        state.execution_props(),
-    )?;
+    let predicate_expr =
+        state.create_physical_expr(Expr::IsTrue(Box::new(expression.clone())), &input_dfschema)?;
 
     let filter: Arc<dyn ExecutionPlan> =
         Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
@@ -1466,12 +1770,16 @@ impl From<Column> for DeltaColumn {
 
 #[cfg(test)]
 mod tests {
+    use crate::operations::write::SchemaMode;
     use crate::writer::test_utils::get_delta_schema;
     use arrow::array::StructArray;
     use arrow::datatypes::{DataType, Field, Schema};
     use chrono::{TimeZone, Utc};
     use datafusion::assert_batches_sorted_eq;
+    use datafusion::datasource::physical_plan::ParquetExec;
     use datafusion::physical_plan::empty::EmptyExec;
+    use datafusion::physical_plan::{visit_execution_plan, ExecutionPlanVisitor, PhysicalExpr};
+    use datafusion_expr::lit;
     use datafusion_proto::physical_plan::AsExecutionPlan;
     use datafusion_proto::protobuf;
     use object_store::path::Path;
@@ -1591,6 +1899,7 @@ mod tests {
             partition_values: [ScalarValue::Int64(Some(2015)), ScalarValue::Int64(Some(1))].to_vec(),
             range: None,
             extensions: None,
+            statistics: None,
         };
         assert_eq!(file.partition_values, ref_file.partition_values)
     }
@@ -1679,6 +1988,7 @@ mod tests {
             parquet_scan: Arc::from(EmptyExec::new(schema.clone())),
             config: DeltaScanConfig::default(),
             logical_schema: schema.clone(),
+            metrics: ExecutionPlanMetricsSet::new(),
         });
         let proto: protobuf::PhysicalPlanNode =
             protobuf::PhysicalPlanNode::try_from_physical_plan(exec_plan.clone(), &codec)
@@ -1734,7 +2044,7 @@ mod tests {
 
         let table = crate::DeltaOps::new_in_memory()
             .create()
-            .with_columns(get_delta_schema().fields().clone())
+            .with_columns(get_delta_schema().fields().cloned())
             .with_partition_columns(["modified", "id"])
             .await
             .unwrap();
@@ -1864,4 +2174,402 @@ mod tests {
         assert_batches_sorted_eq!(&expected, &actual);
         */
     }
+
+    #[tokio::test]
+    async fn delta_scan_supports_missing_columns() {
+        let schema1 = Arc::new(ArrowSchema::new(vec![Field::new(
+            "col_1",
+            DataType::Utf8,
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(
+            schema1.clone(),
+            vec![Arc::new(arrow::array::StringArray::from(vec![
+                Some("A"),
+                Some("B"),
+            ]))],
+        )
+        .unwrap();
+
+        let schema2 = Arc::new(ArrowSchema::new(vec![
+            Field::new("col_1", DataType::Utf8, true),
+            Field::new("col_2", DataType::Utf8, true),
+        ]));
+
+        let batch2 = RecordBatch::try_new(
+            schema2.clone(),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("E"),
+                    Some("F"),
+                    Some("G"),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("E2"),
+                    Some("F2"),
+                    Some("G2"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch2])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let table = crate::DeltaOps(table)
+            .write(vec![batch1])
+            .with_schema_mode(SchemaMode::Merge)
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let config = DeltaScanConfigBuilder::new()
+            .build(table.snapshot().unwrap())
+            .unwrap();
+        let log = table.log_store();
+
+        let provider =
+            DeltaTableProvider::try_new(table.snapshot().unwrap().clone(), log, config).unwrap();
+        let ctx: SessionContext = DeltaSessionContext::default().into();
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        let df = ctx.sql("select col_1, col_2 from test").await.unwrap();
+        let actual = df.collect().await.unwrap();
+        let expected = vec![
+            "+-------+-------+",
+            "| col_1 | col_2 |",
+            "+-------+-------+",
+            "| A     |       |",
+            "| B     |       |",
+            "| E     | E2    |",
+            "| F     | F2    |",
+            "| G     | G2    |",
+            "+-------+-------+",
+        ];
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn delta_scan_supports_pushdown() {
+        let schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("col_1", DataType::Utf8, false),
+            Field::new("col_2", DataType::Utf8, false),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("A"),
+                    Some("B"),
+                    Some("C"),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("A2"),
+                    Some("B2"),
+                    Some("C2"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let config = DeltaScanConfigBuilder::new()
+            .build(table.snapshot().unwrap())
+            .unwrap();
+        let log = table.log_store();
+
+        let provider =
+            DeltaTableProvider::try_new(table.snapshot().unwrap().clone(), log, config).unwrap();
+
+        let mut cfg = SessionConfig::default();
+        cfg.options_mut().execution.parquet.pushdown_filters = true;
+        let ctx = SessionContext::new_with_config(cfg);
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        let df = ctx
+            .sql("select col_1, col_2 from test WHERE col_1 = 'A'")
+            .await
+            .unwrap();
+        let actual = df.collect().await.unwrap();
+        let expected = vec![
+            "+-------+-------+",
+            "| col_1 | col_2 |",
+            "+-------+-------+",
+            "| A     | A2    |",
+            "+-------+-------+",
+        ];
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn delta_scan_supports_nested_missing_columns() {
+        let column1_schema1: arrow::datatypes::Fields =
+            vec![Field::new("col_1a", DataType::Utf8, true)].into();
+        let schema1 = Arc::new(ArrowSchema::new(vec![Field::new(
+            "col_1",
+            DataType::Struct(column1_schema1.clone()),
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(
+            schema1.clone(),
+            vec![Arc::new(StructArray::new(
+                column1_schema1,
+                vec![Arc::new(arrow::array::StringArray::from(vec![
+                    Some("A"),
+                    Some("B"),
+                ]))],
+                None,
+            ))],
+        )
+        .unwrap();
+
+        let column1_schema2: arrow::datatypes::Fields = vec![
+            Field::new("col_1a", DataType::Utf8, true),
+            Field::new("col_1b", DataType::Utf8, true),
+        ]
+        .into();
+        let schema2 = Arc::new(ArrowSchema::new(vec![Field::new(
+            "col_1",
+            DataType::Struct(column1_schema2.clone()),
+            true,
+        )]));
+
+        let batch2 = RecordBatch::try_new(
+            schema2.clone(),
+            vec![Arc::new(StructArray::new(
+                column1_schema2,
+                vec![
+                    Arc::new(arrow::array::StringArray::from(vec![
+                        Some("E"),
+                        Some("F"),
+                        Some("G"),
+                    ])),
+                    Arc::new(arrow::array::StringArray::from(vec![
+                        Some("E2"),
+                        Some("F2"),
+                        Some("G2"),
+                    ])),
+                ],
+                None,
+            ))],
+        )
+        .unwrap();
+
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch1])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let table = crate::DeltaOps(table)
+            .write(vec![batch2])
+            .with_schema_mode(SchemaMode::Merge)
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let config = DeltaScanConfigBuilder::new()
+            .build(table.snapshot().unwrap())
+            .unwrap();
+        let log = table.log_store();
+
+        let provider =
+            DeltaTableProvider::try_new(table.snapshot().unwrap().clone(), log, config).unwrap();
+        let ctx: SessionContext = DeltaSessionContext::default().into();
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        let df = ctx
+            .sql("select col_1.col_1a, col_1.col_1b from test")
+            .await
+            .unwrap();
+        let actual = df.collect().await.unwrap();
+        let expected = vec![
+            "+--------------------+--------------------+",
+            "| test.col_1[col_1a] | test.col_1[col_1b] |",
+            "+--------------------+--------------------+",
+            "| A                  |                    |",
+            "| B                  |                    |",
+            "| E                  | E2                 |",
+            "| F                  | F2                 |",
+            "| G                  | G2                 |",
+            "+--------------------+--------------------+",
+        ];
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn test_multiple_predicate_pushdown() {
+        use crate::datafusion::prelude::SessionContext;
+        let schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("moDified", DataType::Utf8, true),
+            Field::new("id", DataType::Utf8, true),
+            Field::new("vaLue", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2021-02-01",
+                    "2021-02-01",
+                    "2021-02-02",
+                    "2021-02-02",
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C", "D"])),
+                Arc::new(arrow::array::Int32Array::from(vec![1, 10, 20, 100])),
+            ],
+        )
+        .unwrap();
+        // write some data
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let datafusion = SessionContext::new();
+        let table = Arc::new(table);
+
+        datafusion.register_table("snapshot", table).unwrap();
+
+        let df = datafusion
+            .sql("select * from snapshot where id > 10000 and id < 20000")
+            .await
+            .unwrap();
+
+        df.collect().await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_delta_scan_builder_no_scan_config() {
+        let arr: Arc<dyn Array> = Arc::new(arrow::array::StringArray::from(vec!["s"]));
+        let batch = RecordBatch::try_from_iter_with_nullable(vec![("a", arr, false)]).unwrap();
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let ctx = SessionContext::new();
+        let state = ctx.state();
+        let scan = DeltaScanBuilder::new(table.snapshot().unwrap(), table.log_store(), &state)
+            .with_filter(Some(col("a").eq(lit("s"))))
+            .build()
+            .await
+            .unwrap();
+
+        let mut visitor = ParquetPredicateVisitor::default();
+        visit_execution_plan(&scan, &mut visitor).unwrap();
+
+        assert_eq!(visitor.predicate.unwrap().to_string(), "a@0 = s");
+        assert_eq!(
+            visitor.pruning_predicate.unwrap().orig_expr().to_string(),
+            "a@0 = s"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_delta_scan_builder_scan_config_disable_pushdown() {
+        let arr: Arc<dyn Array> = Arc::new(arrow::array::StringArray::from(vec!["s"]));
+        let batch = RecordBatch::try_from_iter_with_nullable(vec![("a", arr, false)]).unwrap();
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let snapshot = table.snapshot().unwrap();
+        let ctx = SessionContext::new();
+        let state = ctx.state();
+        let scan = DeltaScanBuilder::new(snapshot, table.log_store(), &state)
+            .with_filter(Some(col("a").eq(lit("s"))))
+            .with_scan_config(
+                DeltaScanConfigBuilder::new()
+                    .with_parquet_pushdown(false)
+                    .build(snapshot)
+                    .unwrap(),
+            )
+            .build()
+            .await
+            .unwrap();
+
+        let mut visitor = ParquetPredicateVisitor::default();
+        visit_execution_plan(&scan, &mut visitor).unwrap();
+
+        assert!(visitor.predicate.is_none());
+        assert!(visitor.pruning_predicate.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_delta_scan_applies_parquet_options() {
+        let arr: Arc<dyn Array> = Arc::new(arrow::array::StringArray::from(vec!["s"]));
+        let batch = RecordBatch::try_from_iter_with_nullable(vec![("a", arr, false)]).unwrap();
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let snapshot = table.snapshot().unwrap();
+
+        let mut config = SessionConfig::default();
+        config.options_mut().execution.parquet.pushdown_filters = true;
+        let ctx = SessionContext::new_with_config(config);
+        let state = ctx.state();
+
+        let scan = DeltaScanBuilder::new(snapshot, table.log_store(), &state)
+            .build()
+            .await
+            .unwrap();
+
+        let mut visitor = ParquetOptionsVisitor::default();
+        visit_execution_plan(&scan, &mut visitor).unwrap();
+
+        assert_eq!(ctx.copied_table_options().parquet, visitor.options.unwrap());
+    }
+
+    #[derive(Default)]
+    struct ParquetPredicateVisitor {
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        pruning_predicate: Option<Arc<PruningPredicate>>,
+    }
+
+    impl ExecutionPlanVisitor for ParquetPredicateVisitor {
+        type Error = DataFusionError;
+
+        fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
+            if let Some(parquet_exec) = plan.as_any().downcast_ref::<ParquetExec>() {
+                self.predicate = parquet_exec.predicate().cloned();
+                self.pruning_predicate = parquet_exec.pruning_predicate().cloned();
+            }
+            Ok(true)
+        }
+    }
+
+    #[derive(Default)]
+    struct ParquetOptionsVisitor {
+        options: Option<TableParquetOptions>,
+    }
+
+    impl ExecutionPlanVisitor for ParquetOptionsVisitor {
+        type Error = DataFusionError;
+
+        fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
+            if let Some(parquet_exec) = plan.as_any().downcast_ref::<ParquetExec>() {
+                self.options = Some(parquet_exec.table_parquet_options().clone())
+            }
+            Ok(true)
+        }
+    }
 }
diff --git a/crates/core/src/delta_datafusion/physical.rs b/crates/core/src/delta_datafusion/physical.rs
index 954df0b046..c37b85101e 100644
--- a/crates/core/src/delta_datafusion/physical.rs
+++ b/crates/core/src/delta_datafusion/physical.rs
@@ -74,6 +74,10 @@ impl DisplayAs for MetricObserverExec {
 }
 
 impl ExecutionPlan for MetricObserverExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn std::any::Any {
         self
     }
@@ -82,16 +86,12 @@ impl ExecutionPlan for MetricObserverExec {
         self.parent.schema()
     }
 
-    fn output_partitioning(&self) -> datafusion::physical_plan::Partitioning {
-        self.parent.output_partitioning()
-    }
-
-    fn output_ordering(&self) -> Option<&[datafusion_physical_expr::PhysicalSortExpr]> {
-        self.parent.output_ordering()
+    fn properties(&self) -> &datafusion::physical_plan::PlanProperties {
+        self.parent.properties()
     }
 
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.parent.clone()]
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.parent]
     }
 
     fn execute(
@@ -178,3 +178,7 @@ pub(crate) fn find_metric_node(
 
     None
 }
+
+pub(crate) fn get_metric(metrics: &MetricsSet, name: &str) -> usize {
+    metrics.sum_by_name(name).map(|m| m.as_usize()).unwrap_or(0)
+}
diff --git a/crates/core/src/delta_datafusion/planner.rs b/crates/core/src/delta_datafusion/planner.rs
new file mode 100644
index 0000000000..f0af1092ca
--- /dev/null
+++ b/crates/core/src/delta_datafusion/planner.rs
@@ -0,0 +1,57 @@
+//! Custom planners for datafusion so that you can convert custom nodes, can be used
+//! to trace custom metrics in an operation
+//!
+//! # Example
+//!
+//! #[derive(Clone)]
+//! struct MergeMetricExtensionPlanner {}
+//!
+//! #[async_trait]
+//! impl ExtensionPlanner for MergeMetricExtensionPlanner {
+//!     async fn plan_extension(
+//!         &self,
+//!         planner: &dyn PhysicalPlanner,
+//!         node: &dyn UserDefinedLogicalNode,
+//!         _logical_inputs: &[&LogicalPlan],
+//!         physical_inputs: &[Arc<dyn ExecutionPlan>],
+//!         session_state: &SessionState,
+//!     ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {}
+//!
+//! let merge_planner = DeltaPlanner::<MergeMetricExtensionPlanner> {
+//!     extension_planner: MergeMetricExtensionPlanner {}
+//! };
+//!
+//! let state = state.with_query_planner(Arc::new(merge_planner));
+use std::sync::Arc;
+
+use crate::delta_datafusion::DataFusionResult;
+use async_trait::async_trait;
+use datafusion::physical_planner::PhysicalPlanner;
+use datafusion::{
+    execution::{context::QueryPlanner, session_state::SessionState},
+    physical_plan::ExecutionPlan,
+    physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner},
+};
+use datafusion_expr::LogicalPlan;
+
+/// Deltaplanner
+pub struct DeltaPlanner<T: ExtensionPlanner> {
+    /// custom extension planner
+    pub extension_planner: T,
+}
+
+#[async_trait]
+impl<T: ExtensionPlanner + Send + Sync + 'static + Clone> QueryPlanner for DeltaPlanner<T> {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
+        let planner = Arc::new(Box::new(DefaultPhysicalPlanner::with_extension_planners(
+            vec![Arc::new(self.extension_planner.clone())],
+        )));
+        planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
diff --git a/crates/core/src/delta_datafusion/schema_adapter.rs b/crates/core/src/delta_datafusion/schema_adapter.rs
new file mode 100644
index 0000000000..5fb0724f50
--- /dev/null
+++ b/crates/core/src/delta_datafusion/schema_adapter.rs
@@ -0,0 +1,80 @@
+use crate::operations::cast::cast_record_batch;
+use arrow_array::RecordBatch;
+use arrow_schema::{Schema, SchemaRef};
+use datafusion::datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory, SchemaMapper};
+use std::fmt::Debug;
+use std::sync::Arc;
+
+/// A Schema Adapter Factory which provides casting record batches from parquet to meet
+/// delta lake conventions.
+#[derive(Debug)]
+pub(crate) struct DeltaSchemaAdapterFactory {}
+
+impl SchemaAdapterFactory for DeltaSchemaAdapterFactory {
+    fn create(&self, schema: SchemaRef) -> Box<dyn SchemaAdapter> {
+        Box::new(DeltaSchemaAdapter {
+            table_schema: schema,
+        })
+    }
+}
+
+pub(crate) struct DeltaSchemaAdapter {
+    /// Schema for the table
+    table_schema: SchemaRef,
+}
+
+impl SchemaAdapter for DeltaSchemaAdapter {
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.table_schema.field(index);
+        Some(file_schema.fields.find(field.name())?.0)
+    }
+
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::with_capacity(file_schema.fields().len());
+
+        for (file_idx, file_field) in file_schema.fields.iter().enumerate() {
+            if self.table_schema.fields().find(file_field.name()).is_some() {
+                projection.push(file_idx);
+            }
+        }
+
+        Ok((
+            Arc::new(SchemaMapping {
+                table_schema: self.table_schema.clone(),
+            }),
+            projection,
+        ))
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct SchemaMapping {
+    table_schema: SchemaRef,
+}
+
+impl SchemaMapper for SchemaMapping {
+    fn map_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch> {
+        let record_batch = cast_record_batch(&batch, self.table_schema.clone(), false, true)?;
+        Ok(record_batch)
+    }
+
+    fn map_partial_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch> {
+        let partial_table_schema = Arc::new(Schema::new(
+            batch
+                .schema()
+                .fields()
+                .iter()
+                .filter_map(|batch_field| {
+                    self.table_schema.field_with_name(batch_field.name()).ok()
+                })
+                .cloned()
+                .collect::<Vec<_>>(),
+        ));
+
+        let record_batch = cast_record_batch(&batch, partial_table_schema, false, true)?;
+        Ok(record_batch)
+    }
+}
diff --git a/crates/core/src/errors.rs b/crates/core/src/errors.rs
index 63524fd227..0fa589286b 100644
--- a/crates/core/src/errors.rs
+++ b/crates/core/src/errors.rs
@@ -1,7 +1,7 @@
 //! Exceptions for the deltalake crate
 use object_store::Error as ObjectStoreError;
 
-use crate::operations::transaction::TransactionError;
+use crate::operations::transaction::{CommitBuilderError, TransactionError};
 use crate::protocol::ProtocolError;
 
 /// A result returned by delta-rs
@@ -11,6 +11,9 @@ pub type DeltaResult<T> = Result<T, DeltaTableError>;
 #[allow(missing_docs)]
 #[derive(thiserror::Error, Debug)]
 pub enum DeltaTableError {
+    #[error("Kernel error: {0}")]
+    KernelError(#[from] delta_kernel::error::Error),
+
     #[error("Delta protocol violation: {source}")]
     Protocol { source: ProtocolError },
 
@@ -146,6 +149,13 @@ pub enum DeltaTableError {
         source: std::io::Error,
     },
 
+    /// Error raised while preparing a commit
+    #[error("Commit actions are unsound: {source}")]
+    CommitValidation {
+        /// The source error
+        source: CommitBuilderError,
+    },
+
     /// Error raised while commititng transaction
     #[error("Transaction failed: {source}")]
     Transaction {
@@ -210,6 +220,15 @@ pub enum DeltaTableError {
 
     #[error("Table has not yet been initialized")]
     NotInitialized,
+
+    #[error("Change Data not enabled for version: {version}, Start: {start}, End: {end}")]
+    ChangeDataNotRecorded { version: i64, start: i64, end: i64 },
+
+    #[error("Reading a table version: {version} that does not have change data enabled")]
+    ChangeDataNotEnabled { version: i64 },
+
+    #[error("Invalid version start version {start} is greater than version {end}")]
+    ChangeDataInvalidVersionRange { start: i64, end: i64 },
 }
 
 impl From<object_store::path::Error> for DeltaTableError {
diff --git a/crates/core/src/kernel/arrow/json.rs b/crates/core/src/kernel/arrow/json.rs
index dcb56d308a..ed31a7b64e 100644
--- a/crates/core/src/kernel/arrow/json.rs
+++ b/crates/core/src/kernel/arrow/json.rs
@@ -62,9 +62,10 @@ pub(crate) fn parse_json(
     for it in 0..json_strings.len() {
         if json_strings.is_null(it) {
             if value_count > 0 {
-                let slice = json_strings.slice(value_start, value_count);
-                let batch = decode_reader(&mut decoder, get_reader(slice.value_data()))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let slice_data = get_nonnull_slice_data(json_strings, value_start, value_count);
+                let batch =
+                    decode_reader(&mut decoder, get_reader(&slice_data))
+                        .collect::<Result<Vec<_>, _>>()?;
                 batches.extend(batch);
                 value_count = 0;
             }
@@ -86,15 +87,28 @@ pub(crate) fn parse_json(
     }
 
     if value_count > 0 {
-        let slice = json_strings.slice(value_start, value_count);
-        let batch = decode_reader(&mut decoder, get_reader(slice.value_data()))
-            .collect::<Result<Vec<_>, _>>()?;
+        let slice_data = get_nonnull_slice_data(json_strings, value_start, value_count);
+        let batch =
+            decode_reader(&mut decoder, get_reader(&slice_data)).collect::<Result<Vec<_>, _>>()?;
         batches.extend(batch);
     }
 
     Ok(concat_batches(&output_schema, &batches)?)
 }
 
+/// Get the data of a slice of non-null JSON strings.
+fn get_nonnull_slice_data(
+    json_strings: &StringArray,
+    value_start: usize,
+    value_count: usize,
+) -> Vec<u8> {
+    let slice = json_strings.slice(value_start, value_count);
+    slice.iter().fold(Vec::new(), |mut acc, s| {
+        acc.extend_from_slice(s.unwrap().as_bytes());
+        acc
+    })
+}
+
 /// Decode a stream of bytes into a stream of record batches.
 pub(crate) fn decode_stream<S: Stream<Item = ObjectStoreResult<Bytes>> + Unpin>(
     mut decoder: Decoder,
@@ -148,3 +162,42 @@ pub(crate) fn decode_reader<'a, R: BufRead + 'a>(
     };
     std::iter::from_fn(move || next().map_err(DeltaTableError::from).transpose())
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::kernel::arrow::json::parse_json;
+    use crate::DeltaTableConfig;
+    use arrow_array::{Int32Array, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema};
+    use std::sync::Arc;
+
+    #[test]
+    fn json_to_struct() {
+        let json_strings = StringArray::from(vec![
+            Some(r#"{"a": 1, "b": "foo"}"#),
+            Some(r#"{"a": 2, "b": "bar"}"#),
+            None,
+            Some(r#"{"a": 3, "b": "baz"}"#),
+        ]);
+        let struct_schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Utf8, true),
+        ]));
+        let config = DeltaTableConfig::default();
+        let result = parse_json(&json_strings, struct_schema.clone(), &config).unwrap();
+        let expected = RecordBatch::try_new(
+            struct_schema,
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(3)])),
+                Arc::new(StringArray::from(vec![
+                    Some("foo"),
+                    Some("bar"),
+                    None,
+                    Some("baz"),
+                ])),
+            ],
+        )
+        .unwrap();
+        assert_eq!(result, expected);
+    }
+}
diff --git a/crates/core/src/kernel/arrow/mod.rs b/crates/core/src/kernel/arrow/mod.rs
index ab121ee8a6..0fb41379dd 100644
--- a/crates/core/src/kernel/arrow/mod.rs
+++ b/crates/core/src/kernel/arrow/mod.rs
@@ -3,268 +3,19 @@
 use std::sync::Arc;
 
 use arrow_schema::{
-    ArrowError, DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef,
-    Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit,
+    DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef,
+    Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
 };
 use lazy_static::lazy_static;
 
-use super::{ActionType, ArrayType, DataType, MapType, PrimitiveType, StructField, StructType};
-
 pub(crate) mod extract;
 pub(crate) mod json;
 
-const MAP_ROOT_DEFAULT: &str = "entries";
-const MAP_KEY_DEFAULT: &str = "keys";
-const MAP_VALUE_DEFAULT: &str = "values";
+const MAP_ROOT_DEFAULT: &str = "key_value";
+const MAP_KEY_DEFAULT: &str = "key";
+const MAP_VALUE_DEFAULT: &str = "value";
 const LIST_ROOT_DEFAULT: &str = "item";
 
-impl TryFrom<ActionType> for ArrowField {
-    type Error = ArrowError;
-
-    fn try_from(value: ActionType) -> Result<Self, Self::Error> {
-        value.schema_field().try_into()
-    }
-}
-
-impl TryFrom<&StructType> for ArrowSchema {
-    type Error = ArrowError;
-
-    fn try_from(s: &StructType) -> Result<Self, ArrowError> {
-        let fields = s
-            .fields()
-            .iter()
-            .map(TryInto::try_into)
-            .collect::<Result<Vec<ArrowField>, ArrowError>>()?;
-
-        Ok(ArrowSchema::new(fields))
-    }
-}
-
-impl TryFrom<&StructField> for ArrowField {
-    type Error = ArrowError;
-
-    fn try_from(f: &StructField) -> Result<Self, ArrowError> {
-        let metadata = f
-            .metadata()
-            .iter()
-            .map(|(key, val)| Ok((key.clone(), serde_json::to_string(val)?)))
-            .collect::<Result<_, serde_json::Error>>()
-            .map_err(|err| ArrowError::JsonError(err.to_string()))?;
-
-        let field = ArrowField::new(
-            f.name(),
-            ArrowDataType::try_from(f.data_type())?,
-            f.is_nullable(),
-        )
-        .with_metadata(metadata);
-
-        Ok(field)
-    }
-}
-
-impl TryFrom<&ArrayType> for ArrowField {
-    type Error = ArrowError;
-    fn try_from(a: &ArrayType) -> Result<Self, ArrowError> {
-        Ok(ArrowField::new(
-            LIST_ROOT_DEFAULT,
-            ArrowDataType::try_from(a.element_type())?,
-            // TODO check how to handle nullability
-            a.contains_null(),
-        ))
-    }
-}
-
-impl TryFrom<&MapType> for ArrowField {
-    type Error = ArrowError;
-
-    fn try_from(a: &MapType) -> Result<Self, ArrowError> {
-        Ok(ArrowField::new(
-            MAP_ROOT_DEFAULT,
-            ArrowDataType::Struct(
-                vec![
-                    ArrowField::new(
-                        MAP_KEY_DEFAULT,
-                        ArrowDataType::try_from(a.key_type())?,
-                        false,
-                    ),
-                    ArrowField::new(
-                        MAP_VALUE_DEFAULT,
-                        ArrowDataType::try_from(a.value_type())?,
-                        a.value_contains_null(),
-                    ),
-                ]
-                .into(),
-            ),
-            // always non-null
-            false,
-        ))
-    }
-}
-
-impl TryFrom<&DataType> for ArrowDataType {
-    type Error = ArrowError;
-
-    fn try_from(t: &DataType) -> Result<Self, ArrowError> {
-        match t {
-            DataType::Primitive(p) => {
-                match p {
-                    PrimitiveType::String => Ok(ArrowDataType::Utf8),
-                    PrimitiveType::Long => Ok(ArrowDataType::Int64), // undocumented type
-                    PrimitiveType::Integer => Ok(ArrowDataType::Int32),
-                    PrimitiveType::Short => Ok(ArrowDataType::Int16),
-                    PrimitiveType::Byte => Ok(ArrowDataType::Int8),
-                    PrimitiveType::Float => Ok(ArrowDataType::Float32),
-                    PrimitiveType::Double => Ok(ArrowDataType::Float64),
-                    PrimitiveType::Boolean => Ok(ArrowDataType::Boolean),
-                    PrimitiveType::Binary => Ok(ArrowDataType::Binary),
-                    PrimitiveType::Decimal(precision, scale) => {
-                        if precision <= &38 {
-                            Ok(ArrowDataType::Decimal128(*precision, *scale))
-                        } else if precision <= &76 {
-                            Ok(ArrowDataType::Decimal256(*precision, *scale))
-                        } else {
-                            Err(ArrowError::SchemaError(format!(
-                                "Precision too large to be represented in Arrow: {}",
-                                precision
-                            )))
-                        }
-                    }
-                    PrimitiveType::Date => {
-                        // A calendar date, represented as a year-month-day triple without a
-                        // timezone. Stored as 4 bytes integer representing days since 1970-01-01
-                        Ok(ArrowDataType::Date32)
-                    }
-                    PrimitiveType::Timestamp => {
-                        // Issue: https://github.com/delta-io/delta/issues/643
-                        Ok(ArrowDataType::Timestamp(TimeUnit::Microsecond, None))
-                    }
-                }
-            }
-            DataType::Struct(s) => Ok(ArrowDataType::Struct(
-                s.fields()
-                    .iter()
-                    .map(TryInto::try_into)
-                    .collect::<Result<Vec<ArrowField>, ArrowError>>()?
-                    .into(),
-            )),
-            DataType::Array(a) => Ok(ArrowDataType::List(Arc::new(a.as_ref().try_into()?))),
-            DataType::Map(m) => Ok(ArrowDataType::Map(Arc::new(m.as_ref().try_into()?), false)),
-        }
-    }
-}
-
-impl TryFrom<&ArrowSchema> for StructType {
-    type Error = ArrowError;
-
-    fn try_from(arrow_schema: &ArrowSchema) -> Result<Self, ArrowError> {
-        let new_fields: Result<Vec<StructField>, _> = arrow_schema
-            .fields()
-            .iter()
-            .map(|field| field.as_ref().try_into())
-            .collect();
-        Ok(StructType::new(new_fields?))
-    }
-}
-
-impl TryFrom<ArrowSchemaRef> for StructType {
-    type Error = ArrowError;
-
-    fn try_from(arrow_schema: ArrowSchemaRef) -> Result<Self, ArrowError> {
-        arrow_schema.as_ref().try_into()
-    }
-}
-
-impl TryFrom<&ArrowField> for StructField {
-    type Error = ArrowError;
-
-    fn try_from(arrow_field: &ArrowField) -> Result<Self, ArrowError> {
-        Ok(StructField::new(
-            arrow_field.name().clone(),
-            DataType::try_from(arrow_field.data_type())?,
-            arrow_field.is_nullable(),
-        )
-        .with_metadata(arrow_field.metadata().iter().map(|(k, v)| (k.clone(), v))))
-    }
-}
-
-impl TryFrom<&ArrowDataType> for DataType {
-    type Error = ArrowError;
-
-    fn try_from(arrow_datatype: &ArrowDataType) -> Result<Self, ArrowError> {
-        match arrow_datatype {
-            ArrowDataType::Utf8 => Ok(DataType::Primitive(PrimitiveType::String)),
-            ArrowDataType::LargeUtf8 => Ok(DataType::Primitive(PrimitiveType::String)),
-            ArrowDataType::Int64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type
-            ArrowDataType::Int32 => Ok(DataType::Primitive(PrimitiveType::Integer)),
-            ArrowDataType::Int16 => Ok(DataType::Primitive(PrimitiveType::Short)),
-            ArrowDataType::Int8 => Ok(DataType::Primitive(PrimitiveType::Byte)),
-            ArrowDataType::UInt64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type
-            ArrowDataType::UInt32 => Ok(DataType::Primitive(PrimitiveType::Integer)),
-            ArrowDataType::UInt16 => Ok(DataType::Primitive(PrimitiveType::Short)),
-            ArrowDataType::UInt8 => Ok(DataType::Primitive(PrimitiveType::Byte)),
-            ArrowDataType::Float32 => Ok(DataType::Primitive(PrimitiveType::Float)),
-            ArrowDataType::Float64 => Ok(DataType::Primitive(PrimitiveType::Double)),
-            ArrowDataType::Boolean => Ok(DataType::Primitive(PrimitiveType::Boolean)),
-            ArrowDataType::Binary => Ok(DataType::Primitive(PrimitiveType::Binary)),
-            ArrowDataType::FixedSizeBinary(_) => Ok(DataType::Primitive(PrimitiveType::Binary)),
-            ArrowDataType::LargeBinary => Ok(DataType::Primitive(PrimitiveType::Binary)),
-            ArrowDataType::Decimal128(p, s) => {
-                Ok(DataType::Primitive(PrimitiveType::Decimal(*p, *s)))
-            }
-            ArrowDataType::Decimal256(p, s) => {
-                Ok(DataType::Primitive(PrimitiveType::Decimal(*p, *s)))
-            }
-            ArrowDataType::Date32 => Ok(DataType::Primitive(PrimitiveType::Date)),
-            ArrowDataType::Date64 => Ok(DataType::Primitive(PrimitiveType::Date)),
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => {
-                Ok(DataType::Primitive(PrimitiveType::Timestamp))
-            }
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz))
-                if tz.eq_ignore_ascii_case("utc") =>
-            {
-                Ok(DataType::Primitive(PrimitiveType::Timestamp))
-            }
-            ArrowDataType::Struct(fields) => {
-                let converted_fields: Result<Vec<StructField>, _> = fields
-                    .iter()
-                    .map(|field| field.as_ref().try_into())
-                    .collect();
-                Ok(DataType::Struct(Box::new(StructType::new(
-                    converted_fields?,
-                ))))
-            }
-            ArrowDataType::List(field) => Ok(DataType::Array(Box::new(ArrayType::new(
-                (*field).data_type().try_into()?,
-                (*field).is_nullable(),
-            )))),
-            ArrowDataType::LargeList(field) => Ok(DataType::Array(Box::new(ArrayType::new(
-                (*field).data_type().try_into()?,
-                (*field).is_nullable(),
-            )))),
-            ArrowDataType::FixedSizeList(field, _) => Ok(DataType::Array(Box::new(
-                ArrayType::new((*field).data_type().try_into()?, (*field).is_nullable()),
-            ))),
-            ArrowDataType::Map(field, _) => {
-                if let ArrowDataType::Struct(struct_fields) = field.data_type() {
-                    let key_type = struct_fields[0].data_type().try_into()?;
-                    let value_type = struct_fields[1].data_type().try_into()?;
-                    let value_type_nullable = struct_fields[1].is_nullable();
-                    Ok(DataType::Map(Box::new(MapType::new(
-                        key_type,
-                        value_type,
-                        value_type_nullable,
-                    ))))
-                } else {
-                    panic!("DataType::Map should contain a struct field child");
-                }
-            }
-            s => Err(ArrowError::SchemaError(format!(
-                "Invalid data type for Delta Lake: {s}"
-            ))),
-        }
-    }
-}
-
 macro_rules! arrow_map {
     ($fieldname: ident, null) => {
         ArrowField::new(
@@ -448,7 +199,9 @@ pub(crate) fn delta_log_schema_for_table(
                 ],
                 protocol[
                     minReaderVersion:Int32,
-                    minWriterVersion:Int32
+                    minWriterVersion:Int32,
+                    writerFeatures[element]{Utf8},
+                    readerFeatures[element]{Utf8}
                 ],
                 txn[
                     appId:Utf8,
@@ -497,13 +250,15 @@ pub(crate) fn delta_log_schema_for_table(
             .iter()
             .for_each(|f| max_min_schema_for_fields(&mut max_min_vec, f));
 
-        stats_parsed_fields.extend(["minValues", "maxValues"].into_iter().map(|name| {
-            ArrowField::new(
-                name,
-                ArrowDataType::Struct(max_min_vec.clone().into()),
-                true,
-            )
-        }));
+        if max_min_vec.len() > 0 {
+            stats_parsed_fields.extend(["minValues", "maxValues"].into_iter().map(|name| {
+                ArrowField::new(
+                    name,
+                    ArrowDataType::Struct(max_min_vec.clone().into()),
+                    true,
+                )
+            }));
+        }
 
         let mut null_count_vec = Vec::new();
         non_partition_fields
@@ -575,8 +330,7 @@ fn max_min_schema_for_fields(dest: &mut Vec<ArrowField>, f: &ArrowField) {
         // don't compute min or max for list, map or binary types
         ArrowDataType::List(_) | ArrowDataType::Map(_, _) | ArrowDataType::Binary => { /* noop */ }
         _ => {
-            let f = f.clone();
-            dest.push(f);
+            dest.push(ArrowField::new(f.name(), f.data_type().clone(), true));
         }
     }
 }
@@ -605,15 +359,15 @@ fn null_count_schema_for_fields(dest: &mut Vec<ArrowField>, f: &ArrowField) {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
     use arrow::array::ArrayData;
-    use arrow_array::Array;
-    use arrow_array::{make_array, ArrayRef, MapArray, StringArray, StructArray};
+    use arrow_array::{Array, BinaryArray, MapArray, RecordBatch, StringArray, StructArray};
     use arrow_buffer::{Buffer, ToByteSlice};
-    use arrow_schema::Field;
+    use delta_kernel::schema::{DataType, MapType, PrimitiveType, StructField, StructType};
 
     use super::*;
-    use std::collections::HashMap;
-    use std::sync::Arc;
 
     #[test]
     fn delta_log_schema_for_table_test() {
@@ -756,73 +510,6 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_arrow_from_delta_decimal_type() {
-        let precision = 20;
-        let scale = 2;
-        let decimal_field = DataType::Primitive(PrimitiveType::Decimal(precision, scale));
-        assert_eq!(
-            <ArrowDataType as TryFrom<&DataType>>::try_from(&decimal_field).unwrap(),
-            ArrowDataType::Decimal128(precision, scale)
-        );
-    }
-
-    #[test]
-    fn test_arrow_from_delta_timestamp_type() {
-        let timestamp_field = DataType::Primitive(PrimitiveType::Timestamp);
-        assert_eq!(
-            <ArrowDataType as TryFrom<&DataType>>::try_from(&timestamp_field).unwrap(),
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, None)
-        );
-    }
-
-    #[test]
-    fn test_delta_from_arrow_timestamp_type() {
-        let timestamp_field = ArrowDataType::Timestamp(TimeUnit::Microsecond, None);
-        assert_eq!(
-            <DataType as TryFrom<&ArrowDataType>>::try_from(&timestamp_field).unwrap(),
-            DataType::Primitive(PrimitiveType::Timestamp)
-        );
-    }
-
-    #[test]
-    fn test_delta_from_arrow_timestamp_type_with_tz() {
-        let timestamp_field =
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into()));
-        assert_eq!(
-            <DataType as TryFrom<&ArrowDataType>>::try_from(&timestamp_field).unwrap(),
-            DataType::Primitive(PrimitiveType::Timestamp)
-        );
-    }
-
-    #[test]
-    fn test_delta_from_arrow_map_type() {
-        let arrow_map = ArrowDataType::Map(
-            Arc::new(ArrowField::new(
-                "entries",
-                ArrowDataType::Struct(
-                    vec![
-                        ArrowField::new("key", ArrowDataType::Int8, false),
-                        ArrowField::new("value", ArrowDataType::Binary, true),
-                    ]
-                    .into(),
-                ),
-                false,
-            )),
-            false,
-        );
-        let converted_map: DataType = (&arrow_map).try_into().unwrap();
-
-        assert_eq!(
-            converted_map,
-            DataType::Map(Box::new(MapType::new(
-                DataType::Primitive(PrimitiveType::Byte),
-                DataType::Primitive(PrimitiveType::Binary),
-                true,
-            )))
-        );
-    }
-
     #[test]
     fn test_record_batch_from_map_type() {
         let keys = vec!["0", "1", "5", "6", "7"];
@@ -836,52 +523,36 @@ mod tests {
         let entry_offsets = vec![0u32, 1, 1, 4, 5, 5];
         let num_rows = keys.len();
 
-        // Copied the function `new_from_string` with the patched code from https://github.com/apache/arrow-rs/pull/4808
-        // This should be reverted back [`MapArray::new_from_strings`] once arrow is upgraded in this project.
-        fn new_from_strings<'a>(
-            keys: impl Iterator<Item = &'a str>,
-            values: &dyn Array,
-            entry_offsets: &[u32],
-        ) -> Result<MapArray, ArrowError> {
-            let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
-            let keys_data = StringArray::from_iter_values(keys);
-
-            let keys_field = Arc::new(Field::new("keys", ArrowDataType::Utf8, false));
-            let values_field = Arc::new(Field::new(
-                "values",
-                values.data_type().clone(),
-                values.null_count() > 0,
-            ));
-
-            let entry_struct = StructArray::from(vec![
-                (keys_field, Arc::new(keys_data) as ArrayRef),
-                (values_field, make_array(values.to_data())),
-            ]);
-
-            let map_data_type = ArrowDataType::Map(
-                Arc::new(Field::new(
-                    "entries",
-                    entry_struct.data_type().clone(),
-                    false,
-                )),
-                false,
-            );
-
-            let map_data = ArrayData::builder(map_data_type)
-                .len(entry_offsets.len() - 1)
-                .add_buffer(entry_offsets_buffer)
-                .add_child_data(entry_struct.into_data())
-                .build()?;
+        let key_field = Arc::new(ArrowField::new(MAP_KEY_DEFAULT, ArrowDataType::Utf8, false));
+        let value_field = Arc::new(ArrowField::new(
+            MAP_VALUE_DEFAULT,
+            ArrowDataType::Binary,
+            false,
+        ));
+        let key_value_field = ArrowField::new_struct(
+            MAP_ROOT_DEFAULT,
+            vec![key_field.clone(), value_field.clone()],
+            false,
+        );
+        let key_value_array = StructArray::new(
+            vec![key_field, value_field].into(),
+            vec![
+                Arc::new(StringArray::from(keys)),
+                Arc::new(BinaryArray::from(values)),
+            ],
+            None,
+        );
+        let entry_offsets_buffer = Buffer::from(entry_offsets.as_slice().to_byte_slice());
 
-            Ok(MapArray::from(map_data))
-        }
+        let map_data_type = ArrowDataType::Map(Arc::new(key_value_field), false);
+        let map_data = ArrayData::builder(map_data_type)
+            .len(entry_offsets.len() - 1)
+            .add_buffer(entry_offsets_buffer)
+            .add_child_data(key_value_array.into_data())
+            .build()
+            .unwrap();
 
-        let map_array = new_from_strings(
-            keys.into_iter(),
-            &arrow::array::BinaryArray::from(values),
-            entry_offsets.as_slice(),
-        )
-        .expect("Could not create a map array");
+        let map_array = MapArray::from(map_data);
 
         let schema =
             <arrow::datatypes::Schema as TryFrom<&StructType>>::try_from(&StructType::new(vec![
@@ -897,9 +568,8 @@ mod tests {
             ]))
             .expect("Could not get schema");
 
-        let record_batch =
-            arrow::record_batch::RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_array)])
-                .expect("Failed to create RecordBatch");
+        let record_batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_array)])
+            .expect("Failed to create RecordBatch");
 
         assert_eq!(record_batch.num_columns(), 1);
         assert_eq!(record_batch.num_rows(), num_rows);
diff --git a/crates/core/src/kernel/expressions/eval.rs b/crates/core/src/kernel/expressions/eval.rs
deleted file mode 100644
index 3796542ffc..0000000000
--- a/crates/core/src/kernel/expressions/eval.rs
+++ /dev/null
@@ -1,378 +0,0 @@
-//! Default Expression handler.
-//!
-//! Expression handling based on arrow-rs compute kernels.
-
-use std::sync::Arc;
-
-use arrow_arith::boolean::{and, is_null, not, or};
-use arrow_arith::numeric::{add, div, mul, sub};
-use arrow_array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Datum, Decimal128Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, RecordBatch, StringArray,
-    StructArray, TimestampMicrosecondArray,
-};
-use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow_schema::{ArrowError, Field as ArrowField, Schema as ArrowSchema};
-use arrow_select::nullif::nullif;
-
-use crate::kernel::arrow::extract::extract_column;
-use crate::kernel::error::{DeltaResult, Error};
-use crate::kernel::expressions::{scalars::Scalar, Expression};
-use crate::kernel::expressions::{BinaryOperator, UnaryOperator};
-use crate::kernel::{DataType, PrimitiveType, VariadicOperator};
-
-fn downcast_to_bool(arr: &dyn Array) -> DeltaResult<&BooleanArray> {
-    arr.as_any()
-        .downcast_ref::<BooleanArray>()
-        .ok_or(Error::Generic("expected boolean array".to_string()))
-}
-
-fn wrap_comparison_result(arr: BooleanArray) -> ArrayRef {
-    Arc::new(arr) as Arc<dyn Array>
-}
-
-// TODO leverage scalars / Datum
-
-impl Scalar {
-    /// Convert scalar to arrow array.
-    pub fn to_array(&self, num_rows: usize) -> DeltaResult<ArrayRef> {
-        use Scalar::*;
-        let arr: ArrayRef = match self {
-            Integer(val) => Arc::new(Int32Array::from_value(*val, num_rows)),
-            Long(val) => Arc::new(Int64Array::from_value(*val, num_rows)),
-            Short(val) => Arc::new(Int16Array::from_value(*val, num_rows)),
-            Byte(val) => Arc::new(Int8Array::from_value(*val, num_rows)),
-            Float(val) => Arc::new(Float32Array::from_value(*val, num_rows)),
-            Double(val) => Arc::new(Float64Array::from_value(*val, num_rows)),
-            String(val) => Arc::new(StringArray::from(vec![val.clone(); num_rows])),
-            Boolean(val) => Arc::new(BooleanArray::from(vec![*val; num_rows])),
-            Timestamp(val) => Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows)),
-            Date(val) => Arc::new(Date32Array::from_value(*val, num_rows)),
-            Binary(val) => Arc::new(BinaryArray::from(vec![val.as_slice(); num_rows])),
-            Decimal(val, precision, scale) => Arc::new(
-                Decimal128Array::from_value(*val, num_rows)
-                    .with_precision_and_scale(*precision, *scale)?,
-            ),
-            Null(data_type) => match data_type {
-                DataType::Primitive(primitive) => match primitive {
-                    PrimitiveType::Byte => Arc::new(Int8Array::new_null(num_rows)),
-                    PrimitiveType::Short => Arc::new(Int16Array::new_null(num_rows)),
-                    PrimitiveType::Integer => Arc::new(Int32Array::new_null(num_rows)),
-                    PrimitiveType::Long => Arc::new(Int64Array::new_null(num_rows)),
-                    PrimitiveType::Float => Arc::new(Float32Array::new_null(num_rows)),
-                    PrimitiveType::Double => Arc::new(Float64Array::new_null(num_rows)),
-                    PrimitiveType::String => Arc::new(StringArray::new_null(num_rows)),
-                    PrimitiveType::Boolean => Arc::new(BooleanArray::new_null(num_rows)),
-                    PrimitiveType::Timestamp => {
-                        Arc::new(TimestampMicrosecondArray::new_null(num_rows))
-                    }
-                    PrimitiveType::Date => Arc::new(Date32Array::new_null(num_rows)),
-                    PrimitiveType::Binary => Arc::new(BinaryArray::new_null(num_rows)),
-                    PrimitiveType::Decimal(precision, scale) => Arc::new(
-                        Decimal128Array::new_null(num_rows)
-                            .with_precision_and_scale(*precision, *scale)
-                            .unwrap(),
-                    ),
-                },
-                DataType::Array(_) => unimplemented!(),
-                DataType::Map { .. } => unimplemented!(),
-                DataType::Struct { .. } => unimplemented!(),
-            },
-            Struct(values, fields) => {
-                let mut columns = Vec::with_capacity(values.len());
-                for val in values {
-                    columns.push(val.to_array(num_rows)?);
-                }
-                Arc::new(StructArray::try_new(
-                    fields
-                        .iter()
-                        .map(TryInto::<ArrowField>::try_into)
-                        .collect::<Result<Vec<_>, _>>()?
-                        .into(),
-                    columns,
-                    None,
-                )?)
-            }
-        };
-        Ok(arr)
-    }
-}
-
-/// evaluate expression
-pub(crate) fn evaluate_expression(
-    expression: &Expression,
-    batch: &RecordBatch,
-    result_type: Option<&DataType>,
-) -> DeltaResult<ArrayRef> {
-    use BinaryOperator::*;
-    use Expression::*;
-
-    match (expression, result_type) {
-        (Literal(scalar), _) => Ok(scalar.to_array(batch.num_rows())?),
-        (Column(name), _) => {
-            if name.contains('.') {
-                let mut path = name.split('.');
-                // Safety: we know that the first path step exists, because we checked for '.'
-                let arr = extract_column(batch, path.next().unwrap(), &mut path).cloned()?;
-                // NOTE: need to assign first so that rust can figure out lifetimes
-                Ok(arr)
-            } else {
-                batch
-                    .column_by_name(name)
-                    .ok_or(Error::MissingColumn(name.clone()))
-                    .cloned()
-            }
-        }
-        (Struct(fields), Some(DataType::Struct(schema))) => {
-            let output_schema: ArrowSchema = schema.as_ref().try_into()?;
-            let mut columns = Vec::with_capacity(fields.len());
-            for (expr, field) in fields.iter().zip(schema.fields()) {
-                columns.push(evaluate_expression(expr, batch, Some(field.data_type()))?);
-            }
-            Ok(Arc::new(StructArray::try_new(
-                output_schema.fields().clone(),
-                columns,
-                None,
-            )?))
-        }
-        (Struct(_), _) => Err(Error::Generic(
-            "Data type is required to evaluate struct expressions".to_string(),
-        )),
-        (UnaryOperation { op, expr }, _) => {
-            let arr = evaluate_expression(expr.as_ref(), batch, None)?;
-            Ok(match op {
-                UnaryOperator::Not => Arc::new(not(downcast_to_bool(&arr)?)?),
-                UnaryOperator::IsNull => Arc::new(is_null(&arr)?),
-            })
-        }
-        (BinaryOperation { op, left, right }, _) => {
-            let left_arr = evaluate_expression(left.as_ref(), batch, None)?;
-            let right_arr = evaluate_expression(right.as_ref(), batch, None)?;
-
-            type Operation = fn(&dyn Datum, &dyn Datum) -> Result<Arc<dyn Array>, ArrowError>;
-            let eval: Operation = match op {
-                Plus => add,
-                Minus => sub,
-                Multiply => mul,
-                Divide => div,
-                LessThan => |l, r| lt(l, r).map(wrap_comparison_result),
-                LessThanOrEqual => |l, r| lt_eq(l, r).map(wrap_comparison_result),
-                GreaterThan => |l, r| gt(l, r).map(wrap_comparison_result),
-                GreaterThanOrEqual => |l, r| gt_eq(l, r).map(wrap_comparison_result),
-                Equal => |l, r| eq(l, r).map(wrap_comparison_result),
-                NotEqual => |l, r| neq(l, r).map(wrap_comparison_result),
-            };
-
-            eval(&left_arr, &right_arr).map_err(|err| Error::GenericError {
-                source: Box::new(err),
-            })
-        }
-        (VariadicOperation { op, exprs }, _) => {
-            let reducer = match op {
-                VariadicOperator::And => and,
-                VariadicOperator::Or => or,
-            };
-            exprs
-                .iter()
-                .map(|expr| evaluate_expression(expr, batch, Some(&DataType::BOOLEAN)))
-                .reduce(|l, r| {
-                    Ok(reducer(downcast_to_bool(&l?)?, downcast_to_bool(&r?)?)
-                        .map(wrap_comparison_result)?)
-                })
-                .transpose()?
-                .ok_or(Error::Generic("empty expression".to_string()))
-        }
-        (NullIf { expr, if_expr }, _) => {
-            let expr_arr = evaluate_expression(expr.as_ref(), batch, None)?;
-            let if_expr_arr =
-                evaluate_expression(if_expr.as_ref(), batch, Some(&DataType::BOOLEAN))?;
-            let if_expr_arr = downcast_to_bool(&if_expr_arr)?;
-            Ok(nullif(&expr_arr, if_expr_arr)?)
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow_array::Int32Array;
-    use arrow_schema::{DataType, Field, Fields, Schema};
-    use std::ops::{Add, Div, Mul, Sub};
-
-    #[test]
-    fn test_extract_column() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values.clone())]).unwrap();
-        let column = Expression::Column("a".to_string());
-
-        let results = evaluate_expression(&column, &batch, None).unwrap();
-        assert_eq!(results.as_ref(), &values);
-
-        let schema = Schema::new(vec![Field::new(
-            "b",
-            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
-            false,
-        )]);
-
-        let struct_values: ArrayRef = Arc::new(values.clone());
-        let struct_array = StructArray::from(vec![(
-            Arc::new(Field::new("a", DataType::Int32, false)),
-            struct_values,
-        )]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(struct_array.clone())],
-        )
-        .unwrap();
-        let column = Expression::Column("b.a".to_string());
-        let results = evaluate_expression(&column, &batch, None).unwrap();
-        assert_eq!(results.as_ref(), &values);
-    }
-
-    #[test]
-    fn test_binary_op_scalar() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
-        let column = Expression::Column("a".to_string());
-
-        let expression = Box::new(column.clone().add(Expression::Literal(Scalar::Integer(1))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 3, 4]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().sub(Expression::Literal(Scalar::Integer(1))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![0, 1, 2]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().mul(Expression::Literal(Scalar::Integer(2))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        // TODO handle type casting
-        let expression = Box::new(column.div(Expression::Literal(Scalar::Integer(1))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        assert_eq!(results.as_ref(), expected.as_ref())
-    }
-
-    #[test]
-    fn test_binary_op() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(values.clone()), Arc::new(values)],
-        )
-        .unwrap();
-        let column_a = Expression::Column("a".to_string());
-        let column_b = Expression::Column("b".to_string());
-
-        let expression = Box::new(column_a.clone().add(column_b.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column_a.clone().sub(column_b.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![0, 0, 0]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column_a.clone().mul(column_b));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![1, 4, 9]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-
-    #[test]
-    fn test_binary_cmp() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
-        let column = Expression::Column("a".to_string());
-        let lit = Expression::Literal(Scalar::Integer(2));
-
-        let expression = Box::new(column.clone().lt(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().lt_eq(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().gt(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, false, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().gt_eq(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, true, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().eq(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().ne(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-
-    #[test]
-    fn test_logical() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Boolean, false),
-        ]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(BooleanArray::from(vec![true, false])),
-                Arc::new(BooleanArray::from(vec![false, true])),
-            ],
-        )
-        .unwrap();
-        let column_a = Expression::Column("a".to_string());
-        let column_b = Expression::Column("b".to_string());
-
-        let expression = Box::new(column_a.clone().and(column_b.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(
-            column_a
-                .clone()
-                .and(Expression::literal(Scalar::Boolean(true))),
-        );
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column_a.clone().or(column_b));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(
-            column_a
-                .clone()
-                .or(Expression::literal(Scalar::Boolean(false))),
-        );
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-}
diff --git a/crates/core/src/kernel/expressions/mod.rs b/crates/core/src/kernel/expressions/mod.rs
deleted file mode 100644
index b7912681ec..0000000000
--- a/crates/core/src/kernel/expressions/mod.rs
+++ /dev/null
@@ -1,478 +0,0 @@
-//! expressions.
-
-use std::collections::HashSet;
-use std::fmt::{Display, Formatter};
-use std::sync::Arc;
-
-use arrow_array::{ArrayRef, RecordBatch};
-use arrow_schema::Schema as ArrowSchema;
-use itertools::Itertools;
-
-use self::eval::evaluate_expression;
-use super::{DataType, DeltaResult, SchemaRef};
-
-pub use self::scalars::*;
-
-mod eval;
-mod scalars;
-
-/// Interface for implementing an Expression evaluator.
-///
-/// It contains one Expression which can be evaluated on multiple ColumnarBatches.
-/// Connectors can implement this interface to optimize the evaluation using the
-/// connector specific capabilities.
-pub trait ExpressionEvaluator {
-    /// Evaluate the expression on given ColumnarBatch data.
-    ///
-    /// Contains one value for each row of the input.
-    /// The data type of the output is same as the type output of the expression this evaluator is using.
-    fn evaluate(&self, batch: &RecordBatch) -> DeltaResult<ArrayRef>;
-}
-
-/// Provides expression evaluation capability to Delta Kernel.
-///
-/// Delta Kernel can use this client to evaluate predicate on partition filters,
-/// fill up partition column values and any computation on data using Expressions.
-pub trait ExpressionHandler {
-    /// Create an [`ExpressionEvaluator`] that can evaluate the given [`Expression`]
-    /// on columnar batches with the given [`Schema`] to produce data of [`DataType`].
-    ///
-    /// # Parameters
-    ///
-    /// - `schema`: Schema of the input data.
-    /// - `expression`: Expression to evaluate.
-    /// - `output_type`: Expected result data type.
-    ///
-    /// [`Schema`]: crate::schema::StructType
-    /// [`DataType`]: crate::schema::DataType
-    fn get_evaluator(
-        &self,
-        schema: SchemaRef,
-        expression: Expression,
-        output_type: DataType,
-    ) -> Arc<dyn ExpressionEvaluator>;
-}
-
-/// Default implementation of [`ExpressionHandler`] that uses [`evaluate_expression`]
-#[derive(Debug)]
-pub struct ArrowExpressionHandler {}
-
-impl ExpressionHandler for ArrowExpressionHandler {
-    fn get_evaluator(
-        &self,
-        schema: SchemaRef,
-        expression: Expression,
-        output_type: DataType,
-    ) -> Arc<dyn ExpressionEvaluator> {
-        Arc::new(DefaultExpressionEvaluator {
-            input_schema: schema,
-            expression: Box::new(expression),
-            output_type,
-        })
-    }
-}
-
-/// Default implementation of [`ExpressionEvaluator`] that uses [`evaluate_expression`]
-#[derive(Debug)]
-pub struct DefaultExpressionEvaluator {
-    input_schema: SchemaRef,
-    expression: Box<Expression>,
-    output_type: DataType,
-}
-
-impl ExpressionEvaluator for DefaultExpressionEvaluator {
-    fn evaluate(&self, batch: &RecordBatch) -> DeltaResult<ArrayRef> {
-        let _input_schema: ArrowSchema = self.input_schema.as_ref().try_into()?;
-        // TODO: make sure we have matching schemas for validation
-        // if batch.schema().as_ref() != &input_schema {
-        //     return Err(Error::Generic(format!(
-        //         "input schema does not match batch schema: {:?} != {:?}",
-        //         input_schema,
-        //         batch.schema()
-        //     )));
-        // };
-        evaluate_expression(&self.expression, batch, Some(&self.output_type))
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-/// A binary operator.
-pub enum BinaryOperator {
-    /// Arithmetic Plus
-    Plus,
-    /// Arithmetic Minus
-    Minus,
-    /// Arithmetic Multiply
-    Multiply,
-    /// Arithmetic Divide
-    Divide,
-    /// Comparison Less Than
-    LessThan,
-    /// Comparison Less Than Or Equal
-    LessThanOrEqual,
-    /// Comparison Greater Than
-    GreaterThan,
-    /// Comparison Greater Than Or Equal
-    GreaterThanOrEqual,
-    /// Comparison Equal
-    Equal,
-    /// Comparison Not Equal
-    NotEqual,
-}
-
-/// Variadic operators
-#[derive(Debug, Clone, PartialEq)]
-pub enum VariadicOperator {
-    /// AND
-    And,
-    /// OR
-    Or,
-}
-
-impl Display for BinaryOperator {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            // Self::And => write!(f, "AND"),
-            // Self::Or => write!(f, "OR"),
-            Self::Plus => write!(f, "+"),
-            Self::Minus => write!(f, "-"),
-            Self::Multiply => write!(f, "*"),
-            Self::Divide => write!(f, "/"),
-            Self::LessThan => write!(f, "<"),
-            Self::LessThanOrEqual => write!(f, "<="),
-            Self::GreaterThan => write!(f, ">"),
-            Self::GreaterThanOrEqual => write!(f, ">="),
-            Self::Equal => write!(f, "="),
-            Self::NotEqual => write!(f, "!="),
-        }
-    }
-}
-
-#[derive(Debug, Clone, PartialEq)]
-/// A unary operator.
-pub enum UnaryOperator {
-    /// Unary Not
-    Not,
-    /// Unary Is Null
-    IsNull,
-}
-
-/// A SQL expression.
-///
-/// These expressions do not track or validate data types, other than the type
-/// of literals. It is up to the expression evaluator to validate the
-/// expression against a schema and add appropriate casts as required.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Expression {
-    /// A literal value.
-    Literal(Scalar),
-    /// A column reference by name.
-    Column(String),
-    ///
-    Struct(Vec<Expression>),
-    /// A binary operation.
-    BinaryOperation {
-        /// The operator.
-        op: BinaryOperator,
-        /// The left-hand side of the operation.
-        left: Box<Expression>,
-        /// The right-hand side of the operation.
-        right: Box<Expression>,
-    },
-    /// A unary operation.
-    UnaryOperation {
-        /// The operator.
-        op: UnaryOperator,
-        /// The expression.
-        expr: Box<Expression>,
-    },
-    /// A variadic operation.
-    VariadicOperation {
-        /// The operator.
-        op: VariadicOperator,
-        /// The expressions.
-        exprs: Vec<Expression>,
-    },
-    /// A NULLIF expression.
-    NullIf {
-        /// The expression to evaluate.
-        expr: Box<Expression>,
-        /// The expression to compare against.
-        if_expr: Box<Expression>,
-    },
-    // TODO: support more expressions, such as IS IN, LIKE, etc.
-}
-
-impl<T: Into<Scalar>> From<T> for Expression {
-    fn from(value: T) -> Self {
-        Self::literal(value)
-    }
-}
-
-impl Display for Expression {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Literal(l) => write!(f, "{}", l),
-            Self::Column(name) => write!(f, "Column({})", name),
-            Self::Struct(exprs) => write!(
-                f,
-                "Struct({})",
-                &exprs.iter().map(|e| format!("{e}")).join(", ")
-            ),
-            Self::BinaryOperation { op, left, right } => write!(f, "{} {} {}", left, op, right),
-            Self::UnaryOperation { op, expr } => match op {
-                UnaryOperator::Not => write!(f, "NOT {}", expr),
-                UnaryOperator::IsNull => write!(f, "{} IS NULL", expr),
-            },
-            Self::VariadicOperation { op, exprs } => match op {
-                VariadicOperator::And => {
-                    write!(
-                        f,
-                        "AND({})",
-                        &exprs.iter().map(|e| format!("{e}")).join(", ")
-                    )
-                }
-                VariadicOperator::Or => {
-                    write!(
-                        f,
-                        "OR({})",
-                        &exprs.iter().map(|e| format!("{e}")).join(", ")
-                    )
-                }
-            },
-            Self::NullIf { expr, if_expr } => write!(f, "NULLIF({}, {})", expr, if_expr),
-        }
-    }
-}
-
-impl Expression {
-    /// Returns a set of columns referenced by this expression.
-    pub fn references(&self) -> HashSet<&str> {
-        let mut set = HashSet::new();
-
-        for expr in self.walk() {
-            if let Self::Column(name) = expr {
-                set.insert(name.as_str());
-            }
-        }
-
-        set
-    }
-
-    /// Create an new expression for a column reference
-    pub fn column(name: impl Into<String>) -> Self {
-        Self::Column(name.into())
-    }
-
-    /// Create a new expression for a literal value
-    pub fn literal(value: impl Into<Scalar>) -> Self {
-        Self::Literal(value.into())
-    }
-
-    /// Create a new expression for a struct
-    pub fn struct_expr(exprs: impl IntoIterator<Item = Self>) -> Self {
-        Self::Struct(exprs.into_iter().collect())
-    }
-
-    /// Create a new expression for a unary operation
-    pub fn unary(op: UnaryOperator, expr: impl Into<Expression>) -> Self {
-        Self::UnaryOperation {
-            op,
-            expr: Box::new(expr.into()),
-        }
-    }
-
-    /// Create a new expression for a binary operation
-    pub fn binary(
-        op: BinaryOperator,
-        lhs: impl Into<Expression>,
-        rhs: impl Into<Expression>,
-    ) -> Self {
-        Self::BinaryOperation {
-            op,
-            left: Box::new(lhs.into()),
-            right: Box::new(rhs.into()),
-        }
-    }
-
-    /// Create a new expression for a variadic operation
-    pub fn variadic(op: VariadicOperator, other: impl IntoIterator<Item = Self>) -> Self {
-        let mut exprs = other.into_iter().collect::<Vec<_>>();
-        if exprs.is_empty() {
-            // TODO this might break if we introduce new variadic operators?
-            return Self::literal(matches!(op, VariadicOperator::And));
-        }
-        if exprs.len() == 1 {
-            return exprs.pop().unwrap();
-        }
-        Self::VariadicOperation { op, exprs }
-    }
-
-    /// Create a new expression `self == other`
-    pub fn eq(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::Equal, self, other)
-    }
-
-    /// Create a new expression `self != other`
-    pub fn ne(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::NotEqual, self, other)
-    }
-
-    /// Create a new expression `self < other`
-    pub fn lt(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::LessThan, self, other)
-    }
-
-    /// Create a new expression `self > other`
-    pub fn gt(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::GreaterThan, self, other)
-    }
-
-    /// Create a new expression `self >= other`
-    pub fn gt_eq(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::GreaterThanOrEqual, self, other)
-    }
-
-    /// Create a new expression `self <= other`
-    pub fn lt_eq(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::LessThanOrEqual, self, other)
-    }
-
-    /// Create a new expression `self AND other`
-    pub fn and(self, other: Self) -> Self {
-        self.and_many([other])
-    }
-
-    /// Create a new expression `self AND others`
-    pub fn and_many(self, other: impl IntoIterator<Item = Self>) -> Self {
-        Self::variadic(VariadicOperator::And, std::iter::once(self).chain(other))
-    }
-
-    /// Create a new expression `self AND other`
-    pub fn or(self, other: Self) -> Self {
-        self.or_many([other])
-    }
-
-    /// Create a new expression `self OR other`
-    pub fn or_many(self, other: impl IntoIterator<Item = Self>) -> Self {
-        Self::variadic(VariadicOperator::Or, std::iter::once(self).chain(other))
-    }
-
-    /// Create a new expression `self IS NULL`
-    pub fn is_null(self) -> Self {
-        Self::unary(UnaryOperator::IsNull, self)
-    }
-
-    /// Create a new expression `NULLIF(self, other)`
-    pub fn null_if(self, other: Self) -> Self {
-        Self::NullIf {
-            expr: Box::new(self),
-            if_expr: Box::new(other),
-        }
-    }
-
-    fn walk(&self) -> impl Iterator<Item = &Self> + '_ {
-        let mut stack = vec![self];
-        std::iter::from_fn(move || {
-            let expr = stack.pop()?;
-            match expr {
-                Self::Literal(_) => {}
-                Self::Column { .. } => {}
-                Self::Struct(exprs) => {
-                    stack.extend(exprs.iter());
-                }
-                Self::BinaryOperation { left, right, .. } => {
-                    stack.push(left);
-                    stack.push(right);
-                }
-                Self::UnaryOperation { expr, .. } => {
-                    stack.push(expr);
-                }
-                Self::VariadicOperation { op, exprs } => match op {
-                    VariadicOperator::And | VariadicOperator::Or => {
-                        stack.extend(exprs.iter());
-                    }
-                },
-                Self::NullIf { expr, if_expr } => {
-                    stack.push(expr);
-                    stack.push(if_expr);
-                }
-            }
-            Some(expr)
-        })
-    }
-}
-
-impl std::ops::Add<Expression> for Expression {
-    type Output = Self;
-
-    fn add(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Plus, self, rhs)
-    }
-}
-
-impl std::ops::Sub<Expression> for Expression {
-    type Output = Self;
-
-    fn sub(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Minus, self, rhs)
-    }
-}
-
-impl std::ops::Mul<Expression> for Expression {
-    type Output = Self;
-
-    fn mul(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Multiply, self, rhs)
-    }
-}
-
-impl std::ops::Div<Expression> for Expression {
-    type Output = Self;
-
-    fn div(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Divide, self, rhs)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Expression as Expr;
-
-    #[test]
-    fn test_expression_format() {
-        let col_ref = Expr::column("x");
-        let cases = [
-            (col_ref.clone(), "Column(x)"),
-            (col_ref.clone().eq(Expr::literal(2)), "Column(x) = 2"),
-            (
-                col_ref
-                    .clone()
-                    .gt_eq(Expr::literal(2))
-                    .and(col_ref.clone().lt_eq(Expr::literal(10))),
-                "AND(Column(x) >= 2, Column(x) <= 10)",
-            ),
-            (
-                col_ref
-                    .clone()
-                    .gt(Expr::literal(2))
-                    .or(col_ref.clone().lt(Expr::literal(10))),
-                "OR(Column(x) > 2, Column(x) < 10)",
-            ),
-            (
-                (col_ref.clone() - Expr::literal(4)).lt(Expr::literal(10)),
-                "Column(x) - 4 < 10",
-            ),
-            (
-                (col_ref.clone() + Expr::literal(4)) / Expr::literal(10) * Expr::literal(42),
-                "Column(x) + 4 / 10 * 42",
-            ),
-            (col_ref.eq(Expr::literal("foo")), "Column(x) = 'foo'"),
-        ];
-
-        for (expr, expected) in cases {
-            let result = format!("{}", expr);
-            assert_eq!(result, expected);
-        }
-    }
-}
diff --git a/crates/core/src/kernel/expressions/scalars.rs b/crates/core/src/kernel/expressions/scalars.rs
deleted file mode 100644
index 147c9d7633..0000000000
--- a/crates/core/src/kernel/expressions/scalars.rs
+++ /dev/null
@@ -1,534 +0,0 @@
-//! Scalar values for use in expressions.
-
-use std::cmp::Ordering;
-use std::fmt::{Display, Formatter};
-
-use arrow_array::Array;
-use arrow_schema::TimeUnit;
-use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc};
-use object_store::path::Path;
-
-use crate::kernel::{DataType, Error, PrimitiveType, StructField};
-use crate::NULL_PARTITION_VALUE_DATA_PATH;
-
-/// A single value, which can be null. Used for representing literal values
-/// in [Expressions][crate::expressions::Expression].
-#[derive(Debug, Clone, PartialEq)]
-pub enum Scalar {
-    /// 32bit integer
-    Integer(i32),
-    /// 64bit integer
-    Long(i64),
-    /// 16bit integer
-    Short(i16),
-    /// 8bit integer
-    Byte(i8),
-    /// 32bit floating point
-    Float(f32),
-    /// 64bit floating point
-    Double(f64),
-    /// utf-8 encoded string.
-    String(String),
-    /// true or false value
-    Boolean(bool),
-    /// Microsecond precision timestamp, adjusted to UTC.
-    Timestamp(i64),
-    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
-    Date(i32),
-    /// Binary data
-    Binary(Vec<u8>),
-    /// Decimal value
-    Decimal(i128, u8, i8),
-    /// Null value with a given data type.
-    Null(DataType),
-    /// Struct value
-    Struct(Vec<Scalar>, Vec<StructField>),
-}
-
-impl Scalar {
-    /// Returns the data type of this scalar.
-    pub fn data_type(&self) -> DataType {
-        match self {
-            Self::Integer(_) => DataType::Primitive(PrimitiveType::Integer),
-            Self::Long(_) => DataType::Primitive(PrimitiveType::Long),
-            Self::Short(_) => DataType::Primitive(PrimitiveType::Short),
-            Self::Byte(_) => DataType::Primitive(PrimitiveType::Byte),
-            Self::Float(_) => DataType::Primitive(PrimitiveType::Float),
-            Self::Double(_) => DataType::Primitive(PrimitiveType::Double),
-            Self::String(_) => DataType::Primitive(PrimitiveType::String),
-            Self::Boolean(_) => DataType::Primitive(PrimitiveType::Boolean),
-            Self::Timestamp(_) => DataType::Primitive(PrimitiveType::Timestamp),
-            Self::Date(_) => DataType::Primitive(PrimitiveType::Date),
-            Self::Binary(_) => DataType::Primitive(PrimitiveType::Binary),
-            Self::Decimal(_, precision, scale) => DataType::decimal(*precision, *scale),
-            Self::Null(data_type) => data_type.clone(),
-            Self::Struct(_, fields) => DataType::struct_type(fields.clone()),
-        }
-    }
-
-    /// Returns true if this scalar is null.
-    pub fn is_null(&self) -> bool {
-        matches!(self, Self::Null(_))
-    }
-
-    /// Serializes this scalar as a string.
-    pub fn serialize(&self) -> String {
-        match self {
-            Self::String(s) => s.to_owned(),
-            Self::Byte(b) => b.to_string(),
-            Self::Short(s) => s.to_string(),
-            Self::Integer(i) => i.to_string(),
-            Self::Long(l) => l.to_string(),
-            Self::Float(f) => f.to_string(),
-            Self::Double(d) => d.to_string(),
-            Self::Boolean(b) => {
-                if *b {
-                    "true".to_string()
-                } else {
-                    "false".to_string()
-                }
-            }
-            Self::Timestamp(ts) => {
-                let ts = Utc.timestamp_micros(*ts).single().unwrap();
-                ts.format("%Y-%m-%d %H:%M:%S%.6f").to_string()
-            }
-            Self::Date(days) => {
-                let date = Utc.from_utc_datetime(
-                    &NaiveDateTime::from_timestamp_opt(*days as i64 * 24 * 3600, 0).unwrap(),
-                );
-                date.format("%Y-%m-%d").to_string()
-            }
-            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
-                Ordering::Equal => value.to_string(),
-                Ordering::Greater => {
-                    let scalar_multiple = 10_i128.pow(*scale as u32);
-                    let mut s = String::new();
-                    s.push_str((value / scalar_multiple).to_string().as_str());
-                    s.push('.');
-                    s.push_str(&format!(
-                        "{:0>scale$}",
-                        value % scalar_multiple,
-                        scale = *scale as usize
-                    ));
-                    s
-                }
-                Ordering::Less => {
-                    let mut s = value.to_string();
-                    for _ in 0..(scale.abs()) {
-                        s.push('0');
-                    }
-                    s
-                }
-            },
-            Self::Binary(val) => create_escaped_binary_string(val.as_slice()),
-            Self::Null(_) => "null".to_string(),
-            Self::Struct(_, _) => todo!("serializing struct values is not yet supported"),
-        }
-    }
-
-    /// Serializes this scalar as a string for use in hive partition file names.
-    pub fn serialize_encoded(&self) -> String {
-        if self.is_null() {
-            return NULL_PARTITION_VALUE_DATA_PATH.to_string();
-        }
-        Path::from(self.serialize()).to_string()
-    }
-
-    /// Create a [`Scalar`] form a row in an arrow array.
-    pub fn from_array(arr: &dyn Array, index: usize) -> Option<Self> {
-        use arrow_array::*;
-        use arrow_schema::DataType::*;
-
-        if arr.len() <= index {
-            return None;
-        }
-        if arr.is_null(index) {
-            return Some(Self::Null(arr.data_type().try_into().ok()?));
-        }
-
-        match arr.data_type() {
-            Utf8 => arr
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .map(|v| Self::String(v.value(index).to_string())),
-            LargeUtf8 => arr
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .map(|v| Self::String(v.value(index).to_string())),
-            Boolean => arr
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .map(|v| Self::Boolean(v.value(index))),
-            Binary => arr
-                .as_any()
-                .downcast_ref::<BinaryArray>()
-                .map(|v| Self::Binary(v.value(index).to_vec())),
-            LargeBinary => arr
-                .as_any()
-                .downcast_ref::<LargeBinaryArray>()
-                .map(|v| Self::Binary(v.value(index).to_vec())),
-            FixedSizeBinary(_) => arr
-                .as_any()
-                .downcast_ref::<FixedSizeBinaryArray>()
-                .map(|v| Self::Binary(v.value(index).to_vec())),
-            Int8 => arr
-                .as_any()
-                .downcast_ref::<Int8Array>()
-                .map(|v| Self::Byte(v.value(index))),
-            Int16 => arr
-                .as_any()
-                .downcast_ref::<Int16Array>()
-                .map(|v| Self::Short(v.value(index))),
-            Int32 => arr
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .map(|v| Self::Integer(v.value(index))),
-            Int64 => arr
-                .as_any()
-                .downcast_ref::<Int64Array>()
-                .map(|v| Self::Long(v.value(index))),
-            UInt8 => arr
-                .as_any()
-                .downcast_ref::<UInt8Array>()
-                .map(|v| Self::Byte(v.value(index) as i8)),
-            UInt16 => arr
-                .as_any()
-                .downcast_ref::<UInt16Array>()
-                .map(|v| Self::Short(v.value(index) as i16)),
-            UInt32 => arr
-                .as_any()
-                .downcast_ref::<UInt32Array>()
-                .map(|v| Self::Integer(v.value(index) as i32)),
-            UInt64 => arr
-                .as_any()
-                .downcast_ref::<UInt64Array>()
-                .map(|v| Self::Long(v.value(index) as i64)),
-            Float32 => arr
-                .as_any()
-                .downcast_ref::<Float32Array>()
-                .map(|v| Self::Float(v.value(index))),
-            Float64 => arr
-                .as_any()
-                .downcast_ref::<Float64Array>()
-                .map(|v| Self::Double(v.value(index))),
-            Decimal128(precision, scale) => {
-                arr.as_any().downcast_ref::<Decimal128Array>().map(|v| {
-                    let value = v.value(index);
-                    Self::Decimal(value, *precision, *scale)
-                })
-            }
-            Date32 => arr
-                .as_any()
-                .downcast_ref::<Date32Array>()
-                .map(|v| Self::Date(v.value(index))),
-            // TODO handle timezones when implementing timestamp ntz feature.
-            Timestamp(TimeUnit::Microsecond, None) => arr
-                .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .map(|v| Self::Timestamp(v.value(index))),
-            Struct(fields) => {
-                let struct_fields = fields
-                    .iter()
-                    .flat_map(|f| TryFrom::try_from(f.as_ref()))
-                    .collect::<Vec<_>>();
-                let values = arr
-                    .as_any()
-                    .downcast_ref::<StructArray>()
-                    .and_then(|struct_arr| {
-                        struct_fields
-                            .iter()
-                            .map(|f: &StructField| {
-                                struct_arr
-                                    .column_by_name(f.name())
-                                    .and_then(|c| Self::from_array(c.as_ref(), index))
-                            })
-                            .collect::<Option<Vec<_>>>()
-                    })?;
-                if struct_fields.len() != values.len() {
-                    return None;
-                }
-                Some(Self::Struct(values, struct_fields))
-            }
-            Float16
-            | Decimal256(_, _)
-            | List(_)
-            | LargeList(_)
-            | FixedSizeList(_, _)
-            | Map(_, _)
-            | Date64
-            | Timestamp(_, _)
-            | Time32(_)
-            | Time64(_)
-            | Duration(_)
-            | Interval(_)
-            | Dictionary(_, _)
-            | RunEndEncoded(_, _)
-            | Union(_, _)
-            | Null => None,
-        }
-    }
-}
-
-impl PartialOrd for Scalar {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        use Scalar::*;
-        match (self, other) {
-            (Null(_), Null(_)) => Some(Ordering::Equal),
-            (Integer(a), Integer(b)) => a.partial_cmp(b),
-            (Long(a), Long(b)) => a.partial_cmp(b),
-            (Short(a), Short(b)) => a.partial_cmp(b),
-            (Byte(a), Byte(b)) => a.partial_cmp(b),
-            (Float(a), Float(b)) => a.partial_cmp(b),
-            (Double(a), Double(b)) => a.partial_cmp(b),
-            (String(a), String(b)) => a.partial_cmp(b),
-            (Boolean(a), Boolean(b)) => a.partial_cmp(b),
-            (Timestamp(a), Timestamp(b)) => a.partial_cmp(b),
-            (Date(a), Date(b)) => a.partial_cmp(b),
-            (Binary(a), Binary(b)) => a.partial_cmp(b),
-            (Decimal(a, _, _), Decimal(b, _, _)) => a.partial_cmp(b),
-            (Struct(a, _), Struct(b, _)) => a.partial_cmp(b),
-            // TODO should we make an assumption about the ordering of nulls?
-            // rigth now this is only used for internal purposes.
-            (Null(_), _) => Some(Ordering::Less),
-            (_, Null(_)) => Some(Ordering::Greater),
-            _ => None,
-        }
-    }
-}
-
-impl Display for Scalar {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Integer(i) => write!(f, "{}", i),
-            Self::Long(i) => write!(f, "{}", i),
-            Self::Short(i) => write!(f, "{}", i),
-            Self::Byte(i) => write!(f, "{}", i),
-            Self::Float(fl) => write!(f, "{}", fl),
-            Self::Double(fl) => write!(f, "{}", fl),
-            Self::String(s) => write!(f, "'{}'", s),
-            Self::Boolean(b) => write!(f, "{}", b),
-            Self::Timestamp(ts) => write!(f, "{}", ts),
-            Self::Date(d) => write!(f, "{}", d),
-            Self::Binary(b) => write!(f, "{:?}", b),
-            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
-                Ordering::Equal => {
-                    write!(f, "{}", value)
-                }
-                Ordering::Greater => {
-                    let scalar_multiple = 10_i128.pow(*scale as u32);
-                    write!(f, "{}", value / scalar_multiple)?;
-                    write!(f, ".")?;
-                    write!(
-                        f,
-                        "{:0>scale$}",
-                        value % scalar_multiple,
-                        scale = *scale as usize
-                    )
-                }
-                Ordering::Less => {
-                    write!(f, "{}", value)?;
-                    for _ in 0..(scale.abs()) {
-                        write!(f, "0")?;
-                    }
-                    Ok(())
-                }
-            },
-            Self::Null(_) => write!(f, "null"),
-            Self::Struct(values, fields) => {
-                write!(f, "{{")?;
-                for (i, (value, field)) in values.iter().zip(fields.iter()).enumerate() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{}: {}", field.name, value)?;
-                }
-                write!(f, "}}")
-            }
-        }
-    }
-}
-
-impl From<i32> for Scalar {
-    fn from(i: i32) -> Self {
-        Self::Integer(i)
-    }
-}
-
-impl From<i64> for Scalar {
-    fn from(i: i64) -> Self {
-        Self::Long(i)
-    }
-}
-
-impl From<bool> for Scalar {
-    fn from(b: bool) -> Self {
-        Self::Boolean(b)
-    }
-}
-
-impl From<&str> for Scalar {
-    fn from(s: &str) -> Self {
-        Self::String(s.into())
-    }
-}
-
-impl From<String> for Scalar {
-    fn from(value: String) -> Self {
-        Self::String(value)
-    }
-}
-
-// TODO: add more From impls
-
-impl PrimitiveType {
-    fn data_type(&self) -> DataType {
-        DataType::Primitive(self.clone())
-    }
-
-    /// Parses a string into a scalar value.
-    pub fn parse_scalar(&self, raw: &str) -> Result<Scalar, Error> {
-        use PrimitiveType::*;
-
-        lazy_static::lazy_static! {
-            static ref UNIX_EPOCH: DateTime<Utc> = DateTime::from_timestamp(0, 0).unwrap();
-        }
-
-        if raw.is_empty() || raw == NULL_PARTITION_VALUE_DATA_PATH {
-            return Ok(Scalar::Null(self.data_type()));
-        }
-
-        match self {
-            String => Ok(Scalar::String(raw.to_string())),
-            Byte => self.str_parse_scalar(raw, Scalar::Byte),
-            Short => self.str_parse_scalar(raw, Scalar::Short),
-            Integer => self.str_parse_scalar(raw, Scalar::Integer),
-            Long => self.str_parse_scalar(raw, Scalar::Long),
-            Float => self.str_parse_scalar(raw, Scalar::Float),
-            Double => self.str_parse_scalar(raw, Scalar::Double),
-            Boolean => {
-                if raw.eq_ignore_ascii_case("true") {
-                    Ok(Scalar::Boolean(true))
-                } else if raw.eq_ignore_ascii_case("false") {
-                    Ok(Scalar::Boolean(false))
-                } else {
-                    Err(self.parse_error(raw))
-                }
-            }
-            Date => {
-                let date = NaiveDate::parse_from_str(raw, "%Y-%m-%d")
-                    .map_err(|_| self.parse_error(raw))?
-                    .and_hms_opt(0, 0, 0)
-                    .ok_or(self.parse_error(raw))?;
-                let date = Utc.from_utc_datetime(&date);
-                let days = date.signed_duration_since(*UNIX_EPOCH).num_days() as i32;
-                Ok(Scalar::Date(days))
-            }
-            Timestamp => {
-                let timestamp = NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S%.f")
-                    .map_err(|_| self.parse_error(raw))?;
-                let timestamp = Utc.from_utc_datetime(&timestamp);
-                let micros = timestamp
-                    .signed_duration_since(*UNIX_EPOCH)
-                    .num_microseconds()
-                    .ok_or(self.parse_error(raw))?;
-                Ok(Scalar::Timestamp(micros))
-            }
-            Binary => {
-                let bytes = parse_escaped_binary_string(raw).map_err(|_| self.parse_error(raw))?;
-                Ok(Scalar::Binary(bytes))
-            }
-            _ => todo!("parsing {:?} is not yet supported", self),
-        }
-    }
-
-    fn parse_error(&self, raw: &str) -> Error {
-        Error::Parse(raw.to_string(), self.data_type())
-    }
-
-    fn str_parse_scalar<T: std::str::FromStr>(
-        &self,
-        raw: &str,
-        f: impl FnOnce(T) -> Scalar,
-    ) -> Result<Scalar, Error> {
-        match raw.parse() {
-            Ok(val) => Ok(f(val)),
-            Err(..) => Err(self.parse_error(raw)),
-        }
-    }
-}
-
-fn create_escaped_binary_string(data: &[u8]) -> String {
-    let mut escaped_string = String::new();
-    for &byte in data {
-        // Convert each byte to its two-digit hexadecimal representation
-        let hex_representation = format!("{:04X}", byte);
-        // Append the hexadecimal representation with an escape sequence
-        escaped_string.push_str("\\u");
-        escaped_string.push_str(&hex_representation);
-    }
-    escaped_string
-}
-
-fn parse_escaped_binary_string(escaped_string: &str) -> Result<Vec<u8>, &'static str> {
-    let mut parsed_bytes = Vec::new();
-    let mut chars = escaped_string.chars();
-
-    while let Some(ch) = chars.next() {
-        if ch == '\\' {
-            // Check for the escape sequence "\\u" indicating a hexadecimal value
-            if chars.next() == Some('u') {
-                // Read two hexadecimal digits and convert to u8
-                if let (Some(digit1), Some(digit2), Some(digit3), Some(digit4)) =
-                    (chars.next(), chars.next(), chars.next(), chars.next())
-                {
-                    if let Ok(byte) =
-                        u8::from_str_radix(&format!("{}{}{}{}", digit1, digit2, digit3, digit4), 16)
-                    {
-                        parsed_bytes.push(byte);
-                    } else {
-                        return Err("Error parsing hexadecimal value");
-                    }
-                } else {
-                    return Err("Incomplete escape sequence");
-                }
-            } else {
-                // Unrecognized escape sequence
-                return Err("Unrecognized escape sequence");
-            }
-        } else {
-            // Regular character, convert to u8 and push into the result vector
-            parsed_bytes.push(ch as u8);
-        }
-    }
-
-    Ok(parsed_bytes)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_binary_roundtrip() {
-        let scalar = Scalar::Binary(vec![0, 1, 2, 3, 4, 5]);
-        let parsed = PrimitiveType::Binary
-            .parse_scalar(&scalar.serialize())
-            .unwrap();
-        assert_eq!(scalar, parsed);
-    }
-
-    #[test]
-    fn test_decimal_display() {
-        let s = Scalar::Decimal(123456789, 9, 2);
-        assert_eq!(s.to_string(), "1234567.89");
-
-        let s = Scalar::Decimal(123456789, 9, 0);
-        assert_eq!(s.to_string(), "123456789");
-
-        let s = Scalar::Decimal(123456789, 9, 9);
-        assert_eq!(s.to_string(), "0.123456789");
-
-        let s = Scalar::Decimal(123, 9, -3);
-        assert_eq!(s.to_string(), "123000");
-    }
-}
diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs
index 876a09a33c..ce788d6c4d 100644
--- a/crates/core/src/kernel/mod.rs
+++ b/crates/core/src/kernel/mod.rs
@@ -4,12 +4,11 @@
 
 pub mod arrow;
 pub mod error;
-pub mod expressions;
 pub mod models;
+pub mod scalars;
 mod snapshot;
 
 pub use error::*;
-pub use expressions::*;
 pub use models::*;
 pub use snapshot::*;
 
diff --git a/crates/core/src/kernel/models/actions.rs b/crates/core/src/kernel/models/actions.rs
index 28eaa89cc4..962b71b21b 100644
--- a/crates/core/src/kernel/models/actions.rs
+++ b/crates/core/src/kernel/models/actions.rs
@@ -5,6 +5,8 @@ use std::str::FromStr;
 // use std::sync::Arc;
 
 // use roaring::RoaringTreemap;
+use crate::DeltaConfigKey;
+use maplit::hashset;
 use serde::{Deserialize, Serialize};
 use tracing::warn;
 use url::Url;
@@ -137,30 +139,243 @@ pub struct Protocol {
 
 impl Protocol {
     /// Create a new protocol action
-    pub fn new(min_reader_version: i32, min_wrriter_version: i32) -> Self {
+    pub fn new(min_reader_version: i32, min_writer_version: i32) -> Self {
         Self {
             min_reader_version,
-            min_writer_version: min_wrriter_version,
+            min_writer_version,
             reader_features: None,
             writer_features: None,
         }
     }
 
-    /// set the reader features in the protocol action
+    /// set the reader features in the protocol action, automatically bumps min_reader_version
     pub fn with_reader_features(
         mut self,
         reader_features: impl IntoIterator<Item = impl Into<ReaderFeatures>>,
     ) -> Self {
-        self.reader_features = Some(reader_features.into_iter().map(|c| c.into()).collect());
+        let all_reader_features = reader_features
+            .into_iter()
+            .map(Into::into)
+            .collect::<HashSet<_>>();
+        if !all_reader_features.is_empty() {
+            self.min_reader_version = 3
+        }
+        self.reader_features = Some(all_reader_features);
         self
     }
 
-    /// set the writer features in the protocol action
+    /// set the writer features in the protocol action, automatically bumps min_writer_version
     pub fn with_writer_features(
         mut self,
         writer_features: impl IntoIterator<Item = impl Into<WriterFeatures>>,
     ) -> Self {
-        self.writer_features = Some(writer_features.into_iter().map(|c| c.into()).collect());
+        let all_writer_feautures = writer_features
+            .into_iter()
+            .map(|c| c.into())
+            .collect::<HashSet<_>>();
+        if !all_writer_feautures.is_empty() {
+            self.min_writer_version = 7
+        }
+        self.writer_features = Some(all_writer_feautures);
+        self
+    }
+
+    /// Converts existing properties into features if the reader_version is >=3 or writer_version >=3
+    /// only converts features that are "true"
+    pub fn move_table_properties_into_features(
+        mut self,
+        configuration: &HashMap<String, Option<String>>,
+    ) -> Protocol {
+        if self.min_writer_version >= 7 {
+            let mut converted_writer_features = configuration
+                .iter()
+                .filter(|(_, value)| {
+                    value.as_ref().map_or(false, |v| {
+                        v.to_ascii_lowercase().parse::<bool>().is_ok_and(|v| v)
+                    })
+                })
+                .collect::<HashMap<&String, &Option<String>>>()
+                .keys()
+                .map(|key| (*key).clone().into())
+                .filter(|v| !matches!(v, WriterFeatures::Other(_)))
+                .collect::<HashSet<WriterFeatures>>();
+
+            if configuration
+                .keys()
+                .any(|v| v.starts_with("delta.constraints."))
+            {
+                converted_writer_features.insert(WriterFeatures::CheckConstraints);
+            }
+
+            match self.writer_features {
+                Some(mut features) => {
+                    features.extend(converted_writer_features);
+                    self.writer_features = Some(features);
+                }
+                None => self.writer_features = Some(converted_writer_features),
+            }
+        }
+        if self.min_reader_version > 3 {
+            let converted_reader_features = configuration
+                .iter()
+                .filter(|(_, value)| {
+                    value.as_ref().map_or(false, |v| {
+                        v.to_ascii_lowercase().parse::<bool>().is_ok_and(|v| v)
+                    })
+                })
+                .map(|(key, _)| (*key).clone().into())
+                .filter(|v| !matches!(v, ReaderFeatures::Other(_)))
+                .collect::<HashSet<ReaderFeatures>>();
+            match self.reader_features {
+                Some(mut features) => {
+                    features.extend(converted_reader_features);
+                    self.reader_features = Some(features);
+                }
+                None => self.reader_features = Some(converted_reader_features),
+            }
+        }
+        self
+    }
+    /// Will apply the properties to the protocol by either bumping the version or setting
+    /// features
+    pub fn apply_properties_to_protocol(
+        mut self,
+        new_properties: &HashMap<String, String>,
+        raise_if_not_exists: bool,
+    ) -> DeltaResult<Protocol> {
+        let mut parsed_properties: HashMap<DeltaConfigKey, String> = HashMap::new();
+
+        for (key, value) in new_properties {
+            if let Ok(parsed_key) = key.parse::<DeltaConfigKey>() {
+                parsed_properties.insert(parsed_key, value.to_string());
+            } else if raise_if_not_exists {
+                return Err(Error::Generic(format!(
+                    "Error parsing property '{}':'{}'",
+                    key, value
+                )));
+            }
+        }
+
+        // Check and update delta.minReaderVersion
+        if let Some(min_reader_version) = parsed_properties.get(&DeltaConfigKey::MinReaderVersion) {
+            let new_min_reader_version = min_reader_version.parse::<i32>();
+            match new_min_reader_version {
+                Ok(version) => match version {
+                    1..=3 => {
+                        if version > self.min_reader_version {
+                            self.min_reader_version = version
+                        }
+                    }
+                    _ => {
+                        return Err(Error::Generic(format!(
+                        "delta.minReaderVersion = '{}' is invalid, valid values are ['1','2','3']",
+                        min_reader_version
+                    )))
+                    }
+                },
+                Err(_) => {
+                    return Err(Error::Generic(format!(
+                        "delta.minReaderVersion = '{}' is invalid, valid values are ['1','2','3']",
+                        min_reader_version
+                    )))
+                }
+            }
+        }
+
+        // Check and update delta.minWriterVersion
+        if let Some(min_writer_version) = parsed_properties.get(&DeltaConfigKey::MinWriterVersion) {
+            let new_min_writer_version = min_writer_version.parse::<i32>();
+            match new_min_writer_version {
+                Ok(version) => match version {
+                    2..=7 => {
+                        if version > self.min_writer_version {
+                            self.min_writer_version = version
+                        }
+                    }
+                    _ => {
+                        return Err(Error::Generic(format!(
+                            "delta.minWriterVersion = '{}' is invalid, valid values are ['2','3','4','5','6','7']",
+                            min_writer_version
+                        )))
+                    }
+                },
+                Err(_) => {
+                    return Err(Error::Generic(format!(
+                        "delta.minWriterVersion = '{}' is invalid, valid values are ['2','3','4','5','6','7']",
+                        min_writer_version
+                    )))
+                }
+            }
+        }
+
+        // Check enableChangeDataFeed and bump protocol or add writerFeature if writer versions is >=7
+        if let Some(enable_cdf) = parsed_properties.get(&DeltaConfigKey::EnableChangeDataFeed) {
+            let if_enable_cdf = enable_cdf.to_ascii_lowercase().parse::<bool>();
+            match if_enable_cdf {
+                Ok(true) => {
+                    if self.min_writer_version >= 7 {
+                        match self.writer_features {
+                            Some(mut features) => {
+                                features.insert(WriterFeatures::ChangeDataFeed);
+                                self.writer_features = Some(features);
+                            }
+                            None => {
+                                self.writer_features =
+                                    Some(hashset! {WriterFeatures::ChangeDataFeed})
+                            }
+                        }
+                    } else if self.min_writer_version <= 3 {
+                        self.min_writer_version = 4
+                    }
+                }
+                Ok(false) => {}
+                _ => {
+                    return Err(Error::Generic(format!(
+                        "delta.enableChangeDataFeed = '{}' is invalid, valid values are ['true']",
+                        enable_cdf
+                    )))
+                }
+            }
+        }
+
+        if let Some(enable_dv) = parsed_properties.get(&DeltaConfigKey::EnableDeletionVectors) {
+            let if_enable_dv = enable_dv.to_ascii_lowercase().parse::<bool>();
+            match if_enable_dv {
+                Ok(true) => {
+                    let writer_features = match self.writer_features {
+                        Some(mut features) => {
+                            features.insert(WriterFeatures::DeletionVectors);
+                            features
+                        }
+                        None => hashset! {WriterFeatures::DeletionVectors},
+                    };
+                    let reader_features = match self.reader_features {
+                        Some(mut features) => {
+                            features.insert(ReaderFeatures::DeletionVectors);
+                            features
+                        }
+                        None => hashset! {ReaderFeatures::DeletionVectors},
+                    };
+                    self.min_reader_version = 3;
+                    self.min_writer_version = 7;
+                    self.writer_features = Some(writer_features);
+                    self.reader_features = Some(reader_features);
+                }
+                Ok(false) => {}
+                _ => {
+                    return Err(Error::Generic(format!(
+                        "delta.enableDeletionVectors = '{}' is invalid, valid values are ['true']",
+                        enable_dv
+                    )))
+                }
+            }
+        }
+        Ok(self)
+    }
+    /// Enable timestamp_ntz in the protocol
+    pub fn enable_timestamp_ntz(mut self) -> Protocol {
+        self = self.with_reader_features(vec![ReaderFeatures::TimestampWithoutTimezone]);
+        self = self.with_writer_features(vec![WriterFeatures::TimestampWithoutTimezone]);
         self
     }
 }
@@ -175,7 +390,7 @@ pub enum ReaderFeatures {
     /// Deletion vectors for merge, update, delete
     DeletionVectors,
     /// timestamps without timezone support
-    #[serde(alias = "timestampNtz")]
+    #[serde(rename = "timestampNtz")]
     TimestampWithoutTimezone,
     /// version 2 of checkpointing
     V2Checkpoint,
@@ -189,7 +404,9 @@ impl From<&parquet::record::Field> for ReaderFeatures {
         match value {
             parquet::record::Field::Str(feature) => match feature.as_str() {
                 "columnMapping" => ReaderFeatures::ColumnMapping,
-                "deletionVectors" => ReaderFeatures::DeletionVectors,
+                "deletionVectors" | "delta.enableDeletionVectors" => {
+                    ReaderFeatures::DeletionVectors
+                }
                 "timestampNtz" => ReaderFeatures::TimestampWithoutTimezone,
                 "v2Checkpoint" => ReaderFeatures::V2Checkpoint,
                 f => ReaderFeatures::Other(f.to_string()),
@@ -259,7 +476,7 @@ pub enum WriterFeatures {
     /// Row tracking on tables
     RowTracking,
     /// timestamps without timezone support
-    #[serde(alias = "timestampNtz")]
+    #[serde(rename = "timestampNtz")]
     TimestampWithoutTimezone,
     /// domain specific metadata
     DomainMetadata,
@@ -281,15 +498,15 @@ impl From<String> for WriterFeatures {
 impl From<&str> for WriterFeatures {
     fn from(value: &str) -> Self {
         match value {
-            "appendOnly" => WriterFeatures::AppendOnly,
+            "appendOnly" | "delta.appendOnly" => WriterFeatures::AppendOnly,
             "invariants" => WriterFeatures::Invariants,
             "checkConstraints" => WriterFeatures::CheckConstraints,
-            "changeDataFeed" => WriterFeatures::ChangeDataFeed,
+            "changeDataFeed" | "delta.enableChangeDataFeed" => WriterFeatures::ChangeDataFeed,
             "generatedColumns" => WriterFeatures::GeneratedColumns,
             "columnMapping" => WriterFeatures::ColumnMapping,
             "identityColumns" => WriterFeatures::IdentityColumns,
-            "deletionVectors" => WriterFeatures::DeletionVectors,
-            "rowTracking" => WriterFeatures::RowTracking,
+            "deletionVectors" | "delta.enableDeletionVectors" => WriterFeatures::DeletionVectors,
+            "rowTracking" | "delta.enableRowTracking" => WriterFeatures::RowTracking,
             "timestampNtz" => WriterFeatures::TimestampWithoutTimezone,
             "domainMetadata" => WriterFeatures::DomainMetadata,
             "v2Checkpoint" => WriterFeatures::V2Checkpoint,
@@ -351,7 +568,7 @@ impl From<&parquet::record::Field> for WriterFeatures {
 }
 
 ///Storage type of deletion vector
-#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq)]
 pub enum StorageType {
     /// Stored at relative path derived from a UUID.
     #[serde(rename = "u")]
@@ -657,7 +874,7 @@ pub struct AddCDCFile {
 /// enable idempotency.
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)]
 #[serde(rename_all = "camelCase")]
-pub struct Txn {
+pub struct Transaction {
     /// A unique identifier for the application performing the transaction.
     pub app_id: String,
 
@@ -669,6 +886,26 @@ pub struct Txn {
     pub last_updated: Option<i64>,
 }
 
+impl Transaction {
+    /// Create a new application transactions. See [`Txn`] for details.
+    pub fn new(app_id: impl ToString, version: i64) -> Self {
+        Self::new_with_last_update(app_id, version, None)
+    }
+
+    /// Create a new application transactions. See [`Txn`] for details.
+    pub fn new_with_last_update(
+        app_id: impl ToString,
+        version: i64,
+        last_updated: Option<i64>,
+    ) -> Self {
+        Transaction {
+            app_id: app_id.to_string(),
+            version,
+            last_updated,
+        }
+    }
+}
+
 /// The commitInfo is a fairly flexible action within the delta specification, where arbitrary data can be stored.
 /// However the reference implementation as well as delta-rs store useful information that may for instance
 /// allow us to be more permissive in commit conflict resolution.
@@ -714,6 +951,10 @@ pub struct CommitInfo {
     /// Additional provenance information for the commit
     #[serde(flatten, default)]
     pub info: HashMap<String, serde_json::Value>,
+
+    /// User defined metadata
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user_metadata: Option<String>,
 }
 
 /// The domain metadata action contains a configuration (string) for a named metadata domain
@@ -766,7 +1007,7 @@ pub struct Sidecar {
     pub tags: Option<HashMap<String, Option<String>>>,
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq)]
 /// The isolation level applied during transaction
 pub enum IsolationLevel {
     /// The strongest isolation level. It ensures that committed write operations
diff --git a/crates/core/src/kernel/models/fields.rs b/crates/core/src/kernel/models/fields.rs
index fa672aaefc..6c699f0e88 100644
--- a/crates/core/src/kernel/models/fields.rs
+++ b/crates/core/src/kernel/models/fields.rs
@@ -1,8 +1,8 @@
 //! Schema definitions for action types
 
+use delta_kernel::schema::{ArrayType, DataType, MapType, StructField, StructType};
 use lazy_static::lazy_static;
 
-use super::schema::{ArrayType, DataType, MapType, StructField, StructType};
 use super::ActionType;
 
 impl ActionType {
diff --git a/crates/core/src/kernel/models/mod.rs b/crates/core/src/kernel/models/mod.rs
index eda7e6fb60..a8ee2f8d31 100644
--- a/crates/core/src/kernel/models/mod.rs
+++ b/crates/core/src/kernel/models/mod.rs
@@ -14,7 +14,7 @@ mod schema;
 pub use actions::*;
 pub use schema::*;
 
-#[derive(Debug)]
+#[derive(Debug, Hash, PartialEq, Eq, Clone, Serialize, Deserialize)]
 /// The type of action that was performed on the table
 pub enum ActionType {
     /// modify the data in a table by adding individual logical files
@@ -49,7 +49,7 @@ pub enum Action {
     Add(Add),
     Remove(Remove),
     Cdc(AddCDCFile),
-    Txn(Txn),
+    Txn(Transaction),
     CommitInfo(CommitInfo),
     DomainMetadata(DomainMetadata),
 }
@@ -94,8 +94,8 @@ impl From<Protocol> for Action {
     }
 }
 
-impl From<Txn> for Action {
-    fn from(a: Txn) -> Self {
+impl From<Transaction> for Action {
+    fn from(a: Transaction) -> Self {
         Self::Txn(a)
     }
 }
diff --git a/crates/core/src/kernel/models/schema.rs b/crates/core/src/kernel/models/schema.rs
index 874bade71d..3a88564f1d 100644
--- a/crates/core/src/kernel/models/schema.rs
+++ b/crates/core/src/kernel/models/schema.rs
@@ -1,12 +1,11 @@
 //! Delta table schema
 
-use std::borrow::Borrow;
-use std::fmt::Formatter;
-use std::hash::{Hash, Hasher};
 use std::sync::Arc;
-use std::{collections::HashMap, fmt::Display};
 
-use serde::{Deserialize, Serialize};
+pub use delta_kernel::schema::{
+    ArrayType, ColumnMetadataKey, DataType, MapType, MetadataValue, PrimitiveType, StructField,
+    StructType,
+};
 use serde_json::Value;
 
 use crate::kernel::error::Error;
@@ -17,76 +16,6 @@ pub type Schema = StructType;
 /// Schema reference type
 pub type SchemaRef = Arc<StructType>;
 
-/// A value that can be stored in the metadata of a Delta table schema entity.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
-#[serde(untagged)]
-pub enum MetadataValue {
-    /// A number value
-    Number(i32),
-    /// A string value
-    String(String),
-    /// A Boolean value
-    Boolean(bool),
-}
-
-impl From<String> for MetadataValue {
-    fn from(value: String) -> Self {
-        Self::String(value)
-    }
-}
-
-impl From<&String> for MetadataValue {
-    fn from(value: &String) -> Self {
-        Self::String(value.clone())
-    }
-}
-
-impl From<i32> for MetadataValue {
-    fn from(value: i32) -> Self {
-        Self::Number(value)
-    }
-}
-
-impl From<bool> for MetadataValue {
-    fn from(value: bool) -> Self {
-        Self::Boolean(value)
-    }
-}
-
-impl From<Value> for MetadataValue {
-    fn from(value: Value) -> Self {
-        Self::String(value.to_string())
-    }
-}
-
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum ColumnMetadataKey {
-    ColumnMappingId,
-    ColumnMappingPhysicalName,
-    GenerationExpression,
-    IdentityStart,
-    IdentityStep,
-    IdentityHighWaterMark,
-    IdentityAllowExplicitInsert,
-    Invariants,
-}
-
-impl AsRef<str> for ColumnMetadataKey {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::ColumnMappingId => "delta.columnMapping.id",
-            Self::ColumnMappingPhysicalName => "delta.columnMapping.physicalName",
-            Self::GenerationExpression => "delta.generationExpression",
-            Self::IdentityAllowExplicitInsert => "delta.identity.allowExplicitInsert",
-            Self::IdentityHighWaterMark => "delta.identity.highWaterMark",
-            Self::IdentityStart => "delta.identity.start",
-            Self::IdentityStep => "delta.identity.step",
-            Self::Invariants => "delta.invariants",
-        }
-    }
-}
-
 /// An invariant for a column that is enforced on all writes to a Delta table.
 #[derive(Eq, PartialEq, Debug, Default, Clone)]
 pub struct Invariant {
@@ -116,154 +45,17 @@ impl DataCheck for Invariant {
     }
 }
 
-/// Represents a struct field defined in the Delta table schema.
-// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Schema-Serialization-Format
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
-pub struct StructField {
-    /// Name of this (possibly nested) column
-    pub name: String,
-    /// The data type of this field
-    #[serde(rename = "type")]
-    pub data_type: DataType,
-    /// Denotes whether this Field can be null
-    pub nullable: bool,
-    /// A JSON map containing information about this column
-    pub metadata: HashMap<String, MetadataValue>,
-}
-
-impl Hash for StructField {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.name.hash(state);
-    }
-}
-
-impl Borrow<str> for StructField {
-    fn borrow(&self) -> &str {
-        self.name.as_ref()
-    }
-}
-
-impl Eq for StructField {}
-
-impl StructField {
-    /// Creates a new field
-    pub fn new(name: impl Into<String>, data_type: impl Into<DataType>, nullable: bool) -> Self {
-        Self {
-            name: name.into(),
-            data_type: data_type.into(),
-            nullable,
-            metadata: HashMap::default(),
-        }
-    }
-
-    /// Creates a new field with metadata
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (impl Into<String>, impl Into<MetadataValue>)>,
-    ) -> Self {
-        self.metadata = metadata
-            .into_iter()
-            .map(|(k, v)| (k.into(), v.into()))
-            .collect();
-        self
-    }
-
-    /// Get the value of a specific metadata key
-    pub fn get_config_value(&self, key: &ColumnMetadataKey) -> Option<&MetadataValue> {
-        self.metadata.get(key.as_ref())
-    }
-
-    #[inline]
-    /// Returns the name of the column
-    pub fn name(&self) -> &String {
-        &self.name
-    }
-
-    #[inline]
-    /// Returns whether the column is nullable
-    pub fn is_nullable(&self) -> bool {
-        self.nullable
-    }
-
-    /// Returns the physical name of the column
-    /// Equals the name if column mapping is not enabled on table
-    pub fn physical_name(&self) -> Result<&str, Error> {
-        // Even on mapping type id the physical name should be there for partitions
-        let phys_name = self.get_config_value(&ColumnMetadataKey::ColumnMappingPhysicalName);
-        match phys_name {
-            None => Ok(&self.name),
-            Some(MetadataValue::Boolean(_)) => Ok(&self.name),
-            Some(MetadataValue::String(s)) => Ok(s),
-            Some(MetadataValue::Number(_)) => Err(Error::MetadataError(
-                "Unexpected type for physical name".to_string(),
-            )),
-        }
-    }
-
-    #[inline]
-    /// Returns the data type of the column
-    pub const fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    #[inline]
-    /// Returns the metadata of the column
-    pub const fn metadata(&self) -> &HashMap<String, MetadataValue> {
-        &self.metadata
-    }
-}
-
-/// A struct is used to represent both the top-level schema of the table
-/// as well as struct columns that contain nested columns.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)]
-pub struct StructType {
-    #[serde(rename = "type")]
-    /// The type of this struct
-    pub type_name: String,
-    /// The type of element stored in this array
-    pub fields: Vec<StructField>,
+/// Trait to add convenince functions to struct type
+pub trait StructTypeExt {
+    /// Get all invariants in the schemas
+    fn get_invariants(&self) -> Result<Vec<Invariant>, Error>;
 }
 
-impl StructType {
-    /// Creates a new struct type
-    pub fn new(fields: Vec<StructField>) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields,
-        }
-    }
-
-    /// Returns an immutable reference of the fields in the struct
-    pub fn fields(&self) -> &Vec<StructField> {
-        &self.fields
-    }
-
-    /// Find the index of the column with the given name.
-    pub fn index_of(&self, name: &str) -> Result<usize, Error> {
-        let (idx, _) = self
-            .fields()
-            .iter()
-            .enumerate()
-            .find(|(_, b)| b.name() == name)
-            .ok_or_else(|| {
-                let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
-                Error::Schema(format!(
-                    "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
-                ))
-            })?;
-        Ok(idx)
-    }
-
-    /// Returns a reference of a specific [`StructField`] instance selected by name.
-    pub fn field_with_name(&self, name: &str) -> Result<&StructField, Error> {
-        Ok(&self.fields[self.index_of(name)?])
-    }
-
+impl StructTypeExt for StructType {
     /// Get all invariants in the schemas
-    pub fn get_invariants(&self) -> Result<Vec<Invariant>, Error> {
+    fn get_invariants(&self) -> Result<Vec<Invariant>, Error> {
         let mut remaining_fields: Vec<(String, StructField)> = self
             .fields()
-            .iter()
             .map(|field| (field.name.clone(), field.clone()))
             .collect();
         let mut invariants: Vec<Invariant> = Vec::new();
@@ -282,7 +74,6 @@ impl StructType {
                     remaining_fields.extend(
                         inner
                             .fields()
-                            .iter()
                             .map(|field| {
                                 let new_prefix = add_segment(&field_path, &field.name);
                                 (new_prefix, field.clone())
@@ -334,469 +125,12 @@ impl StructType {
     }
 }
 
-impl FromIterator<StructField> for StructType {
-    fn from_iter<T: IntoIterator<Item = StructField>>(iter: T) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: iter.into_iter().collect(),
-        }
-    }
-}
-
-impl<'a> FromIterator<&'a StructField> for StructType {
-    fn from_iter<T: IntoIterator<Item = &'a StructField>>(iter: T) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: iter.into_iter().cloned().collect(),
-        }
-    }
-}
-
-impl<const N: usize> From<[StructField; N]> for StructType {
-    fn from(value: [StructField; N]) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: value.to_vec(),
-        }
-    }
-}
-
-impl<'a, const N: usize> From<[&'a StructField; N]> for StructType {
-    fn from(value: [&'a StructField; N]) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: value.into_iter().cloned().collect(),
-        }
-    }
-}
-
-impl<'a> IntoIterator for &'a StructType {
-    type Item = &'a StructField;
-    type IntoIter = std::slice::Iter<'a, StructField>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.fields.iter()
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)]
-#[serde(rename_all = "camelCase")]
-/// An array stores a variable length collection of items of some type.
-pub struct ArrayType {
-    #[serde(rename = "type")]
-    /// The type of this struct
-    pub type_name: String,
-    /// The type of element stored in this array
-    pub element_type: DataType,
-    /// Denoting whether this array can contain one or more null values
-    pub contains_null: bool,
-}
-
-impl ArrayType {
-    /// Creates a new array type
-    pub fn new(element_type: DataType, contains_null: bool) -> Self {
-        Self {
-            type_name: "array".into(),
-            element_type,
-            contains_null,
-        }
-    }
-
-    #[inline]
-    /// Returns the element type of the array
-    pub const fn element_type(&self) -> &DataType {
-        &self.element_type
-    }
-
-    #[inline]
-    /// Returns whether the array can contain null values
-    pub const fn contains_null(&self) -> bool {
-        self.contains_null
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)]
-#[serde(rename_all = "camelCase")]
-/// A map stores an arbitrary length collection of key-value pairs
-pub struct MapType {
-    #[serde(rename = "type")]
-    /// The type of this struct
-    pub type_name: String,
-    /// The type of element used for the key of this map
-    pub key_type: DataType,
-    /// The type of element used for the value of this map
-    pub value_type: DataType,
-    /// Denoting whether this array can contain one or more null values
-    #[serde(default = "default_true")]
-    pub value_contains_null: bool,
-}
-
-impl MapType {
-    /// Creates a new map type
-    pub fn new(key_type: DataType, value_type: DataType, value_contains_null: bool) -> Self {
-        Self {
-            type_name: "map".into(),
-            key_type,
-            value_type,
-            value_contains_null,
-        }
-    }
-
-    #[inline]
-    /// Returns the key type of the map
-    pub const fn key_type(&self) -> &DataType {
-        &self.key_type
-    }
-
-    #[inline]
-    /// Returns the value type of the map
-    pub const fn value_type(&self) -> &DataType {
-        &self.value_type
-    }
-
-    #[inline]
-    /// Returns whether the map can contain null values
-    pub const fn value_contains_null(&self) -> bool {
-        self.value_contains_null
-    }
-}
-
-fn default_true() -> bool {
-    true
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)]
-#[serde(rename_all = "camelCase")]
-/// Primitive types supported by Delta
-pub enum PrimitiveType {
-    /// UTF-8 encoded string of characters
-    String,
-    /// i64: 8-byte signed integer. Range: -9223372036854775808 to 9223372036854775807
-    Long,
-    /// i32: 4-byte signed integer. Range: -2147483648 to 2147483647
-    Integer,
-    /// i16: 2-byte signed integer numbers. Range: -32768 to 32767
-    Short,
-    /// i8: 1-byte signed integer number. Range: -128 to 127
-    Byte,
-    /// f32: 4-byte single-precision floating-point numbers
-    Float,
-    /// f64: 8-byte double-precision floating-point numbers
-    Double,
-    /// bool: boolean values
-    Boolean,
-    /// Binary: uninterpreted binary data
-    Binary,
-    /// Date: Calendar date (year, month, day)
-    Date,
-    /// Microsecond precision timestamp, adjusted to UTC.
-    Timestamp,
-    // TODO: timestamp without timezone
-    #[serde(
-        serialize_with = "serialize_decimal",
-        deserialize_with = "deserialize_decimal",
-        untagged
-    )]
-    /// Decimal: arbitrary precision decimal numbers
-    Decimal(u8, i8),
-}
-
-fn serialize_decimal<S: serde::Serializer>(
-    precision: &u8,
-    scale: &i8,
-    serializer: S,
-) -> Result<S::Ok, S::Error> {
-    serializer.serialize_str(&format!("decimal({},{})", precision, scale))
-}
-
-fn deserialize_decimal<'de, D>(deserializer: D) -> Result<(u8, i8), D::Error>
-where
-    D: serde::Deserializer<'de>,
-{
-    let str_value = String::deserialize(deserializer)?;
-    if !str_value.starts_with("decimal(") || !str_value.ends_with(')') {
-        return Err(serde::de::Error::custom(format!(
-            "Invalid decimal: {}",
-            str_value
-        )));
-    }
-
-    let mut parts = str_value[8..str_value.len() - 1].split(',');
-    let precision = parts
-        .next()
-        .and_then(|part| part.trim().parse::<u8>().ok())
-        .ok_or_else(|| {
-            serde::de::Error::custom(format!("Invalid precision in decimal: {}", str_value))
-        })?;
-    let scale = parts
-        .next()
-        .and_then(|part| part.trim().parse::<i8>().ok())
-        .ok_or_else(|| {
-            serde::de::Error::custom(format!("Invalid scale in decimal: {}", str_value))
-        })?;
-
-    Ok((precision, scale))
-}
-
-impl Display for PrimitiveType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            PrimitiveType::String => write!(f, "string"),
-            PrimitiveType::Long => write!(f, "long"),
-            PrimitiveType::Integer => write!(f, "integer"),
-            PrimitiveType::Short => write!(f, "short"),
-            PrimitiveType::Byte => write!(f, "byte"),
-            PrimitiveType::Float => write!(f, "float"),
-            PrimitiveType::Double => write!(f, "double"),
-            PrimitiveType::Boolean => write!(f, "boolean"),
-            PrimitiveType::Binary => write!(f, "binary"),
-            PrimitiveType::Date => write!(f, "date"),
-            PrimitiveType::Timestamp => write!(f, "timestamp"),
-            PrimitiveType::Decimal(precision, scale) => {
-                write!(f, "decimal({},{})", precision, scale)
-            }
-        }
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)]
-#[serde(untagged, rename_all = "camelCase")]
-/// Top level delta tdatatypes
-pub enum DataType {
-    /// UTF-8 encoded string of characters
-    Primitive(PrimitiveType),
-    /// An array stores a variable length collection of items of some type.
-    Array(Box<ArrayType>),
-    /// A struct is used to represent both the top-level schema of the table as well
-    /// as struct columns that contain nested columns.
-    Struct(Box<StructType>),
-    /// A map stores an arbitrary length collection of key-value pairs
-    /// with a single keyType and a single valueType
-    Map(Box<MapType>),
-}
-
-impl From<MapType> for DataType {
-    fn from(map_type: MapType) -> Self {
-        DataType::Map(Box::new(map_type))
-    }
-}
-
-impl From<StructType> for DataType {
-    fn from(struct_type: StructType) -> Self {
-        DataType::Struct(Box::new(struct_type))
-    }
-}
-
-impl From<ArrayType> for DataType {
-    fn from(array_type: ArrayType) -> Self {
-        DataType::Array(Box::new(array_type))
-    }
-}
-
-#[allow(missing_docs)]
-impl DataType {
-    pub const STRING: Self = DataType::Primitive(PrimitiveType::String);
-    pub const LONG: Self = DataType::Primitive(PrimitiveType::Long);
-    pub const INTEGER: Self = DataType::Primitive(PrimitiveType::Integer);
-    pub const SHORT: Self = DataType::Primitive(PrimitiveType::Short);
-    pub const BYTE: Self = DataType::Primitive(PrimitiveType::Byte);
-    pub const FLOAT: Self = DataType::Primitive(PrimitiveType::Float);
-    pub const DOUBLE: Self = DataType::Primitive(PrimitiveType::Double);
-    pub const BOOLEAN: Self = DataType::Primitive(PrimitiveType::Boolean);
-    pub const BINARY: Self = DataType::Primitive(PrimitiveType::Binary);
-    pub const DATE: Self = DataType::Primitive(PrimitiveType::Date);
-    pub const TIMESTAMP: Self = DataType::Primitive(PrimitiveType::Timestamp);
-
-    pub fn decimal(precision: u8, scale: i8) -> Self {
-        DataType::Primitive(PrimitiveType::Decimal(precision, scale))
-    }
-
-    pub fn struct_type(fields: Vec<StructField>) -> Self {
-        DataType::Struct(Box::new(StructType::new(fields)))
-    }
-}
-
-impl Display for DataType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            DataType::Primitive(p) => write!(f, "{}", p),
-            DataType::Array(a) => write!(f, "array<{}>", a.element_type),
-            DataType::Struct(s) => {
-                write!(f, "struct<")?;
-                for (i, field) in s.fields.iter().enumerate() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{}: {}", field.name, field.data_type)?;
-                }
-                write!(f, ">")
-            }
-            DataType::Map(m) => write!(f, "map<{}, {}>", m.key_type, m.value_type),
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
     use serde_json;
     use serde_json::json;
 
-    #[test]
-    fn test_serde_data_types() {
-        let data = r#"
-        {
-            "name": "a",
-            "type": "integer",
-            "nullable": false,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(
-            field.data_type,
-            DataType::Primitive(PrimitiveType::Integer)
-        ));
-
-        let data = r#"
-        {
-            "name": "c",
-            "type": {
-                "type": "array",
-                "elementType": "integer",
-                "containsNull": false
-            },
-            "nullable": true,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(field.data_type, DataType::Array(_)));
-
-        let data = r#"
-        {
-            "name": "e",
-            "type": {
-                "type": "array",
-                "elementType": {
-                    "type": "struct",
-                    "fields": [
-                        {
-                            "name": "d",
-                            "type": "integer",
-                            "nullable": false,
-                            "metadata": {}
-                        }
-                    ]
-                },
-                "containsNull": true
-            },
-            "nullable": true,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(field.data_type, DataType::Array(_)));
-        match field.data_type {
-            DataType::Array(array) => assert!(matches!(array.element_type, DataType::Struct(_))),
-            _ => unreachable!(),
-        }
-
-        let data = r#"
-        {
-            "name": "f",
-            "type": {
-                "type": "map",
-                "keyType": "string",
-                "valueType": "string",
-                "valueContainsNull": true
-            },
-            "nullable": true,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(field.data_type, DataType::Map(_)));
-    }
-
-    #[test]
-    fn test_roundtrip_decimal() {
-        let data = r#"
-        {
-            "name": "a",
-            "type": "decimal(10, 2)",
-            "nullable": false,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(
-            field.data_type,
-            DataType::Primitive(PrimitiveType::Decimal(10, 2))
-        ));
-
-        let json_str = serde_json::to_string(&field).unwrap();
-        assert_eq!(
-            json_str,
-            r#"{"name":"a","type":"decimal(10,2)","nullable":false,"metadata":{}}"#
-        );
-    }
-
-    #[test]
-    fn test_field_metadata() {
-        let data = r#"
-        {
-            "name": "e",
-            "type": {
-                "type": "array",
-                "elementType": {
-                    "type": "struct",
-                    "fields": [
-                        {
-                            "name": "d",
-                            "type": "integer",
-                            "nullable": false,
-                            "metadata": {
-                                "delta.columnMapping.id": 5,
-                                "delta.columnMapping.physicalName": "col-a7f4159c-53be-4cb0-b81a-f7e5240cfc49"
-                            }
-                        }
-                    ]
-                },
-                "containsNull": true
-            },
-            "nullable": true,
-            "metadata": {
-                "delta.columnMapping.id": 4,
-                "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1"
-            }
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-
-        let col_id = field
-            .get_config_value(&ColumnMetadataKey::ColumnMappingId)
-            .unwrap();
-        assert!(matches!(col_id, MetadataValue::Number(num) if *num == 4));
-        let physical_name = field
-            .get_config_value(&ColumnMetadataKey::ColumnMappingPhysicalName)
-            .unwrap();
-        assert!(
-            matches!(physical_name, MetadataValue::String(name) if *name == "col-5f422f40-de70-45b2-88ab-1d5c90e94db1")
-        );
-    }
-
-    #[test]
-    fn test_read_schemas() {
-        let file = std::fs::File::open("./tests/serde/schema.json").unwrap();
-        let schema: Result<StructType, _> = serde_json::from_reader(file);
-        assert!(schema.is_ok());
-
-        let file = std::fs::File::open("./tests/serde/checkpoint_schema.json").unwrap();
-        let schema: Result<StructType, _> = serde_json::from_reader(file);
-        assert!(schema.is_ok())
-    }
-
     #[test]
     fn test_get_invariants() {
         let schema: StructType = serde_json::from_value(json!({
@@ -864,6 +198,6 @@ mod tests {
     #[test]
     fn test_identity_columns() {
         let buf = r#"{"type":"struct","fields":[{"name":"ID_D_DATE","type":"long","nullable":true,"metadata":{"delta.identity.start":1,"delta.identity.step":1,"delta.identity.allowExplicitInsert":false}},{"name":"TXT_DateKey","type":"string","nullable":true,"metadata":{}}]}"#;
-        let schema: StructType = serde_json::from_str(buf).expect("Failed to load");
+        let _schema: StructType = serde_json::from_str(buf).expect("Failed to load");
     }
 }
diff --git a/crates/core/src/kernel/scalars.rs b/crates/core/src/kernel/scalars.rs
new file mode 100644
index 0000000000..92c6838234
--- /dev/null
+++ b/crates/core/src/kernel/scalars.rs
@@ -0,0 +1,233 @@
+//! Auxiliary methods for dealing with kernel scalars
+//!
+use arrow_array::Array;
+use arrow_schema::TimeUnit;
+use chrono::{DateTime, TimeZone, Utc};
+use delta_kernel::{
+    expressions::{Scalar, StructData},
+    schema::StructField,
+};
+use object_store::path::Path;
+use std::cmp::Ordering;
+use urlencoding::encode;
+
+use crate::NULL_PARTITION_VALUE_DATA_PATH;
+
+/// Auxiliary methods for dealing with kernel scalars
+pub trait ScalarExt: Sized {
+    /// Serialize to string
+    fn serialize(&self) -> String;
+    /// Serialize to string for use in hive partition file names
+    fn serialize_encoded(&self) -> String;
+    /// Create a [`Scalar`] from an arrow array row
+    fn from_array(arr: &dyn Array, index: usize) -> Option<Self>;
+}
+
+impl ScalarExt for Scalar {
+    /// Serializes this scalar as a string.
+    fn serialize(&self) -> String {
+        match self {
+            Self::String(s) => s.to_owned(),
+            Self::Byte(b) => b.to_string(),
+            Self::Short(s) => s.to_string(),
+            Self::Integer(i) => i.to_string(),
+            Self::Long(l) => l.to_string(),
+            Self::Float(f) => f.to_string(),
+            Self::Double(d) => d.to_string(),
+            Self::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
+            Self::TimestampNtz(ts) | Self::Timestamp(ts) => {
+                let ts = Utc.timestamp_micros(*ts).single().unwrap();
+                ts.format("%Y-%m-%d %H:%M:%S%.6f").to_string()
+            }
+            Self::Date(days) => {
+                let date = DateTime::from_timestamp(*days as i64 * 24 * 3600, 0).unwrap();
+                date.format("%Y-%m-%d").to_string()
+            }
+            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
+                Ordering::Equal => value.to_string(),
+                Ordering::Greater => {
+                    let scalar_multiple = 10_i128.pow(*scale as u32);
+                    let mut s = String::new();
+                    s.push_str((value / scalar_multiple).to_string().as_str());
+                    s.push('.');
+                    s.push_str(&format!(
+                        "{:0>scale$}",
+                        value % scalar_multiple,
+                        scale = *scale as usize
+                    ));
+                    s
+                }
+                Ordering::Less => {
+                    let mut s = value.to_string();
+                    for _ in 0..*scale {
+                        s.push('0');
+                    }
+                    s
+                }
+            },
+            Self::Binary(val) => create_escaped_binary_string(val.as_slice()),
+            Self::Null(_) => "null".to_string(),
+            Self::Struct(_) => unimplemented!(),
+        }
+    }
+
+    /// Serializes this scalar as a string for use in hive partition file names.
+    fn serialize_encoded(&self) -> String {
+        if self.is_null() {
+            return NULL_PARTITION_VALUE_DATA_PATH.to_string();
+        }
+        encode(Path::from(self.serialize()).as_ref()).to_string()
+    }
+
+    /// Create a [`Scalar`] form a row in an arrow array.
+    fn from_array(arr: &dyn Array, index: usize) -> Option<Self> {
+        use arrow_array::*;
+        use arrow_schema::DataType::*;
+
+        if arr.len() <= index {
+            return None;
+        }
+        if arr.is_null(index) {
+            return Some(Self::Null(arr.data_type().try_into().ok()?));
+        }
+
+        match arr.data_type() {
+            Utf8 => arr
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .map(|v| Self::String(v.value(index).to_string())),
+            LargeUtf8 => arr
+                .as_any()
+                .downcast_ref::<LargeStringArray>()
+                .map(|v| Self::String(v.value(index).to_string())),
+            Boolean => arr
+                .as_any()
+                .downcast_ref::<BooleanArray>()
+                .map(|v| Self::Boolean(v.value(index))),
+            Binary => arr
+                .as_any()
+                .downcast_ref::<BinaryArray>()
+                .map(|v| Self::Binary(v.value(index).to_vec())),
+            LargeBinary => arr
+                .as_any()
+                .downcast_ref::<LargeBinaryArray>()
+                .map(|v| Self::Binary(v.value(index).to_vec())),
+            FixedSizeBinary(_) => arr
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .map(|v| Self::Binary(v.value(index).to_vec())),
+            Int8 => arr
+                .as_any()
+                .downcast_ref::<Int8Array>()
+                .map(|v| Self::Byte(v.value(index))),
+            Int16 => arr
+                .as_any()
+                .downcast_ref::<Int16Array>()
+                .map(|v| Self::Short(v.value(index))),
+            Int32 => arr
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .map(|v| Self::Integer(v.value(index))),
+            Int64 => arr
+                .as_any()
+                .downcast_ref::<Int64Array>()
+                .map(|v| Self::Long(v.value(index))),
+            UInt8 => arr
+                .as_any()
+                .downcast_ref::<UInt8Array>()
+                .map(|v| Self::Byte(v.value(index) as i8)),
+            UInt16 => arr
+                .as_any()
+                .downcast_ref::<UInt16Array>()
+                .map(|v| Self::Short(v.value(index) as i16)),
+            UInt32 => arr
+                .as_any()
+                .downcast_ref::<UInt32Array>()
+                .map(|v| Self::Integer(v.value(index) as i32)),
+            UInt64 => arr
+                .as_any()
+                .downcast_ref::<UInt64Array>()
+                .map(|v| Self::Long(v.value(index) as i64)),
+            Float32 => arr
+                .as_any()
+                .downcast_ref::<Float32Array>()
+                .map(|v| Self::Float(v.value(index))),
+            Float64 => arr
+                .as_any()
+                .downcast_ref::<Float64Array>()
+                .map(|v| Self::Double(v.value(index))),
+            Decimal128(precision, scale) => {
+                arr.as_any().downcast_ref::<Decimal128Array>().map(|v| {
+                    let value = v.value(index);
+                    Self::Decimal(value, *precision, *scale as u8)
+                })
+            }
+            Date32 => arr
+                .as_any()
+                .downcast_ref::<Date32Array>()
+                .map(|v| Self::Date(v.value(index))),
+            Timestamp(TimeUnit::Microsecond, None) => arr
+                .as_any()
+                .downcast_ref::<TimestampMicrosecondArray>()
+                .map(|v| Self::TimestampNtz(v.value(index))),
+            Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.eq_ignore_ascii_case("utc") => arr
+                .as_any()
+                .downcast_ref::<TimestampMicrosecondArray>()
+                .map(|v| Self::Timestamp(v.clone().value(index))),
+            Struct(fields) => {
+                let struct_fields = fields
+                    .iter()
+                    .flat_map(|f| TryFrom::try_from(f.as_ref()))
+                    .collect::<Vec<_>>();
+                let values = arr
+                    .as_any()
+                    .downcast_ref::<StructArray>()
+                    .and_then(|struct_arr| {
+                        struct_fields
+                            .iter()
+                            .map(|f: &StructField| {
+                                struct_arr
+                                    .column_by_name(f.name())
+                                    .and_then(|c| Self::from_array(c.as_ref(), index))
+                            })
+                            .collect::<Option<Vec<_>>>()
+                    })?;
+                Some(Self::Struct(
+                    StructData::try_new(struct_fields, values).ok()?,
+                ))
+            }
+            Float16
+            | Decimal256(_, _)
+            | List(_)
+            | LargeList(_)
+            | FixedSizeList(_, _)
+            | Map(_, _)
+            | Date64
+            | Timestamp(_, _)
+            | Time32(_)
+            | Time64(_)
+            | Duration(_)
+            | Interval(_)
+            | Dictionary(_, _)
+            | RunEndEncoded(_, _)
+            | Union(_, _)
+            | Utf8View
+            | BinaryView
+            | ListView(_)
+            | LargeListView(_)
+            | Null => None,
+        }
+    }
+}
+
+fn create_escaped_binary_string(data: &[u8]) -> String {
+    let mut escaped_string = String::new();
+    for &byte in data {
+        // Convert each byte to its two-digit hexadecimal representation
+        let hex_representation = format!("{:04X}", byte);
+        // Append the hexadecimal representation with an escape sequence
+        escaped_string.push_str("\\u");
+        escaped_string.push_str(&hex_representation);
+    }
+    escaped_string
+}
diff --git a/crates/core/src/kernel/snapshot/log_data.rs b/crates/core/src/kernel/snapshot/log_data.rs
index b874b53421..254616691c 100644
--- a/crates/core/src/kernel/snapshot/log_data.rs
+++ b/crates/core/src/kernel/snapshot/log_data.rs
@@ -1,16 +1,19 @@
 use std::borrow::Cow;
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use arrow_array::{Array, Int32Array, Int64Array, MapArray, RecordBatch, StringArray, StructArray};
-use chrono::{NaiveDateTime, TimeZone, Utc};
+use chrono::{DateTime, Utc};
+use delta_kernel::expressions::Scalar;
+use indexmap::IndexMap;
 use object_store::path::Path;
 use object_store::ObjectMeta;
 use percent_encoding::percent_decode_str;
 
+use super::super::scalars::ScalarExt;
 use crate::kernel::arrow::extract::{extract_and_cast, extract_and_cast_opt};
 use crate::kernel::{
-    DataType, DeletionVectorDescriptor, Metadata, Remove, Scalar, StructField, StructType,
+    DataType, DeletionVectorDescriptor, Metadata, Remove, StructField, StructType,
 };
 use crate::{DeltaResult, DeltaTableError};
 
@@ -19,37 +22,35 @@ const COL_MIN_VALUES: &str = "minValues";
 const COL_MAX_VALUES: &str = "maxValues";
 const COL_NULL_COUNT: &str = "nullCount";
 
-pub(crate) type PartitionFields<'a> = Arc<BTreeMap<&'a str, &'a StructField>>;
-pub(crate) type PartitionValues<'a> = BTreeMap<&'a str, Scalar>;
+pub(crate) type PartitionFields<'a> = Arc<IndexMap<&'a str, &'a StructField>>;
+pub(crate) type PartitionValues<'a> = IndexMap<&'a str, Scalar>;
 
 pub(crate) trait PartitionsExt {
     fn hive_partition_path(&self) -> String;
 }
 
-impl PartitionsExt for BTreeMap<&str, Scalar> {
+impl PartitionsExt for IndexMap<&str, Scalar> {
     fn hive_partition_path(&self) -> String {
-        let mut fields = self
+        let fields = self
             .iter()
             .map(|(k, v)| {
                 let encoded = v.serialize_encoded();
                 format!("{k}={encoded}")
             })
             .collect::<Vec<_>>();
-        fields.reverse();
         fields.join("/")
     }
 }
 
-impl PartitionsExt for BTreeMap<String, Scalar> {
+impl PartitionsExt for IndexMap<String, Scalar> {
     fn hive_partition_path(&self) -> String {
-        let mut fields = self
+        let fields = self
             .iter()
             .map(|(k, v)| {
                 let encoded = v.serialize_encoded();
                 format!("{k}={encoded}")
             })
             .collect::<Vec<_>>();
-        fields.reverse();
         fields.join("/")
     }
 }
@@ -121,9 +122,9 @@ impl<'a> DeletionVectorView<'a> {
     }
 }
 
-/// A view into the log data representiang a single logical file.
+/// A view into the log data representing a single logical file.
 ///
-/// This stuct holds a pointer to a specific row in the log data and provides access to the
+/// This struct holds a pointer to a specific row in the log data and provides access to the
 /// information stored in that row by tracking references to the underlying arrays.
 ///
 /// Additionally, references to some table metadata is tracked to provide higher level
@@ -179,20 +180,18 @@ impl LogicalFile<'_> {
 
     /// Datetime of the last modification time of the file.
     pub fn modification_datetime(&self) -> DeltaResult<chrono::DateTime<Utc>> {
-        Ok(Utc.from_utc_datetime(
-            &NaiveDateTime::from_timestamp_millis(self.modification_time()).ok_or(
-                DeltaTableError::from(crate::protocol::ProtocolError::InvalidField(format!(
-                    "invalid modification_time: {:?}",
-                    self.modification_time()
-                ))),
-            )?,
+        DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from(
+            crate::protocol::ProtocolError::InvalidField(format!(
+                "invalid modification_time: {:?}",
+                self.modification_time()
+            )),
         ))
     }
 
     /// The partition values for this logical file.
     pub fn partition_values(&self) -> DeltaResult<PartitionValues<'_>> {
         if self.partition_fields.is_empty() {
-            return Ok(BTreeMap::new());
+            return Ok(IndexMap::new());
         }
         let map_value = self.partition_values.value(self.index);
         let keys = map_value
@@ -237,7 +236,7 @@ impl LogicalFile<'_> {
                     .unwrap_or(Scalar::Null(f.data_type.clone()));
                 Ok((*k, val))
             })
-            .collect::<DeltaResult<BTreeMap<_, _>>>()
+            .collect::<DeltaResult<IndexMap<_, _>>>()
     }
 
     /// Defines a deletion vector
@@ -354,8 +353,17 @@ impl<'a> FileStatsAccessor<'a> {
             metadata
                 .partition_columns
                 .iter()
-                .map(|c| Ok((c.as_str(), schema.field_with_name(c.as_str())?)))
-                .collect::<DeltaResult<BTreeMap<_, _>>>()?,
+                .map(|c| {
+                    Ok((
+                        c.as_str(),
+                        schema
+                            .field(c.as_str())
+                            .ok_or(DeltaTableError::PartitionError {
+                                partition: c.clone(),
+                            })?,
+                    ))
+                })
+                .collect::<DeltaResult<IndexMap<_, _>>>()?,
         );
         let deletion_vector = extract_and_cast_opt::<StructArray>(data, "add.deletionVector");
         let deletion_vector = deletion_vector.and_then(|dv| {
@@ -476,7 +484,7 @@ mod datafusion {
     use super::*;
     use crate::kernel::arrow::extract::{extract_and_cast_opt, extract_column};
 
-    // TODO validate this works with "wide and narrow" boulds / stats
+    // TODO validate this works with "wide and narrow" builds / stats
 
     impl FileStatsAccessor<'_> {
         fn collect_count(&self, name: &str) -> Precision<usize> {
@@ -550,7 +558,15 @@ mod datafusion {
                             _ => None,
                         })
                         .collect::<Option<Vec<_>>>()
-                        .map(|o| Precision::Exact(ScalarValue::Struct(Some(o), fields.clone())))
+                        .map(|o| {
+                            let arrays = o
+                                .into_iter()
+                                .map(|sv| sv.to_array())
+                                .collect::<Result<Vec<_>, datafusion_common::DataFusionError>>()
+                                .unwrap();
+                            let sa = StructArray::new(fields.clone(), arrays, None);
+                            Precision::Exact(ScalarValue::Struct(Arc::new(sa)))
+                        })
                         .unwrap_or(Precision::Absent);
                 }
                 _ => Precision::Absent,
@@ -665,7 +681,6 @@ mod datafusion {
             let column_statistics = self
                 .schema
                 .fields()
-                .iter()
                 .map(|f| self.column_stats(f.name()))
                 .collect::<Option<Vec<_>>>()?;
             Some(Statistics {
diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs
index 6ad1690db1..69076bd066 100644
--- a/crates/core/src/kernel/snapshot/log_segment.rs
+++ b/crates/core/src/kernel/snapshot/log_segment.rs
@@ -1,5 +1,5 @@
 use std::cmp::Ordering;
-use std::collections::{HashMap, VecDeque};
+use std::collections::VecDeque;
 use std::sync::Arc;
 
 use arrow_array::RecordBatch;
@@ -9,34 +9,25 @@ use itertools::Itertools;
 use lazy_static::lazy_static;
 use object_store::path::Path;
 use object_store::{Error as ObjectStoreError, ObjectMeta, ObjectStore};
-use parquet::arrow::arrow_reader::ArrowReaderOptions;
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
+use parquet::arrow::ProjectionMask;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
-use serde_json::Value;
 use tracing::debug;
 
 use super::parse;
-use crate::kernel::{arrow::json, Action, ActionType, Metadata, Protocol, Schema, StructType};
+use crate::kernel::{arrow::json, ActionType, Metadata, Protocol, Schema, StructType};
 use crate::logstore::LogStore;
-use crate::operations::transaction::get_commit_bytes;
-use crate::protocol::DeltaOperation;
+use crate::operations::transaction::CommitData;
 use crate::{DeltaResult, DeltaTableConfig, DeltaTableError};
 
 const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint";
 
-pub type CommitData = (Vec<Action>, DeltaOperation, Option<HashMap<String, Value>>);
-
 lazy_static! {
     static ref CHECKPOINT_FILE_PATTERN: Regex =
         Regex::new(r"\d+\.checkpoint(\.\d+\.\d+)?\.parquet").unwrap();
-    static ref DELTA_FILE_PATTERN: Regex = Regex::new(r"\d+\.json").unwrap();
-    pub(super) static ref COMMIT_SCHEMA: StructType = StructType::new(vec![
-        ActionType::Add.schema_field().clone(),
-        ActionType::Remove.schema_field().clone(),
-    ]);
-    pub(super) static ref CHECKPOINT_SCHEMA: StructType =
-        StructType::new(vec![ActionType::Add.schema_field().clone(),]);
+    static ref DELTA_FILE_PATTERN: Regex = Regex::new(r"^\d+\.json$").unwrap();
     pub(super) static ref TOMBSTONE_SCHEMA: StructType =
         StructType::new(vec![ActionType::Remove.schema_field().clone(),]);
 }
@@ -260,19 +251,45 @@ impl LogSegment {
     pub(super) fn checkpoint_stream(
         &self,
         store: Arc<dyn ObjectStore>,
-        _read_schema: &Schema,
+        read_schema: &Schema,
         config: &DeltaTableConfig,
     ) -> BoxStream<'_, DeltaResult<RecordBatch>> {
         let batch_size = config.log_batch_size;
+        let read_schema = Arc::new(read_schema.clone());
         futures::stream::iter(self.checkpoint_files.clone())
             .map(move |meta| {
                 let store = store.clone();
+                let read_schema = read_schema.clone();
                 async move {
-                    let reader = ParquetObjectReader::new(store, meta);
-                    let options = ArrowReaderOptions::new(); //.with_page_index(enable_page_index);
-                    let builder =
-                        ParquetRecordBatchStreamBuilder::new_with_options(reader, options).await?;
-                    builder.with_batch_size(batch_size).build()
+                    let mut reader = ParquetObjectReader::new(store, meta);
+                    let options = ArrowReaderOptions::new();
+                    let reader_meta = ArrowReaderMetadata::load_async(&mut reader, options).await?;
+
+                    // Create projection selecting read_schema fields from parquet file's arrow schema
+                    let projection = reader_meta
+                        .schema()
+                        .fields
+                        .iter()
+                        .enumerate()
+                        .filter_map(|(i, f)| {
+                            if read_schema.fields.contains_key(f.name()) {
+                                Some(i)
+                            } else {
+                                None
+                            }
+                        })
+                        .collect::<Vec<_>>();
+                    let projection =
+                        ProjectionMask::roots(reader_meta.parquet_schema(), projection);
+
+                    // Note: the output batch stream batches have all null value rows for action types not
+                    // present in the projection. When a RowFilter was used to remove null rows, the performance
+                    // got worse when projecting all fields, and was no better when projecting a subset.
+                    // The all null rows are filtered out anyway when the batch stream is consumed.
+                    ParquetRecordBatchStreamBuilder::new_with_metadata(reader, reader_meta)
+                        .with_projection(projection.clone())
+                        .with_batch_size(batch_size)
+                        .build()
                 }
             })
             .buffered(config.log_buffer_size)
@@ -351,10 +368,10 @@ impl LogSegment {
         let mut decoder = json::get_decoder(Arc::new(read_schema.try_into()?), config)?;
 
         let mut commit_data = Vec::new();
-        for (actions, operation, app_metadata) in commits {
+        for commit in commits {
             self.version += 1;
             let path = log_path.child(format!("{:020}.json", self.version));
-            let bytes = get_commit_bytes(operation, actions, app_metadata.clone())?;
+            let bytes = commit.get_bytes()?;
             let meta = ObjectMeta {
                 location: path,
                 size: bytes.len(),
@@ -383,13 +400,13 @@ struct CheckpointMetadata {
     #[allow(unreachable_pub)] // used by acceptance tests (TODO make an fn accessor?)
     pub version: i64,
     /// The number of actions that are stored in the checkpoint.
-    pub(crate) size: i32,
+    pub(crate) size: i64,
     /// The number of fragments if the last checkpoint was written in multiple parts.
     pub(crate) parts: Option<i32>,
     /// The number of bytes of the checkpoint.
-    pub(crate) size_in_bytes: Option<i32>,
+    pub(crate) size_in_bytes: Option<i64>,
     /// The number of AddFile actions in the checkpoint.
-    pub(crate) num_of_add_files: Option<i32>,
+    pub(crate) num_of_add_files: Option<i64>,
     /// The schema of the checkpoint file.
     pub(crate) checkpoint_schema: Option<Schema>,
     /// The checksum of the last checkpoint JSON.
@@ -449,7 +466,7 @@ async fn list_log_files_with_checkpoint(
     let checkpoint_files = files
         .iter()
         .filter_map(|f| {
-            if f.location.is_checkpoint_file() {
+            if f.location.is_checkpoint_file() && f.location.commit_version() == Some(cp.version) {
                 Some(f.clone())
             } else {
                 None
@@ -457,10 +474,16 @@ async fn list_log_files_with_checkpoint(
         })
         .collect_vec();
 
-    // TODO raise a proper error
-    assert_eq!(checkpoint_files.len(), cp.parts.unwrap_or(1) as usize);
-
-    Ok((commit_files, checkpoint_files))
+    if checkpoint_files.len() != cp.parts.unwrap_or(1) as usize {
+        let msg = format!(
+            "Number of checkpoint files '{}' is not equal to number of checkpoint metadata parts '{:?}'",
+            checkpoint_files.len(),
+            cp.parts
+        );
+        Err(DeltaTableError::MetadataError(msg))
+    } else {
+        Ok((commit_files, checkpoint_files))
+    }
 }
 
 /// List relevant log files.
@@ -516,6 +539,15 @@ pub(super) async fn list_log_files(
 #[cfg(test)]
 pub(super) mod tests {
     use deltalake_test::utils::*;
+    use tokio::task::JoinHandle;
+
+    use crate::{
+        checkpoints::{create_checkpoint_for, create_checkpoint_from_table_uri_and_cleanup},
+        kernel::{Action, Add, Format, Remove},
+        operations::transaction::{CommitBuilder, TableReference},
+        protocol::{DeltaOperation, SaveMode},
+        DeltaTableBuilder,
+    };
 
     use super::*;
 
@@ -617,4 +649,215 @@ pub(super) mod tests {
 
         Ok(())
     }
+
+    pub(crate) async fn concurrent_checkpoint(context: &IntegrationContext) -> TestResult {
+        context
+            .load_table(TestTables::LatestNotCheckpointed)
+            .await?;
+        let table_to_checkpoint = context
+            .table_builder(TestTables::LatestNotCheckpointed)
+            .load()
+            .await?;
+        let store = context
+            .table_builder(TestTables::LatestNotCheckpointed)
+            .build_storage()?
+            .object_store();
+        let slow_list_store = Arc::new(slow_store::SlowListStore { store });
+
+        let version = table_to_checkpoint.version();
+        let load_task: JoinHandle<Result<LogSegment, DeltaTableError>> = tokio::spawn(async move {
+            let segment =
+                LogSegment::try_new(&Path::default(), Some(version), slow_list_store.as_ref())
+                    .await?;
+            Ok(segment)
+        });
+
+        create_checkpoint_from_table_uri_and_cleanup(
+            &table_to_checkpoint.table_uri(),
+            version,
+            Some(false),
+        )
+        .await?;
+
+        let segment = load_task.await??;
+        assert_eq!(segment.version, version);
+
+        Ok(())
+    }
+
+    mod slow_store {
+        use std::sync::Arc;
+
+        use futures::stream::BoxStream;
+        use object_store::{
+            path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
+            ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result,
+        };
+
+        #[derive(Debug)]
+        pub(super) struct SlowListStore {
+            pub store: Arc<dyn ObjectStore>,
+        }
+
+        impl std::fmt::Display for SlowListStore {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(f, "SlowListStore {{ store: {} }}", self.store)
+            }
+        }
+
+        #[async_trait::async_trait]
+        impl object_store::ObjectStore for SlowListStore {
+            async fn put_opts(
+                &self,
+                location: &Path,
+                bytes: PutPayload,
+                opts: PutOptions,
+            ) -> Result<PutResult> {
+                self.store.put_opts(location, bytes, opts).await
+            }
+            async fn put_multipart(&self, location: &Path) -> Result<Box<dyn MultipartUpload>> {
+                self.store.put_multipart(location).await
+            }
+
+            async fn put_multipart_opts(
+                &self,
+                location: &Path,
+                opts: PutMultipartOpts,
+            ) -> Result<Box<dyn MultipartUpload>> {
+                self.store.put_multipart_opts(location, opts).await
+            }
+
+            async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
+                self.store.get_opts(location, options).await
+            }
+
+            async fn delete(&self, location: &Path) -> Result<()> {
+                self.store.delete(location).await
+            }
+
+            fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+                std::thread::sleep(std::time::Duration::from_secs(1));
+                self.store.list(prefix)
+            }
+
+            async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
+                self.store.list_with_delimiter(prefix).await
+            }
+
+            async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
+                self.store.copy(from, to).await
+            }
+
+            async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
+                self.store.copy_if_not_exists(from, to).await
+            }
+        }
+    }
+
+    #[test]
+    pub fn is_commit_file_only_matches_commits() {
+        for path in [0, 1, 5, 10, 100, i64::MAX]
+            .into_iter()
+            .map(crate::storage::commit_uri_from_version)
+        {
+            assert!(path.is_commit_file());
+        }
+
+        let not_commits = ["_delta_log/_commit_2132c4fe-4077-476c-b8f5-e77fea04f170.json.tmp"];
+
+        for not_commit in not_commits {
+            let path = Path::from(not_commit);
+            assert!(!path.is_commit_file());
+        }
+    }
+
+    #[tokio::test]
+    async fn test_checkpoint_stream_parquet_read() {
+        let metadata = Metadata {
+            id: "test".to_string(),
+            format: Format::new("parquet".to_string(), None),
+            schema_string: r#"{"type":"struct",  "fields": []}"#.to_string(),
+            ..Default::default()
+        };
+        let protocol = Protocol::default();
+
+        let mut actions = vec![Action::Metadata(metadata), Action::Protocol(protocol)];
+        for i in 0..10 {
+            actions.push(Action::Add(Add {
+                path: format!("part-{}.parquet", i),
+                modification_time: chrono::Utc::now().timestamp_millis() as i64,
+                ..Default::default()
+            }));
+        }
+
+        let log_store = DeltaTableBuilder::from_uri("memory:///".to_string())
+            .build_storage()
+            .unwrap();
+        let op = DeltaOperation::Write {
+            mode: SaveMode::Overwrite,
+            partition_by: None,
+            predicate: None,
+        };
+        let commit = CommitBuilder::default()
+            .with_actions(actions)
+            .build(None, log_store.clone(), op)
+            .await
+            .unwrap();
+
+        let mut actions = Vec::new();
+        // remove all but one file
+        for i in 0..9 {
+            actions.push(Action::Remove(Remove {
+                path: format!("part-{}.parquet", i),
+                deletion_timestamp: Some(chrono::Utc::now().timestamp_millis() as i64),
+                ..Default::default()
+            }))
+        }
+
+        let op = DeltaOperation::Delete { predicate: None };
+        let table_data = &commit.snapshot as &dyn TableReference;
+        let commit = CommitBuilder::default()
+            .with_actions(actions)
+            .build(Some(table_data), log_store.clone(), op)
+            .await
+            .unwrap();
+
+        create_checkpoint_for(commit.version, &commit.snapshot, log_store.as_ref())
+            .await
+            .unwrap();
+
+        let batches = LogSegment::try_new(
+            &Path::default(),
+            Some(commit.version),
+            log_store.object_store().as_ref(),
+        )
+        .await
+        .unwrap()
+        .checkpoint_stream(
+            log_store.object_store(),
+            &StructType::new(vec![
+                ActionType::Metadata.schema_field().clone(),
+                ActionType::Protocol.schema_field().clone(),
+                ActionType::Add.schema_field().clone(),
+            ]),
+            &Default::default(),
+        )
+        .try_collect::<Vec<_>>()
+        .await
+        .unwrap();
+
+        let batch = arrow::compute::concat_batches(&batches[0].schema(), batches.iter()).unwrap();
+
+        // there are 9 remove action rows but all columns are null
+        // because the removes are not projected in the schema
+        // these get filtered out upstream and there was no perf
+        // benefit when applying a row filter
+        // in addition there is 1 add, 1 metadata, and 1 protocol row
+        assert_eq!(batch.num_rows(), 12);
+
+        assert_eq!(batch.schema().fields().len(), 3);
+        assert!(batch.schema().field_with_name("metaData").is_ok());
+        assert!(batch.schema().field_with_name("protocol").is_ok());
+        assert!(batch.schema().field_with_name("add").is_ok());
+    }
 }
diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs
index d12018c245..d34b78fbed 100644
--- a/crates/core/src/kernel/snapshot/mod.rs
+++ b/crates/core/src/kernel/snapshot/mod.rs
@@ -15,6 +15,7 @@
 //!
 //!
 
+use std::collections::{HashMap, HashSet};
 use std::io::{BufRead, BufReader, Cursor};
 use std::sync::Arc;
 
@@ -25,22 +26,29 @@ use futures::{StreamExt, TryStreamExt};
 use object_store::path::Path;
 use object_store::ObjectStore;
 
-use self::log_segment::{CommitData, LogSegment, PathExt};
+use self::log_segment::{LogSegment, PathExt};
 use self::parse::{read_adds, read_removes};
 use self::replay::{LogMapper, LogReplayScanner, ReplayStream};
-use super::{Action, Add, CommitInfo, DataType, Metadata, Protocol, Remove, StructField};
-use crate::kernel::StructType;
+use self::visitors::*;
+use super::{
+    Action, Add, AddCDCFile, CommitInfo, DataType, Metadata, Protocol, Remove, StructField,
+    Transaction,
+};
+use crate::kernel::parse::read_cdf_adds;
+use crate::kernel::{ActionType, StructType};
 use crate::logstore::LogStore;
+use crate::operations::transaction::CommitData;
 use crate::table::config::TableConfig;
 use crate::{DeltaResult, DeltaTableConfig, DeltaTableError};
 
+pub use self::log_data::*;
+
 mod log_data;
 mod log_segment;
 pub(crate) mod parse;
 mod replay;
 mod serde;
-
-pub use log_data::*;
+mod visitors;
 
 /// A snapshot of a Delta table
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
@@ -196,21 +204,39 @@ impl Snapshot {
     }
 
     /// Get the files in the snapshot
-    pub fn files(
+    pub fn files<'a>(
         &self,
         store: Arc<dyn ObjectStore>,
-    ) -> DeltaResult<ReplayStream<BoxStream<'_, DeltaResult<RecordBatch>>>> {
+        visitors: &'a mut Vec<Box<dyn ReplayVisitor>>,
+    ) -> DeltaResult<ReplayStream<'a, BoxStream<'_, DeltaResult<RecordBatch>>>> {
+        let mut schema_actions: HashSet<_> =
+            visitors.iter().flat_map(|v| v.required_actions()).collect();
+
+        schema_actions.insert(ActionType::Add);
+        let checkpoint_stream = self.log_segment.checkpoint_stream(
+            store.clone(),
+            &StructType::new(
+                schema_actions
+                    .iter()
+                    .map(|a| a.schema_field().clone())
+                    .collect(),
+            ),
+            &self.config,
+        );
+
+        schema_actions.insert(ActionType::Remove);
         let log_stream = self.log_segment.commit_stream(
             store.clone(),
-            &log_segment::COMMIT_SCHEMA,
+            &StructType::new(
+                schema_actions
+                    .iter()
+                    .map(|a| a.schema_field().clone())
+                    .collect(),
+            ),
             &self.config,
         )?;
-        let checkpoint_stream = self.log_segment.checkpoint_stream(
-            store,
-            &log_segment::CHECKPOINT_SCHEMA,
-            &self.config,
-        );
-        ReplayStream::try_new(log_stream, checkpoint_stream, &self)
+
+        ReplayStream::try_new(log_stream, checkpoint_stream, self, visitors)
     }
 
     /// Get the commit infos in the snapshot
@@ -283,12 +309,14 @@ impl Snapshot {
     }
 
     /// Get the statistics schema of the snapshot
-    pub fn stats_schema(&self) -> DeltaResult<StructType> {
+    pub fn stats_schema(&self, table_schema: Option<&StructType>) -> DeltaResult<StructType> {
+        let schema = table_schema.unwrap_or_else(|| self.schema());
+
         let stats_fields = if let Some(stats_cols) = self.table_config().stats_columns() {
             stats_cols
                 .iter()
-                .map(|col| match self.schema().field_with_name(col) {
-                    Ok(field) => match field.data_type() {
+                .map(|col| match schema.field(col) {
+                    Some(field) => match field.data_type() {
                         DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => {
                             Err(DeltaTableError::Generic(format!(
                                 "Stats column {} has unsupported type {}",
@@ -310,17 +338,11 @@ impl Snapshot {
                 .collect::<Result<Vec<_>, _>>()?
         } else {
             let num_indexed_cols = self.table_config().num_indexed_cols();
-            self.schema()
+            schema
                 .fields
-                .iter()
+                .values()
                 .enumerate()
-                .filter_map(|(idx, f)| match f.data_type() {
-                    DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => None,
-                    _ if num_indexed_cols < 0 || (idx as i32) < num_indexed_cols => {
-                        Some(StructField::new(f.name(), f.data_type().clone(), true))
-                    }
-                    _ => None,
-                })
+                .filter_map(|(idx, f)| stats_field(idx, num_indexed_cols, f))
                 .collect()
         };
         Ok(StructType::new(vec![
@@ -340,6 +362,11 @@ impl Snapshot {
 #[derive(Debug, Clone, PartialEq)]
 pub struct EagerSnapshot {
     snapshot: Snapshot,
+    // additional actions that should be tracked during log replay.
+    tracked_actions: HashSet<ActionType>,
+
+    transactions: Option<HashMap<String, Transaction>>,
+
     // NOTE: this is a Vec of RecordBatch instead of a single RecordBatch because
     //       we do not yet enforce a consistent schema across all batches we read from the log.
     files: Vec<RecordBatch>,
@@ -353,9 +380,51 @@ impl EagerSnapshot {
         config: DeltaTableConfig,
         version: Option<i64>,
     ) -> DeltaResult<Self> {
+        Self::try_new_with_visitor(table_root, store, config, version, Default::default()).await
+    }
+
+    /// Create a new [`EagerSnapshot`] instance
+    pub async fn try_new_with_visitor(
+        table_root: &Path,
+        store: Arc<dyn ObjectStore>,
+        config: DeltaTableConfig,
+        version: Option<i64>,
+        tracked_actions: HashSet<ActionType>,
+    ) -> DeltaResult<Self> {
+        let mut visitors = tracked_actions
+            .iter()
+            .flat_map(get_visitor)
+            .collect::<Vec<_>>();
         let snapshot = Snapshot::try_new(table_root, store.clone(), config, version).await?;
-        let files = snapshot.files(store)?.try_collect().await?;
-        Ok(Self { snapshot, files })
+        let files = snapshot.files(store, &mut visitors)?.try_collect().await?;
+
+        let mut sn = Self {
+            snapshot,
+            files,
+            tracked_actions,
+            transactions: None,
+        };
+
+        sn.process_visitors(visitors)?;
+
+        Ok(sn)
+    }
+
+    fn process_visitors(&mut self, visitors: Vec<Box<dyn ReplayVisitor>>) -> DeltaResult<()> {
+        for visitor in visitors {
+            if let Some(tv) = visitor
+                .as_ref()
+                .as_any()
+                .downcast_ref::<AppTransactionVisitor>()
+            {
+                if self.transactions.is_none() {
+                    self.transactions = Some(tv.app_transaction_version.clone());
+                } else {
+                    self.transactions = Some(tv.merge(self.transactions.as_ref().unwrap()));
+                }
+            }
+        }
+        Ok(())
     }
 
     #[cfg(test)]
@@ -364,16 +433,21 @@ impl EagerSnapshot {
         let mut files = Vec::new();
         let mut scanner = LogReplayScanner::new();
         files.push(scanner.process_files_batch(&batch, true)?);
-        let mapper = LogMapper::try_new(&snapshot)?;
+        let mapper = LogMapper::try_new(&snapshot, None)?;
         files = files
             .into_iter()
             .map(|b| mapper.map_batch(b))
             .collect::<DeltaResult<Vec<_>>>()?;
-        Ok(Self { snapshot, files })
+        Ok(Self {
+            snapshot,
+            files,
+            tracked_actions: Default::default(),
+            transactions: None,
+        })
     }
 
     /// Update the snapshot to the given version
-    pub async fn update(
+    pub async fn update<'a>(
         &mut self,
         log_store: Arc<dyn LogStore>,
         target_version: Option<i64>,
@@ -381,36 +455,71 @@ impl EagerSnapshot {
         if Some(self.version()) == target_version {
             return Ok(());
         }
+
         let new_slice = self
             .snapshot
             .update_inner(log_store.clone(), target_version)
             .await?;
-        if let Some(new_slice) = new_slice {
-            let files = std::mem::take(&mut self.files);
-            let log_stream = new_slice.commit_stream(
-                log_store.object_store().clone(),
-                &log_segment::COMMIT_SCHEMA,
-                &self.snapshot.config,
-            )?;
-            let checkpoint_stream = if new_slice.checkpoint_files.is_empty() {
-                futures::stream::iter(files.into_iter().map(Ok)).boxed()
-            } else {
-                new_slice
-                    .checkpoint_stream(
-                        log_store.object_store(),
-                        &log_segment::CHECKPOINT_SCHEMA,
-                        &self.snapshot.config,
-                    )
-                    .boxed()
-            };
-            let mapper = LogMapper::try_new(&self.snapshot)?;
-            let files = ReplayStream::try_new(log_stream, checkpoint_stream, &self.snapshot)?
+
+        if new_slice.is_none() {
+            return Ok(());
+        }
+        let new_slice = new_slice.unwrap();
+
+        let mut visitors = self
+            .tracked_actions
+            .iter()
+            .flat_map(get_visitor)
+            .collect::<Vec<_>>();
+
+        let mut schema_actions: HashSet<_> =
+            visitors.iter().flat_map(|v| v.required_actions()).collect();
+        let files = std::mem::take(&mut self.files);
+
+        schema_actions.insert(ActionType::Add);
+        let checkpoint_stream = if new_slice.checkpoint_files.is_empty() {
+            // NOTE: we don't need to add the visitor relevant data here, as it is repüresented in teh state already
+            futures::stream::iter(files.into_iter().map(Ok)).boxed()
+        } else {
+            let read_schema = StructType::new(
+                schema_actions
+                    .iter()
+                    .map(|a| a.schema_field().clone())
+                    .collect(),
+            );
+            new_slice
+                .checkpoint_stream(
+                    log_store.object_store(),
+                    &read_schema,
+                    &self.snapshot.config,
+                )
+                .boxed()
+        };
+
+        schema_actions.insert(ActionType::Remove);
+        let read_schema = StructType::new(
+            schema_actions
+                .iter()
+                .map(|a| a.schema_field().clone())
+                .collect(),
+        );
+        let log_stream = new_slice.commit_stream(
+            log_store.object_store().clone(),
+            &read_schema,
+            &self.snapshot.config,
+        )?;
+
+        let mapper = LogMapper::try_new(&self.snapshot, None)?;
+
+        let files =
+            ReplayStream::try_new(log_stream, checkpoint_stream, &self.snapshot, &mut visitors)?
                 .map(|batch| batch.and_then(|b| mapper.map_batch(b)))
                 .try_collect()
                 .await?;
 
-            self.files = files;
-        }
+        self.files = files;
+        self.process_visitors(visitors)?;
+
         Ok(())
     }
 
@@ -477,6 +586,22 @@ impl EagerSnapshot {
         self.log_data().into_iter()
     }
 
+    /// Get an iterator for the CDC files added in this version
+    pub fn cdc_files(&self) -> DeltaResult<impl Iterator<Item = AddCDCFile> + '_> {
+        Ok(self.files.iter().flat_map(|b| read_cdf_adds(b)).flatten())
+    }
+
+    /// Iterate over all latest app transactions
+    pub fn transactions(&self) -> DeltaResult<impl Iterator<Item = Transaction> + '_> {
+        self.transactions
+            .as_ref()
+            .map(|t| t.values().cloned())
+            .ok_or(DeltaTableError::Generic(
+                "Transactions are not available. Please enable tracking of transactions."
+                    .to_string(),
+            ))
+    }
+
     /// Advance the snapshot based on the given commit actions
     pub fn advance<'a>(
         &mut self,
@@ -487,23 +612,38 @@ impl EagerSnapshot {
         let mut send = Vec::new();
         for commit in commits {
             if metadata.is_none() {
-                metadata = commit.0.iter().find_map(|a| match a {
+                metadata = commit.actions.iter().find_map(|a| match a {
                     Action::Metadata(metadata) => Some(metadata.clone()),
                     _ => None,
                 });
             }
             if protocol.is_none() {
-                protocol = commit.0.iter().find_map(|a| match a {
+                protocol = commit.actions.iter().find_map(|a| match a {
                     Action::Protocol(protocol) => Some(protocol.clone()),
                     _ => None,
                 });
             }
             send.push(commit);
         }
+
+        let mut visitors = self
+            .tracked_actions
+            .iter()
+            .flat_map(get_visitor)
+            .collect::<Vec<_>>();
+        let mut schema_actions: HashSet<_> =
+            visitors.iter().flat_map(|v| v.required_actions()).collect();
+        schema_actions.extend([ActionType::Add, ActionType::Remove]);
+        let read_schema = StructType::new(
+            schema_actions
+                .iter()
+                .map(|a| a.schema_field().clone())
+                .collect(),
+        );
         let actions = self.snapshot.log_segment.advance(
             send,
             &self.table_root(),
-            &log_segment::COMMIT_SCHEMA,
+            &read_schema,
             &self.snapshot.config,
         )?;
 
@@ -511,10 +651,20 @@ impl EagerSnapshot {
         let mut scanner = LogReplayScanner::new();
 
         for batch in actions {
-            files.push(scanner.process_files_batch(&batch?, true)?);
+            let batch = batch?;
+            files.push(scanner.process_files_batch(&batch, true)?);
+            for visitor in &mut visitors {
+                visitor.visit_batch(&batch)?;
+            }
         }
 
-        let mapper = LogMapper::try_new(&self.snapshot)?;
+        let mapper = if let Some(metadata) = &metadata {
+            let new_schema: StructType = serde_json::from_str(&metadata.schema_string)?;
+            LogMapper::try_new(&self.snapshot, Some(&new_schema))?
+        } else {
+            LogMapper::try_new(&self.snapshot, None)?
+        };
+
         self.files = files
             .into_iter()
             .chain(
@@ -532,24 +682,44 @@ impl EagerSnapshot {
         if let Some(protocol) = protocol {
             self.snapshot.protocol = protocol;
         }
+        self.process_visitors(visitors)?;
 
         Ok(self.snapshot.version())
     }
 }
 
-fn to_count_field(field: &StructField) -> Option<StructField> {
+fn stats_field(idx: usize, num_indexed_cols: i32, field: &StructField) -> Option<StructField> {
+    if !(num_indexed_cols < 0 || (idx as i32) < num_indexed_cols) {
+        return None;
+    }
     match field.data_type() {
         DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => None,
-        DataType::Struct(s) => Some(StructField::new(
+        DataType::Struct(dt_struct) => Some(StructField::new(
             field.name(),
             StructType::new(
-                s.fields()
-                    .iter()
-                    .filter_map(to_count_field)
-                    .collect::<Vec<_>>(),
+                dt_struct
+                    .fields()
+                    .flat_map(|f| stats_field(idx, num_indexed_cols, f))
+                    .collect(),
             ),
             true,
         )),
+        DataType::Primitive(_) => Some(StructField::new(
+            field.name(),
+            field.data_type.clone(),
+            true,
+        )),
+    }
+}
+
+fn to_count_field(field: &StructField) -> Option<StructField> {
+    match field.data_type() {
+        DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => None,
+        DataType::Struct(s) => Some(StructField::new(
+            field.name(),
+            StructType::new(s.fields().filter_map(to_count_field).collect::<Vec<_>>()),
+            true,
+        )),
         _ => Some(StructField::new(field.name(), DataType::LONG, true)),
     }
 }
@@ -570,12 +740,14 @@ mod datafusion {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use chrono::Utc;
     use deltalake_test::utils::*;
     use futures::TryStreamExt;
     use itertools::Itertools;
 
-    use super::log_segment::tests::test_log_segment;
+    use super::log_segment::tests::{concurrent_checkpoint, test_log_segment};
     use super::replay::tests::test_log_replay;
     use super::*;
     use crate::kernel::Remove;
@@ -597,6 +769,13 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_concurrent_checkpoint() -> TestResult {
+        let context = IntegrationContext::new(Box::<LocalStorageIntegration>::default())?;
+        concurrent_checkpoint(&context).await?;
+        Ok(())
+    }
+
     async fn test_snapshot(context: &IntegrationContext) -> TestResult {
         let store = context
             .table_builder(TestTables::Simple)
@@ -630,7 +809,7 @@ mod tests {
         assert_eq!(tombstones.len(), 31);
 
         let batches = snapshot
-            .files(store.clone())?
+            .files(store.clone(), &mut vec![])?
             .try_collect::<Vec<_>>()
             .await?;
         let expected = [
@@ -660,7 +839,7 @@ mod tests {
             )
             .await?;
             let batches = snapshot
-                .files(store.clone())?
+                .files(store.clone(), &mut vec![])?
                 .try_collect::<Vec<_>>()
                 .await?;
             let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::<i64>();
@@ -750,14 +929,17 @@ mod tests {
             })
             .collect_vec();
 
-        let actions = vec![(
+        let operation = DeltaOperation::Write {
+            mode: SaveMode::Append,
+            partition_by: None,
+            predicate: None,
+        };
+
+        let actions = vec![CommitData::new(
             removes,
-            DeltaOperation::Write {
-                mode: SaveMode::Append,
-                partition_by: None,
-                predicate: None,
-            },
-            None,
+            operation,
+            HashMap::new(),
+            Vec::new(),
         )];
 
         let new_version = snapshot.advance(&actions)?;
diff --git a/crates/core/src/kernel/snapshot/parse.rs b/crates/core/src/kernel/snapshot/parse.rs
index 0070880c9b..a3ccef1902 100644
--- a/crates/core/src/kernel/snapshot/parse.rs
+++ b/crates/core/src/kernel/snapshot/parse.rs
@@ -6,7 +6,7 @@ use arrow_array::{
 use percent_encoding::percent_decode_str;
 
 use crate::kernel::arrow::extract::{self as ex, ProvidesColumnByName};
-use crate::kernel::{Add, DeletionVectorDescriptor, Metadata, Protocol, Remove};
+use crate::kernel::{Add, AddCDCFile, DeletionVectorDescriptor, Metadata, Protocol, Remove};
 use crate::{DeltaResult, DeltaTableError};
 
 pub(super) fn read_metadata(batch: &dyn ProvidesColumnByName) -> DeltaResult<Option<Metadata>> {
@@ -134,6 +134,39 @@ pub(super) fn read_adds(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<Add
     Ok(result)
 }
 
+pub(super) fn read_cdf_adds(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<AddCDCFile>> {
+    let mut result = Vec::new();
+
+    if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(array, "cdc") {
+        let path = ex::extract_and_cast::<StringArray>(arr, "path")?;
+        let pvs = ex::extract_and_cast_opt::<MapArray>(arr, "partitionValues");
+        let size = ex::extract_and_cast::<Int64Array>(arr, "size")?;
+        let data_change = ex::extract_and_cast::<BooleanArray>(arr, "dataChange")?;
+        let tags = ex::extract_and_cast_opt::<MapArray>(arr, "tags");
+
+        for i in 0..arr.len() {
+            if arr.is_valid(i) {
+                let path_ = ex::read_str(path, i)?;
+                let path_ = percent_decode_str(path_)
+                    .decode_utf8()
+                    .map_err(|_| DeltaTableError::Generic("illegal path encoding".into()))?
+                    .to_string();
+                result.push(AddCDCFile {
+                    path: path_,
+                    size: ex::read_primitive(size, i)?,
+                    data_change: ex::read_bool(data_change, i)?,
+                    partition_values: pvs
+                        .and_then(|pv| collect_map(&pv.value(i)).map(|m| m.collect()))
+                        .unwrap_or_default(),
+                    tags: tags.and_then(|t| collect_map(&t.value(i)).map(|m| m.collect())),
+                });
+            }
+        }
+    }
+
+    Ok(result)
+}
+
 pub(super) fn read_removes(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<Remove>> {
     let mut result = Vec::new();
 
diff --git a/crates/core/src/kernel/snapshot/replay.rs b/crates/core/src/kernel/snapshot/replay.rs
index 71408b27d5..3efd9584e2 100644
--- a/crates/core/src/kernel/snapshot/replay.rs
+++ b/crates/core/src/kernel/snapshot/replay.rs
@@ -21,16 +21,20 @@ use tracing::debug;
 
 use crate::kernel::arrow::extract::{self as ex, ProvidesColumnByName};
 use crate::kernel::arrow::json;
+use crate::kernel::StructType;
 use crate::{DeltaResult, DeltaTableConfig, DeltaTableError};
 
+use super::ReplayVisitor;
 use super::Snapshot;
 
 pin_project! {
-    pub struct ReplayStream<S> {
+    pub struct ReplayStream<'a, S> {
         scanner: LogReplayScanner,
 
         mapper: Arc<LogMapper>,
 
+        visitors: &'a mut Vec<Box<dyn ReplayVisitor>>,
+
         #[pin]
         commits: S,
 
@@ -39,9 +43,14 @@ pin_project! {
     }
 }
 
-impl<S> ReplayStream<S> {
-    pub(super) fn try_new(commits: S, checkpoint: S, snapshot: &Snapshot) -> DeltaResult<Self> {
-        let stats_schema = Arc::new((&snapshot.stats_schema()?).try_into()?);
+impl<'a, S> ReplayStream<'a, S> {
+    pub(super) fn try_new(
+        commits: S,
+        checkpoint: S,
+        snapshot: &Snapshot,
+        visitors: &'a mut Vec<Box<dyn ReplayVisitor>>,
+    ) -> DeltaResult<Self> {
+        let stats_schema = Arc::new((&snapshot.stats_schema(None)?).try_into()?);
         let mapper = Arc::new(LogMapper {
             stats_schema,
             config: snapshot.config.clone(),
@@ -50,6 +59,7 @@ impl<S> ReplayStream<S> {
             commits,
             checkpoint,
             mapper,
+            visitors,
             scanner: LogReplayScanner::new(),
         })
     }
@@ -61,9 +71,12 @@ pub(super) struct LogMapper {
 }
 
 impl LogMapper {
-    pub(super) fn try_new(snapshot: &Snapshot) -> DeltaResult<Self> {
+    pub(super) fn try_new(
+        snapshot: &Snapshot,
+        table_schema: Option<&StructType>,
+    ) -> DeltaResult<Self> {
         Ok(Self {
-            stats_schema: Arc::new((&snapshot.stats_schema()?).try_into()?),
+            stats_schema: Arc::new((&snapshot.stats_schema(table_schema)?).try_into()?),
             config: snapshot.config.clone(),
         })
     }
@@ -79,7 +92,7 @@ fn map_batch(
     config: &DeltaTableConfig,
 ) -> DeltaResult<RecordBatch> {
     let stats_col = ex::extract_and_cast_opt::<StringArray>(&batch, "add.stats");
-    let stats_parsed_col = ex::extract_and_cast_opt::<StringArray>(&batch, "add.stats_parsed");
+    let stats_parsed_col = ex::extract_and_cast_opt::<StructArray>(&batch, "add.stats_parsed");
     if stats_parsed_col.is_some() {
         return Ok(batch);
     }
@@ -127,7 +140,7 @@ fn map_batch(
     Ok(batch)
 }
 
-impl<S> Stream for ReplayStream<S>
+impl<'a, S> Stream for ReplayStream<'a, S>
 where
     S: Stream<Item = DeltaResult<RecordBatch>>,
 {
@@ -136,20 +149,34 @@ where
     fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
         let this = self.project();
         let res = this.commits.poll_next(cx).map(|b| match b {
-            Some(Ok(batch)) => match this.scanner.process_files_batch(&batch, true) {
-                Ok(filtered) => Some(this.mapper.map_batch(filtered)),
-                Err(e) => Some(Err(e)),
-            },
-            Some(Err(e)) => Some(Err(e)),
+            Some(Ok(batch)) => {
+                for visitor in this.visitors.iter_mut() {
+                    if let Err(e) = visitor.visit_batch(&batch) {
+                        return Some(Err(e));
+                    }
+                }
+                match this.scanner.process_files_batch(&batch, true) {
+                    Ok(filtered) => Some(this.mapper.map_batch(filtered)),
+                    err => Some(err),
+                }
+            }
+            Some(e) => Some(e),
             None => None,
         });
         if matches!(res, Poll::Ready(None)) {
             this.checkpoint.poll_next(cx).map(|b| match b {
-                Some(Ok(batch)) => match this.scanner.process_files_batch(&batch, false) {
-                    Ok(filtered) => Some(this.mapper.map_batch(filtered)),
-                    Err(e) => Some(Err(e)),
-                },
-                Some(Err(e)) => Some(Err(e)),
+                Some(Ok(batch)) => {
+                    for visitor in this.visitors.iter_mut() {
+                        if let Err(e) = visitor.visit_batch(&batch) {
+                            return Some(Err(e));
+                        }
+                    }
+                    match this.scanner.process_files_batch(&batch, false) {
+                        Ok(filtered) => Some(this.mapper.map_batch(filtered)),
+                        err => Some(err),
+                    }
+                }
+                Some(e) => Some(e),
                 None => None,
             })
         } else {
diff --git a/crates/core/src/kernel/snapshot/serde.rs b/crates/core/src/kernel/snapshot/serde.rs
index 5162c4a1fe..dd7403bc28 100644
--- a/crates/core/src/kernel/snapshot/serde.rs
+++ b/crates/core/src/kernel/snapshot/serde.rs
@@ -1,6 +1,6 @@
 use arrow_ipc::reader::FileReader;
 use arrow_ipc::writer::FileWriter;
-use chrono::{TimeZone, Utc};
+use chrono::{DateTime, TimeZone, Utc};
 use object_store::ObjectMeta;
 use serde::de::{self, Deserializer, SeqAccess, Visitor};
 use serde::{ser::SerializeSeq, Deserialize, Serialize};
@@ -99,9 +99,8 @@ impl<'de> Visitor<'de> for LogSegmentVisitor {
                 .map(|f| ObjectMeta {
                     location: f.path.into(),
                     size: f.size,
-                    last_modified: Utc.from_utc_datetime(
-                        &chrono::NaiveDateTime::from_timestamp_millis(f.last_modified).unwrap(),
-                    ),
+                    last_modified: DateTime::from_timestamp_millis(f.last_modified).unwrap(),
+
                     version: None,
                     e_tag: None,
                 })
@@ -126,6 +125,8 @@ impl Serialize for EagerSnapshot {
     {
         let mut seq = serializer.serialize_seq(None)?;
         seq.serialize_element(&self.snapshot)?;
+        seq.serialize_element(&self.tracked_actions)?;
+        seq.serialize_element(&self.transactions)?;
         for batch in self.files.iter() {
             let mut buffer = vec![];
             let mut writer = FileWriter::try_new(&mut buffer, batch.schema().as_ref())
@@ -153,10 +154,15 @@ impl<'de> Visitor<'de> for EagerSnapshotVisitor {
     where
         V: SeqAccess<'de>,
     {
-        println!("eager: {:?}", "start");
         let snapshot = seq
             .next_element()?
             .ok_or_else(|| de::Error::invalid_length(0, &self))?;
+        let tracked_actions = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(1, &self))?;
+        let transactions = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(2, &self))?;
         let mut files = Vec::new();
         while let Some(elem) = seq.next_element::<Vec<u8>>()? {
             let mut reader =
@@ -171,7 +177,12 @@ impl<'de> Visitor<'de> for EagerSnapshotVisitor {
                 })?;
             files.push(rb);
         }
-        Ok(EagerSnapshot { snapshot, files })
+        Ok(EagerSnapshot {
+            snapshot,
+            files,
+            tracked_actions,
+            transactions,
+        })
     }
 }
 
diff --git a/crates/core/src/kernel/snapshot/visitors.rs b/crates/core/src/kernel/snapshot/visitors.rs
new file mode 100644
index 0000000000..1b68026a5b
--- /dev/null
+++ b/crates/core/src/kernel/snapshot/visitors.rs
@@ -0,0 +1,192 @@
+//! Log replay visitors.
+//!
+//! Log replay visitors allow to extract additional actions during log replay.
+
+use std::collections::HashMap;
+
+use arrow::compute::{filter_record_batch, is_not_null};
+use arrow_array::{Array, Int64Array, RecordBatch, StringArray, StructArray};
+
+use super::ActionType;
+use crate::errors::DeltaResult;
+use crate::kernel::arrow::extract as ex;
+use crate::kernel::Transaction;
+
+/// Allows hooking into the reading of commit files and checkpoints whenever a table is loaded or updated.
+pub trait ReplayVisitor: std::fmt::Debug + Send + Sync {
+    fn as_any(&self) -> &dyn std::any::Any;
+
+    /// Process a batch
+    fn visit_batch(&mut self, batch: &RecordBatch) -> DeltaResult<()>;
+
+    /// return all relevant actions for the visitor
+    fn required_actions(&self) -> Vec<ActionType>;
+}
+
+/// Get the relevant visitor for the given action type
+pub fn get_visitor(action: &ActionType) -> Option<Box<dyn ReplayVisitor>> {
+    match action {
+        ActionType::Txn => Some(Box::new(AppTransactionVisitor::new())),
+        _ => None,
+    }
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct AppTransactionVisitor {
+    pub(crate) app_transaction_version: HashMap<String, Transaction>,
+}
+
+impl AppTransactionVisitor {
+    pub(crate) fn new() -> Self {
+        Self {
+            app_transaction_version: HashMap::new(),
+        }
+    }
+}
+
+impl AppTransactionVisitor {
+    pub fn merge(&self, map: &HashMap<String, Transaction>) -> HashMap<String, Transaction> {
+        let mut clone = map.clone();
+        for (key, value) in &self.app_transaction_version {
+            clone.insert(key.clone(), value.clone());
+        }
+        clone
+    }
+}
+
+impl ReplayVisitor for AppTransactionVisitor {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn visit_batch(&mut self, batch: &arrow_array::RecordBatch) -> DeltaResult<()> {
+        if batch.column_by_name("txn").is_none() {
+            return Ok(());
+        }
+
+        let txn_col = ex::extract_and_cast::<StructArray>(batch, "txn")?;
+        let filtered = filter_record_batch(batch, &is_not_null(txn_col)?)?;
+        let arr = ex::extract_and_cast::<StructArray>(&filtered, "txn")?;
+
+        let id = ex::extract_and_cast::<StringArray>(arr, "appId")?;
+        let version = ex::extract_and_cast::<Int64Array>(arr, "version")?;
+        let last_updated = ex::extract_and_cast_opt::<Int64Array>(arr, "lastUpdated");
+
+        for idx in 0..id.len() {
+            if id.is_valid(idx) {
+                let app_id = ex::read_str(id, idx)?;
+                if self.app_transaction_version.contains_key(app_id) {
+                    continue;
+                }
+                self.app_transaction_version.insert(
+                    app_id.to_owned(),
+                    Transaction {
+                        app_id: app_id.into(),
+                        version: ex::read_primitive(version, idx)?,
+                        last_updated: last_updated.and_then(|arr| ex::read_primitive_opt(arr, idx)),
+                    },
+                );
+            }
+        }
+
+        Ok(())
+    }
+
+    fn required_actions(&self) -> Vec<ActionType> {
+        vec![ActionType::Txn]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::{Int64Array, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Fields, Schema};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_app_txn_visitor() {
+        let fields: Fields = vec![
+            Field::new("appId", DataType::Utf8, true),
+            Field::new("version", DataType::Int64, true),
+            Field::new("lastUpdated", DataType::Int64, true),
+        ]
+        .into();
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "txn",
+            DataType::Struct(fields.clone()),
+            false,
+        )]));
+
+        let mut data_app = vec![None, Some("my-app"), None];
+        let mut data_version = vec![None, Some(1), None];
+        let mut data_last_updated = vec![None, Some(123), None];
+        let arr = Arc::new(StructArray::new(
+            fields.clone(),
+            vec![
+                Arc::new(StringArray::from(data_app.clone())),
+                Arc::new(Int64Array::from(data_version.clone())),
+                Arc::new(Int64Array::from(data_last_updated.clone())),
+            ],
+            None,
+        ));
+
+        let batch = RecordBatch::try_new(schema.clone(), vec![arr]).unwrap();
+        let mut visitor = AppTransactionVisitor::new();
+        visitor.visit_batch(&batch).unwrap();
+
+        let app_txns = visitor.app_transaction_version;
+        assert_eq!(app_txns.len(), 1);
+        assert_eq!(app_txns.get("my-app").map(|t| t.version), Some(1));
+        assert_eq!(
+            app_txns.get("my-app").map(|t| t.last_updated),
+            Some(Some(123))
+        );
+
+        // test that only the first encountered txn ist tacked for every app id.
+        data_app.extend([None, Some("my-app")]);
+        data_version.extend([None, Some(2)]);
+        data_last_updated.extend([None, Some(124)]);
+        let arr = Arc::new(StructArray::new(
+            fields.clone(),
+            vec![
+                Arc::new(StringArray::from(data_app.clone())),
+                Arc::new(Int64Array::from(data_version.clone())),
+                Arc::new(Int64Array::from(data_last_updated.clone())),
+            ],
+            None,
+        ));
+        let batch = RecordBatch::try_new(schema.clone(), vec![arr]).unwrap();
+        let mut visitor = AppTransactionVisitor::new();
+        visitor.visit_batch(&batch).unwrap();
+
+        let app_txns = visitor.app_transaction_version;
+        assert_eq!(app_txns.len(), 1);
+        assert_eq!(app_txns.get("my-app").map(|t| t.version), Some(1));
+        assert_eq!(
+            app_txns.get("my-app").map(|t| t.last_updated),
+            Some(Some(123))
+        );
+
+        // test that multiple app ids are tracked
+        data_app.extend([Some("my-other-app")]);
+        data_version.extend([Some(10)]);
+        data_last_updated.extend([Some(123)]);
+        let arr = Arc::new(StructArray::new(
+            fields.clone(),
+            vec![
+                Arc::new(StringArray::from(data_app.clone())),
+                Arc::new(Int64Array::from(data_version.clone())),
+                Arc::new(Int64Array::from(data_last_updated.clone())),
+            ],
+            None,
+        ));
+        let batch = RecordBatch::try_new(schema.clone(), vec![arr]).unwrap();
+        let mut visitor = AppTransactionVisitor::new();
+        visitor.visit_batch(&batch).unwrap();
+
+        let app_txns = visitor.app_transaction_version;
+        assert_eq!(app_txns.len(), 2);
+        assert_eq!(app_txns.get("my-other-app").map(|t| t.version), Some(10));
+    }
+}
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index b24faf248e..4ef9fc06fd 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -47,7 +47,7 @@
 //! # Querying Delta Tables with Datafusion
 //!
 //! Querying from local filesystem:
-//! ```ignore
+//! ```
 //! use std::sync::Arc;
 //! use datafusion::prelude::SessionContext;
 //!
diff --git a/crates/core/src/logstore/default_logstore.rs b/crates/core/src/logstore/default_logstore.rs
index ed463e9947..8fd4f52beb 100644
--- a/crates/core/src/logstore/default_logstore.rs
+++ b/crates/core/src/logstore/default_logstore.rs
@@ -50,6 +50,14 @@ impl LogStore for DefaultLogStore {
         super::write_commit_entry(self.storage.as_ref(), version, tmp_commit).await
     }
 
+    async fn abort_commit_entry(
+        &self,
+        version: i64,
+        tmp_commit: &Path,
+    ) -> Result<(), TransactionError> {
+        super::abort_commit_entry(self.storage.as_ref(), version, tmp_commit).await
+    }
+
     async fn get_latest_version(&self, current_version: i64) -> DeltaResult<i64> {
         super::get_latest_version(self, current_version).await
     }
diff --git a/crates/core/src/logstore/mod.rs b/crates/core/src/logstore/mod.rs
index e6b4c6e2d4..b8646cdb65 100644
--- a/crates/core/src/logstore/mod.rs
+++ b/crates/core/src/logstore/mod.rs
@@ -18,7 +18,9 @@ use crate::{
     kernel::Action,
     operations::transaction::TransactionError,
     protocol::{get_last_checkpoint, ProtocolError},
-    storage::{commit_uri_from_version, ObjectStoreRef, StorageOptions},
+    storage::{
+        commit_uri_from_version, retry_ext::ObjectStoreRetryExt, ObjectStoreRef, StorageOptions,
+    },
     DeltaTableError,
 };
 use bytes::Bytes;
@@ -183,6 +185,13 @@ pub trait LogStore: Sync + Send {
         tmp_commit: &Path,
     ) -> Result<(), TransactionError>;
 
+    /// Abort the commit entry for the given version.
+    async fn abort_commit_entry(
+        &self,
+        version: i64,
+        tmp_commit: &Path,
+    ) -> Result<(), TransactionError>;
+
     /// Find latest version currently stored in the delta log.
     async fn get_latest_version(&self, start_version: i64) -> DeltaResult<i64>;
 
@@ -449,6 +458,16 @@ pub async fn write_commit_entry(
     Ok(())
 }
 
+/// Default implementation for aborting a commit entry
+pub async fn abort_commit_entry(
+    storage: &dyn ObjectStore,
+    _version: i64,
+    tmp_commit: &Path,
+) -> Result<(), TransactionError> {
+    storage.delete_with_retries(tmp_commit, 15).await?;
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/core/src/operations/add_column.rs b/crates/core/src/operations/add_column.rs
new file mode 100644
index 0000000000..028a6e5b2e
--- /dev/null
+++ b/crates/core/src/operations/add_column.rs
@@ -0,0 +1,114 @@
+//! Add a new column to a table
+
+use delta_kernel::schema::StructType;
+use futures::future::BoxFuture;
+use itertools::Itertools;
+
+use super::cast::merge_struct;
+use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL};
+
+use crate::kernel::StructField;
+use crate::logstore::LogStoreRef;
+use crate::protocol::DeltaOperation;
+use crate::table::state::DeltaTableState;
+use crate::{DeltaResult, DeltaTable, DeltaTableError};
+
+/// Add new columns and/or nested fields to a table
+pub struct AddColumnBuilder {
+    /// A snapshot of the table's state
+    snapshot: DeltaTableState,
+    /// Fields to add/merge into schema
+    fields: Option<Vec<StructField>>,
+    /// Delta object store for handling data files
+    log_store: LogStoreRef,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
+}
+
+impl super::Operation<()> for AddColumnBuilder {}
+
+impl AddColumnBuilder {
+    /// Create a new builder
+    pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
+        Self {
+            snapshot,
+            log_store,
+            fields: None,
+            commit_properties: CommitProperties::default(),
+        }
+    }
+
+    /// Specify the fields to be added
+    pub fn with_fields(mut self, fields: impl IntoIterator<Item = StructField> + Clone) -> Self {
+        self.fields = Some(fields.into_iter().collect());
+        self
+    }
+    /// Additional metadata to be added to commit info
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
+        self
+    }
+}
+
+impl std::future::IntoFuture for AddColumnBuilder {
+    type Output = DeltaResult<DeltaTable>;
+
+    type IntoFuture = BoxFuture<'static, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            let mut metadata = this.snapshot.metadata().clone();
+            let fields = match this.fields {
+                Some(v) => v,
+                None => return Err(DeltaTableError::Generic("No fields provided".to_string())),
+            };
+
+            let fields_right = &StructType::new(fields.clone());
+            let table_schema = this.snapshot.schema();
+            let new_table_schema = merge_struct(table_schema, fields_right)?;
+
+            // TODO(ion): Think of a way how we can simply this checking through the API or centralize some checks.
+            let contains_timestampntz = PROTOCOL.contains_timestampntz(fields.iter());
+            let protocol = this.snapshot.protocol();
+
+            let maybe_new_protocol = if contains_timestampntz {
+                let updated_protocol = protocol.clone().enable_timestamp_ntz();
+                if !(protocol.min_reader_version == 3 && protocol.min_writer_version == 7) {
+                    // Convert existing properties to features since we advanced the protocol to v3,7
+                    Some(
+                        updated_protocol
+                            .move_table_properties_into_features(&metadata.configuration),
+                    )
+                } else {
+                    Some(updated_protocol)
+                }
+            } else {
+                None
+            };
+
+            let operation = DeltaOperation::AddColumn {
+                fields: fields.into_iter().collect_vec(),
+            };
+
+            metadata.schema_string = serde_json::to_string(&new_table_schema)?;
+
+            let mut actions = vec![metadata.into()];
+
+            if let Some(new_protocol) = maybe_new_protocol {
+                actions.push(new_protocol.into())
+            }
+
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions)
+                .build(Some(&this.snapshot), this.log_store.clone(), operation)
+                .await?;
+
+            Ok(DeltaTable::new_with_state(
+                this.log_store,
+                commit.snapshot(),
+            ))
+        })
+    }
+}
diff --git a/crates/core/src/operations/cast.rs b/crates/core/src/operations/cast.rs
index 6e77552286..68f630239d 100644
--- a/crates/core/src/operations/cast.rs
+++ b/crates/core/src/operations/cast.rs
@@ -1,39 +1,268 @@
 //! Provide common cast functionality for callers
 //!
-use arrow_array::{Array, ArrayRef, RecordBatch, StructArray};
+use crate::kernel::{
+    ArrayType, DataType as DeltaDataType, MapType, MetadataValue, StructField, StructType,
+};
+use arrow_array::cast::AsArray;
+use arrow_array::{
+    new_null_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, RecordBatch,
+    RecordBatchOptions, StructArray,
+};
 use arrow_cast::{cast_with_options, CastOptions};
-use arrow_schema::{DataType, Fields, SchemaRef as ArrowSchemaRef};
-
+use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef as ArrowSchemaRef};
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::DeltaResult;
 
-fn cast_struct(
-    struct_array: &StructArray,
-    fields: &Fields,
-    cast_options: &CastOptions,
-) -> Result<Vec<Arc<(dyn Array)>>, arrow_schema::ArrowError> {
-    fields
-        .iter()
+fn try_merge_metadata(
+    left: &mut HashMap<String, MetadataValue>,
+    right: &HashMap<String, MetadataValue>,
+) -> Result<(), ArrowError> {
+    for (k, v) in right {
+        if let Some(vl) = left.get(k) {
+            if vl != v {
+                return Err(ArrowError::SchemaError(format!(
+                    "Cannot merge metadata with different values for key {}",
+                    k
+                )));
+            }
+        } else {
+            left.insert(k.clone(), v.clone());
+        }
+    }
+    Ok(())
+}
+
+pub(crate) fn merge_struct(
+    left: &StructType,
+    right: &StructType,
+) -> Result<StructType, ArrowError> {
+    let mut errors = Vec::new();
+    let merged_fields: Result<Vec<StructField>, ArrowError> = left
+        .fields()
         .map(|field| {
-            let col = struct_array.column_by_name(field.name()).unwrap();
-            if let (DataType::Struct(_), DataType::Struct(child_fields)) =
-                (col.data_type(), field.data_type())
-            {
-                let child_struct = StructArray::from(col.into_data());
-                let s = cast_struct(&child_struct, child_fields, cast_options)?;
-                Ok(Arc::new(StructArray::new(
-                    child_fields.clone(),
-                    s,
-                    child_struct.nulls().map(ToOwned::to_owned),
-                )) as ArrayRef)
-            } else if is_cast_required(col.data_type(), field.data_type()) {
-                cast_with_options(col, field.data_type(), cast_options)
+            let right_field = right.field(field.name());
+            if let Some(right_field) = right_field {
+                let type_or_not = merge_type(field.data_type(), right_field.data_type());
+                match type_or_not {
+                    Err(e) => {
+                        errors.push(e.to_string());
+                        Err(e)
+                    }
+                    Ok(f) => {
+                        let mut new_field = StructField::new(
+                            field.name(),
+                            f,
+                            field.is_nullable() || right_field.is_nullable(),
+                        );
+
+                        new_field.metadata.clone_from(&field.metadata);
+                        try_merge_metadata(&mut new_field.metadata, &right_field.metadata)?;
+                        Ok(new_field)
+                    }
+                }
             } else {
-                Ok(col.clone())
+                Ok(field.clone())
             }
         })
-        .collect::<Result<Vec<_>, _>>()
+        .collect();
+    match merged_fields {
+        Ok(mut fields) => {
+            for field in right.fields() {
+                if !left.field(field.name()).is_some() {
+                    fields.push(field.clone());
+                }
+            }
+
+            Ok(StructType::new(fields))
+        }
+        Err(e) => {
+            errors.push(e.to_string());
+            Err(ArrowError::SchemaError(errors.join("\n")))
+        }
+    }
+}
+
+pub(crate) fn merge_type(
+    left: &DeltaDataType,
+    right: &DeltaDataType,
+) -> Result<DeltaDataType, ArrowError> {
+    if left == right {
+        return Ok(left.clone());
+    }
+    match (left, right) {
+        (DeltaDataType::Array(a), DeltaDataType::Array(b)) => {
+            let merged = merge_type(&a.element_type, &b.element_type)?;
+            Ok(DeltaDataType::Array(Box::new(ArrayType::new(
+                merged,
+                a.contains_null() || b.contains_null(),
+            ))))
+        }
+        (DeltaDataType::Map(a), DeltaDataType::Map(b)) => {
+            let merged_key = merge_type(&a.key_type, &b.key_type)?;
+            let merged_value = merge_type(&a.value_type, &b.value_type)?;
+            Ok(DeltaDataType::Map(Box::new(MapType::new(
+                merged_key,
+                merged_value,
+                a.value_contains_null() || b.value_contains_null(),
+            ))))
+        }
+        (DeltaDataType::Struct(a), DeltaDataType::Struct(b)) => {
+            let merged = merge_struct(a, b)?;
+            Ok(DeltaDataType::Struct(Box::new(merged)))
+        }
+        (a, b) => Err(ArrowError::SchemaError(format!(
+            "Cannot merge types {} and {}",
+            a, b
+        ))),
+    }
+}
+
+pub(crate) fn merge_schema(
+    left: ArrowSchemaRef,
+    right: ArrowSchemaRef,
+) -> Result<ArrowSchemaRef, ArrowError> {
+    let left_delta: StructType = left.try_into()?;
+    let right_delta: StructType = right.try_into()?;
+    let merged: StructType = merge_struct(&left_delta, &right_delta)?;
+    Ok(Arc::new((&merged).try_into()?))
+}
+
+fn cast_struct(
+    struct_array: &StructArray,
+    fields: &Fields,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<StructArray, ArrowError> {
+    StructArray::try_new(
+        fields.to_owned(),
+        fields
+            .iter()
+            .map(|field| {
+                let col_or_not = struct_array.column_by_name(field.name());
+                match col_or_not {
+                    None => match add_missing {
+                        true if field.is_nullable() => {
+                            Ok(new_null_array(field.data_type(), struct_array.len()))
+                        }
+                        _ => Err(ArrowError::SchemaError(format!(
+                            "Could not find column {0}",
+                            field.name()
+                        ))),
+                    },
+                    Some(col) => cast_field(col, field, cast_options, add_missing),
+                }
+            })
+            .collect::<Result<Vec<_>, _>>()?,
+        struct_array.nulls().map(ToOwned::to_owned),
+    )
+}
+
+fn cast_list<T: OffsetSizeTrait>(
+    array: &GenericListArray<T>,
+    field: &FieldRef,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<GenericListArray<T>, ArrowError> {
+    let values = cast_field(array.values(), field, cast_options, add_missing)?;
+    GenericListArray::<T>::try_new(
+        field.clone(),
+        array.offsets().clone(),
+        values,
+        array.nulls().cloned(),
+    )
+}
+
+fn cast_map(
+    array: &MapArray,
+    entries_field: &FieldRef,
+    sorted: bool,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<MapArray, ArrowError> {
+    match entries_field.data_type() {
+        DataType::Struct(entry_fields) => {
+            let entries = cast_struct(array.entries(), entry_fields, cast_options, add_missing)?;
+            MapArray::try_new(
+                entries_field.clone(),
+                array.offsets().to_owned(),
+                entries,
+                array.nulls().cloned(),
+                sorted,
+            )
+        }
+        _ => Err(ArrowError::CastError(
+            "Map entries must be a struct".to_string(),
+        )),
+    }
+}
+
+fn cast_field(
+    col: &ArrayRef,
+    field: &FieldRef,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<ArrayRef, ArrowError> {
+    if let (DataType::Struct(_), DataType::Struct(child_fields)) =
+        (col.data_type(), field.data_type())
+    {
+        let child_struct = StructArray::from(col.into_data());
+        Ok(Arc::new(cast_struct(
+            &child_struct,
+            child_fields,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef)
+    } else if let (DataType::List(_), DataType::List(child_fields)) =
+        (col.data_type(), field.data_type())
+    {
+        Ok(Arc::new(cast_list(
+            col.as_any()
+                .downcast_ref::<GenericListArray<i32>>()
+                .ok_or(ArrowError::CastError(format!(
+                    "Expected a list for {} but got {}",
+                    field.name(),
+                    col.data_type()
+                )))?,
+            child_fields,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef)
+    } else if let (DataType::LargeList(_), DataType::LargeList(child_fields)) =
+        (col.data_type(), field.data_type())
+    {
+        Ok(Arc::new(cast_list(
+            col.as_any()
+                .downcast_ref::<GenericListArray<i64>>()
+                .ok_or(ArrowError::CastError(format!(
+                    "Expected a list for {} but got {}",
+                    field.name(),
+                    col.data_type()
+                )))?,
+            child_fields,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef)
+    } else if let (DataType::Map(_, _), DataType::Map(child_fields, sorted)) =
+        (col.data_type(), field.data_type())
+    {
+        Ok(Arc::new(cast_map(
+            col.as_map_opt().ok_or(ArrowError::CastError(format!(
+                "Expected a map for {} but got {}",
+                field.name(),
+                col.data_type()
+            )))?,
+            child_fields,
+            *sorted,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef)
+    } else if is_cast_required(col.data_type(), field.data_type()) {
+        cast_with_options(col, field.data_type(), cast_options)
+    } else {
+        Ok(col.clone())
+    }
 }
 
 fn is_cast_required(a: &DataType, b: &DataType) -> bool {
@@ -51,6 +280,7 @@ pub fn cast_record_batch(
     batch: &RecordBatch,
     target_schema: ArrowSchemaRef,
     safe: bool,
+    add_missing: bool,
 ) -> DeltaResult<RecordBatch> {
     let cast_options = CastOptions {
         safe,
@@ -62,20 +292,109 @@ pub fn cast_record_batch(
         batch.columns().to_owned(),
         None,
     );
-
-    let columns = cast_struct(&s, target_schema.fields(), &cast_options)?;
-    Ok(RecordBatch::try_new(target_schema, columns)?)
+    let struct_array = cast_struct(&s, target_schema.fields(), &cast_options, add_missing)?;
+    Ok(RecordBatch::try_new_with_options(
+        target_schema,
+        struct_array.columns().to_vec(),
+        &RecordBatchOptions::new().with_row_count(Some(batch.num_rows())),
+    )?)
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::operations::cast::{cast_record_batch, is_cast_required};
-    use arrow::array::ArrayData;
-    use arrow_array::{Array, ArrayRef, ListArray, RecordBatch};
-    use arrow_buffer::Buffer;
-    use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
+    use std::collections::HashMap;
+    use std::ops::Deref;
     use std::sync::Arc;
 
+    use arrow::array::types::Int32Type;
+    use arrow::array::{
+        new_empty_array, new_null_array, Array, ArrayData, ArrayRef, AsArray, Int32Array,
+        ListArray, PrimitiveArray, RecordBatch, StringArray, StructArray,
+    };
+    use arrow::buffer::{Buffer, NullBuffer};
+    use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
+    use itertools::Itertools;
+
+    use crate::kernel::{
+        ArrayType as DeltaArrayType, DataType as DeltaDataType, StructField as DeltaStructField,
+        StructType as DeltaStructType,
+    };
+    use crate::operations::cast::MetadataValue;
+    use crate::operations::cast::{cast_record_batch, is_cast_required};
+
+    #[test]
+    fn test_merge_schema_with_dict() {
+        let left_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            false,
+        )]));
+        let right_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::LargeUtf8,
+            true,
+        )]));
+
+        let result = super::merge_schema(left_schema, right_schema).unwrap();
+        assert_eq!(result.fields().len(), 1);
+        let delta_type: DeltaDataType = result.fields()[0].data_type().try_into().unwrap();
+        assert_eq!(delta_type, DeltaDataType::STRING);
+        assert!(result.fields()[0].is_nullable());
+    }
+
+    #[test]
+    fn test_merge_schema_with_meta() {
+        let mut left_meta = HashMap::new();
+        left_meta.insert("a".to_string(), "a1".to_string());
+        let left_schema = DeltaStructType::new(vec![DeltaStructField::new(
+            "f",
+            DeltaDataType::STRING,
+            false,
+        )
+        .with_metadata(left_meta)]);
+        let mut right_meta = HashMap::new();
+        right_meta.insert("b".to_string(), "b2".to_string());
+        let right_schema = DeltaStructType::new(vec![DeltaStructField::new(
+            "f",
+            DeltaDataType::STRING,
+            true,
+        )
+        .with_metadata(right_meta)]);
+
+        let result = super::merge_struct(&left_schema, &right_schema).unwrap();
+        let fields = result.fields().collect_vec();
+        assert_eq!(fields.len(), 1);
+        let delta_type = fields[0].data_type();
+        assert_eq!(delta_type, &DeltaDataType::STRING);
+        let mut expected_meta = HashMap::new();
+        expected_meta.insert("a".to_string(), MetadataValue::String("a1".to_string()));
+        expected_meta.insert("b".to_string(), MetadataValue::String("b2".to_string()));
+        assert_eq!(fields[0].metadata(), &expected_meta);
+    }
+
+    #[test]
+    fn test_merge_schema_with_nested() {
+        let left_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, false))),
+            false,
+        )]));
+        let right_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))),
+            true,
+        )]));
+
+        let result = super::merge_schema(left_schema, right_schema).unwrap();
+        assert_eq!(result.fields().len(), 1);
+        let delta_type: DeltaDataType = result.fields()[0].data_type().try_into().unwrap();
+        assert_eq!(
+            delta_type,
+            DeltaDataType::Array(Box::new(DeltaArrayType::new(DeltaDataType::STRING, false)))
+        );
+        assert!(result.fields()[0].is_nullable());
+    }
+
     #[test]
     fn test_cast_record_batch_with_list_non_default_item() {
         let array = Arc::new(make_list_array()) as ArrayRef;
@@ -93,7 +412,7 @@ mod tests {
         )]);
         let target_schema = Arc::new(Schema::new(fields)) as SchemaRef;
 
-        let result = cast_record_batch(&record_batch, target_schema, false);
+        let result = cast_record_batch(&record_batch, target_schema, false, false);
 
         let schema = result.unwrap().schema();
         let field = schema.column_with_name("list_column").unwrap().1;
@@ -142,4 +461,303 @@ mod tests {
 
         assert!(is_cast_required(&field1, &field2));
     }
+
+    #[test]
+    fn test_add_missing_null_fields_with_no_missing_fields() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Utf8, true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(StringArray::from(vec![Some("a"), None, Some("c")])),
+            ],
+        )
+        .unwrap();
+        let result = cast_record_batch(&batch, schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        assert_eq!(
+            result.column(1).deref().as_string(),
+            &StringArray::from(vec![Some("a"), None, Some("c")])
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_with_missing_beginning() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field2",
+            DataType::Utf8,
+            true,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(StringArray::from(vec![
+                Some("a"),
+                None,
+                Some("c"),
+            ]))],
+        )
+        .unwrap();
+
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, true),
+            Field::new("field2", DataType::Utf8, true),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            new_null_array(&DataType::Int32, 3)
+                .deref()
+                .as_primitive::<Int32Type>()
+        );
+        assert_eq!(
+            result.column(1).deref().as_string(),
+            &StringArray::from(vec![Some("a"), None, Some("c")])
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_with_missing_end() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Utf8, true),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from(vec![Some(1), Some(2), Some(3)])
+        );
+        assert_eq!(
+            result.column(1).deref().as_string::<i32>(),
+            new_null_array(&DataType::Utf8, 3).deref().as_string()
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_error_on_missing_non_null() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Utf8, false),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_nested_struct_missing() {
+        let nested_fields = Fields::from(vec![Field::new("nested1", DataType::Utf8, true)]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Struct(nested_fields.clone()), true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(StructArray::new(
+                    nested_fields,
+                    vec![Arc::new(StringArray::from(vec![Some("a"), None, Some("c")])) as ArrayRef],
+                    None,
+                )),
+            ],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("nested1", DataType::Utf8, true),
+                    Field::new("nested2", DataType::Utf8, true),
+                ])),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let struct_column = result.column(1).deref().as_struct();
+        assert_eq!(struct_column.num_columns(), 2);
+        assert_eq!(
+            struct_column.column(0).deref().as_string(),
+            &StringArray::from(vec![Some("a"), None, Some("c")])
+        );
+        assert_eq!(
+            struct_column.column(1).deref().as_string::<i32>(),
+            new_null_array(&DataType::Utf8, 3).deref().as_string()
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_nested_struct_missing_non_nullable() {
+        let nested_fields = Fields::from(vec![Field::new("nested1", DataType::Utf8, false)]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Struct(nested_fields.clone()), true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(StructArray::new(
+                    nested_fields,
+                    vec![new_null_array(&DataType::Utf8, 3)],
+                    Some(NullBuffer::new_null(3)),
+                )),
+            ],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("nested1", DataType::Utf8, false),
+                    Field::new("nested2", DataType::Utf8, true),
+                ])),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let struct_column = result.column(1).deref().as_struct();
+        assert_eq!(struct_column.num_columns(), 2);
+        let expected: [Option<&str>; 3] = Default::default();
+        assert_eq!(
+            struct_column.column(0).deref().as_string(),
+            &StringArray::from(Vec::from(expected))
+        );
+        assert_eq!(
+            struct_column.column(1).deref().as_string::<i32>(),
+            new_null_array(&DataType::Utf8, 3).deref().as_string(),
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_list_missing() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::List(Arc::new(Field::new("nested1", DataType::Utf8, true))),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let list_column = result.column(1).deref().as_list::<i32>();
+        assert_eq!(list_column.len(), 3);
+        assert_eq!(list_column.value_offsets(), &[0, 0, 0, 0]);
+        assert_eq!(
+            list_column.values().deref().as_string::<i32>(),
+            new_empty_array(&DataType::Utf8).deref().as_string()
+        )
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_map_missing() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::Map(
+                    Arc::new(Field::new(
+                        "entries",
+                        DataType::Struct(Fields::from(vec![
+                            Field::new("key", DataType::Utf8, true),
+                            Field::new("value", DataType::Utf8, true),
+                        ])),
+                        true,
+                    )),
+                    false,
+                ),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let map_column = result.column(1).deref().as_map();
+        assert_eq!(map_column.len(), 3);
+        assert_eq!(map_column.offsets().as_ref(), &[0; 4]);
+        assert_eq!(
+            map_column.keys().deref().as_string::<i32>(),
+            new_empty_array(&DataType::Utf8).deref().as_string()
+        );
+        assert_eq!(
+            map_column.values().deref().as_string::<i32>(),
+            new_empty_array(&DataType::Utf8).deref().as_string()
+        );
+    }
 }
diff --git a/crates/core/src/operations/cdc.rs b/crates/core/src/operations/cdc.rs
new file mode 100644
index 0000000000..42a33cbcab
--- /dev/null
+++ b/crates/core/src/operations/cdc.rs
@@ -0,0 +1,417 @@
+//!
+//! The CDC module contains private tools for managing CDC files
+//!
+
+use crate::table::state::DeltaTableState;
+use crate::DeltaResult;
+
+use arrow::datatypes::{DataType, Field, SchemaRef};
+
+use datafusion::prelude::*;
+use datafusion_common::ScalarValue;
+use std::sync::Arc;
+use tracing::log::*;
+
+/// The CDCTracker is useful for hooking reads/writes in a manner nececessary to create CDC files
+/// associated with commits
+pub(crate) struct CDCTracker {
+    pre_dataframe: DataFrame,
+    post_dataframe: DataFrame,
+}
+
+impl CDCTracker {
+    ///  construct
+    pub(crate) fn new(pre_dataframe: DataFrame, post_dataframe: DataFrame) -> Self {
+        Self {
+            pre_dataframe,
+            post_dataframe,
+        }
+    }
+
+    pub(crate) fn collect(self) -> DeltaResult<DataFrame> {
+        // Collect _all_ the batches for consideration
+        let pre_df = self.pre_dataframe;
+        let post_df = self.post_dataframe;
+
+        // There is certainly a better way to do this other than stupidly cloning data for diffing
+        // purposes, but this is the quickest and easiest way to "diff" the two sets of batches
+        let preimage = pre_df.clone().except(post_df.clone())?;
+        let postimage = post_df.except(pre_df)?;
+
+        let preimage = preimage.with_column(
+            "_change_type",
+            lit(ScalarValue::Utf8(Some("update_preimage".to_string()))),
+        )?;
+
+        let postimage = postimage.with_column(
+            "_change_type",
+            lit(ScalarValue::Utf8(Some("update_postimage".to_string()))),
+        )?;
+
+        let final_df = preimage.union(postimage)?;
+        Ok(final_df)
+    }
+}
+
+///
+/// Return true if the specified table is capable of writing Change Data files
+///
+/// From the Protocol:
+///
+/// > For Writer Versions 4 up to 6, all writers must respect the delta.enableChangeDataFeed
+/// > configuration flag in the metadata of the table. When delta.enableChangeDataFeed is true,
+/// > writers must produce the relevant AddCDCFile's for any operation that changes data, as
+/// > specified in Change Data Files.
+/// >
+/// > For Writer Version 7, all writers must respect the delta.enableChangeDataFeed configuration flag in
+/// > the metadata of the table only if the feature changeDataFeed exists in the table protocol's
+/// > writerFeatures.
+pub(crate) fn should_write_cdc(snapshot: &DeltaTableState) -> DeltaResult<bool> {
+    if let Some(features) = &snapshot.protocol().writer_features {
+        // Features should only exist at writer version 7 but to avoid cases where
+        // the Option<HashSet<T>> can get filled with an empty set, checking for the value
+        // explicitly
+        if snapshot.protocol().min_writer_version == 7
+            && !features.contains(&crate::kernel::WriterFeatures::ChangeDataFeed)
+        {
+            // If the writer feature has not been set, then the table should not have CDC written
+            // to it. Otherwise fallback to the configured table configuration
+            return Ok(false);
+        }
+    }
+    Ok(snapshot.table_config().enable_change_data_feed())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::kernel::DataType as DeltaDataType;
+    use crate::kernel::{Action, PrimitiveType, Protocol};
+    use crate::operations::DeltaOps;
+    use crate::{DeltaConfigKey, DeltaTable};
+    use arrow::array::{ArrayRef, Int32Array, StructArray};
+    use arrow_array::RecordBatch;
+    use arrow_schema::Schema;
+    use datafusion::assert_batches_sorted_eq;
+    use datafusion::datasource::{MemTable, TableProvider};
+
+    /// A simple test which validates primitive writer version 1 tables should
+    /// not write Change Data Files
+    #[tokio::test]
+    async fn test_should_write_cdc_basic_table() {
+        let mut table = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .await
+            .expect("Failed to make a table");
+        table.load().await.expect("Failed to reload table");
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            result == false,
+            "A default table should not create CDC files"
+        );
+    }
+
+    ///
+    /// This test manually creates a table with writer version 4 that has the configuration sets
+    ///
+    #[tokio::test]
+    async fn test_should_write_cdc_table_with_configuration() {
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let mut table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        table.load().await.expect("Failed to reload table");
+
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            result == true,
+            "A table with the EnableChangeDataFeed should create CDC files"
+        );
+    }
+
+    ///
+    /// This test creates a writer version 7 table which has a slightly different way of
+    /// determining whether CDC files should be written or not.
+    #[tokio::test]
+    async fn test_should_write_cdc_v7_table_no_writer_feature() {
+        let actions = vec![Action::Protocol(Protocol::new(1, 7))];
+        let mut table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        table.load().await.expect("Failed to reload table");
+
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            result == false,
+            "A v7 table must not write CDC files unless the writer feature is set"
+        );
+    }
+
+    ///
+    /// This test creates a writer version 7 table with a writer table feature enabled for CDC and
+    /// therefore should write CDC files
+    #[tokio::test]
+    async fn test_should_write_cdc_v7_table_with_writer_feature() {
+        let protocol = Protocol::new(1, 7)
+            .with_writer_features(vec![crate::kernel::WriterFeatures::ChangeDataFeed]);
+        let actions = vec![Action::Protocol(protocol)];
+        let mut table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        table.load().await.expect("Failed to reload table");
+
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            result,
+            "A v7 table must not write CDC files unless the writer feature is set"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_sanity_check() {
+        let ctx = SessionContext::new();
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            DataType::Int32,
+            true,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table_provider: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap());
+        let source_df = ctx.read_table(table_provider).unwrap();
+
+        let updated_batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)]))],
+        )
+        .unwrap();
+        let table_provider_updated: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![updated_batch]]).unwrap());
+        let updated_df = ctx.read_table(table_provider_updated).unwrap();
+
+        let tracker = CDCTracker::new(source_df, updated_df);
+
+        match tracker.collect() {
+            Ok(df) => {
+                let batches = &df.collect().await.unwrap();
+                let _ = arrow::util::pretty::print_batches(batches);
+                assert_eq!(batches.len(), 2);
+                assert_batches_sorted_eq! {[
+                "+-------+------------------+",
+                "| value | _change_type     |",
+                "+-------+------------------+",
+                "| 2     | update_preimage  |",
+                "| 12    | update_postimage |",
+                "+-------+------------------+",
+                    ], &batches }
+            }
+            Err(err) => {
+                println!("err: {err:#?}");
+                panic!("Should have never reached this assertion");
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_sanity_check_with_pure_df() {
+        let nested_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("lat", DataType::Int32, true),
+            Field::new("long", DataType::Int32, true),
+        ]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("value", DataType::Int32, true),
+            Field::new(
+                "nested",
+                DataType::Struct(nested_schema.fields.clone()),
+                true,
+            ),
+        ]));
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let updated_batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+        let _ = arrow::util::pretty::print_batches(&vec![batch.clone()]);
+        let _ = arrow::util::pretty::print_batches(&vec![updated_batch.clone()]);
+
+        let ctx = SessionContext::new();
+        let before = ctx.read_batch(batch).expect("Failed to make DataFrame");
+        let after = ctx
+            .read_batch(updated_batch)
+            .expect("Failed to make DataFrame");
+
+        let diff = before
+            .except(after)
+            .expect("Failed to except")
+            .collect()
+            .await
+            .expect("Failed to diff");
+        assert_eq!(diff.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_sanity_check_with_struct() {
+        let ctx = SessionContext::new();
+        let nested_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("lat", DataType::Int32, true),
+            Field::new("long", DataType::Int32, true),
+        ]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("value", DataType::Int32, true),
+            Field::new(
+                "nested",
+                DataType::Struct(nested_schema.fields.clone()),
+                true,
+            ),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+        let table_provider: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap());
+        let source_df = ctx.read_table(table_provider).unwrap();
+
+        let updated_batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+        let table_provider_updated: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![updated_batch]]).unwrap());
+        let updated_df = ctx.read_table(table_provider_updated).unwrap();
+
+        let tracker = CDCTracker::new(source_df, updated_df);
+
+        match tracker.collect() {
+            Ok(df) => {
+                let batches = &df.collect().await.unwrap();
+                let _ = arrow::util::pretty::print_batches(&batches);
+                assert_eq!(batches.len(), 2);
+                assert_batches_sorted_eq! {[
+                "+-------+--------------------------+------------------+",
+                "| value | nested                   | _change_type     |",
+                "+-------+--------------------------+------------------+",
+                "| 12    | {id: 2, lat: 2, long: 2} | update_postimage |",
+                "| 2     | {id: 2, lat: 2, long: 2} | update_preimage  |",
+                "+-------+--------------------------+------------------+",
+                ], &batches }
+            }
+            Err(err) => {
+                println!("err: {err:#?}");
+                panic!("Should have never reached this assertion");
+            }
+        }
+    }
+}
diff --git a/crates/core/src/operations/constraints.rs b/crates/core/src/operations/constraints.rs
index 9bf5f2d22c..e5d356f81c 100644
--- a/crates/core/src/operations/constraints.rs
+++ b/crates/core/src/operations/constraints.rs
@@ -1,9 +1,7 @@
 //! Add a check constraint to a table
 
-use std::collections::HashMap;
 use std::sync::Arc;
 
-use chrono::Utc;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::{SendableRecordBatchStream, TaskContext};
 use datafusion::physical_plan::ExecutionPlan;
@@ -11,22 +9,21 @@ use datafusion::prelude::SessionContext;
 use datafusion_common::ToDFSchema;
 use futures::future::BoxFuture;
 use futures::StreamExt;
-use serde_json::json;
 
 use crate::delta_datafusion::expr::fmt_expr_to_sql;
 use crate::delta_datafusion::{
     register_store, DeltaDataChecker, DeltaScanBuilder, DeltaSessionContext,
 };
-use crate::kernel::{CommitInfo, IsolationLevel, Protocol};
+use crate::kernel::{Protocol, WriterFeatures};
 use crate::logstore::LogStoreRef;
 use crate::operations::datafusion_utils::Expression;
-use crate::operations::transaction::commit;
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
 use crate::table::Constraint;
 use crate::{DeltaResult, DeltaTable, DeltaTableError};
 
 use super::datafusion_utils::into_expr;
+use super::transaction::{CommitBuilder, CommitProperties};
 
 /// Build a constraint to add to a table
 pub struct ConstraintBuilder {
@@ -40,10 +37,12 @@ pub struct ConstraintBuilder {
     log_store: LogStoreRef,
     /// Datafusion session state relevant for executing the input plan
     state: Option<SessionState>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
 }
 
+impl super::Operation<()> for ConstraintBuilder {}
+
 impl ConstraintBuilder {
     /// Create a new builder
     pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
@@ -53,7 +52,7 @@ impl ConstraintBuilder {
             snapshot,
             log_store,
             state: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
         }
     }
 
@@ -75,11 +74,8 @@ impl ConstraintBuilder {
     }
 
     /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 }
@@ -90,7 +86,7 @@ impl std::future::IntoFuture for ConstraintBuilder {
     type IntoFuture = BoxFuture<'static, Self::Output>;
 
     fn into_future(self) -> Self::IntoFuture {
-        let mut this = self;
+        let this = self;
 
         Box::pin(async move {
             let name = match this.name {
@@ -132,7 +128,7 @@ impl std::future::IntoFuture for ConstraintBuilder {
 
             let plan: Arc<dyn ExecutionPlan> = Arc::new(scan);
             let mut tasks = vec![];
-            for p in 0..plan.output_partitioning().partition_count() {
+            for p in 0..plan.properties().output_partitioning().partition_count() {
                 let inner_plan = plan.clone();
                 let inner_checker = checker.clone();
                 let task_ctx = Arc::new(TaskContext::from(&state));
@@ -177,48 +173,35 @@ impl std::future::IntoFuture for ConstraintBuilder {
                     3
                 },
                 reader_features: old_protocol.reader_features.clone(),
-                writer_features: old_protocol.writer_features.clone(),
+                writer_features: if old_protocol.min_writer_version < 7 {
+                    old_protocol.writer_features.clone()
+                } else {
+                    let current_features = old_protocol.writer_features.clone();
+                    if let Some(mut features) = current_features {
+                        features.insert(WriterFeatures::CheckConstraints);
+                        Some(features)
+                    } else {
+                        current_features
+                    }
+                },
             };
 
-            let operational_parameters = HashMap::from_iter([
-                ("name".to_string(), json!(&name)),
-                ("expr".to_string(), json!(&expr_str)),
-            ]);
-
-            let operations = DeltaOperation::AddConstraint {
+            let operation = DeltaOperation::AddConstraint {
                 name: name.clone(),
                 expr: expr_str.clone(),
             };
 
-            let app_metadata = match this.app_metadata {
-                Some(metadata) => metadata,
-                None => HashMap::default(),
-            };
-
-            let commit_info = CommitInfo {
-                timestamp: Some(Utc::now().timestamp_millis()),
-                operation: Some(operations.name().to_string()),
-                operation_parameters: Some(operational_parameters),
-                read_version: Some(this.snapshot.version()),
-                isolation_level: Some(IsolationLevel::Serializable),
-                is_blind_append: Some(false),
-                info: app_metadata,
-                ..Default::default()
-            };
+            let actions = vec![metadata.into(), protocol.into()];
 
-            let actions = vec![commit_info.into(), metadata.into(), protocol.into()];
-
-            let version = commit(
-                this.log_store.as_ref(),
-                &actions,
-                operations.clone(),
-                Some(&this.snapshot),
-                None,
-            )
-            .await?;
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions)
+                .build(Some(&this.snapshot), this.log_store.clone(), operation)
+                .await?;
 
-            this.snapshot.merge(actions, &operations, version)?;
-            Ok(DeltaTable::new_with_state(this.log_store, this.snapshot))
+            Ok(DeltaTable::new_with_state(
+                this.log_store,
+                commit.snapshot(),
+            ))
         })
     }
 }
diff --git a/crates/core/src/operations/convert_to_delta.rs b/crates/core/src/operations/convert_to_delta.rs
index 1ed4e1cee6..a51d353b20 100644
--- a/crates/core/src/operations/convert_to_delta.rs
+++ b/crates/core/src/operations/convert_to_delta.rs
@@ -1,33 +1,32 @@
 //! Command for converting a Parquet table to a Delta table in place
 // https://github.com/delta-io/delta/blob/1d5dd774111395b0c4dc1a69c94abc169b1c83b6/spark/src/main/scala/org/apache/spark/sql/delta/commands/ConvertToDeltaCommand.scala
+use std::collections::{HashMap, HashSet};
+use std::num::TryFromIntError;
+use std::str::{FromStr, Utf8Error};
+use std::sync::Arc;
 
+use arrow::{datatypes::Schema as ArrowSchema, error::ArrowError};
+use futures::future::{self, BoxFuture};
+use futures::TryStreamExt;
+use indexmap::IndexMap;
+use itertools::Itertools;
+use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
+use parquet::errors::ParquetError;
+use percent_encoding::percent_decode_str;
+use serde_json::{Map, Value};
+use tracing::debug;
+
+use crate::operations::get_num_idx_cols_and_stats_columns;
 use crate::{
-    kernel::{Add, DataType, Schema, StructField},
+    kernel::{scalars::ScalarExt, Add, DataType, Schema, StructField},
     logstore::{LogStore, LogStoreRef},
     operations::create::CreateBuilder,
     protocol::SaveMode,
     table::builder::ensure_table_uri,
     table::config::DeltaConfigKey,
+    writer::stats::stats_from_parquet_metadata,
     DeltaResult, DeltaTable, DeltaTableError, ObjectStoreError, NULL_PARTITION_VALUE_DATA_PATH,
 };
-use arrow::{datatypes::Schema as ArrowSchema, error::ArrowError};
-use futures::{
-    future::{self, BoxFuture},
-    TryStreamExt,
-};
-use parquet::{
-    arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder},
-    errors::ParquetError,
-};
-use percent_encoding::percent_decode_str;
-use serde_json::{Map, Value};
-use std::{
-    collections::{HashMap, HashSet},
-    num::TryFromIntError,
-    str::{FromStr, Utf8Error},
-    sync::Arc,
-};
-use tracing::debug;
 
 /// Error converting a Parquet table to a Delta table
 #[derive(Debug, thiserror::Error)]
@@ -49,7 +48,7 @@ enum Error {
     #[error("The schema of partition columns must be provided to convert a Parquet table to a Delta table")]
     MissingPartitionSchema,
     #[error("Partition column provided by the user does not exist in the parquet files")]
-    PartitionColumnNotExist(HashSet<StructField>),
+    PartitionColumnNotExist,
     #[error("The given location is already a delta table location")]
     DeltaTableAlready,
     #[error("Location must be provided to convert a Parquet table to a Delta table")]
@@ -101,7 +100,7 @@ pub struct ConvertToDeltaBuilder {
     log_store: Option<LogStoreRef>,
     location: Option<String>,
     storage_options: Option<HashMap<String, String>>,
-    partition_schema: HashSet<StructField>,
+    partition_schema: HashMap<String, StructField>,
     partition_strategy: PartitionStrategy,
     mode: SaveMode,
     name: Option<String>,
@@ -116,6 +115,8 @@ impl Default for ConvertToDeltaBuilder {
     }
 }
 
+impl super::Operation<()> for ConvertToDeltaBuilder {}
+
 impl ConvertToDeltaBuilder {
     /// Create a new [`ConvertToDeltaBuilder`]
     pub fn new() -> Self {
@@ -164,7 +165,10 @@ impl ConvertToDeltaBuilder {
         mut self,
         partition_schema: impl IntoIterator<Item = StructField>,
     ) -> Self {
-        self.partition_schema = HashSet::from_iter(partition_schema);
+        self.partition_schema = partition_schema
+            .into_iter()
+            .map(|f| (f.name.clone(), f))
+            .collect();
         self
     }
 
@@ -226,7 +230,7 @@ impl ConvertToDeltaBuilder {
     }
 
     /// Consume self into CreateBuilder with corresponding add actions, schemas and operation meta
-    async fn into_create_builder(mut self) -> Result<CreateBuilder, Error> {
+    async fn into_create_builder(self) -> Result<CreateBuilder, Error> {
         // Use the specified log store. If a log store is not provided, create a new store from the specified path.
         // Return an error if neither log store nor path is provided
         let log_store = if let Some(log_store) = self.log_store {
@@ -270,11 +274,17 @@ impl ConvertToDeltaBuilder {
         // Iterate over the parquet files. Parse partition columns, generate add actions and collect parquet file schemas
         let mut arrow_schemas = Vec::new();
         let mut actions = Vec::new();
+        // partition columns that were defined by caller and are expected to apply on this table
+        let mut expected_partitions: HashMap<String, StructField> = self.partition_schema.clone();
         // A HashSet of all unique partition columns in a Parquet table
         let mut partition_columns = HashSet::new();
         // A vector of StructField of all unique partition columns in a Parquet table
         let mut partition_schema_fields = HashMap::new();
 
+        // Obtain settings on which columns to skip collecting stats on if any
+        let (num_indexed_cols, stats_columns) =
+            get_num_idx_cols_and_stats_columns(None, self.configuration.clone());
+
         for file in files {
             // A HashMap from partition column to value for this parquet file only
             let mut partition_values = HashMap::new();
@@ -290,7 +300,7 @@ impl ConvertToDeltaBuilder {
                     .ok_or(Error::MissingPartitionSchema)?;
 
                 if partition_columns.insert(key.to_string()) {
-                    if let Some(schema) = self.partition_schema.take(key) {
+                    if let Some(schema) = expected_partitions.remove(key) {
                         partition_schema_fields.insert(key.to_string(), schema);
                     } else {
                         // Return an error if the schema of a partition column is not provided by user
@@ -301,12 +311,14 @@ impl ConvertToDeltaBuilder {
                 // Safety: we just checked that the key is present in the map
                 let field = partition_schema_fields.get(key).unwrap();
                 let scalar = if value == NULL_PARTITION_VALUE_DATA_PATH {
-                    Ok(crate::kernel::Scalar::Null(field.data_type().clone()))
+                    Ok(delta_kernel::expressions::Scalar::Null(
+                        field.data_type().clone(),
+                    ))
                 } else {
                     let decoded = percent_decode_str(value).decode_utf8()?;
                     match field.data_type() {
                         DataType::Primitive(p) => p.parse_scalar(decoded.as_ref()),
-                        _ => Err(crate::kernel::Error::Generic(format!(
+                        _ => Err(delta_kernel::Error::Generic(format!(
                             "Exprected primitive type, found: {:?}",
                             field.data_type()
                         ))),
@@ -319,6 +331,24 @@ impl ConvertToDeltaBuilder {
                 subpath = iter.next();
             }
 
+            let batch_builder = ParquetRecordBatchStreamBuilder::new(ParquetObjectReader::new(
+                object_store.clone(),
+                file.clone(),
+            ))
+            .await?;
+
+            // Fetch the stats
+            let parquet_metadata = batch_builder.metadata();
+            let stats = stats_from_parquet_metadata(
+                &IndexMap::from_iter(partition_values.clone().into_iter()),
+                parquet_metadata.as_ref(),
+                num_indexed_cols,
+                &stats_columns,
+            )
+            .map_err(|e| Error::DeltaTable(e.into()))?;
+            let stats_string =
+                serde_json::to_string(&stats).map_err(|e| Error::DeltaTable(e.into()))?;
+
             actions.push(
                 Add {
                     path: percent_decode_str(file.location.as_ref())
@@ -340,19 +370,13 @@ impl ConvertToDeltaBuilder {
                         .collect(),
                     modification_time: file.last_modified.timestamp_millis(),
                     data_change: true,
+                    stats: Some(stats_string),
                     ..Default::default()
                 }
                 .into(),
             );
 
-            let mut arrow_schema = ParquetRecordBatchStreamBuilder::new(ParquetObjectReader::new(
-                object_store.clone(),
-                file,
-            ))
-            .await?
-            .schema()
-            .as_ref()
-            .clone();
+            let mut arrow_schema = batch_builder.schema().as_ref().clone();
 
             // Arrow schema of Parquet files may have conflicting metatdata
             // Since Arrow schema metadata is not used to generate Delta table schema, we set the metadata field to an empty HashMap
@@ -360,27 +384,21 @@ impl ConvertToDeltaBuilder {
             arrow_schemas.push(arrow_schema);
         }
 
-        if !self.partition_schema.is_empty() {
+        if !expected_partitions.is_empty() {
             // Partition column provided by the user does not exist in the parquet files
-            return Err(Error::PartitionColumnNotExist(self.partition_schema));
+            return Err(Error::PartitionColumnNotExist);
         }
 
         // Merge parquet file schemas
         // This step is needed because timestamp will not be preserved when copying files in S3. We can't use the schema of the latest parqeut file as Delta table's schema
-        let mut schema_fields = Schema::try_from(&ArrowSchema::try_merge(arrow_schemas)?)?
-            .fields()
-            .clone();
-        schema_fields.append(
-            &mut partition_schema_fields
-                .values()
-                .cloned()
-                .collect::<Vec<_>>(),
-        );
+        let schema = Schema::try_from(&ArrowSchema::try_merge(arrow_schemas)?)?;
+        let mut schema_fields = schema.fields().collect_vec();
+        schema_fields.append(&mut partition_schema_fields.values().collect::<Vec<_>>());
 
         // Generate CreateBuilder with corresponding add actions, schemas and operation meta
         let mut builder = CreateBuilder::new()
             .with_log_store(log_store)
-            .with_columns(schema_fields)
+            .with_columns(schema_fields.into_iter().cloned())
             .with_partition_columns(partition_columns.into_iter())
             .with_actions(actions)
             .with_save_mode(self.mode)
@@ -419,17 +437,20 @@ impl std::future::IntoFuture for ConvertToDeltaBuilder {
 
 #[cfg(test)]
 mod tests {
+    use std::fs;
+
+    use delta_kernel::expressions::Scalar;
+    use itertools::Itertools;
+    use pretty_assertions::assert_eq;
+    use tempfile::tempdir;
+
     use super::*;
     use crate::{
-        kernel::{DataType, PrimitiveType, Scalar},
+        kernel::{DataType, PrimitiveType},
         open_table,
         storage::StorageOptions,
         Path,
     };
-    use itertools::Itertools;
-    use pretty_assertions::assert_eq;
-    use std::fs;
-    use tempfile::tempdir;
 
     fn schema_field(key: &str, primitive: PrimitiveType, nullable: bool) -> StructField {
         StructField::new(key.to_string(), DataType::Primitive(primitive), nullable)
@@ -535,7 +556,8 @@ mod tests {
             .get_schema()
             .expect("Failed to get schema")
             .fields()
-            .clone();
+            .cloned()
+            .collect_vec();
         schema_fields.sort_by(|a, b| a.name().cmp(b.name()));
         assert_eq!(
             schema_fields, expected_schema,
@@ -575,6 +597,16 @@ mod tests {
             "part-00000-d22c627d-9655-4153-9527-f8995620fa42-c000.snappy.parquet"
         );
 
+        let Some(Scalar::Struct(data)) = action.min_values() else {
+            panic!("Missing min values");
+        };
+        assert_eq!(data.values(), vec![Scalar::Date(18628), Scalar::Integer(1)]);
+
+        let Some(Scalar::Struct(data)) = action.max_values() else {
+            panic!("Missing max values");
+        };
+        assert_eq!(data.values(), vec![Scalar::Date(18632), Scalar::Integer(5)]);
+
         assert_delta_table(
             table,
             path,
diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs
index bbf11e3705..63b6995f9b 100644
--- a/crates/core/src/operations/create.rs
+++ b/crates/core/src/operations/create.rs
@@ -4,12 +4,16 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
+use delta_kernel::schema::MetadataValue;
 use futures::future::BoxFuture;
+use maplit::hashset;
 use serde_json::Value;
 
-use super::transaction::{commit, PROTOCOL};
+use super::transaction::{CommitBuilder, TableReference, PROTOCOL};
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Action, DataType, Metadata, Protocol, StructField, StructType};
+use crate::kernel::{
+    Action, DataType, Metadata, Protocol, ReaderFeatures, StructField, StructType, WriterFeatures,
+};
 use crate::logstore::{LogStore, LogStoreRef};
 use crate::protocol::{DeltaOperation, SaveMode};
 use crate::table::builder::ensure_table_uri;
@@ -56,8 +60,11 @@ pub struct CreateBuilder {
     log_store: Option<LogStoreRef>,
     configuration: HashMap<String, Option<String>>,
     metadata: Option<HashMap<String, Value>>,
+    raise_if_key_not_exists: bool,
 }
 
+impl super::Operation<()> for CreateBuilder {}
+
 impl Default for CreateBuilder {
     fn default() -> Self {
         Self::new()
@@ -79,6 +86,7 @@ impl CreateBuilder {
             log_store: None,
             configuration: Default::default(),
             metadata: Default::default(),
+            raise_if_key_not_exists: true,
         }
     }
 
@@ -118,7 +126,24 @@ impl CreateBuilder {
     ) -> Self {
         let mut field = StructField::new(name.into(), data_type, nullable);
         if let Some(meta) = metadata {
-            field = field.with_metadata(meta);
+            field = field.with_metadata(meta.iter().map(|(k, v)| {
+                (
+                    k,
+                    if let Value::Number(n) = v {
+                        n.as_i64().map_or_else(
+                            || MetadataValue::String(v.to_string()),
+                            |i| {
+                                i32::try_from(i)
+                                    .ok()
+                                    .map(MetadataValue::Number)
+                                    .unwrap_or_else(|| MetadataValue::String(v.to_string()))
+                            },
+                        )
+                    } else {
+                        MetadataValue::String(v.to_string())
+                    },
+                )
+            }));
         };
         self.columns.push(field);
         self
@@ -188,6 +213,12 @@ impl CreateBuilder {
         self
     }
 
+    /// Specify whether to raise an error if the table properties in the configuration are not DeltaConfigKeys
+    pub fn with_raise_if_key_not_exists(mut self, raise_if_key_not_exists: bool) -> Self {
+        self.raise_if_key_not_exists = raise_if_key_not_exists;
+        self
+    }
+
     /// Specify additional actions to be added to the commit.
     ///
     /// This method is mainly meant for internal use. Manually adding inconsistent
@@ -233,8 +264,27 @@ impl CreateBuilder {
             )
         };
 
+        let configuration = self.configuration;
+        let contains_timestampntz = PROTOCOL.contains_timestampntz(self.columns.iter());
         // TODO configure more permissive versions based on configuration. Also how should this ideally be handled?
         // We set the lowest protocol we can, and if subsequent writes use newer features we update metadata?
+
+        let current_protocol = if contains_timestampntz {
+            Protocol {
+                min_reader_version: 3,
+                min_writer_version: 7,
+                writer_features: Some(hashset! {WriterFeatures::TimestampWithoutTimezone}),
+                reader_features: Some(hashset! {ReaderFeatures::TimestampWithoutTimezone}),
+            }
+        } else {
+            Protocol {
+                min_reader_version: PROTOCOL.default_reader_version(),
+                min_writer_version: PROTOCOL.default_writer_version(),
+                reader_features: None,
+                writer_features: None,
+            }
+        };
+
         let protocol = self
             .actions
             .iter()
@@ -243,17 +293,22 @@ impl CreateBuilder {
                 Action::Protocol(p) => p.clone(),
                 _ => unreachable!(),
             })
-            .unwrap_or_else(|| Protocol {
-                min_reader_version: PROTOCOL.default_reader_version(),
-                min_writer_version: PROTOCOL.default_writer_version(),
-                writer_features: None,
-                reader_features: None,
-            });
+            .unwrap_or_else(|| current_protocol);
+
+        let protocol = protocol.apply_properties_to_protocol(
+            &configuration
+                .iter()
+                .map(|(k, v)| (k.clone(), v.clone().unwrap()))
+                .collect::<HashMap<String, String>>(),
+            self.raise_if_key_not_exists,
+        )?;
+
+        let protocol = protocol.move_table_properties_into_features(&configuration);
 
         let mut metadata = Metadata::try_new(
             StructType::new(self.columns),
             self.partition_columns.unwrap_or_default(),
-            self.configuration,
+            configuration,
         )?
         .with_created_time(chrono::Utc::now().timestamp_millis());
         if let Some(name) = self.name {
@@ -264,13 +319,14 @@ impl CreateBuilder {
         }
 
         let operation = DeltaOperation::Create {
-            mode: self.mode.clone(),
+            mode: self.mode,
             metadata: metadata.clone(),
             location: storage_url,
             protocol: protocol.clone(),
         };
 
         let mut actions = vec![Action::Protocol(protocol), Action::Metadata(metadata)];
+
         actions.extend(
             self.actions
                 .into_iter()
@@ -288,9 +344,9 @@ impl std::future::IntoFuture for CreateBuilder {
     fn into_future(self) -> Self::IntoFuture {
         let this = self;
         Box::pin(async move {
-            let mode = this.mode.clone();
-            let app_metadata = this.metadata.clone();
-            let (mut table, actions, operation) = this.into_table_and_actions()?;
+            let mode = this.mode;
+            let app_metadata = this.metadata.clone().unwrap_or_default();
+            let (mut table, mut actions, operation) = this.into_table_and_actions()?;
             let log_store = table.log_store();
 
             let table_state = if log_store.is_delta_table_location().await? {
@@ -303,6 +359,12 @@ impl std::future::IntoFuture for CreateBuilder {
                     }
                     SaveMode::Overwrite => {
                         table.load().await?;
+                        let remove_actions = table
+                            .snapshot()?
+                            .log_data()
+                            .into_iter()
+                            .map(|p| p.remove_action(true).into());
+                        actions.extend(remove_actions);
                         Some(table.snapshot()?)
                     }
                 }
@@ -310,15 +372,16 @@ impl std::future::IntoFuture for CreateBuilder {
                 None
             };
 
-            let version = commit(
-                table.log_store.as_ref(),
-                &actions,
-                operation,
-                table_state,
-                app_metadata,
-            )
-            .await?;
-
+            let version = CommitBuilder::default()
+                .with_actions(actions)
+                .with_app_metadata(app_metadata)
+                .build(
+                    table_state.map(|f| f as &dyn TableReference),
+                    table.log_store.clone(),
+                    operation,
+                )
+                .await?
+                .version();
             table.load_version(version).await?;
 
             Ok(table)
@@ -331,7 +394,7 @@ mod tests {
     use super::*;
     use crate::operations::DeltaOps;
     use crate::table::config::DeltaConfigKey;
-    use crate::writer::test_utils::get_delta_schema;
+    use crate::writer::test_utils::{get_delta_schema, get_record_batch};
     use tempfile::TempDir;
 
     #[tokio::test]
@@ -340,7 +403,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(SaveMode::Ignore)
             .await
             .unwrap();
@@ -360,7 +423,7 @@ mod tests {
             .await
             .unwrap()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(SaveMode::Ignore)
             .await
             .unwrap();
@@ -378,7 +441,7 @@ mod tests {
         );
         let table = CreateBuilder::new()
             .with_location(format!("./{relative_path}"))
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -389,7 +452,7 @@ mod tests {
         let schema = get_delta_schema();
         let table = CreateBuilder::new()
             .with_location("memory://")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -412,7 +475,7 @@ mod tests {
         };
         let table = CreateBuilder::new()
             .with_location("memory://")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_actions(vec![Action::Protocol(protocol)])
             .await
             .unwrap();
@@ -421,7 +484,7 @@ mod tests {
 
         let table = CreateBuilder::new()
             .with_location("memory://")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_configuration_property(DeltaConfigKey::AppendOnly, Some("true"))
             .await
             .unwrap();
@@ -444,7 +507,7 @@ mod tests {
         let schema = get_delta_schema();
         let table = CreateBuilder::new()
             .with_location(tmp_dir.path().to_str().unwrap())
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -455,7 +518,7 @@ mod tests {
         // Check an error is raised when a table exists at location
         let table = CreateBuilder::new()
             .with_log_store(log_store.clone())
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::ErrorIfExists)
             .await;
         assert!(table.is_err());
@@ -463,7 +526,7 @@ mod tests {
         // Check current table is returned when ignore option is chosen.
         let table = CreateBuilder::new()
             .with_log_store(log_store.clone())
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::Ignore)
             .await
             .unwrap();
@@ -472,10 +535,98 @@ mod tests {
         // Check table is overwritten
         let table = CreateBuilder::new()
             .with_log_store(log_store)
-            .with_columns(schema.fields().iter().cloned())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::Overwrite)
             .await
             .unwrap();
         assert_ne!(table.metadata().unwrap().id, first_id)
     }
+
+    #[tokio::test]
+    async fn test_create_or_replace_existing_table() {
+        let batch = get_record_batch(None, false);
+        let schema = get_delta_schema();
+        let table = DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 1);
+
+        let mut table = DeltaOps(table)
+            .create()
+            .with_columns(schema.fields().cloned())
+            .with_save_mode(SaveMode::Overwrite)
+            .await
+            .unwrap();
+        table.load().await.unwrap();
+        assert_eq!(table.version(), 1);
+        // Checks if files got removed after overwrite
+        assert_eq!(table.get_files_count(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_create_or_replace_existing_table_partitioned() {
+        let batch = get_record_batch(None, false);
+        let schema = get_delta_schema();
+        let table = DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 1);
+
+        let mut table = DeltaOps(table)
+            .create()
+            .with_columns(schema.fields().cloned())
+            .with_save_mode(SaveMode::Overwrite)
+            .with_partition_columns(vec!["id"])
+            .await
+            .unwrap();
+        table.load().await.unwrap();
+        assert_eq!(table.version(), 1);
+        // Checks if files got removed after overwrite
+        assert_eq!(table.get_files_count(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_create_table_metadata_raise_if_key_not_exists() {
+        let schema = get_delta_schema();
+        let config: HashMap<String, Option<String>> =
+            vec![("key".to_string(), Some("value".to_string()))]
+                .into_iter()
+                .collect();
+
+        // Fail to create table with unknown Delta key
+        let table = CreateBuilder::new()
+            .with_location("memory://")
+            .with_columns(schema.fields().cloned())
+            .with_configuration(config.clone())
+            .await;
+        assert!(table.is_err());
+
+        // Succeed in creating table with unknown Delta key since we set raise_if_key_not_exists to false
+        let table = CreateBuilder::new()
+            .with_location("memory://")
+            .with_columns(schema.fields().cloned())
+            .with_raise_if_key_not_exists(false)
+            .with_configuration(config)
+            .await;
+        assert!(table.is_ok());
+
+        // Ensure the non-Delta key was set correctly
+        let value = table
+            .unwrap()
+            .metadata()
+            .unwrap()
+            .configuration
+            .get("key")
+            .unwrap()
+            .as_ref()
+            .unwrap()
+            .clone();
+        assert_eq!(String::from("value"), value);
+    }
 }
diff --git a/crates/core/src/operations/delete.rs b/crates/core/src/operations/delete.rs
index 2e3e99bde2..692c1b303b 100644
--- a/crates/core/src/operations/delete.rs
+++ b/crates/core/src/operations/delete.rs
@@ -17,34 +17,47 @@
 //!     .await?;
 //! ````
 
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::{Instant, SystemTime, UNIX_EPOCH};
-
+use crate::delta_datafusion::logical::MetricObserver;
+use crate::delta_datafusion::physical::{find_metric_node, get_metric, MetricObserverExec};
+use crate::delta_datafusion::planner::DeltaPlanner;
 use crate::logstore::LogStoreRef;
+use async_trait::async_trait;
+use datafusion::dataframe::DataFrame;
+use datafusion::datasource::provider_as_source;
+use datafusion::error::Result as DataFusionResult;
 use datafusion::execution::context::{SessionContext, SessionState};
-use datafusion::physical_expr::create_physical_expr;
-use datafusion::physical_plan::filter::FilterExec;
+use datafusion::physical_plan::metrics::MetricBuilder;
 use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
 use datafusion::prelude::Expr;
-use datafusion_common::scalar::ScalarValue;
-use datafusion_common::DFSchema;
+use datafusion_common::ScalarValue;
+use datafusion_expr::{lit, Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode};
+
 use futures::future::BoxFuture;
+use std::sync::Arc;
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
+
 use parquet::file::properties::WriterProperties;
 use serde::Serialize;
-use serde_json::Value;
 
+use super::cdc::should_write_cdc;
 use super::datafusion_utils::Expression;
-use super::transaction::PROTOCOL;
+use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL};
+
 use crate::delta_datafusion::expr::fmt_expr_to_sql;
-use crate::delta_datafusion::{find_files, register_store, DeltaScanBuilder, DeltaSessionContext};
+use crate::delta_datafusion::{
+    find_files, register_store, DataFusionMixins, DeltaScanConfigBuilder, DeltaSessionContext,
+    DeltaTableProvider,
+};
 use crate::errors::DeltaResult;
 use crate::kernel::{Action, Add, Remove};
-use crate::operations::transaction::commit;
-use crate::operations::write::write_execution_plan;
+use crate::operations::write::{write_execution_plan, write_execution_plan_cdc, WriterStatsConfig};
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
-use crate::DeltaTable;
+use crate::{DeltaTable, DeltaTableError};
+
+const SOURCE_COUNT_ID: &str = "delete_source_count";
+const SOURCE_COUNT_METRIC: &str = "num_source_rows";
 
 /// Delete Records from the Delta Table.
 /// See this module's documentation for more information
@@ -59,8 +72,8 @@ pub struct DeleteBuilder {
     state: Option<SessionState>,
     /// Properties passed to underlying parquet writer for when files are rewritten
     writer_properties: Option<WriterProperties>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Commit properties and configuration
+    commit_properties: CommitProperties,
 }
 
 #[derive(Default, Debug, Serialize)]
@@ -71,17 +84,19 @@ pub struct DeleteMetrics {
     /// Number of files removed
     pub num_removed_files: usize,
     /// Number of rows removed
-    pub num_deleted_rows: Option<usize>,
+    pub num_deleted_rows: usize,
     /// Number of rows copied in the process of deleting files
-    pub num_copied_rows: Option<usize>,
+    pub num_copied_rows: usize,
     /// Time taken to execute the entire operation
-    pub execution_time_ms: u128,
+    pub execution_time_ms: u64,
     /// Time taken to scan the file for matches
-    pub scan_time_ms: u128,
+    pub scan_time_ms: u64,
     /// Time taken to rewrite the matched files
-    pub rewrite_time_ms: u128,
+    pub rewrite_time_ms: u64,
 }
 
+impl super::Operation<()> for DeleteBuilder {}
+
 impl DeleteBuilder {
     /// Create a new [`DeleteBuilder`]
     pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
@@ -90,7 +105,7 @@ impl DeleteBuilder {
             snapshot,
             log_store,
             state: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
             writer_properties: None,
         }
     }
@@ -107,12 +122,9 @@ impl DeleteBuilder {
         self
     }
 
-    /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    /// Additonal information to write to the commit
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -123,96 +135,189 @@ impl DeleteBuilder {
     }
 }
 
+#[derive(Clone)]
+struct DeleteMetricExtensionPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for DeleteMetricExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(metric_observer) = node.as_any().downcast_ref::<MetricObserver>() {
+            if metric_observer.id.eq(SOURCE_COUNT_ID) {
+                return Ok(Some(MetricObserverExec::try_new(
+                    SOURCE_COUNT_ID.into(),
+                    physical_inputs,
+                    |batch, metrics| {
+                        MetricBuilder::new(metrics)
+                            .global_counter(SOURCE_COUNT_METRIC)
+                            .add(batch.num_rows());
+                    },
+                )?));
+            }
+        }
+        Ok(None)
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
 async fn excute_non_empty_expr(
     snapshot: &DeltaTableState,
     log_store: LogStoreRef,
     state: &SessionState,
     expression: &Expr,
-    metrics: &mut DeleteMetrics,
     rewrite: &[Add],
+    metrics: &mut DeleteMetrics,
     writer_properties: Option<WriterProperties>,
-) -> DeltaResult<Vec<Add>> {
+    partition_scan: bool,
+) -> DeltaResult<Vec<Action>> {
     // For each identified file perform a parquet scan + filter + limit (1) + count.
     // If returned count is not zero then append the file to be rewritten and removed from the log. Otherwise do nothing to the file.
+    let mut actions: Vec<Action> = Vec::new();
+    let table_partition_cols = snapshot.metadata().partition_columns.clone();
 
-    let input_schema = snapshot.input_schema()?;
-    let input_dfschema: DFSchema = input_schema.clone().as_ref().clone().try_into()?;
+    let delete_planner = DeltaPlanner::<DeleteMetricExtensionPlanner> {
+        extension_planner: DeleteMetricExtensionPlanner {},
+    };
 
-    let table_partition_cols = snapshot.metadata().partition_columns.clone();
+    let state = state.clone().with_query_planner(Arc::new(delete_planner));
+
+    let scan_config = DeltaScanConfigBuilder::default()
+        .with_file_column(false)
+        .with_schema(snapshot.input_schema()?)
+        .build(snapshot)?;
+
+    let target_provider = Arc::new(
+        DeltaTableProvider::try_new(snapshot.clone(), log_store.clone(), scan_config.clone())?
+            .with_files(rewrite.to_vec()),
+    );
+    let target_provider = provider_as_source(target_provider);
+    let source = LogicalPlanBuilder::scan("target", target_provider.clone(), None)?.build()?;
+
+    let source = LogicalPlan::Extension(Extension {
+        node: Arc::new(MetricObserver {
+            id: "delete_source_count".into(),
+            input: source,
+            enable_pushdown: false,
+        }),
+    });
+
+    let df = DataFrame::new(state.clone(), source);
+
+    let writer_stats_config = WriterStatsConfig::new(
+        snapshot.table_config().num_indexed_cols(),
+        snapshot
+            .table_config()
+            .stats_columns()
+            .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
+    );
+
+    if !partition_scan {
+        // Apply the negation of the filter and rewrite files
+        let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+
+        let filter = df
+            .clone()
+            .filter(negated_expression)?
+            .create_physical_plan()
+            .await?;
 
-    let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), state)
-        .with_files(rewrite)
-        .build()
+        let add_actions: Vec<Action> = write_execution_plan(
+            Some(snapshot),
+            state.clone(),
+            filter.clone(),
+            table_partition_cols.clone(),
+            log_store.object_store(),
+            Some(snapshot.table_config().target_file_size() as usize),
+            None,
+            writer_properties.clone(),
+            false,
+            None,
+            writer_stats_config.clone(),
+            None,
+        )
         .await?;
-    let scan = Arc::new(scan);
-
-    // Apply the negation of the filter and rewrite files
-    let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
-
-    let predicate_expr = create_physical_expr(
-        &negated_expression,
-        &input_dfschema,
-        state.execution_props(),
-    )?;
-    let filter: Arc<dyn ExecutionPlan> =
-        Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
-
-    let add_actions = write_execution_plan(
-        Some(snapshot),
-        state.clone(),
-        filter.clone(),
-        table_partition_cols.clone(),
-        log_store.object_store(),
-        Some(snapshot.table_config().target_file_size() as usize),
-        None,
-        writer_properties,
-        false,
-        false,
-    )
-    .await?;
-
-    let read_records = scan.parquet_scan.metrics().and_then(|m| m.output_rows());
-    let filter_records = filter.metrics().and_then(|m| m.output_rows());
-    metrics.num_copied_rows = filter_records;
-    metrics.num_deleted_rows = read_records
-        .zip(filter_records)
-        .map(|(read, filter)| read - filter);
-
-    Ok(add_actions)
+
+        actions.extend(add_actions);
+
+        let source_count = find_metric_node(SOURCE_COUNT_ID, &filter).ok_or_else(|| {
+            DeltaTableError::Generic("Unable to locate expected metric node".into())
+        })?;
+        let source_count_metrics = source_count.metrics().unwrap();
+        let read_records = get_metric(&source_count_metrics, SOURCE_COUNT_METRIC);
+        let filter_records = filter.metrics().and_then(|m| m.output_rows()).unwrap_or(0);
+
+        metrics.num_copied_rows = filter_records;
+        metrics.num_deleted_rows = read_records - filter_records;
+    }
+
+    // CDC logic, simply filters data with predicate and adds the _change_type="delete" as literal column
+    if let Ok(true) = should_write_cdc(snapshot) {
+        // Create CDC scan
+        let change_type_lit = lit(ScalarValue::Utf8(Some("delete".to_string())));
+        let cdc_filter = df
+            .filter(expression.clone())?
+            .with_column("_change_type", change_type_lit)?
+            .create_physical_plan()
+            .await?;
+
+        use crate::operations::write::write_execution_plan_cdc;
+        let cdc_actions = write_execution_plan_cdc(
+            Some(snapshot),
+            state.clone(),
+            cdc_filter,
+            table_partition_cols.clone(),
+            log_store.object_store(),
+            Some(snapshot.table_config().target_file_size() as usize),
+            None,
+            writer_properties,
+            false,
+            writer_stats_config,
+            None,
+        )
+        .await?;
+        actions.extend(cdc_actions)
+    }
+
+    Ok(actions)
 }
 
 async fn execute(
     predicate: Option<Expr>,
     log_store: LogStoreRef,
-    snapshot: &DeltaTableState,
+    snapshot: DeltaTableState,
     state: SessionState,
     writer_properties: Option<WriterProperties>,
-    app_metadata: Option<HashMap<String, Value>>,
-) -> DeltaResult<((Vec<Action>, i64, Option<DeltaOperation>), DeleteMetrics)> {
+    mut commit_properties: CommitProperties,
+) -> DeltaResult<(DeltaTableState, DeleteMetrics)> {
     let exec_start = Instant::now();
     let mut metrics = DeleteMetrics::default();
 
     let scan_start = Instant::now();
-    let candidates = find_files(snapshot, log_store.clone(), &state, predicate.clone()).await?;
-    metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_micros();
+    let candidates = find_files(&snapshot, log_store.clone(), &state, predicate.clone()).await?;
+    metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_millis() as u64;
 
     let predicate = predicate.unwrap_or(Expr::Literal(ScalarValue::Boolean(Some(true))));
 
-    let add = if candidates.partition_scan {
-        Vec::new()
-    } else {
+    let mut actions = {
         let write_start = Instant::now();
         let add = excute_non_empty_expr(
-            snapshot,
+            &snapshot,
             log_store.clone(),
             &state,
             &predicate,
-            &mut metrics,
             &candidates.candidates,
+            &mut metrics,
             writer_properties,
+            candidates.partition_scan,
         )
         .await?;
-        metrics.rewrite_time_ms = Instant::now().duration_since(write_start).as_millis();
+        metrics.rewrite_time_ms = Instant::now().duration_since(write_start).as_millis() as u64;
         add
     };
     let remove = candidates.candidates;
@@ -222,8 +327,6 @@ async fn execute(
         .unwrap()
         .as_millis() as i64;
 
-    let mut actions: Vec<Action> = add.into_iter().map(Action::Add).collect();
-    let mut version = snapshot.version();
     metrics.num_removed_files = remove.len();
     metrics.num_added_files = actions.len();
 
@@ -242,35 +345,29 @@ async fn execute(
         }))
     }
 
-    metrics.execution_time_ms = Instant::now().duration_since(exec_start).as_micros();
+    metrics.execution_time_ms = Instant::now().duration_since(exec_start).as_millis() as u64;
 
-    let mut app_metadata = match app_metadata {
-        Some(meta) => meta,
-        None => HashMap::new(),
-    };
-
-    app_metadata.insert("readVersion".to_owned(), snapshot.version().into());
-
-    if let Ok(map) = serde_json::to_value(&metrics) {
-        app_metadata.insert("operationMetrics".to_owned(), map);
-    }
+    commit_properties
+        .app_metadata
+        .insert("readVersion".to_owned(), snapshot.version().into());
+    commit_properties.app_metadata.insert(
+        "operationMetrics".to_owned(),
+        serde_json::to_value(&metrics)?,
+    );
 
     // Do not make a commit when there are zero updates to the state
     let operation = DeltaOperation::Delete {
         predicate: Some(fmt_expr_to_sql(&predicate)?),
     };
-    if !actions.is_empty() {
-        version = commit(
-            log_store.as_ref(),
-            &actions,
-            operation.clone(),
-            Some(snapshot),
-            Some(app_metadata),
-        )
-        .await?;
+    if actions.is_empty() {
+        return Ok((snapshot.clone(), metrics));
     }
-    let op = (!actions.is_empty()).then_some(operation);
-    Ok(((actions, version, op), metrics))
+
+    let commit = CommitBuilder::from(commit_properties)
+        .with_actions(actions)
+        .build(Some(&snapshot), log_store, operation)
+        .await?;
+    Ok((commit.snapshot(), metrics))
 }
 
 impl std::future::IntoFuture for DeleteBuilder {
@@ -278,12 +375,11 @@ impl std::future::IntoFuture for DeleteBuilder {
     type IntoFuture = BoxFuture<'static, Self::Output>;
 
     fn into_future(self) -> Self::IntoFuture {
-        let mut this = self;
+        let this = self;
 
         Box::pin(async move {
-            PROTOCOL.check_append_only(&this.snapshot)?;
-
-            PROTOCOL.can_write_to(&this.snapshot)?;
+            PROTOCOL.check_append_only(&this.snapshot.snapshot)?;
+            PROTOCOL.can_write_to(&this.snapshot.snapshot)?;
 
             let state = this.state.unwrap_or_else(|| {
                 let session: SessionContext = DeltaSessionContext::default().into();
@@ -304,28 +400,29 @@ impl std::future::IntoFuture for DeleteBuilder {
                 None => None,
             };
 
-            let ((actions, version, operation), metrics) = execute(
+            let (new_snapshot, metrics) = execute(
                 predicate,
                 this.log_store.clone(),
-                &this.snapshot,
+                this.snapshot,
                 state,
                 this.writer_properties,
-                this.app_metadata,
+                this.commit_properties,
             )
             .await?;
 
-            if let Some(op) = &operation {
-                this.snapshot.merge(actions, op, version)?;
-            }
-
-            let table = DeltaTable::new_with_state(this.log_store, this.snapshot);
-            Ok((table, metrics))
+            Ok((
+                DeltaTable::new_with_state(this.log_store, new_snapshot),
+                metrics,
+            ))
         })
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::delta_datafusion::cdf::DeltaCdfScan;
+    use crate::kernel::DataType as DeltaDataType;
+    use crate::operations::collect_sendable_stream;
     use crate::operations::DeltaOps;
     use crate::protocol::*;
     use crate::writer::test_utils::datafusion::get_data;
@@ -339,11 +436,15 @@ mod tests {
     use arrow::datatypes::{Field, Schema};
     use arrow::record_batch::RecordBatch;
     use arrow_array::ArrayRef;
+    use arrow_array::StringArray;
     use arrow_array::StructArray;
     use arrow_buffer::NullBuffer;
+    use arrow_schema::DataType;
     use arrow_schema::Fields;
     use datafusion::assert_batches_sorted_eq;
+    use datafusion::physical_plan::ExecutionPlan;
     use datafusion::prelude::*;
+    use delta_kernel::schema::PrimitiveType;
     use serde_json::json;
     use std::sync::Arc;
 
@@ -352,7 +453,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions.unwrap_or_default())
             .await
             .unwrap();
@@ -407,8 +508,8 @@ mod tests {
         assert_eq!(table.get_files_count(), 0);
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 1);
-        assert_eq!(metrics.num_deleted_rows, None);
-        assert_eq!(metrics.num_copied_rows, None);
+        assert_eq!(metrics.num_deleted_rows, 0);
+        assert_eq!(metrics.num_copied_rows, 0);
 
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
@@ -418,16 +519,13 @@ mod tests {
         //     serde_json::to_value(&metrics).unwrap()
         // );
 
-        // rewrite is not required
-        assert_eq!(metrics.rewrite_time_ms, 0);
-
         // Deletes with no changes to state must not commit
         let (table, metrics) = DeltaOps(table).delete().await.unwrap();
         assert_eq!(table.version(), 2);
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 0);
-        assert_eq!(metrics.num_deleted_rows, None);
-        assert_eq!(metrics.num_copied_rows, None);
+        assert_eq!(metrics.num_deleted_rows, 0);
+        assert_eq!(metrics.num_copied_rows, 0);
     }
 
     #[tokio::test]
@@ -498,8 +596,8 @@ mod tests {
         assert_eq!(metrics.num_added_files, 1);
         assert_eq!(metrics.num_removed_files, 1);
         assert!(metrics.scan_time_ms > 0);
-        assert_eq!(metrics.num_deleted_rows, Some(1));
-        assert_eq!(metrics.num_copied_rows, Some(3));
+        assert_eq!(metrics.num_deleted_rows, 1);
+        assert_eq!(metrics.num_copied_rows, 3);
 
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
@@ -653,10 +751,9 @@ mod tests {
 
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 1);
-        assert_eq!(metrics.num_deleted_rows, None);
-        assert_eq!(metrics.num_copied_rows, None);
+        assert_eq!(metrics.num_deleted_rows, 0);
+        assert_eq!(metrics.num_copied_rows, 0);
         assert!(metrics.scan_time_ms > 0);
-        assert_eq!(metrics.rewrite_time_ms, 0);
 
         let expected = vec![
             "+----+-------+------------+",
@@ -715,8 +812,8 @@ mod tests {
 
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 1);
-        assert_eq!(metrics.num_deleted_rows, Some(1));
-        assert_eq!(metrics.num_copied_rows, Some(0));
+        assert_eq!(metrics.num_deleted_rows, 1);
+        assert_eq!(metrics.num_copied_rows, 0);
         assert!(metrics.scan_time_ms > 0);
 
         let expected = [
@@ -799,4 +896,174 @@ mod tests {
             .await;
         assert!(res.is_err());
     }
+
+    #[tokio::test]
+    async fn test_delete_cdc_enabled() {
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            arrow::datatypes::DataType::Int32,
+            true,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .delete()
+            .with_predicate(col("value").eq(lit(2)))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+--------------+-----------------+",
+        "| value | _change_type | _commit_version |",
+        "+-------+--------------+-----------------+",
+        "| 1     | insert       | 1               |",
+        "| 2     | delete       | 2               |",
+        "| 2     | insert       | 1               |",
+        "| 3     | insert       | 1               |",
+        "+-------+--------------+-----------------+",
+        ], &batches }
+    }
+
+    #[tokio::test]
+    async fn test_delete_cdc_enabled_partitioned() {
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "year",
+                DeltaDataType::Primitive(PrimitiveType::String),
+                true,
+                None,
+            )
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_partition_columns(vec!["year"])
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("year", DataType::Utf8, true),
+            Field::new("value", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![
+                    Some("2020"),
+                    Some("2020"),
+                    Some("2024"),
+                ])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .delete()
+            .with_predicate(col("value").eq(lit(2)))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+--------------+-----------------+------+",
+        "| value | _change_type | _commit_version | year |",
+        "+-------+--------------+-----------------+------+",
+        "| 1     | insert       | 1               | 2020 |",
+        "| 2     | delete       | 2               | 2020 |",
+        "| 2     | insert       | 1               | 2020 |",
+        "| 3     | insert       | 1               | 2024 |",
+        "+-------+--------------+-----------------+------+",
+        ], &batches }
+    }
+
+    async fn collect_batches(
+        num_partitions: usize,
+        stream: DeltaCdfScan,
+        ctx: SessionContext,
+    ) -> Result<Vec<RecordBatch>, Box<dyn std::error::Error>> {
+        let mut batches = vec![];
+        for p in 0..num_partitions {
+            let data: Vec<RecordBatch> =
+                collect_sendable_stream(stream.execute(p, ctx.task_ctx())?).await?;
+            batches.extend_from_slice(&data);
+        }
+        Ok(batches)
+    }
 }
diff --git a/crates/core/src/operations/drop_constraints.rs b/crates/core/src/operations/drop_constraints.rs
new file mode 100644
index 0000000000..0941b99552
--- /dev/null
+++ b/crates/core/src/operations/drop_constraints.rs
@@ -0,0 +1,183 @@
+//! Drop a constraint from a table
+
+use futures::future::BoxFuture;
+
+use super::transaction::{CommitBuilder, CommitProperties};
+use crate::kernel::Action;
+use crate::logstore::LogStoreRef;
+use crate::protocol::DeltaOperation;
+use crate::table::state::DeltaTableState;
+use crate::DeltaTable;
+use crate::{DeltaResult, DeltaTableError};
+
+/// Remove constraints from the table
+pub struct DropConstraintBuilder {
+    /// A snapshot of the table's state
+    snapshot: DeltaTableState,
+    /// Name of the constraint
+    name: Option<String>,
+    /// Raise if constraint doesn't exist
+    raise_if_not_exists: bool,
+    /// Delta object store for handling data files
+    log_store: LogStoreRef,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
+}
+
+impl super::Operation<()> for DropConstraintBuilder {}
+
+impl DropConstraintBuilder {
+    /// Create a new builder
+    pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
+        Self {
+            name: None,
+            raise_if_not_exists: true,
+            snapshot,
+            log_store,
+            commit_properties: CommitProperties::default(),
+        }
+    }
+
+    /// Specify the constraint to be removed
+    pub fn with_constraint<S: Into<String>>(mut self, name: S) -> Self {
+        self.name = Some(name.into());
+        self
+    }
+
+    /// Specify if you want to raise if the constraint does not exist
+    pub fn with_raise_if_not_exists(mut self, raise: bool) -> Self {
+        self.raise_if_not_exists = raise;
+        self
+    }
+
+    /// Additional metadata to be added to commit info
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
+        self
+    }
+}
+
+impl std::future::IntoFuture for DropConstraintBuilder {
+    type Output = DeltaResult<DeltaTable>;
+
+    type IntoFuture = BoxFuture<'static, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            let name = this
+                .name
+                .ok_or(DeltaTableError::Generic("No name provided".to_string()))?;
+
+            let mut metadata = this.snapshot.metadata().clone();
+            let configuration_key = format!("delta.constraints.{}", name);
+
+            if metadata.configuration.remove(&configuration_key).is_none() {
+                if this.raise_if_not_exists {
+                    return Err(DeltaTableError::Generic(format!(
+                        "Constraint with name: {} doesn't exists",
+                        name
+                    )));
+                }
+                return Ok(DeltaTable::new_with_state(this.log_store, this.snapshot));
+            }
+            let operation = DeltaOperation::DropConstraint { name: name.clone() };
+
+            let actions = vec![Action::Metadata(metadata)];
+
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions)
+                .build(Some(&this.snapshot), this.log_store.clone(), operation)
+                .await?;
+
+            Ok(DeltaTable::new_with_state(
+                this.log_store,
+                commit.snapshot(),
+            ))
+        })
+    }
+}
+
+#[cfg(feature = "datafusion")]
+#[cfg(test)]
+mod tests {
+    use crate::writer::test_utils::{create_bare_table, get_record_batch};
+    use crate::{DeltaOps, DeltaResult, DeltaTable};
+
+    async fn get_constraint_op_params(table: &mut DeltaTable) -> String {
+        let commit_info = table.history(None).await.unwrap();
+        let last_commit = &commit_info[0];
+
+        last_commit
+            .operation_parameters
+            .as_ref()
+            .unwrap()
+            .get("name")
+            .unwrap()
+            .as_str()
+            .unwrap()
+            .to_owned()
+    }
+
+    #[tokio::test]
+    async fn drop_valid_constraint() -> DeltaResult<()> {
+        let batch = get_record_batch(None, false);
+        let write = DeltaOps(create_bare_table())
+            .write(vec![batch.clone()])
+            .await?;
+        let table = DeltaOps(write);
+
+        let table = table
+            .add_constraint()
+            .with_constraint("id", "value < 1000")
+            .await?;
+
+        let mut table = DeltaOps(table)
+            .drop_constraints()
+            .with_constraint("id")
+            .await?;
+
+        let expected_name = "id";
+        assert_eq!(get_constraint_op_params(&mut table).await, expected_name);
+        assert_eq!(table.metadata().unwrap().configuration.get("id"), None);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn drop_invalid_constraint_not_existing() -> DeltaResult<()> {
+        let batch = get_record_batch(None, false);
+        let write = DeltaOps(create_bare_table())
+            .write(vec![batch.clone()])
+            .await?;
+
+        let table = DeltaOps(write)
+            .drop_constraints()
+            .with_constraint("not_existing")
+            .await;
+        assert!(table.is_err());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn drop_invalid_constraint_ignore() -> DeltaResult<()> {
+        let batch = get_record_batch(None, false);
+        let write = DeltaOps(create_bare_table())
+            .write(vec![batch.clone()])
+            .await?;
+
+        let version = write.version();
+
+        let table = DeltaOps(write)
+            .drop_constraints()
+            .with_constraint("not_existing")
+            .with_raise_if_not_exists(false)
+            .await?;
+
+        let version_after = table.version();
+
+        assert_eq!(version, version_after);
+        Ok(())
+    }
+}
diff --git a/crates/core/src/operations/filesystem_check.rs b/crates/core/src/operations/filesystem_check.rs
index 923f0aea54..44fa84d29a 100644
--- a/crates/core/src/operations/filesystem_check.rs
+++ b/crates/core/src/operations/filesystem_check.rs
@@ -27,11 +27,13 @@ use url::{ParseError, Url};
 use crate::errors::{DeltaResult, DeltaTableError};
 use crate::kernel::{Action, Add, Remove};
 use crate::logstore::LogStoreRef;
-use crate::operations::transaction::commit;
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
 use crate::DeltaTable;
 
+use super::transaction::CommitBuilder;
+use super::transaction::CommitProperties;
+
 /// Audit the Delta Table's active files with the underlying file system.
 /// See this module's documentation for more information
 #[derive(Debug)]
@@ -42,8 +44,8 @@ pub struct FileSystemCheckBuilder {
     log_store: LogStoreRef,
     /// Don't remove actions to the table log. Just determine which files can be removed
     dry_run: bool,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Commit properties and configuration
+    commit_properties: CommitProperties,
 }
 
 /// Details of the FSCK operation including which files were removed from the log
@@ -73,6 +75,8 @@ fn is_absolute_path(path: &str) -> DeltaResult<bool> {
     }
 }
 
+impl super::Operation<()> for FileSystemCheckBuilder {}
+
 impl FileSystemCheckBuilder {
     /// Create a new [`FileSystemCheckBuilder`]
     pub fn new(log_store: LogStoreRef, state: DeltaTableState) -> Self {
@@ -80,7 +84,7 @@ impl FileSystemCheckBuilder {
             snapshot: state,
             log_store,
             dry_run: false,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
         }
     }
 
@@ -90,12 +94,9 @@ impl FileSystemCheckBuilder {
         self
     }
 
-    /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    /// Additonal information to write to the commit
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -104,7 +105,7 @@ impl FileSystemCheckBuilder {
             HashMap::with_capacity(self.snapshot.file_actions()?.len());
         let log_store = self.log_store.clone();
 
-        for active in self.snapshot.file_actions()? {
+        for active in self.snapshot.file_actions_iter()? {
             if is_absolute_path(&active.path)? {
                 return Err(DeltaTableError::Generic(
                     "Filesystem check does not support absolute paths".to_string(),
@@ -141,7 +142,7 @@ impl FileSystemCheckPlan {
     pub async fn execute(
         self,
         snapshot: &DeltaTableState,
-        app_metadata: Option<HashMap<String, serde_json::Value>>,
+        mut commit_properties: CommitProperties,
     ) -> DeltaResult<FileSystemCheckMetrics> {
         if self.files_to_remove.is_empty() {
             return Ok(FileSystemCheckMetrics {
@@ -175,25 +176,22 @@ impl FileSystemCheckPlan {
             files_removed: removed_file_paths,
         };
 
-        let mut app_metadata = match app_metadata {
-            Some(meta) => meta,
-            None => HashMap::new(),
-        };
-
-        app_metadata.insert("readVersion".to_owned(), snapshot.version().into());
-        if let Ok(map) = serde_json::to_value(&metrics) {
-            app_metadata.insert("operationMetrics".to_owned(), map);
-        }
-
-        commit(
-            self.log_store.as_ref(),
-            &actions,
-            DeltaOperation::FileSystemCheck {},
-            Some(snapshot),
-            // TODO pass through metadata
-            Some(app_metadata),
-        )
-        .await?;
+        commit_properties
+            .app_metadata
+            .insert("readVersion".to_owned(), snapshot.version().into());
+        commit_properties.app_metadata.insert(
+            "operationMetrics".to_owned(),
+            serde_json::to_value(&metrics)?,
+        );
+
+        CommitBuilder::from(commit_properties)
+            .with_actions(actions)
+            .build(
+                Some(snapshot),
+                self.log_store.clone(),
+                DeltaOperation::FileSystemCheck {},
+            )
+            .await?;
 
         Ok(metrics)
     }
@@ -218,7 +216,7 @@ impl std::future::IntoFuture for FileSystemCheckBuilder {
                 ));
             }
 
-            let metrics = plan.execute(&this.snapshot, this.app_metadata).await?;
+            let metrics = plan.execute(&this.snapshot, this.commit_properties).await?;
             let mut table = DeltaTable::new_with_state(this.log_store, this.snapshot);
             table.update().await?;
             Ok((table, metrics))
diff --git a/crates/core/src/operations/load.rs b/crates/core/src/operations/load.rs
index 2eac151052..4bf439cd0d 100644
--- a/crates/core/src/operations/load.rs
+++ b/crates/core/src/operations/load.rs
@@ -7,6 +7,7 @@ use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
 use futures::future::BoxFuture;
 
 use super::transaction::PROTOCOL;
+use crate::delta_datafusion::DataFusionMixins;
 use crate::errors::{DeltaResult, DeltaTableError};
 use crate::logstore::LogStoreRef;
 use crate::table::state::DeltaTableState;
@@ -22,6 +23,8 @@ pub struct LoadBuilder {
     columns: Option<Vec<String>>,
 }
 
+impl super::Operation<()> for LoadBuilder {}
+
 impl LoadBuilder {
     /// Create a new [`LoadBuilder`]
     pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
@@ -47,7 +50,7 @@ impl std::future::IntoFuture for LoadBuilder {
         let this = self;
 
         Box::pin(async move {
-            PROTOCOL.can_read_from(&this.snapshot)?;
+            PROTOCOL.can_read_from(&this.snapshot.snapshot)?;
 
             let table = DeltaTable::new_with_state(this.log_store, this.snapshot);
             let schema = table.snapshot()?.arrow_schema()?;
diff --git a/crates/core/src/operations/load_cdf.rs b/crates/core/src/operations/load_cdf.rs
new file mode 100644
index 0000000000..57542ab668
--- /dev/null
+++ b/crates/core/src/operations/load_cdf.rs
@@ -0,0 +1,547 @@
+//! Module for reading the change datafeed of delta tables
+
+use datafusion_physical_expr::{
+    expressions::{self},
+    PhysicalExpr,
+};
+use std::sync::Arc;
+use std::time::SystemTime;
+
+use arrow_schema::{ArrowError, Field};
+use chrono::{DateTime, Utc};
+use datafusion::datasource::file_format::parquet::ParquetFormat;
+use datafusion::datasource::file_format::FileFormat;
+use datafusion::datasource::physical_plan::FileScanConfig;
+use datafusion::physical_plan::projection::ProjectionExec;
+use datafusion::physical_plan::union::UnionExec;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::prelude::SessionContext;
+use datafusion_common::{ScalarValue, Statistics};
+use tracing::log;
+
+use crate::delta_datafusion::cdf::*;
+use crate::delta_datafusion::{register_store, DataFusionMixins};
+use crate::errors::DeltaResult;
+use crate::kernel::{Action, Add, AddCDCFile, CommitInfo};
+use crate::logstore::{get_actions, LogStoreRef};
+use crate::table::state::DeltaTableState;
+use crate::DeltaTableError;
+
+/// Builder for create a read of change data feeds for delta tables
+#[derive(Clone)]
+pub struct CdfLoadBuilder {
+    /// A snapshot of the to-be-loaded table's state
+    snapshot: DeltaTableState,
+    /// Delta object store for handling data files
+    log_store: LogStoreRef,
+    /// Columns to project
+    columns: Option<Vec<String>>,
+    /// Version to read from
+    starting_version: i64,
+    /// Version to stop reading at
+    ending_version: Option<i64>,
+    /// Starting timestamp of commits to accept
+    starting_timestamp: Option<DateTime<Utc>>,
+    /// Ending timestamp of commits to accept
+    ending_timestamp: Option<DateTime<Utc>>,
+    /// Provided Datafusion context
+    ctx: SessionContext,
+}
+
+impl CdfLoadBuilder {
+    /// Create a new [`LoadBuilder`]
+    pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
+        Self {
+            snapshot,
+            log_store,
+            columns: None,
+            starting_version: 0,
+            ending_version: None,
+            starting_timestamp: None,
+            ending_timestamp: None,
+            ctx: SessionContext::new(),
+        }
+    }
+
+    /// Version to start at (version 0 if not provided)
+    pub fn with_starting_version(mut self, starting_version: i64) -> Self {
+        self.starting_version = starting_version;
+        self
+    }
+
+    /// Version (inclusive) to end at
+    pub fn with_ending_version(mut self, ending_version: i64) -> Self {
+        self.ending_version = Some(ending_version);
+        self
+    }
+
+    /// Provide a datafusion session context
+    pub fn with_session_ctx(mut self, ctx: SessionContext) -> Self {
+        self.ctx = ctx;
+        self
+    }
+
+    /// Timestamp (inclusive) to end at
+    pub fn with_ending_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
+        self.ending_timestamp = Some(timestamp);
+        self
+    }
+
+    /// Timestamp to start from
+    pub fn with_starting_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
+        self.starting_timestamp = Some(timestamp);
+        self
+    }
+
+    /// Columns to select
+    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
+        self.columns = Some(columns);
+        self
+    }
+
+    /// This is a rust version of https://github.com/delta-io/delta/blob/master/spark/src/main/scala/org/apache/spark/sql/delta/commands/cdc/CDCReader.scala#L418
+    /// Which iterates through versions of the delta table collects the relevant actions / commit info and returns those
+    /// groupings for later use. The scala implementation has a lot more edge case handling and read schema checking (and just error checking in general)
+    /// than I have right now. I plan to extend the checks once we have a stable state of the initial implementation.
+    async fn determine_files_to_read(
+        &self,
+    ) -> DeltaResult<(Vec<CdcDataSpec<AddCDCFile>>, Vec<CdcDataSpec<Add>>)> {
+        let start = self.starting_version;
+        let end = self
+            .ending_version
+            .unwrap_or(self.log_store.get_latest_version(start).await?);
+
+        if end < start {
+            return Err(DeltaTableError::ChangeDataInvalidVersionRange { start, end });
+        }
+
+        let starting_timestamp = self.starting_timestamp.unwrap_or(DateTime::UNIX_EPOCH);
+        let ending_timestamp = self
+            .ending_timestamp
+            .unwrap_or(DateTime::from(SystemTime::now()));
+
+        log::debug!(
+            "starting timestamp = {:?}, ending timestamp = {:?}",
+            &starting_timestamp,
+            &ending_timestamp
+        );
+        log::debug!("starting version = {}, ending version = {:?}", start, end);
+
+        let mut change_files = vec![];
+        let mut add_files = vec![];
+
+        for version in start..=end {
+            let snapshot_bytes = self
+                .log_store
+                .read_commit_entry(version)
+                .await?
+                .ok_or(DeltaTableError::InvalidVersion(version))?;
+            let version_actions = get_actions(version, snapshot_bytes).await?;
+
+            let mut ts = 0;
+            let mut cdc_actions = vec![];
+
+            if self.starting_timestamp.is_some() || self.ending_timestamp.is_some() {
+                let version_commit = version_actions
+                    .iter()
+                    .find(|a| matches!(a, Action::CommitInfo(_)));
+                if let Some(Action::CommitInfo(CommitInfo {
+                    timestamp: Some(t), ..
+                })) = version_commit
+                {
+                    if starting_timestamp.timestamp_millis() > *t
+                        || *t > ending_timestamp.timestamp_millis()
+                    {
+                        log::debug!("Version: {} skipped, due to commit timestamp", version);
+                        continue;
+                    }
+                }
+            }
+
+            for action in &version_actions {
+                match action {
+                    Action::Cdc(f) => cdc_actions.push(f.clone()),
+                    Action::Metadata(md) => {
+                        log::info!("Metadata: {:?}", &md);
+                        if let Some(Some(key)) = &md.configuration.get("delta.enableChangeDataFeed")
+                        {
+                            let key = key.to_lowercase();
+                            // Check here to ensure the CDC function is enabled for the first version of the read
+                            // and check in subsequent versions only that it was not disabled.
+                            if (version == start && key != "true") || key == "false" {
+                                return Err(DeltaTableError::ChangeDataNotRecorded {
+                                    version,
+                                    start,
+                                    end,
+                                });
+                            }
+                        } else if version == start {
+                            return Err(DeltaTableError::ChangeDataNotEnabled { version });
+                        };
+                    }
+                    Action::CommitInfo(ci) => {
+                        ts = ci.timestamp.unwrap_or(0);
+                    }
+                    _ => {}
+                }
+            }
+
+            if !cdc_actions.is_empty() {
+                log::debug!(
+                    "Located {} cdf actions for version: {}",
+                    cdc_actions.len(),
+                    version
+                );
+                change_files.push(CdcDataSpec::new(version, ts, cdc_actions))
+            } else {
+                let add_actions = version_actions
+                    .iter()
+                    .filter_map(|a| match a {
+                        Action::Add(a) if a.data_change => Some(a.clone()),
+                        _ => None,
+                    })
+                    .collect::<Vec<Add>>();
+
+                if !add_actions.is_empty() {
+                    log::debug!(
+                        "Located {} cdf actions for version: {}",
+                        add_actions.len(),
+                        version
+                    );
+                    add_files.push(CdcDataSpec::new(version, ts, add_actions));
+                }
+            }
+        }
+
+        Ok((change_files, add_files))
+    }
+
+    #[inline]
+    fn get_add_action_type() -> Option<ScalarValue> {
+        Some(ScalarValue::Utf8(Some(String::from("insert"))))
+    }
+
+    /// Executes the scan
+    pub async fn build(&self) -> DeltaResult<DeltaCdfScan> {
+        let (cdc, add) = self.determine_files_to_read().await?;
+        register_store(
+            self.log_store.clone(),
+            self.ctx.state().runtime_env().clone(),
+        );
+
+        let partition_values = self.snapshot.metadata().partition_columns.clone();
+        let schema = self.snapshot.input_schema()?;
+        let schema_fields: Vec<Field> = self
+            .snapshot
+            .input_schema()?
+            .flattened_fields()
+            .into_iter()
+            .filter(|f| !partition_values.contains(f.name()))
+            .cloned()
+            .collect();
+
+        let this_partition_values = partition_values
+            .iter()
+            .map(|name| schema.field_with_name(name).map(|f| f.to_owned()))
+            .collect::<Result<Vec<_>, ArrowError>>()?;
+
+        // Setup for the Read Schemas of each kind of file, CDC files include commit action type so they need a slightly
+        // different schema than standard add file reads
+        let cdc_file_schema = create_cdc_schema(schema_fields.clone(), true);
+        let add_file_schema = create_cdc_schema(schema_fields, false);
+
+        // Set up the mapping of partition columns to be projected into the final output batch
+        // cdc for example has timestamp, version, and any table partitions mapped here.
+        // add on the other hand has action type, timestamp, version and any additional table partitions because adds do
+        // not include their actions
+        let mut cdc_partition_cols = CDC_PARTITION_SCHEMA.clone();
+        let mut add_partition_cols = ADD_PARTITION_SCHEMA.clone();
+        cdc_partition_cols.extend_from_slice(&this_partition_values);
+        add_partition_cols.extend_from_slice(&this_partition_values);
+
+        // Set up the partition to physical file mapping, this is a mostly unmodified version of what is done in load
+        let cdc_file_groups =
+            create_partition_values(schema.clone(), cdc, &partition_values, None)?;
+        let add_file_groups = create_partition_values(
+            schema.clone(),
+            add,
+            &partition_values,
+            Self::get_add_action_type(),
+        )?;
+
+        // Create the parquet scans for each associated type of file. I am not sure when we would use removes yet, but
+        // they would be here if / when they are necessary
+        let cdc_scan = ParquetFormat::new()
+            .create_physical_plan(
+                &self.ctx.state(),
+                FileScanConfig {
+                    object_store_url: self.log_store.object_store_url(),
+                    file_schema: cdc_file_schema.clone(),
+                    file_groups: cdc_file_groups.into_values().collect(),
+                    statistics: Statistics::new_unknown(&cdc_file_schema),
+                    projection: None,
+                    limit: None,
+                    table_partition_cols: cdc_partition_cols,
+                    output_ordering: vec![],
+                },
+                None,
+            )
+            .await?;
+
+        let add_scan = ParquetFormat::new()
+            .create_physical_plan(
+                &self.ctx.state(),
+                FileScanConfig {
+                    object_store_url: self.log_store.object_store_url(),
+                    file_schema: add_file_schema.clone(),
+                    file_groups: add_file_groups.into_values().collect(),
+                    statistics: Statistics::new_unknown(&add_file_schema),
+                    projection: None,
+                    limit: None,
+                    table_partition_cols: add_partition_cols,
+                    output_ordering: vec![],
+                },
+                None,
+            )
+            .await?;
+
+        // The output batches are then unioned to create a single output. Coalesce partitions is only here for the time
+        // being for development. I plan to parallelize the reads once the base idea is correct.
+        let mut union_scan: Arc<dyn ExecutionPlan> =
+            Arc::new(UnionExec::new(vec![cdc_scan, add_scan]));
+
+        if let Some(columns) = &self.columns {
+            let expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = union_scan
+                .schema()
+                .fields()
+                .into_iter()
+                .enumerate()
+                .map(|(idx, field)| -> (Arc<dyn PhysicalExpr>, String) {
+                    let field_name = field.name();
+                    let expr = Arc::new(expressions::Column::new(field_name, idx));
+                    (expr, field_name.to_owned())
+                })
+                .filter(|(_, field_name)| columns.contains(field_name))
+                .collect();
+            union_scan = Arc::new(ProjectionExec::try_new(expressions, union_scan)?);
+        }
+        Ok(DeltaCdfScan::new(union_scan))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::error::Error;
+    use std::str::FromStr;
+
+    use arrow_array::RecordBatch;
+    use chrono::NaiveDateTime;
+    use datafusion::physical_plan::ExecutionPlan;
+    use datafusion::prelude::SessionContext;
+    use datafusion_common::assert_batches_sorted_eq;
+
+    use crate::delta_datafusion::cdf::DeltaCdfScan;
+    use crate::operations::collect_sendable_stream;
+    use crate::writer::test_utils::TestResult;
+    use crate::DeltaOps;
+
+    async fn collect_batches(
+        num_partitions: usize,
+        stream: DeltaCdfScan,
+        ctx: SessionContext,
+    ) -> Result<Vec<RecordBatch>, Box<dyn Error>> {
+        let mut batches = vec![];
+        for p in 0..num_partitions {
+            let data: Vec<RecordBatch> =
+                collect_sendable_stream(stream.execute(p, ctx.task_ctx())?).await?;
+            batches.extend_from_slice(&data);
+        }
+        Ok(batches)
+    }
+
+    #[tokio::test]
+    async fn test_load_local() -> TestResult {
+        let ctx = SessionContext::new();
+        let table = DeltaOps::try_from_uri("../test/tests/data/cdf-table")
+            .await?
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await?;
+
+        let batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await?;
+        assert_batches_sorted_eq! {
+            ["+----+--------+------------------+-----------------+-------------------------+------------+",
+             "| id | name   | _change_type     | _commit_version | _commit_timestamp       | birthday   |",
+             "+----+--------+------------------+-----------------+-------------------------+------------+",
+             "| 7  | Dennis | delete           | 3               | 2024-01-06T16:44:59.570 | 2023-12-29 |",
+             "| 3  | Dave   | update_preimage  | 1               | 2023-12-22T17:10:21.675 | 2023-12-23 |",
+             "| 4  | Kate   | update_preimage  | 1               | 2023-12-22T17:10:21.675 | 2023-12-23 |",
+             "| 2  | Bob    | update_preimage  | 1               | 2023-12-22T17:10:21.675 | 2023-12-23 |",
+             "| 7  | Dennis | update_preimage  | 2               | 2023-12-29T21:41:33.785 | 2023-12-24 |",
+             "| 5  | Emily  | update_preimage  | 2               | 2023-12-29T21:41:33.785 | 2023-12-24 |",
+             "| 6  | Carl   | update_preimage  | 2               | 2023-12-29T21:41:33.785 | 2023-12-24 |",
+             "| 7  | Dennis | update_postimage | 2               | 2023-12-29T21:41:33.785 | 2023-12-29 |",
+             "| 5  | Emily  | update_postimage | 2               | 2023-12-29T21:41:33.785 | 2023-12-29 |",
+             "| 6  | Carl   | update_postimage | 2               | 2023-12-29T21:41:33.785 | 2023-12-29 |",
+             "| 3  | Dave   | update_postimage | 1               | 2023-12-22T17:10:21.675 | 2023-12-22 |",
+             "| 4  | Kate   | update_postimage | 1               | 2023-12-22T17:10:21.675 | 2023-12-22 |",
+             "| 2  | Bob    | update_postimage | 1               | 2023-12-22T17:10:21.675 | 2023-12-22 |",
+             "| 2  | Bob    | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-23 |",
+             "| 3  | Dave   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-23 |",
+             "| 4  | Kate   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-23 |",
+             "| 5  | Emily  | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-24 |",
+             "| 6  | Carl   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-24 |",
+             "| 7  | Dennis | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-24 |",
+             "| 1  | Steve  | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-22 |",
+             "| 8  | Claire | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-25 |",
+             "| 9  | Ada    | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-25 |",
+             "| 10 | Borb   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-25 |",
+             "+----+--------+------------------+-----------------+-------------------------+------------+"
+        ], &batches }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_load_local_datetime() -> TestResult {
+        let ctx = SessionContext::new();
+        let starting_timestamp = NaiveDateTime::from_str("2023-12-22T17:10:21.675").unwrap();
+        let table = DeltaOps::try_from_uri("../test/tests/data/cdf-table")
+            .await?
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_ending_timestamp(starting_timestamp.and_utc())
+            .build()
+            .await?;
+
+        let batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await?;
+
+        assert_batches_sorted_eq! {
+            ["+----+--------+------------------+-----------------+-------------------------+------------+",
+             "| id | name   | _change_type     | _commit_version | _commit_timestamp       | birthday   |",
+             "+----+--------+------------------+-----------------+-------------------------+------------+",
+             "| 3  | Dave   | update_preimage  | 1               | 2023-12-22T17:10:21.675 | 2023-12-23 |",
+             "| 4  | Kate   | update_preimage  | 1               | 2023-12-22T17:10:21.675 | 2023-12-23 |",
+             "| 2  | Bob    | update_preimage  | 1               | 2023-12-22T17:10:21.675 | 2023-12-23 |",
+             "| 3  | Dave   | update_postimage | 1               | 2023-12-22T17:10:21.675 | 2023-12-22 |",
+             "| 4  | Kate   | update_postimage | 1               | 2023-12-22T17:10:21.675 | 2023-12-22 |",
+             "| 2  | Bob    | update_postimage | 1               | 2023-12-22T17:10:21.675 | 2023-12-22 |",
+             "| 2  | Bob    | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-23 |",
+             "| 3  | Dave   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-23 |",
+             "| 4  | Kate   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-23 |",
+             "| 8  | Claire | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-25 |",
+             "| 9  | Ada    | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-25 |",
+             "| 10 | Borb   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-25 |",
+             "| 1  | Steve  | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-22 |",
+             "| 5  | Emily  | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-24 |",
+             "| 6  | Carl   | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-24 |",
+             "| 7  | Dennis | insert           | 0               | 2023-12-22T17:10:18.828 | 2023-12-24 |",
+             "+----+--------+------------------+-----------------+-------------------------+------------+"
+            ],
+            &batches
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_load_local_non_partitioned() -> TestResult {
+        let ctx = SessionContext::new();
+        let table = DeltaOps::try_from_uri("../test/tests/data/cdf-table-non-partitioned")
+            .await?
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await?;
+
+        let batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await?;
+
+        assert_batches_sorted_eq! {
+            ["+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+-------------------------+",
+             "| id | name   | birthday   | long_field        | boolean_field | double_field | smallint_field | _change_type     | _commit_version | _commit_timestamp       |",
+             "+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+-------------------------+",
+             "| 7  | Dennis | 2024-04-14 | 6                 | true          | 3.14         | 1              | delete           | 3               | 2024-04-14T15:58:32.495 |",
+             "| 3  | Dave   | 2024-04-15 | 2                 | true          | 3.14         | 1              | update_preimage  | 1               | 2024-04-14T15:58:29.393 |",
+             "| 3  | Dave   | 2024-04-14 | 2                 | true          | 3.14         | 1              | update_postimage | 1               | 2024-04-14T15:58:29.393 |",
+             "| 4  | Kate   | 2024-04-15 | 3                 | true          | 3.14         | 1              | update_preimage  | 1               | 2024-04-14T15:58:29.393 |",
+             "| 4  | Kate   | 2024-04-14 | 3                 | true          | 3.14         | 1              | update_postimage | 1               | 2024-04-14T15:58:29.393 |",
+             "| 2  | Bob    | 2024-04-15 | 1                 | true          | 3.14         | 1              | update_preimage  | 1               | 2024-04-14T15:58:29.393 |",
+             "| 2  | Bob    | 2024-04-14 | 1                 | true          | 3.14         | 1              | update_postimage | 1               | 2024-04-14T15:58:29.393 |",
+             "| 7  | Dennis | 2024-04-16 | 6                 | true          | 3.14         | 1              | update_preimage  | 2               | 2024-04-14T15:58:31.257 |",
+             "| 7  | Dennis | 2024-04-14 | 6                 | true          | 3.14         | 1              | update_postimage | 2               | 2024-04-14T15:58:31.257 |",
+             "| 5  | Emily  | 2024-04-16 | 4                 | true          | 3.14         | 1              | update_preimage  | 2               | 2024-04-14T15:58:31.257 |",
+             "| 5  | Emily  | 2024-04-14 | 4                 | true          | 3.14         | 1              | update_postimage | 2               | 2024-04-14T15:58:31.257 |",
+             "| 6  | Carl   | 2024-04-16 | 5                 | true          | 3.14         | 1              | update_preimage  | 2               | 2024-04-14T15:58:31.257 |",
+             "| 6  | Carl   | 2024-04-14 | 5                 | true          | 3.14         | 1              | update_postimage | 2               | 2024-04-14T15:58:31.257 |",
+             "| 1  | Alex   | 2024-04-14 | 1                 | true          | 3.14         | 1              | insert           | 4               | 2024-04-14T15:58:33.444 |",
+             "| 2  | Alan   | 2024-04-15 | 1                 | true          | 3.14         | 1              | insert           | 4               | 2024-04-14T15:58:33.444 |",
+             "| 1  | Steve  | 2024-04-14 | 1                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 2  | Bob    | 2024-04-15 | 1                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 3  | Dave   | 2024-04-15 | 2                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 4  | Kate   | 2024-04-15 | 3                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 5  | Emily  | 2024-04-16 | 4                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 6  | Carl   | 2024-04-16 | 5                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 7  | Dennis | 2024-04-16 | 6                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 8  | Claire | 2024-04-17 | 7                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 9  | Ada    | 2024-04-17 | 8                 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "| 10 | Borb   | 2024-04-17 | 99999999999999999 | true          | 3.14         | 1              | insert           | 0               | 2024-04-14T15:58:26.249 |",
+             "+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+-------------------------+"],
+            &batches
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_load_bad_version_range() -> TestResult {
+        let table = DeltaOps::try_from_uri("../test/tests/data/cdf-table-non-partitioned")
+            .await?
+            .load_cdf()
+            .with_starting_version(4)
+            .with_ending_version(1)
+            .build()
+            .await;
+
+        assert!(table.is_err());
+        assert!(matches!(
+            table.unwrap_err(),
+            DeltaTableError::ChangeDataInvalidVersionRange { .. }
+        ));
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_load_non_cdf() -> TestResult {
+        let table = DeltaOps::try_from_uri("../test/tests/data/simple_table")
+            .await?
+            .load_cdf()
+            .with_starting_version(0)
+            .build()
+            .await;
+
+        assert!(table.is_err());
+        assert!(matches!(
+            table.unwrap_err(),
+            DeltaTableError::ChangeDataNotEnabled { .. }
+        ));
+
+        Ok(())
+    }
+}
diff --git a/crates/core/src/operations/merge/barrier.rs b/crates/core/src/operations/merge/barrier.rs
index f1df28c4a4..e9b2f8fd00 100644
--- a/crates/core/src/operations/merge/barrier.rs
+++ b/crates/core/src/operations/merge/barrier.rs
@@ -6,7 +6,7 @@
 //! To determine if a file contains zero changes, the input stream is
 //! exhausted. Afterwards, records are then dropped.
 //!
-//! Bookkeeping is maintained to determine which files have modifications so
+//! Bookkeeping is maintained to determine which files have modifications, so
 //! they can be removed from the delta log.
 
 use std::{
@@ -67,6 +67,10 @@ impl MergeBarrierExec {
 }
 
 impl ExecutionPlan for MergeBarrierExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn std::any::Any {
         self
     }
@@ -75,26 +79,27 @@ impl ExecutionPlan for MergeBarrierExec {
         self.input.schema()
     }
 
-    fn output_partitioning(&self) -> datafusion_physical_expr::Partitioning {
-        self.input.output_partitioning()
+    fn properties(&self) -> &datafusion::physical_plan::PlanProperties {
+        self.input.properties()
     }
 
     fn required_input_distribution(&self) -> Vec<Distribution> {
         vec![Distribution::HashPartitioned(vec![self.expr.clone()]); 1]
     }
 
-    fn output_ordering(&self) -> Option<&[datafusion_physical_expr::PhysicalSortExpr]> {
-        None
-    }
-
-    fn children(&self) -> Vec<std::sync::Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
     }
 
     fn with_new_children(
-        self: std::sync::Arc<Self>,
-        children: Vec<std::sync::Arc<dyn ExecutionPlan>>,
-    ) -> datafusion_common::Result<std::sync::Arc<dyn ExecutionPlan>> {
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Plan(
+                "MergeBarrierExec wrong number of children".to_string(),
+            ));
+        }
         Ok(Arc::new(MergeBarrierExec::new(
             children[0].clone(),
             self.file_column.clone(),
@@ -105,7 +110,7 @@ impl ExecutionPlan for MergeBarrierExec {
     fn execute(
         &self,
         partition: usize,
-        context: std::sync::Arc<datafusion::execution::TaskContext>,
+        context: Arc<datafusion::execution::TaskContext>,
     ) -> datafusion_common::Result<datafusion::physical_plan::SendableRecordBatchStream> {
         let input = self.input.execute(partition, context)?;
         Ok(Box::pin(MergeBarrierStream::new(
@@ -421,11 +426,20 @@ impl UserDefinedLogicalNodeCore for MergeBarrier {
         exprs: &[datafusion_expr::Expr],
         inputs: &[datafusion_expr::LogicalPlan],
     ) -> Self {
-        MergeBarrier {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        exprs: Vec<datafusion_expr::Expr>,
+        inputs: Vec<datafusion_expr::LogicalPlan>,
+    ) -> DataFusionResult<Self> {
+        Ok(MergeBarrier {
             input: inputs[0].clone(),
             file_column: self.file_column.clone(),
             expr: exprs[0].clone(),
-        }
+        })
     }
 }
 
diff --git a/crates/core/src/operations/merge/mod.rs b/crates/core/src/operations/merge/mod.rs
index b1f89c4c12..ea54e4e211 100644
--- a/crates/core/src/operations/merge/mod.rs
+++ b/crates/core/src/operations/merge/mod.rs
@@ -35,38 +35,39 @@ use std::time::Instant;
 use async_trait::async_trait;
 use datafusion::datasource::provider_as_source;
 use datafusion::error::Result as DataFusionResult;
-use datafusion::execution::context::{QueryPlanner, SessionConfig};
+use datafusion::execution::context::SessionConfig;
 use datafusion::logical_expr::build_join_schema;
-use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+use datafusion::physical_plan::metrics::MetricBuilder;
+use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
 use datafusion::{
     execution::context::SessionState,
-    physical_plan::{
-        metrics::{MetricBuilder, MetricsSet},
-        ExecutionPlan,
-    },
+    physical_plan::ExecutionPlan,
     prelude::{DataFrame, SessionContext},
 };
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{Column, DFSchema, ScalarValue, TableReference};
 use datafusion_expr::expr::Placeholder;
-use datafusion_expr::{col, conditional_expressions::CaseBuilder, lit, when, Expr, JoinType};
 use datafusion_expr::{
-    BinaryExpr, Distinct, Extension, Filter, LogicalPlan, LogicalPlanBuilder, Operator, Projection,
+    col, conditional_expressions::CaseBuilder, lit, max, min, when, Between, Expr, JoinType,
+};
+use datafusion_expr::{
+    Aggregate, BinaryExpr, Extension, LogicalPlan, LogicalPlanBuilder, Operator,
     UserDefinedLogicalNode, UNNAMED_TABLE,
 };
+use either::{Left, Right};
 use futures::future::BoxFuture;
 use itertools::Itertools;
 use parquet::file::properties::WriterProperties;
 use serde::Serialize;
-use serde_json::Value;
 
 use self::barrier::{MergeBarrier, MergeBarrierExec};
 
 use super::datafusion_utils::{into_expr, maybe_into_expr, Expression};
-use super::transaction::{commit, PROTOCOL};
+use super::transaction::{CommitProperties, PROTOCOL};
 use crate::delta_datafusion::expr::{fmt_expr_to_sql, parse_predicate_expression};
 use crate::delta_datafusion::logical::MetricObserver;
-use crate::delta_datafusion::physical::{find_metric_node, MetricObserverExec};
+use crate::delta_datafusion::physical::{find_metric_node, get_metric, MetricObserverExec};
+use crate::delta_datafusion::planner::DeltaPlanner;
 use crate::delta_datafusion::{
     execute_plan_to_batch, register_store, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionConfig,
     DeltaTableProvider,
@@ -74,7 +75,8 @@ use crate::delta_datafusion::{
 use crate::kernel::Action;
 use crate::logstore::LogStoreRef;
 use crate::operations::merge::barrier::find_barrier_node;
-use crate::operations::write::write_execution_plan;
+use crate::operations::transaction::CommitBuilder;
+use crate::operations::write::{write_execution_plan, WriterStatsConfig};
 use crate::protocol::{DeltaOperation, MergePredicate};
 use crate::table::state::DeltaTableState;
 use crate::{DeltaResult, DeltaTable, DeltaTableError};
@@ -126,13 +128,15 @@ pub struct MergeBuilder {
     state: Option<SessionState>,
     /// Properties passed to underlying parquet writer for when files are rewritten
     writer_properties: Option<WriterProperties>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
     /// safe_cast determines how data types that do not match the underlying table are handled
     /// By default an error is returned
     safe_cast: bool,
 }
 
+impl super::Operation<()> for MergeBuilder {}
+
 impl MergeBuilder {
     /// Create a new [`MergeBuilder`]
     pub fn new<E: Into<Expression>>(
@@ -150,7 +154,7 @@ impl MergeBuilder {
             source_alias: None,
             target_alias: None,
             state: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
             writer_properties: None,
             match_operations: Vec::new(),
             not_match_operations: Vec::new(),
@@ -163,10 +167,10 @@ impl MergeBuilder {
     ///
     /// The update expressions can specify both source and target columns.
     ///
-    /// Multiple match clasues can be specified and their predicates are
+    /// Multiple match clauses can be specified and their predicates are
     /// evaluated to determine if the corresponding operation are performed.
-    /// Only the first clause that results in an satisfy predicate is executed.
-    /// Ther order of match clauses matter.
+    /// Only the first clause that results in a satisfy predicate is executed.
+    /// The order of match clauses matter.
     ///
     /// #Example
     /// ```rust ignore
@@ -201,10 +205,10 @@ impl MergeBuilder {
 
     /// Delete a target record when it matches with a source record
     ///
-    /// Multiple match clasues can be specified and their predicates are
+    /// Multiple match clauses can be specified and their predicates are
     /// evaluated to determine if the corresponding operation are performed.
-    /// Only the first clause that results in an satisfy predicate is executed.
-    /// Ther order of match clauses matter.
+    /// Only the first clause that results in a satisfy predicate is executed.
+    /// The order of match clauses matter.
     ///
     /// #Example
     /// ```rust ignore
@@ -234,10 +238,10 @@ impl MergeBuilder {
 
     /// Insert a source record when it does not match with a target record
     ///
-    /// Multiple not match clasues can be specified and their predicates are
+    /// Multiple not match clauses can be specified and their predicates are
     /// evaluated to determine if the corresponding operation are performed.
-    /// Only the first clause that results in an satisfy predicate is executed.
-    /// Ther order of not match clauses matter.
+    /// Only the first clause that results in a satisfy predicate is executed.
+    /// The order of not match clauses matter.
     ///
     /// #Example
     /// ```rust ignore
@@ -269,10 +273,10 @@ impl MergeBuilder {
     ///
     /// The update expressions can specify only target columns.
     ///
-    /// Multiple source not match clasues can be specified and their predicates
+    /// Multiple source not match clauses can be specified and their predicates
     /// are evaluated to determine if the corresponding operation are performed.
-    /// Only the first clause that results in an satisfy predicate is executed.
-    /// Ther order of source not match clauses matter.
+    /// Only the first clause that results in a satisfy predicate is executed.
+    /// The order of source not match clauses matter.
     ///
     /// #Example
     /// ```rust ignore
@@ -301,10 +305,10 @@ impl MergeBuilder {
 
     /// Delete a target record when it does not match with a source record
     ///
-    /// Multiple source not match clasues can be specified and their predicates
-    /// are evaluated to determine if the corresponding operation are performed.
-    /// Only the first clause that results in an satisfy predicate is executed.
-    /// Ther order of source not match clauses matter.
+    /// Multiple source "not match" clauses can be specified and their predicates
+    /// are evaluated to determine if the corresponding operations are performed.
+    /// Only the first clause that results in a satisfy predicate is executed.
+    /// The order of source "not match" clauses matter.
     ///
     /// #Example
     /// ```rust ignore
@@ -351,11 +355,8 @@ impl MergeBuilder {
     }
 
     /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -503,7 +504,7 @@ impl MergeOperation {
                             relation: Some(TableReference::Bare { table }),
                             name,
                         } => {
-                            if table.eq(alias) {
+                            if table.as_ref() == alias {
                                 Column {
                                     relation: Some(r),
                                     name,
@@ -574,7 +575,7 @@ pub struct MergeMetrics {
     /// Time taken to rewrite the matched files
     pub rewrite_time_ms: u64,
 }
-
+#[derive(Clone)]
 struct MergeMetricExtensionPlanner {}
 
 #[async_trait]
@@ -667,13 +668,22 @@ impl ExtensionPlanner for MergeMetricExtensionPlanner {
     }
 }
 
-/// Takes the predicate provided and does two things:
+struct PredicatePlaceholder {
+    expr: Expr,
+    alias: String,
+    is_aggregate: bool,
+}
+
+/// Takes the predicate provided and does three things:
 ///
-/// 1. for any relations between a source column and a target column, if the target column is a
-/// partition column, then replace source with a placeholder matching the name of the partition
+/// 1. for any relations between a source column and a partition target column,
+/// replace source with a placeholder matching the name of the partition
 /// columns
 ///
-/// 2. for any other relation with a source column, remove them.
+/// 2. for any is equal relations between a source column and a non-partition target column,
+/// replace source with is between expression with min(source_column) and max(source_column) placeholders
+///
+/// 3. for any other relation with a source column, remove them.
 ///
 /// For example, for the predicate:
 ///
@@ -681,21 +691,17 @@ impl ExtensionPlanner for MergeMetricExtensionPlanner {
 ///
 /// where `date` is a partition column, would result in the expr:
 ///
-/// `$date = target.date and frob > 42`
+/// `$date_0 = target.date and target.id between $id_1_min and $id_1_max and frob > 42`
 ///
 /// This leaves us with a predicate that we can push into delta scan after expanding it out to
-/// a conjunction between the disinct partitions in the source input.
+/// a conjunction between the distinct partitions in the source input.
 ///
-/// TODO: A futher improvement here might be for non-partition columns to be replaced with min/max
-/// checks, so the above example could become:
-///
-/// `$date = target.date and target.id between 12345 and 99999 and frob > 42`
 fn generalize_filter(
     predicate: Expr,
     partition_columns: &Vec<String>,
     source_name: &TableReference,
     target_name: &TableReference,
-    placeholders: &mut HashMap<String, Expr>,
+    placeholders: &mut Vec<PredicatePlaceholder>,
 ) -> Option<Expr> {
     #[derive(Debug)]
     enum ReferenceTableCheck {
@@ -705,10 +711,7 @@ fn generalize_filter(
     }
     impl ReferenceTableCheck {
         fn has_reference(&self) -> bool {
-            match self {
-                ReferenceTableCheck::HasReference(_) => true,
-                _ => false,
-            }
+            matches!(self, ReferenceTableCheck::HasReference(_))
         }
     }
     fn references_table(expr: &Expr, table: &TableReference) -> ReferenceTableCheck {
@@ -735,36 +738,101 @@ fn generalize_filter(
                     ReferenceTableCheck::Unknown
                 }
             }
-            Expr::IsNull(inner) => references_table(&inner, table),
+            Expr::IsNull(inner) => references_table(inner, table),
             Expr::Literal(_) => ReferenceTableCheck::NoReference,
             _ => ReferenceTableCheck::Unknown,
         };
         res
     }
 
+    fn construct_placeholder(
+        binary: BinaryExpr,
+        source_left: bool,
+        is_partition_column: bool,
+        column_name: String,
+        placeholders: &mut Vec<PredicatePlaceholder>,
+    ) -> Option<Expr> {
+        if is_partition_column {
+            let placeholder_name = format!("{column_name}_{}", placeholders.len());
+            let placeholder = Expr::Placeholder(Placeholder {
+                id: placeholder_name.clone(),
+                data_type: None,
+            });
+
+            let (left, right, source_expr): (Box<Expr>, Box<Expr>, Expr) = if source_left {
+                (placeholder.into(), binary.clone().right, *binary.left)
+            } else {
+                (binary.clone().left, placeholder.into(), *binary.right)
+            };
+
+            let replaced = Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: binary.op,
+                right,
+            });
+
+            placeholders.push(PredicatePlaceholder {
+                expr: source_expr,
+                alias: placeholder_name,
+                is_aggregate: false,
+            });
+
+            Some(replaced)
+        } else {
+            match binary.op {
+                Operator::Eq => {
+                    let name_min = format!("{column_name}_{}_min", placeholders.len());
+                    let placeholder_min = Expr::Placeholder(Placeholder {
+                        id: name_min.clone(),
+                        data_type: None,
+                    });
+                    let name_max = format!("{column_name}_{}_max", placeholders.len());
+                    let placeholder_max = Expr::Placeholder(Placeholder {
+                        id: name_max.clone(),
+                        data_type: None,
+                    });
+                    let (source_expr, target_expr) = if source_left {
+                        (*binary.left, *binary.right)
+                    } else {
+                        (*binary.right, *binary.left)
+                    };
+                    let replaced = Expr::Between(Between {
+                        expr: target_expr.into(),
+                        negated: false,
+                        low: placeholder_min.into(),
+                        high: placeholder_max.into(),
+                    });
+
+                    placeholders.push(PredicatePlaceholder {
+                        expr: min(source_expr.clone()),
+                        alias: name_min,
+                        is_aggregate: true,
+                    });
+                    placeholders.push(PredicatePlaceholder {
+                        expr: max(source_expr),
+                        alias: name_max,
+                        is_aggregate: true,
+                    });
+                    Some(replaced)
+                }
+                _ => None,
+            }
+        }
+    }
+
     match predicate {
         Expr::BinaryExpr(binary) => {
             if references_table(&binary.right, source_name).has_reference() {
                 if let ReferenceTableCheck::HasReference(left_target) =
                     references_table(&binary.left, target_name)
                 {
-                    if partition_columns.contains(&left_target) {
-                        let placeholder_name = format!("{left_target}_{}", placeholders.len());
-
-                        let placeholder = Expr::Placeholder(datafusion_expr::expr::Placeholder {
-                            id: placeholder_name.clone(),
-                            data_type: None,
-                        });
-                        let replaced = Expr::BinaryExpr(BinaryExpr {
-                            left: binary.left,
-                            op: binary.op,
-                            right: placeholder.into(),
-                        });
-
-                        placeholders.insert(placeholder_name, *binary.right);
-
-                        return Some(replaced);
-                    }
+                    return construct_placeholder(
+                        binary,
+                        false,
+                        partition_columns.contains(&left_target),
+                        left_target,
+                        placeholders,
+                    );
                 }
                 return None;
             }
@@ -772,23 +840,13 @@ fn generalize_filter(
                 if let ReferenceTableCheck::HasReference(right_target) =
                     references_table(&binary.right, target_name)
                 {
-                    if partition_columns.contains(&right_target) {
-                        let placeholder_name = format!("{right_target}_{}", placeholders.len());
-
-                        let placeholder = Expr::Placeholder(datafusion_expr::expr::Placeholder {
-                            id: placeholder_name.clone(),
-                            data_type: None,
-                        });
-                        let replaced = Expr::BinaryExpr(BinaryExpr {
-                            right: binary.right,
-                            op: binary.op,
-                            left: placeholder.into(),
-                        });
-
-                        placeholders.insert(placeholder_name, *binary.left);
-
-                        return Some(replaced);
-                    }
+                    return construct_placeholder(
+                        binary,
+                        true,
+                        partition_columns.contains(&right_target),
+                        right_target,
+                        placeholders,
+                    );
                 }
                 return None;
             }
@@ -808,7 +866,7 @@ fn generalize_filter(
                 placeholders,
             );
 
-            let res = match (left, right) {
+            match (left, right) {
                 (None, None) => None,
                 (None, Some(one_side)) | (Some(one_side), None) => {
                     // in the case of an AND clause, it's safe to generalize the filter down to just one side of the AND.
@@ -828,19 +886,22 @@ fn generalize_filter(
                     right: r.into(),
                 })
                 .into(),
-            };
-            res
+            }
         }
         other => match references_table(&other, source_name) {
             ReferenceTableCheck::HasReference(col) => {
                 let placeholder_name = format!("{col}_{}", placeholders.len());
 
-                let placeholder = Expr::Placeholder(datafusion_expr::expr::Placeholder {
+                let placeholder = Expr::Placeholder(Placeholder {
                     id: placeholder_name.clone(),
                     data_type: None,
                 });
 
-                placeholders.insert(placeholder_name, other);
+                placeholders.push(PredicatePlaceholder {
+                    expr: other,
+                    alias: placeholder_name,
+                    is_aggregate: true,
+                });
 
                 Some(placeholder)
             }
@@ -855,11 +916,12 @@ fn replace_placeholders(expr: Expr, placeholders: &HashMap<String, ScalarValue>)
         Expr::Placeholder(Placeholder { id, .. }) => {
             let value = placeholders[&id].clone();
             // Replace the placeholder with the value
-            Ok(Transformed::Yes(Expr::Literal(value)))
+            Ok(Transformed::yes(Expr::Literal(value)))
         }
-        _ => Ok(Transformed::No(expr)),
+        _ => Ok(Transformed::no(expr)),
     })
     .unwrap()
+    .data
 }
 
 async fn try_construct_early_filter(
@@ -867,17 +929,13 @@ async fn try_construct_early_filter(
     table_snapshot: &DeltaTableState,
     session_state: &SessionState,
     source: &LogicalPlan,
-    source_name: &TableReference<'_>,
-    target_name: &TableReference<'_>,
+    source_name: &TableReference,
+    target_name: &TableReference,
 ) -> DeltaResult<Option<Expr>> {
     let table_metadata = table_snapshot.metadata();
     let partition_columns = &table_metadata.partition_columns;
 
-    if partition_columns.is_empty() {
-        return Ok(None);
-    }
-
-    let mut placeholders = HashMap::default();
+    let mut placeholders = Vec::default();
 
     match generalize_filter(
         join_predicate,
@@ -889,35 +947,34 @@ async fn try_construct_early_filter(
         None => Ok(None),
         Some(filter) => {
             if placeholders.is_empty() {
-                // if we haven't recognised any partition-based predicates in the join predicate, return our reduced filter
+                // if we haven't recognised any source predicates in the join predicate, return our filter with static only predicates
                 Ok(Some(filter))
             } else {
-                // if we have some recognised partitions, then discover the distinct set of partitions in the source data and
-                // make a new filter, which expands out the placeholders for each distinct partition (and then OR these together)
-                let distinct_partitions = LogicalPlan::Distinct(Distinct::All(
-                    LogicalPlan::Projection(Projection::try_new(
-                        placeholders
-                            .into_iter()
-                            .map(|(alias, expr)| expr.alias(alias))
-                            .collect_vec(),
-                        source.clone().into(),
-                    )?)
-                    .into(),
-                ));
-
+                // if we have some filters, which depend on the source df, then collect the placeholders values from the source data
+                // We aggregate the distinct values for partitions with the group_columns and stats(min, max) for dynamic filter as agg_columns
+                // Can be translated into `SELECT partition1 as part1_0, min(id) as id_1_min, max(id) as id_1_max FROM source GROUP BY partition1`
+                let (agg_columns, group_columns) = placeholders.into_iter().partition_map(|p| {
+                    if p.is_aggregate {
+                        Left(p.expr.alias(p.alias))
+                    } else {
+                        Right(p.expr.alias(p.alias))
+                    }
+                });
+                let distinct_partitions = LogicalPlan::Aggregate(Aggregate::try_new(
+                    source.clone().into(),
+                    group_columns,
+                    agg_columns,
+                )?);
                 let execution_plan = session_state
                     .create_physical_plan(&distinct_partitions)
                     .await?;
-
                 let items = execute_plan_to_batch(session_state, execution_plan).await?;
-
                 let placeholder_names = items
                     .schema()
                     .fields()
                     .iter()
                     .map(|f| f.name().to_owned())
                     .collect_vec();
-
                 let expr = (0..items.num_rows())
                     .map(|i| {
                         let replacements = placeholder_names
@@ -933,7 +990,6 @@ async fn try_construct_early_filter(
                     .collect::<DeltaResult<Vec<_>>>()?
                     .into_iter()
                     .reduce(Expr::or);
-
                 Ok(expr)
             }
         }
@@ -945,21 +1001,26 @@ async fn execute(
     predicate: Expression,
     source: DataFrame,
     log_store: LogStoreRef,
-    snapshot: &DeltaTableState,
+    snapshot: DeltaTableState,
     state: SessionState,
     writer_properties: Option<WriterProperties>,
-    app_metadata: Option<HashMap<String, Value>>,
+    mut commit_properties: CommitProperties,
     safe_cast: bool,
     source_alias: Option<String>,
     target_alias: Option<String>,
     match_operations: Vec<MergeOperationConfig>,
     not_match_target_operations: Vec<MergeOperationConfig>,
     not_match_source_operations: Vec<MergeOperationConfig>,
-) -> DeltaResult<((Vec<Action>, i64, Option<DeltaOperation>), MergeMetrics)> {
+) -> DeltaResult<(DeltaTableState, MergeMetrics)> {
     let mut metrics = MergeMetrics::default();
     let exec_start = Instant::now();
 
     let current_metadata = snapshot.metadata();
+    let merge_planner = DeltaPlanner::<MergeMetricExtensionPlanner> {
+        extension_planner: MergeMetricExtensionPlanner {},
+    };
+
+    let state = state.with_query_planner(Arc::new(merge_planner));
 
     // TODO: Given the join predicate, remove any expression that involve the
     // source table and keep expressions that only involve the target table.
@@ -998,19 +1059,18 @@ async fn execute(
 
     let scan_config = DeltaScanConfigBuilder::default()
         .with_file_column(true)
-        .build(snapshot)?;
-
-    let file_column = Arc::new(scan_config.file_column_name.clone().unwrap());
+        .with_parquet_pushdown(false)
+        .build(&snapshot)?;
 
     let target_provider = Arc::new(DeltaTableProvider::try_new(
         snapshot.clone(),
         log_store.clone(),
-        scan_config,
+        scan_config.clone(),
     )?);
 
     let target_provider = provider_as_source(target_provider);
-
-    let target = LogicalPlanBuilder::scan(target_name.clone(), target_provider, None)?.build()?;
+    let target =
+        LogicalPlanBuilder::scan(target_name.clone(), target_provider.clone(), None)?.build()?;
 
     let source_schema = source.schema();
     let target_schema = target.schema();
@@ -1020,31 +1080,42 @@ async fn execute(
         Expression::String(s) => parse_predicate_expression(&join_schema_df, s, &state)?,
     };
 
-    let state = state.with_query_planner(Arc::new(MergePlanner {}));
-
-    let target = {
-        // Attempt to construct an early filter that we can apply to the Add action list and the delta scan.
-        // In the case where there are partition columns in the join predicate, we can scan the source table
-        // to get the distinct list of partitions affected and constrain the search to those.
-
-        if !not_match_source_operations.is_empty() {
-            // It's only worth trying to create an early filter where there are no `when_not_matched_source` operators, since
-            // that implies a full scan
-            target
-        } else if let Some(filter) = try_construct_early_filter(
+    // Attempt to construct an early filter that we can apply to the Add action list and the delta scan.
+    // In the case where there are partition columns in the join predicate, we can scan the source table
+    // to get the distinct list of partitions affected and constrain the search to those.
+    let target_subset_filter = if !not_match_source_operations.is_empty() {
+        // It's only worth trying to create an early filter where there are no `when_not_matched_source` operators, since
+        // that implies a full scan
+        None
+    } else {
+        try_construct_early_filter(
             predicate.clone(),
-            snapshot,
+            &snapshot,
             &state,
             &source,
             &source_name,
             &target_name,
         )
         .await?
-        {
-            LogicalPlan::Filter(Filter::try_new(filter, target.into())?)
-        } else {
-            target
+    };
+
+    let file_column = Arc::new(scan_config.file_column_name.clone().unwrap());
+    // Need to manually push this filter into the scan... We want to PRUNE files not FILTER RECORDS
+    let target = match target_subset_filter.clone() {
+        Some(filter) => {
+            let filter = match &target_alias {
+                Some(alias) => remove_table_alias(filter, alias),
+                None => filter,
+            };
+            LogicalPlanBuilder::scan_with_filters(
+                target_name.clone(),
+                target_provider,
+                None,
+                vec![filter],
+            )?
+            .build()?
         }
+        None => LogicalPlanBuilder::scan(target_name.clone(), target_provider, None)?.build()?,
     };
 
     let source = DataFrame::new(state.clone(), source);
@@ -1188,7 +1259,7 @@ async fn execute(
 
     let projection = join.with_column(OPERATION_COLUMN, case)?;
 
-    let mut new_columns = projection;
+    let mut new_columns = vec![];
     let mut write_projection = Vec::new();
 
     for delta_field in snapshot.schema().fields() {
@@ -1223,11 +1294,9 @@ async fn execute(
         .end()?;
 
         let name = "__delta_rs_c_".to_owned() + delta_field.name();
-        write_projection.push(
-            Expr::Column(Column::from_qualified_name_ignore_case(name.clone()))
-                .alias(delta_field.name()),
-        );
-        new_columns = new_columns.with_column(&name, case)?;
+        write_projection
+            .push(Expr::Column(Column::from_name(name.clone())).alias(delta_field.name()));
+        new_columns.push((name, case));
     }
 
     let mut insert_when = Vec::with_capacity(ops.len());
@@ -1303,18 +1372,40 @@ async fn execute(
         .end()
     }
 
-    new_columns = new_columns.with_column(DELETE_COLUMN, build_case(delete_when, delete_then)?)?;
-    new_columns =
-        new_columns.with_column(TARGET_INSERT_COLUMN, build_case(insert_when, insert_then)?)?;
-    new_columns =
-        new_columns.with_column(TARGET_UPDATE_COLUMN, build_case(update_when, update_then)?)?;
-    new_columns = new_columns.with_column(
-        TARGET_DELETE_COLUMN,
+    new_columns.push((
+        DELETE_COLUMN.to_owned(),
+        build_case(delete_when, delete_then)?,
+    ));
+    new_columns.push((
+        TARGET_INSERT_COLUMN.to_owned(),
+        build_case(insert_when, insert_then)?,
+    ));
+    new_columns.push((
+        TARGET_UPDATE_COLUMN.to_owned(),
+        build_case(update_when, update_then)?,
+    ));
+    new_columns.push((
+        TARGET_DELETE_COLUMN.to_owned(),
         build_case(target_delete_when, target_delete_then)?,
-    )?;
-    new_columns = new_columns.with_column(TARGET_COPY_COLUMN, build_case(copy_when, copy_then)?)?;
-
-    let new_columns = new_columns.into_unoptimized_plan();
+    ));
+    new_columns.push((
+        TARGET_COPY_COLUMN.to_owned(),
+        build_case(copy_when, copy_then)?,
+    ));
+
+    let new_columns = {
+        let plan = projection.into_unoptimized_plan();
+        let mut fields: Vec<Expr> = plan
+            .schema()
+            .columns()
+            .iter()
+            .map(|f| col(f.clone()))
+            .collect();
+
+        fields.extend(new_columns.into_iter().map(|(name, ex)| ex.alias(name)));
+
+        LogicalPlanBuilder::from(plan).project(fields)?.build()?
+    };
 
     let distrbute_expr = col(file_column.as_str());
 
@@ -1350,9 +1441,17 @@ async fn execute(
     // write projected records
     let table_partition_cols = current_metadata.partition_columns.clone();
 
+    let writer_stats_config = WriterStatsConfig::new(
+        snapshot.table_config().num_indexed_cols(),
+        snapshot
+            .table_config()
+            .stats_columns()
+            .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
+    );
+
     let rewrite_start = Instant::now();
     let add_actions = write_execution_plan(
-        Some(snapshot),
+        Some(&snapshot),
         state.clone(),
         write,
         table_partition_cols.clone(),
@@ -1361,13 +1460,15 @@ async fn execute(
         None,
         writer_properties,
         safe_cast,
-        false,
+        None,
+        writer_stats_config,
+        None,
     )
     .await?;
 
     metrics.rewrite_time_ms = Instant::now().duration_since(rewrite_start).as_millis() as u64;
 
-    let mut actions: Vec<Action> = add_actions.into_iter().map(Action::Add).collect();
+    let mut actions: Vec<Action> = add_actions.clone();
     metrics.num_target_files_added = actions.len();
 
     let survivors = barrier
@@ -1386,13 +1487,8 @@ async fn execute(
         }
     }
 
-    let mut version = snapshot.version();
-
     let source_count_metrics = source_count.metrics().unwrap();
     let target_count_metrics = op_count.metrics().unwrap();
-    fn get_metric(metrics: &MetricsSet, name: &str) -> usize {
-        metrics.sum_by_name(name).map(|m| m.as_usize()).unwrap_or(0)
-    }
 
     metrics.num_source_rows = get_metric(&source_count_metrics, SOURCE_COUNT_METRIC);
     metrics.num_target_rows_inserted = get_metric(&target_count_metrics, TARGET_INSERTED_METRIC);
@@ -1405,55 +1501,58 @@ async fn execute(
 
     metrics.execution_time_ms = Instant::now().duration_since(exec_start).as_millis() as u64;
 
-    let mut app_metadata = match app_metadata {
-        Some(meta) => meta,
-        None => HashMap::new(),
-    };
-
+    let app_metadata = &mut commit_properties.app_metadata;
     app_metadata.insert("readVersion".to_owned(), snapshot.version().into());
-
     if let Ok(map) = serde_json::to_value(&metrics) {
         app_metadata.insert("operationMetrics".to_owned(), map);
     }
 
+    // Predicate will be used for conflict detection
+    let commit_predicate = match target_subset_filter {
+        None => None, // No predicate means it's a full table merge
+        Some(some_filter) => {
+            let predict_expr = match &target_alias {
+                None => some_filter,
+                Some(alias) => remove_table_alias(some_filter, alias),
+            };
+            Some(fmt_expr_to_sql(&predict_expr)?)
+        }
+    };
+
     // Do not make a commit when there are zero updates to the state
     let operation = DeltaOperation::Merge {
-        predicate: Some(fmt_expr_to_sql(&predicate)?),
+        predicate: commit_predicate,
+        merge_predicate: Some(fmt_expr_to_sql(&predicate)?),
         matched_predicates: match_operations,
         not_matched_predicates: not_match_target_operations,
         not_matched_by_source_predicates: not_match_source_operations,
     };
-    if !actions.is_empty() {
-        version = commit(
-            log_store.as_ref(),
-            &actions,
-            operation.clone(),
-            Some(snapshot),
-            Some(app_metadata),
-        )
-        .await?;
+
+    if actions.is_empty() {
+        return Ok((snapshot, metrics));
     }
-    let op = (!actions.is_empty()).then_some(operation);
-    Ok(((actions, version, op), metrics))
-}
 
-// TODO: Abstract MergePlanner into DeltaPlanner to support other delta operations in the future.
-struct MergePlanner {}
+    let commit = CommitBuilder::from(commit_properties)
+        .with_actions(actions)
+        .build(Some(&snapshot), log_store.clone(), operation)
+        .await?;
+    Ok((commit.snapshot(), metrics))
+}
 
-#[async_trait]
-impl QueryPlanner for MergePlanner {
-    async fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        session_state: &SessionState,
-    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
-        let planner = Arc::new(Box::new(DefaultPhysicalPlanner::with_extension_planners(
-            vec![Arc::new(MergeMetricExtensionPlanner {})],
-        )));
-        planner
-            .create_physical_plan(logical_plan, session_state)
-            .await
-    }
+fn remove_table_alias(expr: Expr, table_alias: &str) -> Expr {
+    expr.transform(&|expr| match expr {
+        Expr::Column(c) => match c.relation {
+            Some(rel) if rel.table() == table_alias => Ok(Transformed::yes(Expr::Column(
+                Column::new_unqualified(c.name),
+            ))),
+            _ => Ok(Transformed::no(Expr::Column(Column::new(
+                c.relation, c.name,
+            )))),
+        },
+        _ => Ok(Transformed::no(expr)),
+    })
+    .unwrap()
+    .data
 }
 
 impl std::future::IntoFuture for MergeBuilder {
@@ -1461,10 +1560,10 @@ impl std::future::IntoFuture for MergeBuilder {
     type IntoFuture = BoxFuture<'static, Self::Output>;
 
     fn into_future(self) -> Self::IntoFuture {
-        let mut this = self;
+        let this = self;
 
         Box::pin(async move {
-            PROTOCOL.can_write_to(&this.snapshot)?;
+            PROTOCOL.can_write_to(&this.snapshot.snapshot)?;
 
             let state = this.state.unwrap_or_else(|| {
                 let config: SessionConfig = DeltaSessionConfig::default().into();
@@ -1476,14 +1575,14 @@ impl std::future::IntoFuture for MergeBuilder {
                 session.state()
             });
 
-            let ((actions, version, operation), metrics) = execute(
+            let (snapshot, metrics) = execute(
                 this.predicate,
                 this.source,
                 this.log_store.clone(),
-                &this.snapshot,
+                this.snapshot,
                 state,
                 this.writer_properties,
-                this.app_metadata,
+                this.commit_properties,
                 this.safe_cast,
                 this.source_alias,
                 this.target_alias,
@@ -1493,12 +1592,10 @@ impl std::future::IntoFuture for MergeBuilder {
             )
             .await?;
 
-            if let Some(op) = &operation {
-                this.snapshot.merge(actions, op, version)?;
-            }
-            let table = DeltaTable::new_with_state(this.log_store, this.snapshot);
-
-            Ok((table, metrics))
+            Ok((
+                DeltaTable::new_with_state(this.log_store, snapshot),
+                metrics,
+            ))
         })
     }
 }
@@ -1536,8 +1633,8 @@ mod tests {
     use datafusion_expr::LogicalPlanBuilder;
     use datafusion_expr::Operator;
     use itertools::Itertools;
+    use regex::Regex;
     use serde_json::json;
-    use std::collections::HashMap;
     use std::ops::Neg;
     use std::sync::Arc;
 
@@ -1548,7 +1645,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions.unwrap_or_default())
             .await
             .unwrap();
@@ -1556,6 +1653,7 @@ mod tests {
         table
     }
 
+    // TODO(ion): property keys are not passed through or translated as table features.. fix this as well
     #[tokio::test]
     async fn test_merge_when_delta_table_is_append_only() {
         let schema = get_arrow_schema(&None);
@@ -1685,7 +1783,8 @@ mod tests {
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
-        assert_eq!(parameters["predicate"], json!("target.id = source.id"));
+        assert!(!parameters.contains_key("predicate"));
+        assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["matchedPredicates"],
             json!(r#"[{"actionType":"update"}]"#)
@@ -1737,7 +1836,8 @@ mod tests {
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
-        assert_eq!(parameters["predicate"], json!("target.id = source.id"));
+        assert!(!parameters.contains_key("predicate"));
+        assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["matchedPredicates"],
             json!(r#"[{"actionType":"update"}]"#)
@@ -1941,6 +2041,15 @@ mod tests {
         assert_eq!(metrics.num_output_rows, 6);
         assert_eq!(metrics.num_source_rows, 3);
 
+        let commit_info = table.history(None).await.unwrap();
+        let last_commit = &commit_info[0];
+        let parameters = last_commit.operation_parameters.clone().unwrap();
+        assert!(!parameters.contains_key("predicate"));
+        assert_eq!(
+            parameters["mergePredicate"],
+            "target.id = source.id AND target.modified = '2021-02-02'"
+        );
+
         let expected = vec![
             "+----+-------+------------+",
             "| id | value | modified   |",
@@ -1957,6 +2066,64 @@ mod tests {
         assert_batches_sorted_eq!(&expected, &actual);
     }
 
+    #[tokio::test]
+    async fn test_merge_partition_filtered() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+        let table = write_data(table, &schema).await;
+        assert_eq!(table.version(), 1);
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2021-02-02",
+                    "2021-02-02",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+        let (table, _metrics) = DeltaOps(table)
+            .merge(
+                source,
+                col("target.id")
+                    .eq(col("source.id"))
+                    .and(col("target.modified").eq(lit("2021-02-02"))),
+            )
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|update| {
+                update
+                    .update("value", col("source.value"))
+                    .update("modified", col("source.modified"))
+            })
+            .unwrap()
+            .when_not_matched_insert(|insert| {
+                insert
+                    .set("id", col("source.id"))
+                    .set("value", col("source.value"))
+                    .set("modified", col("source.modified"))
+            })
+            .unwrap()
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+        let commit_info = table.history(None).await.unwrap();
+        let last_commit = &commit_info[0];
+        let parameters = last_commit.operation_parameters.clone().unwrap();
+        assert_eq!(
+            parameters["predicate"],
+            "id BETWEEN 'B' AND 'C' AND modified = '2021-02-02'"
+        );
+        assert_eq!(
+            parameters["mergePredicate"],
+            "target.id = source.id AND target.modified = '2021-02-02'"
+        );
+    }
+
     #[tokio::test]
     async fn test_merge_partitions_skipping() {
         /* Validate the join predicate can be used for skipping partitions */
@@ -2014,6 +2181,13 @@ mod tests {
         assert_eq!(metrics.num_output_rows, 3);
         assert_eq!(metrics.num_source_rows, 3);
 
+        let commit_info = table.history(None).await.unwrap();
+        let last_commit = &commit_info[0];
+        let parameters = last_commit.operation_parameters.clone().unwrap();
+        let predicate = parameters["predicate"].as_str().unwrap();
+        let re = Regex::new(r"^id = '(C|X|B)' OR id = '(C|X|B)' OR id = '(C|X|B)'$").unwrap();
+        assert!(re.is_match(predicate));
+
         let expected = vec![
             "+-------+------------+----+",
             "| value | modified   | id |",
@@ -2084,7 +2258,8 @@ mod tests {
             extra_info["operationMetrics"],
             serde_json::to_value(&metrics).unwrap()
         );
-        assert_eq!(parameters["predicate"], json!("target.id = source.id"));
+        assert_eq!(parameters["predicate"], "id BETWEEN 'B' AND 'X'");
+        assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["matchedPredicates"],
             json!(r#"[{"actionType":"delete"}]"#)
@@ -2148,7 +2323,7 @@ mod tests {
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
-        assert_eq!(parameters["predicate"], json!("target.id = source.id"));
+        assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["matchedPredicates"],
             json!(r#"[{"actionType":"delete","predicate":"source.value <= 10"}]"#)
@@ -2217,7 +2392,8 @@ mod tests {
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
-        assert_eq!(parameters["predicate"], json!("target.id = source.id"));
+        assert!(!parameters.contains_key("predicate"));
+        assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["notMatchedBySourcePredicates"],
             json!(r#"[{"actionType":"delete"}]"#)
@@ -2281,7 +2457,7 @@ mod tests {
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
-        assert_eq!(parameters["predicate"], json!("target.id = source.id"));
+        assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["notMatchedBySourcePredicates"],
             json!(r#"[{"actionType":"delete","predicate":"target.modified > '2021-02-01'"}]"#)
@@ -2360,6 +2536,15 @@ mod tests {
         assert_eq!(metrics.num_output_rows, 3);
         assert_eq!(metrics.num_source_rows, 3);
 
+        let commit_info = table.history(None).await.unwrap();
+        let last_commit = &commit_info[0];
+        let parameters = last_commit.operation_parameters.clone().unwrap();
+
+        assert_eq!(
+            parameters["predicate"],
+            json!("id BETWEEN 'B' AND 'X' AND modified = '2021-02-02'")
+        );
+
         let expected = vec![
             "+----+-------+------------+",
             "| id | value | modified   |",
@@ -2462,7 +2647,7 @@ mod tests {
         let parsed_filter = col(Column::new(source.clone().into(), "id"))
             .eq(col(Column::new(target.clone().into(), "id")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2494,7 +2679,7 @@ mod tests {
         let parsed_filter = (source_id.clone().eq(target_id.clone()))
             .or(source_id.clone().is_null().and(target_id.clone().is_null()));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2517,9 +2702,9 @@ mod tests {
         })
         .and(target_id.clone().is_null()));
 
-        assert!(placeholders.len() == 2);
+        assert_eq!(placeholders.len(), 2);
 
-        let captured_expressions = placeholders.values().collect_vec();
+        let captured_expressions = placeholders.into_iter().map(|p| p.expr).collect_vec();
 
         assert!(captured_expressions.contains(&&source_id));
         assert!(captured_expressions.contains(&&source_id.is_null()));
@@ -2538,7 +2723,7 @@ mod tests {
             .neg()
             .eq(col(Column::new(target.clone().into(), "id")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2558,12 +2743,13 @@ mod tests {
         assert_eq!(generalized, expected_filter);
 
         assert_eq!(placeholders.len(), 1);
-
-        let placeholder_expr = &placeholders["id_0"];
+        let placeholder_expr = placeholders.get(0).unwrap();
 
         let expected_placeholder = col(Column::new(source.clone().into(), "id")).neg();
 
-        assert_eq!(placeholder_expr, &expected_placeholder);
+        assert_eq!(placeholder_expr.expr, expected_placeholder);
+        assert_eq!(placeholder_expr.alias, "id_0");
+        assert_eq!(placeholder_expr.is_aggregate, false);
     }
 
     #[tokio::test]
@@ -2576,7 +2762,7 @@ mod tests {
             .eq(col(Column::new(target.clone().into(), "id")))
             .and(col(Column::new(target.clone().into(), "id")).eq(lit("C")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2598,6 +2784,38 @@ mod tests {
         assert_eq!(generalized, expected_filter);
     }
 
+    #[tokio::test]
+    async fn test_generalize_filter_with_dynamic_target_range_references() {
+        let source = TableReference::parse_str("source");
+        let target = TableReference::parse_str("target");
+
+        let parsed_filter = col(Column::new(source.clone().into(), "id"))
+            .eq(col(Column::new(target.clone().into(), "id")));
+
+        let mut placeholders = Vec::default();
+
+        let generalized = generalize_filter(
+            parsed_filter,
+            &vec!["other".to_owned()],
+            &source,
+            &target,
+            &mut placeholders,
+        )
+        .unwrap();
+        let expected_filter_l = Expr::Placeholder(Placeholder {
+            id: "id_0_min".to_owned(),
+            data_type: None,
+        });
+        let expected_filter_h = Expr::Placeholder(Placeholder {
+            id: "id_0_max".to_owned(),
+            data_type: None,
+        });
+        let expected_filter = col(Column::new(target.clone().into(), "id"))
+            .between(expected_filter_l, expected_filter_h);
+
+        assert_eq!(generalized, expected_filter);
+    }
+
     #[tokio::test]
     async fn test_generalize_filter_removes_source_references() {
         let source = TableReference::parse_str("source");
@@ -2607,7 +2825,7 @@ mod tests {
             .eq(col(Column::new(target.clone().into(), "id")))
             .and(col(Column::new(source.clone().into(), "id")).eq(lit("C")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2724,4 +2942,479 @@ mod tests {
 
         assert_eq!(split_pred, expected_pred_parts);
     }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_range() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }));
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("B".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        );
+        assert_eq!(pred.unwrap(), filter);
+    }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_partition_and_range() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }))
+        .and(
+            col(Column {
+                relation: Some(source_name.clone()),
+                name: "modified".to_owned(),
+            })
+            .eq(col(Column {
+                relation: Some(target_name.clone()),
+                name: "modified".to_owned(),
+            })),
+        );
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("B".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        )
+        .and(
+            Expr::Literal(ScalarValue::Utf8(Some("2023-07-04".to_string()))).eq(col(Column {
+                relation: Some(target_name.clone()),
+                name: "modified".to_owned(),
+            })),
+        );
+        assert_eq!(pred.unwrap(), filter);
+    }
+
+    #[tokio::test]
+    async fn test_merge_pushdowns() {
+        //See https://github.com/delta-io/delta-rs/issues/2158
+        let schema = vec![
+            StructField::new(
+                "id".to_string(),
+                DataType::Primitive(PrimitiveType::String),
+                true,
+            ),
+            StructField::new(
+                "cost".to_string(),
+                DataType::Primitive(PrimitiveType::Float),
+                true,
+            ),
+            StructField::new(
+                "month".to_string(),
+                DataType::Primitive(PrimitiveType::String),
+                true,
+            ),
+        ];
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", ArrowDataType::Utf8, true),
+            Field::new("cost", ArrowDataType::Float32, true),
+            Field::new("month", ArrowDataType::Utf8, true),
+        ]));
+
+        let table = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(schema)
+            .await
+            .unwrap();
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B"])),
+                Arc::new(arrow::array::Float32Array::from(vec![Some(10.15), None])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::Append)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 1);
+        assert_eq!(table.get_files_count(), 1);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B"])),
+                Arc::new(arrow::array::Float32Array::from(vec![
+                    Some(12.15),
+                    Some(11.15),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let (table, _metrics) = DeltaOps(table)
+            .merge(source, "target.id = source.id and target.cost is null")
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|insert| {
+                insert
+                    .update("id", "target.id")
+                    .update("cost", "source.cost")
+                    .update("month", "target.month")
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        let expected = vec![
+            "+----+-------+------------+",
+            "| id | cost  | month      |",
+            "+----+-------+------------+",
+            "| A  | 10.15 | 2023-07-04 |",
+            "| B  | 11.15 | 2023-07-04 |",
+            "+----+-------+------------+",
+        ];
+        let actual = get_data(&table).await;
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn test_merge_row_groups_parquet_pushdown() {
+        //See https://github.com/delta-io/delta-rs/issues/2362
+        let schema = vec![
+            StructField::new(
+                "id".to_string(),
+                DataType::Primitive(PrimitiveType::String),
+                true,
+            ),
+            StructField::new(
+                "cost".to_string(),
+                DataType::Primitive(PrimitiveType::Float),
+                true,
+            ),
+            StructField::new(
+                "month".to_string(),
+                DataType::Primitive(PrimitiveType::String),
+                true,
+            ),
+        ];
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", ArrowDataType::Utf8, true),
+            Field::new("cost", ArrowDataType::Float32, true),
+            Field::new("month", ArrowDataType::Utf8, true),
+        ]));
+
+        let table = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(schema)
+            .await
+            .unwrap();
+
+        let ctx = SessionContext::new();
+        let batch1 = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B"])),
+                Arc::new(arrow::array::Float32Array::from(vec![Some(10.15), None])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["C", "D"])),
+                Arc::new(arrow::array::Float32Array::from(vec![
+                    Some(11.0),
+                    Some(12.0),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch1, batch2])
+            .with_write_batch_size(2)
+            .with_save_mode(SaveMode::Append)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 1);
+        assert_eq!(table.get_files_count(), 1);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["C", "E"])),
+                Arc::new(arrow::array::Float32Array::from(vec![
+                    Some(12.15),
+                    Some(11.15),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let (table, _metrics) = DeltaOps(table)
+            .merge(source, "target.id = source.id and target.id >= 'C'")
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|insert| {
+                insert
+                    .update("id", "target.id")
+                    .update("cost", "source.cost")
+                    .update("month", "target.month")
+            })
+            .unwrap()
+            .when_not_matched_insert(|insert| {
+                insert
+                    .set("id", "source.id")
+                    .set("cost", "source.cost")
+                    .set("month", "source.month")
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        let expected = vec![
+            "+----+-------+------------+",
+            "| id | cost  | month      |",
+            "+----+-------+------------+",
+            "| A  | 10.15 | 2023-07-04 |",
+            "| B  |       | 2023-07-04 |",
+            "| C  | 12.15 | 2023-07-04 |",
+            "| D  | 12.0  | 2023-07-04 |",
+            "| E  | 11.15 | 2023-07-04 |",
+            "+----+-------+------------+",
+        ];
+        let actual = get_data(&table).await;
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn test_merge_pushdowns_partitioned() {
+        //See #2158
+        let schema = vec![
+            StructField::new(
+                "id".to_string(),
+                DataType::Primitive(PrimitiveType::String),
+                true,
+            ),
+            StructField::new(
+                "cost".to_string(),
+                DataType::Primitive(PrimitiveType::Float),
+                true,
+            ),
+            StructField::new(
+                "month".to_string(),
+                DataType::Primitive(PrimitiveType::String),
+                true,
+            ),
+        ];
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", ArrowDataType::Utf8, true),
+            Field::new("cost", ArrowDataType::Float32, true),
+            Field::new("month", ArrowDataType::Utf8, true),
+        ]));
+
+        let part_cols = vec!["month"];
+        let table = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(schema)
+            .with_partition_columns(part_cols)
+            .await
+            .unwrap();
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B"])),
+                Arc::new(arrow::array::Float32Array::from(vec![Some(10.15), None])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::Append)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 1);
+        assert_eq!(table.get_files_count(), 1);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&arrow_schema.clone()),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B"])),
+                Arc::new(arrow::array::Float32Array::from(vec![
+                    Some(12.15),
+                    Some(11.15),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let (table, _metrics) = DeltaOps(table)
+            .merge(source, "target.id = source.id and target.cost is null")
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|insert| {
+                insert
+                    .update("id", "target.id")
+                    .update("cost", "source.cost")
+                    .update("month", "target.month")
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        let expected = vec![
+            "+----+-------+------------+",
+            "| id | cost  | month      |",
+            "+----+-------+------------+",
+            "| A  | 10.15 | 2023-07-04 |",
+            "| B  | 11.15 | 2023-07-04 |",
+            "+----+-------+------------+",
+        ];
+        let actual = get_data(&table).await;
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
 }
diff --git a/crates/core/src/operations/mod.rs b/crates/core/src/operations/mod.rs
index 2271f36641..608bdb1549 100644
--- a/crates/core/src/operations/mod.rs
+++ b/crates/core/src/operations/mod.rs
@@ -7,6 +7,7 @@
 //! with a [data stream][datafusion::physical_plan::SendableRecordBatchStream],
 //! if the operation returns data as well.
 
+use self::add_column::AddColumnBuilder;
 use self::create::CreateBuilder;
 use self::filesystem_check::FileSystemCheckBuilder;
 use self::vacuum::VacuumBuilder;
@@ -15,9 +16,11 @@ use crate::table::builder::DeltaTableBuilder;
 use crate::DeltaTable;
 use std::collections::HashMap;
 
+pub mod add_column;
 pub mod cast;
 pub mod convert_to_delta;
 pub mod create;
+pub mod drop_constraints;
 pub mod filesystem_check;
 pub mod optimize;
 pub mod restore;
@@ -27,7 +30,8 @@ pub mod vacuum;
 #[cfg(feature = "datafusion")]
 use self::{
     constraints::ConstraintBuilder, datafusion_utils::Expression, delete::DeleteBuilder,
-    load::LoadBuilder, merge::MergeBuilder, update::UpdateBuilder, write::WriteBuilder,
+    drop_constraints::DropConstraintBuilder, load::LoadBuilder, load_cdf::CdfLoadBuilder,
+    merge::MergeBuilder, update::UpdateBuilder, write::WriteBuilder,
 };
 #[cfg(feature = "datafusion")]
 pub use ::datafusion::physical_plan::common::collect as collect_sendable_stream;
@@ -35,7 +39,10 @@ pub use ::datafusion::physical_plan::common::collect as collect_sendable_stream;
 use arrow::record_batch::RecordBatch;
 use optimize::OptimizeBuilder;
 use restore::RestoreBuilder;
+use set_tbl_properties::SetTablePropertiesBuilder;
 
+#[cfg(all(feature = "cdf", feature = "datafusion"))]
+mod cdc;
 #[cfg(feature = "datafusion")]
 pub mod constraints;
 #[cfg(feature = "datafusion")]
@@ -43,14 +50,19 @@ pub mod delete;
 #[cfg(feature = "datafusion")]
 mod load;
 #[cfg(feature = "datafusion")]
+pub mod load_cdf;
+#[cfg(feature = "datafusion")]
 pub mod merge;
+pub mod set_tbl_properties;
 #[cfg(feature = "datafusion")]
 pub mod update;
 #[cfg(feature = "datafusion")]
 pub mod write;
 pub mod writer;
 
-// TODO make ops consume a snapshot ...
+/// The [Operation] trait defines common behaviors that all operations builders
+/// should have consistent
+pub(crate) trait Operation<State>: std::future::IntoFuture {}
 
 /// High level interface for executing commands against a DeltaTable
 pub struct DeltaOps(pub DeltaTable);
@@ -132,6 +144,13 @@ impl DeltaOps {
         LoadBuilder::new(self.0.log_store, self.0.state.unwrap())
     }
 
+    /// Load a table with CDF Enabled
+    #[cfg(feature = "datafusion")]
+    #[must_use]
+    pub fn load_cdf(self) -> CdfLoadBuilder {
+        CdfLoadBuilder::new(self.0.log_store, self.0.state.unwrap())
+    }
+
     /// Write data to Delta table
     #[cfg(feature = "datafusion")]
     #[must_use]
@@ -199,6 +218,23 @@ impl DeltaOps {
     pub fn add_constraint(self) -> ConstraintBuilder {
         ConstraintBuilder::new(self.0.log_store, self.0.state.unwrap())
     }
+
+    /// Drops constraints from a table
+    #[cfg(feature = "datafusion")]
+    #[must_use]
+    pub fn drop_constraints(self) -> DropConstraintBuilder {
+        DropConstraintBuilder::new(self.0.log_store, self.0.state.unwrap())
+    }
+
+    /// Set table properties
+    pub fn set_tbl_properties(self) -> SetTablePropertiesBuilder {
+        SetTablePropertiesBuilder::new(self.0.log_store, self.0.state.unwrap())
+    }
+
+    /// Add new columns
+    pub fn add_columns(self) -> AddColumnBuilder {
+        AddColumnBuilder::new(self.0.log_store, self.0.state.unwrap())
+    }
 }
 
 impl From<DeltaTable> for DeltaOps {
@@ -219,6 +255,33 @@ impl AsRef<DeltaTable> for DeltaOps {
     }
 }
 
+/// Get the num_idx_columns and stats_columns from the table configuration in the state
+/// If table_config does not exist (only can occur in the first write action) it takes
+/// the configuration that was passed to the writerBuilder.
+pub fn get_num_idx_cols_and_stats_columns(
+    config: Option<crate::table::config::TableConfig<'_>>,
+    configuration: HashMap<String, Option<String>>,
+) -> (i32, Option<Vec<String>>) {
+    let (num_index_cols, stats_columns) = match &config {
+        Some(conf) => (conf.num_indexed_cols(), conf.stats_columns()),
+        _ => (
+            configuration
+                .get("delta.dataSkippingNumIndexedCols")
+                .and_then(|v| v.clone().map(|v| v.parse::<i32>().unwrap()))
+                .unwrap_or(crate::table::config::DEFAULT_NUM_INDEX_COLS),
+            configuration
+                .get("delta.dataSkippingStatsColumns")
+                .and_then(|v| v.as_ref().map(|v| v.split(',').collect::<Vec<&str>>())),
+        ),
+    };
+    (
+        num_index_cols,
+        stats_columns
+            .clone()
+            .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
+    )
+}
+
 #[cfg(feature = "datafusion")]
 mod datafusion_utils {
     use datafusion::execution::context::SessionState;
@@ -228,6 +291,7 @@ mod datafusion_utils {
     use crate::{delta_datafusion::expr::parse_predicate_expression, DeltaResult};
 
     /// Used to represent user input of either a Datafusion expression or string expression
+    #[derive(Debug)]
     pub enum Expression {
         /// Datafusion Expression
         DataFusion(Expr),
diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs
index c67b31a71b..9e1641fc7f 100644
--- a/crates/core/src/operations/optimize.rs
+++ b/crates/core/src/operations/optimize.rs
@@ -20,29 +20,33 @@
 //! let (table, metrics) = OptimizeBuilder::new(table.object_store(), table.state).await?;
 //! ````
 
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
+use std::fmt;
 use std::sync::Arc;
 use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 
 use arrow::datatypes::SchemaRef as ArrowSchemaRef;
 use arrow_array::RecordBatch;
+use delta_kernel::expressions::Scalar;
 use futures::future::BoxFuture;
 use futures::stream::BoxStream;
 use futures::{Future, StreamExt, TryStreamExt};
+use indexmap::IndexMap;
 use itertools::Itertools;
 use num_cpus;
 use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
 use parquet::basic::{Compression, ZstdLevel};
 use parquet::errors::ParquetError;
 use parquet::file::properties::WriterProperties;
-use serde::{Deserialize, Serialize};
+use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializer};
 use tracing::debug;
 
-use super::transaction::{commit, PROTOCOL};
+use super::transaction::PROTOCOL;
 use super::writer::{PartitionWriter, PartitionWriterConfig};
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Action, PartitionsExt, Remove, Scalar};
+use crate::kernel::{scalars::ScalarExt, Action, PartitionsExt, Remove};
 use crate::logstore::LogStoreRef;
+use crate::operations::transaction::{CommitBuilder, CommitProperties, DEFAULT_RETRIES};
 use crate::protocol::DeltaOperation;
 use crate::storage::ObjectStoreRef;
 use crate::table::state::DeltaTableState;
@@ -58,8 +62,16 @@ pub struct Metrics {
     /// Number of unoptimized files removed
     pub num_files_removed: u64,
     /// Detailed metrics for the add operation
+    #[serde(
+        serialize_with = "serialize_metric_details",
+        deserialize_with = "deserialize_metric_details"
+    )]
     pub files_added: MetricDetails,
     /// Detailed metrics for the remove operation
+    #[serde(
+        serialize_with = "serialize_metric_details",
+        deserialize_with = "deserialize_metric_details"
+    )]
     pub files_removed: MetricDetails,
     /// Number of partitions that had at least one file optimized
     pub partitions_optimized: u64,
@@ -73,17 +85,34 @@ pub struct Metrics {
     pub preserve_insertion_order: bool,
 }
 
+// Custom serialization function that serializes metric details as a string
+fn serialize_metric_details<S>(value: &MetricDetails, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    serializer.serialize_str(&value.to_string())
+}
+
+// Custom deserialization that parses a JSON string into MetricDetails
+fn deserialize_metric_details<'de, D>(deserializer: D) -> Result<MetricDetails, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let s: String = Deserialize::deserialize(deserializer)?;
+    serde_json::from_str(&s).map_err(DeError::custom)
+}
+
 /// Statistics on files for a particular operation
 /// Operation can be remove or add
 #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct MetricDetails {
-    /// Minimum file size of a operation
-    pub min: i64,
-    /// Maximum file size of a operation
-    pub max: i64,
     /// Average file size of a operation
     pub avg: f64,
+    /// Maximum file size of a operation
+    pub max: i64,
+    /// Minimum file size of a operation
+    pub min: i64,
     /// Number of files encountered during operation
     pub total_files: usize,
     /// Sum of file sizes of a operation
@@ -101,6 +130,13 @@ impl MetricDetails {
     }
 }
 
+impl fmt::Display for MetricDetails {
+    /// Display the metric details using serde serialization
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        serde_json::to_string(self).map_err(|_| fmt::Error)?.fmt(f)
+    }
+}
+
 /// Metrics for a single partition
 pub struct PartialMetrics {
     /// Number of optimized files added
@@ -163,8 +199,8 @@ pub struct OptimizeBuilder<'a> {
     target_size: Option<i64>,
     /// Properties passed to underlying parquet writer
     writer_properties: Option<WriterProperties>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Commit properties and configuration
+    commit_properties: CommitProperties,
     /// Whether to preserve insertion order within files (default false)
     preserve_insertion_order: bool,
     /// Max number of concurrent tasks (default is number of cpus)
@@ -176,6 +212,8 @@ pub struct OptimizeBuilder<'a> {
     min_commit_interval: Option<Duration>,
 }
 
+impl super::Operation<()> for OptimizeBuilder<'_> {}
+
 impl<'a> OptimizeBuilder<'a> {
     /// Create a new [`OptimizeBuilder`]
     pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
@@ -185,7 +223,7 @@ impl<'a> OptimizeBuilder<'a> {
             filters: &[],
             target_size: None,
             writer_properties: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
             preserve_insertion_order: false,
             max_concurrent_tasks: num_cpus::get(),
             max_spill_size: 20 * 1024 * 1024 * 2014, // 20 GB.
@@ -218,12 +256,9 @@ impl<'a> OptimizeBuilder<'a> {
         self
     }
 
-    /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    /// Additonal information to write to the commit
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -260,7 +295,7 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> {
         let this = self;
 
         Box::pin(async move {
-            PROTOCOL.can_write_to(&this.snapshot)?;
+            PROTOCOL.can_write_to(&this.snapshot.snapshot)?;
 
             let writer_properties = this.writer_properties.unwrap_or_else(|| {
                 WriterProperties::builder()
@@ -282,7 +317,7 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> {
                     this.max_concurrent_tasks,
                     this.max_spill_size,
                     this.min_commit_interval,
-                    this.app_metadata,
+                    this.commit_properties,
                 )
                 .await?;
             let mut table = DeltaTable::new_with_state(this.log_store, this.snapshot);
@@ -295,20 +330,21 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> {
 #[derive(Debug, Clone)]
 struct OptimizeInput {
     target_size: i64,
+    predicate: Option<String>,
 }
 
 impl From<OptimizeInput> for DeltaOperation {
     fn from(opt_input: OptimizeInput) -> Self {
         DeltaOperation::Optimize {
             target_size: opt_input.target_size,
-            predicate: None,
+            predicate: opt_input.predicate,
         }
     }
 }
 
 fn create_remove(
     path: &str,
-    partitions: &BTreeMap<String, Scalar>,
+    partitions: &IndexMap<String, Scalar>,
     size: i64,
 ) -> Result<Action, DeltaTableError> {
     // NOTE unwrap is safe since UNIX_EPOCH will always be earlier then now.
@@ -353,11 +389,11 @@ enum OptimizeOperations {
     ///
     /// Bins are determined by the bin-packing algorithm to reach an optimal size.
     /// Files that are large enough already are skipped. Bins of size 1 are dropped.
-    Compact(HashMap<String, (BTreeMap<String, Scalar>, Vec<MergeBin>)>),
+    Compact(HashMap<String, (IndexMap<String, Scalar>, Vec<MergeBin>)>),
     /// Plan to Z-order each partition
     ZOrder(
         Vec<String>,
-        HashMap<String, (BTreeMap<String, Scalar>, MergeBin)>,
+        HashMap<String, (IndexMap<String, Scalar>, MergeBin)>,
     ),
     // TODO: Sort
 }
@@ -389,6 +425,10 @@ pub struct MergeTaskParameters {
     file_schema: ArrowSchemaRef,
     /// Properties passed to parquet writer
     writer_properties: WriterProperties,
+    /// Num index cols to collect stats for
+    num_indexed_cols: i32,
+    /// Stats columns, specific columns to collect stats from, takes precedence over num_indexed_cols
+    stats_columns: Option<Vec<String>>,
 }
 
 /// A stream of record batches, with a ParquetError on failure.
@@ -401,7 +441,7 @@ impl MergePlan {
     /// collected during the operation.
     async fn rewrite_files<F>(
         task_parameters: Arc<MergeTaskParameters>,
-        partition_values: BTreeMap<String, Scalar>,
+        partition_values: IndexMap<String, Scalar>,
         files: MergeBin,
         object_store: ObjectStoreRef,
         read_stream: F,
@@ -448,15 +488,24 @@ impl MergePlan {
             Some(task_parameters.input_parameters.target_size as usize),
             None,
         )?;
-        let mut writer = PartitionWriter::try_with_config(object_store, writer_config)?;
+        let mut writer = PartitionWriter::try_with_config(
+            object_store,
+            writer_config,
+            task_parameters.num_indexed_cols,
+            task_parameters.stats_columns.clone(),
+        )?;
 
         let mut read_stream = read_stream.await?;
 
         while let Some(maybe_batch) = read_stream.next().await {
             let mut batch = maybe_batch?;
 
-            batch =
-                super::cast::cast_record_batch(&batch, task_parameters.file_schema.clone(), false)?;
+            batch = super::cast::cast_record_batch(
+                &batch,
+                task_parameters.file_schema.clone(),
+                false,
+                true,
+            )?;
             partial_metrics.num_batches += 1;
             writer.write(&batch).await.map_err(DeltaTableError::from)?;
         }
@@ -606,7 +655,7 @@ impl MergePlan {
         #[allow(unused_variables)] // used behind a feature flag
         max_spill_size: usize,
         min_commit_interval: Option<Duration>,
-        app_metadata: Option<HashMap<String, serde_json::Value>>,
+        commit_properties: CommitProperties,
     ) -> Result<Metrics, DeltaTableError> {
         let operations = std::mem::take(&mut self.operations);
 
@@ -698,6 +747,7 @@ impl MergePlan {
         let mut total_metrics = orig_metrics.clone();
 
         let mut last_commit = Instant::now();
+        let mut commits_made = 0;
         loop {
             let next = stream.next().await.transpose()?;
 
@@ -720,31 +770,34 @@ impl MergePlan {
                 last_commit = now;
 
                 buffered_metrics.preserve_insertion_order = true;
-                let mut app_metadata = match app_metadata.clone() {
-                    Some(meta) => meta,
-                    None => HashMap::new(),
-                };
-                app_metadata.insert("readVersion".to_owned(), self.read_table_version.into());
+                let mut properties = CommitProperties::default();
+                properties.app_metadata = commit_properties.app_metadata.clone();
+                properties
+                    .app_metadata
+                    .insert("readVersion".to_owned(), self.read_table_version.into());
                 let maybe_map_metrics = serde_json::to_value(std::mem::replace(
                     &mut buffered_metrics,
                     orig_metrics.clone(),
                 ));
                 if let Ok(map) = maybe_map_metrics {
-                    app_metadata.insert("operationMetrics".to_owned(), map);
+                    properties
+                        .app_metadata
+                        .insert("operationMetrics".to_owned(), map);
                 }
 
-                table.update().await?;
                 debug!("committing {} actions", actions.len());
-                //// TODO: Check for remove actions on optimized partitions. If a
-                //// optimized partition was updated then abort the commit. Requires (#593).
-                commit(
-                    table.log_store.as_ref(),
-                    &actions,
-                    self.task_parameters.input_parameters.clone().into(),
-                    Some(table.snapshot()?),
-                    Some(app_metadata.clone()),
-                )
-                .await?;
+
+                CommitBuilder::from(properties)
+                    .with_actions(actions)
+                    .with_max_retries(DEFAULT_RETRIES + commits_made)
+                    .build(
+                        Some(snapshot),
+                        log_store.clone(),
+                        self.task_parameters.input_parameters.clone().into(),
+                    )
+                    .await?;
+
+                commits_made += 1;
             }
 
             if end {
@@ -760,6 +813,8 @@ impl MergePlan {
             total_metrics.files_removed.min = 0;
         }
 
+        table.update().await?;
+
         Ok(total_metrics)
     }
 }
@@ -782,7 +837,10 @@ pub fn create_merge_plan(
         }
     };
 
-    let input_parameters = OptimizeInput { target_size };
+    let input_parameters = OptimizeInput {
+        target_size,
+        predicate: serde_json::to_string(filters).ok(),
+    };
     let file_schema =
         arrow_schema_without_partitions(&Arc::new(snapshot.schema().try_into()?), partitions_keys);
 
@@ -793,6 +851,11 @@ pub fn create_merge_plan(
             input_parameters,
             file_schema,
             writer_properties,
+            num_indexed_cols: snapshot.table_config().num_indexed_cols(),
+            stats_columns: snapshot
+                .table_config()
+                .stats_columns()
+                .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
         }),
         read_table_version: snapshot.version(),
     })
@@ -849,7 +912,7 @@ fn build_compaction_plan(
 ) -> Result<(OptimizeOperations, Metrics), DeltaTableError> {
     let mut metrics = Metrics::default();
 
-    let mut partition_files: HashMap<String, (BTreeMap<String, Scalar>, Vec<ObjectMeta>)> =
+    let mut partition_files: HashMap<String, (IndexMap<String, Scalar>, Vec<ObjectMeta>)> =
         HashMap::new();
     for add in snapshot.get_active_add_actions_by_partitions(filters)? {
         let add = add?;
@@ -863,7 +926,7 @@ fn build_compaction_plan(
             .partition_values()?
             .into_iter()
             .map(|(k, v)| (k.to_string(), v))
-            .collect::<BTreeMap<_, _>>();
+            .collect::<IndexMap<_, _>>();
 
         partition_files
             .entry(add.partition_values()?.hive_partition_path())
@@ -877,7 +940,7 @@ fn build_compaction_plan(
         file.sort_by(|a, b| b.size.cmp(&a.size));
     }
 
-    let mut operations: HashMap<String, (BTreeMap<String, Scalar>, Vec<MergeBin>)> = HashMap::new();
+    let mut operations: HashMap<String, (IndexMap<String, Scalar>, Vec<MergeBin>)> = HashMap::new();
     for (part, (partition, files)) in partition_files {
         let mut merge_bins = vec![MergeBin::new()];
 
@@ -939,7 +1002,6 @@ fn build_zorder_plan(
     let field_names = snapshot
         .schema()
         .fields()
-        .iter()
         .map(|field| field.name().to_string())
         .collect_vec();
     let unknown_columns = zorder_columns
@@ -955,14 +1017,14 @@ fn build_zorder_plan(
     // For now, just be naive and optimize all files in each selected partition.
     let mut metrics = Metrics::default();
 
-    let mut partition_files: HashMap<String, (BTreeMap<String, Scalar>, MergeBin)> = HashMap::new();
+    let mut partition_files: HashMap<String, (IndexMap<String, Scalar>, MergeBin)> = HashMap::new();
     for add in snapshot.get_active_add_actions_by_partitions(filters)? {
         let add = add?;
         let partition_values = add
             .partition_values()?
             .into_iter()
             .map(|(k, v)| (k.to_string(), v))
-            .collect::<BTreeMap<_, _>>();
+            .collect::<IndexMap<_, _>>();
         metrics.total_considered_files += 1;
         let object_meta = ObjectMeta::try_from(&add)?;
 
@@ -1283,7 +1345,7 @@ pub(super) mod zorder {
                     "+-----+-----+-----------+",
                 ];
 
-                let expected = vec![expected_1, expected_2, expected_3];
+                let expected = [expected_1, expected_2, expected_3];
 
                 let indices = Int32Array::from(shuffled_indices().to_vec());
                 let shuffled_columns = batch
diff --git a/crates/core/src/operations/restore.rs b/crates/core/src/operations/restore.rs
index 2718ee34fb..e2ab9741bc 100644
--- a/crates/core/src/operations/restore.rs
+++ b/crates/core/src/operations/restore.rs
@@ -21,7 +21,7 @@
 //! ````
 
 use std::cmp::max;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::ops::BitXor;
 use std::time::{SystemTime, UNIX_EPOCH};
 
@@ -33,11 +33,12 @@ use serde::Serialize;
 
 use crate::kernel::{Action, Add, Protocol, Remove};
 use crate::logstore::LogStoreRef;
-use crate::operations::transaction::{prepare_commit, TransactionError};
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
 use crate::{DeltaResult, DeltaTable, DeltaTableConfig, DeltaTableError, ObjectStoreError};
 
+use super::transaction::{CommitBuilder, CommitProperties, TransactionError};
+
 /// Errors that can occur during restore
 #[derive(thiserror::Error, Debug)]
 enum RestoreError {
@@ -84,10 +85,12 @@ pub struct RestoreBuilder {
     ignore_missing_files: bool,
     /// Protocol downgrade allowed
     protocol_downgrade_allowed: bool,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
 }
 
+impl super::Operation<()> for RestoreBuilder {}
+
 impl RestoreBuilder {
     /// Create a new [`RestoreBuilder`]
     pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
@@ -98,7 +101,7 @@ impl RestoreBuilder {
             datetime_to_restore: None,
             ignore_missing_files: false,
             protocol_downgrade_allowed: false,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
         }
     }
 
@@ -128,11 +131,8 @@ impl RestoreBuilder {
     }
 
     /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 }
@@ -144,7 +144,7 @@ async fn execute(
     datetime_to_restore: Option<DateTime<Utc>>,
     ignore_missing_files: bool,
     protocol_downgrade_allowed: bool,
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    mut commit_properties: CommitProperties,
 ) -> DeltaResult<RestoreMetrics> {
     if !(version_to_restore
         .is_none()
@@ -248,43 +248,41 @@ async fn execute(
             reader_features: snapshot.protocol().reader_features.clone(),
         }
     };
-    let mut app_metadata = match app_metadata {
-        Some(meta) => meta,
-        None => HashMap::new(),
-    };
-
-    app_metadata.insert("readVersion".to_owned(), snapshot.version().into());
-
-    if let Ok(map) = serde_json::to_value(&metrics) {
-        app_metadata.insert("operationMetrics".to_owned(), map);
-    }
+    commit_properties
+        .app_metadata
+        .insert("readVersion".to_owned(), snapshot.version().into());
+    commit_properties.app_metadata.insert(
+        "operationMetrics".to_owned(),
+        serde_json::to_value(&metrics)?,
+    );
 
     actions.push(Action::Protocol(protocol));
     actions.extend(files_to_add.into_iter().map(Action::Add));
     actions.extend(files_to_remove.into_iter().map(Action::Remove));
 
-    let commit = prepare_commit(
-        log_store.object_store().as_ref(),
-        &DeltaOperation::Restore {
-            version: version_to_restore,
-            datetime: datetime_to_restore.map(|time| -> i64 { time.timestamp_millis() }),
-        },
-        &actions,
-        Some(app_metadata),
-    )
-    .await?;
+    let operation = DeltaOperation::Restore {
+        version: version_to_restore,
+        datetime: datetime_to_restore.map(|time| -> i64 { time.timestamp_millis() }),
+    };
+
+    let prepared_commit = CommitBuilder::from(commit_properties)
+        .with_actions(actions)
+        .build(Some(&snapshot), log_store.clone(), operation)
+        .into_prepared_commit_future()
+        .await?;
+
     let commit_version = snapshot.version() + 1;
-    match log_store.write_commit_entry(commit_version, &commit).await {
+    let commit = prepared_commit.path();
+    match log_store.write_commit_entry(commit_version, commit).await {
         Ok(_) => {}
         Err(err @ TransactionError::VersionAlreadyExists(_)) => {
             return Err(err.into());
         }
         Err(err) => {
-            log_store.object_store().delete(&commit).await?;
+            log_store.abort_commit_entry(commit_version, commit).await?;
             return Err(err.into());
         }
     }
-
     Ok(metrics)
 }
 
@@ -322,7 +320,7 @@ impl std::future::IntoFuture for RestoreBuilder {
                 this.datetime_to_restore,
                 this.ignore_missing_files,
                 this.protocol_downgrade_allowed,
-                this.app_metadata,
+                this.commit_properties,
             )
             .await?;
             let mut table = DeltaTable::new_with_state(this.log_store, this.snapshot);
diff --git a/crates/core/src/operations/set_tbl_properties.rs b/crates/core/src/operations/set_tbl_properties.rs
new file mode 100644
index 0000000000..b3ca7607ac
--- /dev/null
+++ b/crates/core/src/operations/set_tbl_properties.rs
@@ -0,0 +1,111 @@
+//! Set table properties on a table
+
+use std::collections::HashMap;
+
+use futures::future::BoxFuture;
+
+use super::transaction::{CommitBuilder, CommitProperties};
+use crate::kernel::Action;
+use crate::logstore::LogStoreRef;
+use crate::protocol::DeltaOperation;
+use crate::table::state::DeltaTableState;
+use crate::DeltaResult;
+use crate::DeltaTable;
+
+/// Remove constraints from the table
+pub struct SetTablePropertiesBuilder {
+    /// A snapshot of the table's state
+    snapshot: DeltaTableState,
+    /// Name of the property
+    properties: HashMap<String, String>,
+    /// Raise if property doesn't exist
+    raise_if_not_exists: bool,
+    /// Delta object store for handling data files
+    log_store: LogStoreRef,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
+}
+
+impl SetTablePropertiesBuilder {
+    /// Create a new builder
+    pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
+        Self {
+            properties: HashMap::new(),
+            raise_if_not_exists: true,
+            snapshot,
+            log_store,
+            commit_properties: CommitProperties::default(),
+        }
+    }
+
+    /// Specify the properties to be removed
+    pub fn with_properties(mut self, table_properties: HashMap<String, String>) -> Self {
+        self.properties = table_properties;
+        self
+    }
+
+    /// Specify if you want to raise if the property does not exist
+    pub fn with_raise_if_not_exists(mut self, raise: bool) -> Self {
+        self.raise_if_not_exists = raise;
+        self
+    }
+
+    /// Additional metadata to be added to commit info
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
+        self
+    }
+}
+
+impl std::future::IntoFuture for SetTablePropertiesBuilder {
+    type Output = DeltaResult<DeltaTable>;
+
+    type IntoFuture = BoxFuture<'static, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            let mut metadata = this.snapshot.metadata().clone();
+
+            let current_protocol = this.snapshot.protocol();
+            let properties = this.properties;
+
+            let new_protocol = current_protocol
+                .clone()
+                .apply_properties_to_protocol(&properties, this.raise_if_not_exists)?;
+
+            metadata.configuration.extend(
+                properties
+                    .clone()
+                    .into_iter()
+                    .map(|(k, v)| (k, Some(v)))
+                    .collect::<HashMap<String, Option<String>>>(),
+            );
+
+            let final_protocol =
+                new_protocol.move_table_properties_into_features(&metadata.configuration);
+
+            let operation = DeltaOperation::SetTableProperties { properties };
+
+            let mut actions = vec![Action::Metadata(metadata)];
+
+            if current_protocol.ne(&final_protocol) {
+                actions.push(Action::Protocol(final_protocol));
+            }
+
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions.clone())
+                .build(
+                    Some(&this.snapshot),
+                    this.log_store.clone(),
+                    operation.clone(),
+                )
+                .await?;
+            Ok(DeltaTable::new_with_state(
+                this.log_store,
+                commit.snapshot(),
+            ))
+        })
+    }
+}
diff --git a/crates/core/src/operations/transaction/application.rs b/crates/core/src/operations/transaction/application.rs
new file mode 100644
index 0000000000..5a636bcecf
--- /dev/null
+++ b/crates/core/src/operations/transaction/application.rs
@@ -0,0 +1,136 @@
+#[cfg(test)]
+mod tests {
+    use crate::{
+        checkpoints, kernel::Transaction, operations::transaction::CommitProperties,
+        protocol::SaveMode, writer::test_utils::get_record_batch, DeltaOps, DeltaTableBuilder,
+    };
+
+    #[tokio::test]
+    async fn test_app_txn_workload() {
+        // Test that the transaction ids can be read from different scenarios
+        // 1. Write new table to storage
+        // 2. Read new table
+        // 3. Write to table a new txn id and then update a different table state that uses the same underlying table
+        // 4. Write a checkpoint and read that checkpoint.
+
+        let tmp_dir = tempfile::tempdir().unwrap();
+        let tmp_path = std::fs::canonicalize(tmp_dir.path()).unwrap();
+
+        let batch = get_record_batch(None, false);
+        let table = DeltaOps::try_from_uri(tmp_path.to_str().unwrap())
+            .await
+            .unwrap()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .with_partition_columns(["modified"])
+            .with_commit_properties(
+                CommitProperties::default()
+                    .with_application_transaction(Transaction::new("my-app", 1)),
+            )
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 2);
+
+        let app_txns = table.get_app_transaction_version();
+        assert_eq!(app_txns.len(), 1);
+        assert_eq!(app_txns.get("my-app").map(|t| t.version), Some(1));
+
+        // Test Txn Id can be read from existing table
+
+        let mut table2 = DeltaTableBuilder::from_uri(tmp_path.to_str().unwrap())
+            .load()
+            .await
+            .unwrap();
+        let app_txns2 = table2.get_app_transaction_version();
+
+        assert_eq!(app_txns2.len(), 1);
+        assert_eq!(app_txns2.get("my-app").map(|t| t.version), Some(1));
+
+        // Write new data to the table and check that `update` functions work
+
+        let table = DeltaOps::from(table)
+            .write(vec![get_record_batch(None, false)])
+            .with_commit_properties(
+                CommitProperties::default()
+                    .with_application_transaction(Transaction::new("my-app", 3)),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(table.version(), 1);
+        let app_txns = table.get_app_transaction_version();
+        assert_eq!(app_txns.len(), 1);
+        assert_eq!(app_txns.get("my-app").map(|t| t.version), Some(3));
+
+        table2.update_incremental(None).await.unwrap();
+        assert_eq!(table2.version(), 1);
+        let app_txns2 = table2.get_app_transaction_version();
+        assert_eq!(app_txns2.len(), 1);
+        assert_eq!(app_txns2.get("my-app").map(|t| t.version), Some(3));
+
+        // Create a checkpoint and then load
+        checkpoints::create_checkpoint(&table).await.unwrap();
+        let table3 = DeltaTableBuilder::from_uri(tmp_path.to_str().unwrap())
+            .load()
+            .await
+            .unwrap();
+        let app_txns3 = table2.get_app_transaction_version();
+        assert_eq!(app_txns3.len(), 1);
+        assert_eq!(app_txns3.get("my-app").map(|t| t.version), Some(3));
+        assert_eq!(table3.version(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_app_txn_conflict() {
+        // A conflict must be raised whenever the same application id is used for two concurrent transactions
+
+        let tmp_dir = tempfile::tempdir().unwrap();
+        let tmp_path = std::fs::canonicalize(tmp_dir.path()).unwrap();
+
+        let batch = get_record_batch(None, false);
+        let table = DeltaOps::try_from_uri(tmp_path.to_str().unwrap())
+            .await
+            .unwrap()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .with_partition_columns(["modified"])
+            .with_commit_properties(
+                CommitProperties::default()
+                    .with_application_transaction(Transaction::new("my-app", 1)),
+            )
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let table2 = DeltaTableBuilder::from_uri(tmp_path.to_str().unwrap())
+            .load()
+            .await
+            .unwrap();
+        assert_eq!(table2.version(), 0);
+
+        let table = DeltaOps::from(table)
+            .write(vec![get_record_batch(None, false)])
+            .with_commit_properties(
+                CommitProperties::default()
+                    .with_application_transaction(Transaction::new("my-app", 2)),
+            )
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 1);
+
+        let res = DeltaOps::from(table2)
+            .write(vec![get_record_batch(None, false)])
+            .with_commit_properties(
+                CommitProperties::default()
+                    .with_application_transaction(Transaction::new("my-app", 3)),
+            )
+            .await;
+
+        let err = res.err().unwrap();
+        assert_eq!(
+            err.to_string(),
+            "Transaction failed: Failed to commit transaction: Concurrent transaction failed."
+        );
+    }
+}
diff --git a/crates/core/src/operations/transaction/conflict_checker.rs b/crates/core/src/operations/transaction/conflict_checker.rs
index abd5351ef9..d44c704b53 100644
--- a/crates/core/src/operations/transaction/conflict_checker.rs
+++ b/crates/core/src/operations/transaction/conflict_checker.rs
@@ -2,12 +2,15 @@
 use std::collections::HashSet;
 
 use super::CommitInfo;
+#[cfg(feature = "datafusion")]
+use crate::delta_datafusion::DataFusionMixins;
 use crate::errors::DeltaResult;
+use crate::kernel::EagerSnapshot;
+use crate::kernel::Transaction;
 use crate::kernel::{Action, Add, Metadata, Protocol, Remove};
 use crate::logstore::{get_actions, LogStore};
 use crate::protocol::DeltaOperation;
 use crate::table::config::IsolationLevel;
-use crate::table::state::DeltaTableState;
 use crate::DeltaTableError;
 
 #[cfg(feature = "datafusion")]
@@ -98,9 +101,9 @@ pub(crate) struct TransactionInfo<'a> {
     /// appIds that have been seen by the transaction
     pub(crate) read_app_ids: HashSet<String>,
     /// delta log actions that the transaction wants to commit
-    actions: &'a Vec<Action>,
+    actions: &'a [Action],
     /// read [`DeltaTableState`] used for the transaction
-    pub(crate) read_snapshot: &'a DeltaTableState,
+    pub(crate) read_snapshot: &'a EagerSnapshot,
     /// Whether the transaction tainted the whole table
     read_whole_table: bool,
 }
@@ -108,9 +111,9 @@ pub(crate) struct TransactionInfo<'a> {
 impl<'a> TransactionInfo<'a> {
     #[cfg(feature = "datafusion")]
     pub fn try_new(
-        read_snapshot: &'a DeltaTableState,
+        read_snapshot: &'a EagerSnapshot,
         read_predicates: Option<String>,
-        actions: &'a Vec<Action>,
+        actions: &'a [Action],
         read_whole_table: bool,
     ) -> DeltaResult<Self> {
         use datafusion::prelude::SessionContext;
@@ -119,10 +122,18 @@ impl<'a> TransactionInfo<'a> {
         let read_predicates = read_predicates
             .map(|pred| read_snapshot.parse_predicate_expression(pred, &session.state()))
             .transpose()?;
+
+        let mut read_app_ids = HashSet::<String>::new();
+        for action in actions.iter() {
+            if let Action::Txn(Transaction { app_id, .. }) = action {
+                read_app_ids.insert(app_id.clone());
+            }
+        }
+
         Ok(Self {
             txn_id: "".into(),
             read_predicates,
-            read_app_ids: Default::default(),
+            read_app_ids,
             actions,
             read_snapshot,
             read_whole_table,
@@ -132,15 +143,21 @@ impl<'a> TransactionInfo<'a> {
     #[cfg(feature = "datafusion")]
     #[allow(unused)]
     pub fn new(
-        read_snapshot: &'a DeltaTableState,
+        read_snapshot: &'a EagerSnapshot,
         read_predicates: Option<Expr>,
         actions: &'a Vec<Action>,
         read_whole_table: bool,
     ) -> Self {
+        let mut read_app_ids = HashSet::<String>::new();
+        for action in actions.iter() {
+            if let Action::Txn(Transaction { app_id, .. }) = action {
+                read_app_ids.insert(app_id.clone());
+            }
+        }
         Self {
             txn_id: "".into(),
             read_predicates,
-            read_app_ids: Default::default(),
+            read_app_ids,
             actions,
             read_snapshot,
             read_whole_table,
@@ -149,15 +166,21 @@ impl<'a> TransactionInfo<'a> {
 
     #[cfg(not(feature = "datafusion"))]
     pub fn try_new(
-        read_snapshot: &'a DeltaTableState,
+        read_snapshot: &'a EagerSnapshot,
         read_predicates: Option<String>,
         actions: &'a Vec<Action>,
         read_whole_table: bool,
     ) -> DeltaResult<Self> {
+        let mut read_app_ids = HashSet::<String>::new();
+        for action in actions.iter() {
+            if let Action::Txn(Transaction { app_id, .. }) = action {
+                read_app_ids.insert(app_id.clone());
+            }
+        }
         Ok(Self {
             txn_id: "".into(),
             read_predicates,
-            read_app_ids: Default::default(),
+            read_app_ids,
             actions,
             read_snapshot,
             read_whole_table,
@@ -173,14 +196,16 @@ impl<'a> TransactionInfo<'a> {
 
     #[cfg(feature = "datafusion")]
     /// Files read by the transaction
-    pub fn read_files(&self) -> Result<impl Iterator<Item = Add>, CommitConflictError> {
+    pub fn read_files(&self) -> Result<impl Iterator<Item = Add> + '_, CommitConflictError> {
+        use crate::delta_datafusion::files_matching_predicate;
+
         if let Some(predicate) = &self.read_predicates {
             Ok(Either::Left(
-                self.read_snapshot
-                    .files_matching_predicate(&[predicate.clone()])
-                    .map_err(|err| CommitConflictError::Predicate {
+                files_matching_predicate(self.read_snapshot, &[predicate.clone()]).map_err(
+                    |err| CommitConflictError::Predicate {
                         source: Box::new(err),
-                    })?,
+                    },
+                )?,
             ))
         } else {
             Ok(Either::Right(std::iter::empty()))
@@ -189,8 +214,8 @@ impl<'a> TransactionInfo<'a> {
 
     #[cfg(not(feature = "datafusion"))]
     /// Files read by the transaction
-    pub fn read_files(&self) -> Result<impl Iterator<Item = Add>, CommitConflictError> {
-        Ok(self.read_snapshot.file_actions().unwrap().into_iter())
+    pub fn read_files(&self) -> Result<impl Iterator<Item = Add> + '_, CommitConflictError> {
+        Ok(self.read_snapshot.file_actions().unwrap())
     }
 
     /// Whether the whole table was read during the transaction
@@ -307,13 +332,6 @@ impl WinningCommitSummary {
         }
     }
 
-    // pub fn only_add_files(&self) -> bool {
-    //     !self
-    //         .actions
-    //         .iter()
-    //         .any(|action| matches!(action, Action::remove(_)))
-    // }
-
     pub fn is_blind_append(&self) -> Option<bool> {
         self.commit_info
             .as_ref()
@@ -677,9 +695,12 @@ mod tests {
         actions: Vec<Action>,
         read_whole_table: bool,
     ) -> Result<(), CommitConflictError> {
+        use crate::table::state::DeltaTableState;
+
         let setup_actions = setup.unwrap_or_else(|| init_table_actions(None));
         let state = DeltaTableState::from_actions(setup_actions).unwrap();
-        let transaction_info = TransactionInfo::new(&state, reads, &actions, read_whole_table);
+        let snapshot = state.snapshot();
+        let transaction_info = TransactionInfo::new(snapshot, reads, &actions, read_whole_table);
         let summary = WinningCommitSummary {
             actions: concurrent,
             commit_info: None,
diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs
index 63d1789e0a..babff18439 100644
--- a/crates/core/src/operations/transaction/mod.rs
+++ b/crates/core/src/operations/transaction/mod.rs
@@ -1,22 +1,104 @@
-//! Delta transactions
-use std::collections::HashMap;
+//! Add a commit entry to the Delta Table.
+//! This module provides a unified interface for modifying commit behavior and attributes
+//!
+//! [`CommitProperties`] provides an unified client interface for all Delta opeartions.
+//! Internally this is used to initialize a [`CommitBuilder`].
+//!  
+//! For advanced use cases [`CommitBuilder`] can be used which allows
+//! finer control over the commit process. The builder can be converted
+//! into a future the yield either a [`PreparedCommit`] or a [`FinalizedCommit`].
+//!
+//! A [`PreparedCommit`] represents a temporary commit marker written to storage.
+//! To convert to a [`FinalizedCommit`] an atomic rename is attempted. If the rename fails
+//! then conflict resolution is performed and the atomic rename is tried for the latest version.
+//!
+//!<pre>
+//!                                          Client Interface
+//!        ┌─────────────────────────────┐                    
+//!        │      Commit Properties      │                    
+//!        │                             │                    
+//!        │ Public commit interface for │                    
+//!        │     all Delta Operations    │                    
+//!        │                             │                    
+//!        └─────────────┬───────────────┘                    
+//!                      │                                    
+//! ─────────────────────┼────────────────────────────────────
+//!                      │                                    
+//!                      ▼                  Advanced Interface
+//!        ┌─────────────────────────────┐                    
+//!        │       Commit Builder        │                    
+//!        │                             │                    
+//!        │   Advanced entry point for  │                    
+//!        │     creating a commit       │                    
+//!        └─────────────┬───────────────┘                    
+//!                      │                                    
+//!                      ▼                                    
+//!     ┌───────────────────────────────────┐                 
+//!     │                                   │                 
+//!     │ ┌───────────────────────────────┐ │                 
+//!     │ │        Prepared Commit        │ │                 
+//!     │ │                               │ │                 
+//!     │ │     Represents a temporary    │ │                 
+//!     │ │   commit marker written to    │ │                 
+//!     │ │           storage             │ │                 
+//!     │ └──────────────┬────────────────┘ │                 
+//!     │                │                  │                 
+//!     │                ▼                  │                 
+//!     │ ┌───────────────────────────────┐ │                 
+//!     │ │       Finalize Commit         │ │                 
+//!     │ │                               │ │                 
+//!     │ │   Convert the commit marker   │ │                 
+//!     │ │   to a commit using atomic    │ │                 
+//!     │ │         operations            │ │                 
+//!     │ │                               │ │                 
+//!     │ └───────────────────────────────┘ │                 
+//!     │                                   │                 
+//!     └────────────────┬──────────────────┘                 
+//!                      │                                    
+//!                      ▼                                    
+//!       ┌───────────────────────────────┐                   
+//!       │          Post Commit          │                   
+//!       │                               │                   
+//!       │ Commit that was materialized  │                   
+//!       │ to storage with post commit   │                   
+//!       │      hooks to be executed     │                   
+//!       └──────────────┬────────────────┘                 
+//!                      │                                    
+//!                      ▼    
+//!       ┌───────────────────────────────┐                   
+//!       │        Finalized Commit       │                   
+//!       │                               │                   
+//!       │ Commit that was materialized  │                   
+//!       │         to storage            │                   
+//!       │                               │                   
+//!       └───────────────────────────────┘           
+//!</pre>
 
 use chrono::Utc;
 use conflict_checker::ConflictChecker;
+use futures::future::BoxFuture;
 use object_store::path::Path;
 use object_store::{Error as ObjectStoreError, ObjectStore};
 use serde_json::Value;
+use std::collections::HashMap;
 
 use self::conflict_checker::{CommitConflictError, TransactionInfo, WinningCommitSummary};
-use crate::crate_version;
-use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Action, CommitInfo, ReaderFeatures, WriterFeatures};
-use crate::logstore::LogStore;
+use crate::checkpoints::{cleanup_expired_logs_for, create_checkpoint_for};
+use crate::errors::DeltaTableError;
+use crate::kernel::{
+    Action, CommitInfo, EagerSnapshot, Metadata, Protocol, ReaderFeatures, Transaction,
+    WriterFeatures,
+};
+use crate::logstore::LogStoreRef;
 use crate::protocol::DeltaOperation;
+use crate::table::config::TableConfig;
 use crate::table::state::DeltaTableState;
+use crate::{crate_version, DeltaResult};
 
 pub use self::protocol::INSTANCE as PROTOCOL;
 
+#[cfg(test)]
+pub(crate) mod application;
 mod conflict_checker;
 mod protocol;
 #[cfg(feature = "datafusion")]
@@ -25,6 +107,7 @@ mod state;
 pub(crate) mod test_utils;
 
 const DELTA_LOG_FOLDER: &str = "_delta_log";
+pub(crate) const DEFAULT_RETRIES: usize = 15;
 
 /// Error raised while commititng transaction
 #[derive(thiserror::Error, Debug)]
@@ -71,12 +154,12 @@ pub enum TransactionError {
     UnsupportedWriterFeatures(Vec<WriterFeatures>),
 
     /// Error returned when writer features are required but not specified
-    #[error("Writer features must be specified for writerversion >= 7")]
-    WriterFeaturesRequired,
+    #[error("Writer features must be specified for writerversion >= 7, please specify: {0:?}")]
+    WriterFeaturesRequired(WriterFeatures),
 
     /// Error returned when reader features are required but not specified
-    #[error("Reader features must be specified for reader version >= 3")]
-    ReaderFeaturesRequired,
+    #[error("Reader features must be specified for reader version >= 3, please specify: {0:?}")]
+    ReaderFeaturesRequired(ReaderFeatures),
 
     /// The transaction failed to commit due to an error in an implementation-specific layer.
     /// Currently used by DynamoDb-backed S3 log store when database operations fail.
@@ -104,167 +187,554 @@ impl From<TransactionError> for DeltaTableError {
     }
 }
 
-// Convert actions to their json representation
-fn log_entry_from_actions<'a>(
-    actions: impl IntoIterator<Item = &'a Action>,
-) -> Result<String, TransactionError> {
-    let mut jsons = Vec::<String>::new();
-    for action in actions {
-        let json = serde_json::to_string(action)
-            .map_err(|e| TransactionError::SerializeLogJson { json_err: e })?;
-        jsons.push(json);
-    }
-    Ok(jsons.join("\n"))
+/// Error raised while commititng transaction
+#[derive(thiserror::Error, Debug)]
+pub enum CommitBuilderError {}
+
+impl From<CommitBuilderError> for DeltaTableError {
+    fn from(err: CommitBuilderError) -> Self {
+        DeltaTableError::CommitValidation { source: err }
+    }
+}
+
+/// Reference to some structure that contains mandatory attributes for performing a commit.
+pub trait TableReference: Send + Sync {
+    /// Well known table configuration
+    fn config(&self) -> TableConfig;
+
+    /// Get the table protocol of the snapshot
+    fn protocol(&self) -> &Protocol;
+
+    /// Get the table metadata of the snapshot
+    fn metadata(&self) -> &Metadata;
+
+    /// Try to cast this table reference to a `EagerSnapshot`
+    fn eager_snapshot(&self) -> &EagerSnapshot;
+}
+
+impl TableReference for EagerSnapshot {
+    fn protocol(&self) -> &Protocol {
+        EagerSnapshot::protocol(self)
+    }
+
+    fn metadata(&self) -> &Metadata {
+        EagerSnapshot::metadata(self)
+    }
+
+    fn config(&self) -> TableConfig {
+        self.table_config()
+    }
+
+    fn eager_snapshot(&self) -> &EagerSnapshot {
+        self
+    }
+}
+
+impl TableReference for DeltaTableState {
+    fn config(&self) -> TableConfig {
+        self.snapshot.config()
+    }
+
+    fn protocol(&self) -> &Protocol {
+        self.snapshot.protocol()
+    }
+
+    fn metadata(&self) -> &Metadata {
+        self.snapshot.metadata()
+    }
+
+    fn eager_snapshot(&self) -> &EagerSnapshot {
+        &self.snapshot
+    }
+}
+
+/// Data that was actually written to the log store.
+#[derive(Debug)]
+pub struct CommitData {
+    /// The actions
+    pub actions: Vec<Action>,
+    /// The Operation
+    pub operation: DeltaOperation,
+    /// The Metadata
+    pub app_metadata: HashMap<String, Value>,
+    /// Application specific transaction
+    pub app_transactions: Vec<Transaction>,
+}
+
+impl CommitData {
+    /// Create new data to be comitted
+    pub fn new(
+        mut actions: Vec<Action>,
+        operation: DeltaOperation,
+        mut app_metadata: HashMap<String, Value>,
+        app_transactions: Vec<Transaction>,
+    ) -> Self {
+        if !actions.iter().any(|a| matches!(a, Action::CommitInfo(..))) {
+            let mut commit_info = operation.get_commit_info();
+            commit_info.timestamp = Some(Utc::now().timestamp_millis());
+            app_metadata.insert(
+                "clientVersion".to_string(),
+                Value::String(format!("delta-rs.{}", crate_version())),
+            );
+            app_metadata.extend(commit_info.info);
+            commit_info.info = app_metadata.clone();
+            actions.push(Action::CommitInfo(commit_info))
+        }
+
+        for txn in &app_transactions {
+            actions.push(Action::Txn(txn.clone()))
+        }
+
+        CommitData {
+            actions,
+            operation,
+            app_metadata,
+            app_transactions,
+        }
+    }
+
+    /// Obtain the byte representation of the commit.
+    pub fn get_bytes(&self) -> Result<bytes::Bytes, TransactionError> {
+        let mut jsons = Vec::<String>::new();
+        for action in &self.actions {
+            let json = serde_json::to_string(action)
+                .map_err(|e| TransactionError::SerializeLogJson { json_err: e })?;
+            jsons.push(json);
+        }
+        Ok(bytes::Bytes::from(jsons.join("\n")))
+    }
+}
+
+#[derive(Clone, Debug, Copy)]
+/// Properties for post commit hook.
+pub struct PostCommitHookProperties {
+    create_checkpoint: bool,
+    /// Override the EnableExpiredLogCleanUp setting, if None config setting is used
+    cleanup_expired_logs: Option<bool>,
+}
+
+#[derive(Clone, Debug)]
+/// End user facing interface to be used by operations on the table.
+/// Enable controling commit behaviour and modifying metadata that is written during a commit.
+pub struct CommitProperties {
+    pub(crate) app_metadata: HashMap<String, Value>,
+    pub(crate) app_transaction: Vec<Transaction>,
+    max_retries: usize,
+    create_checkpoint: bool,
+    cleanup_expired_logs: Option<bool>,
+}
+
+impl Default for CommitProperties {
+    fn default() -> Self {
+        Self {
+            app_metadata: Default::default(),
+            app_transaction: Vec::new(),
+            max_retries: DEFAULT_RETRIES,
+            create_checkpoint: true,
+            cleanup_expired_logs: None,
+        }
+    }
+}
+
+impl CommitProperties {
+    /// Specify metadata the be comitted
+    pub fn with_metadata(
+        mut self,
+        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
+    ) -> Self {
+        self.app_metadata = HashMap::from_iter(metadata);
+        self
+    }
+
+    /// Specify if it should create a checkpoint when the commit interval condition is met
+    pub fn with_create_checkpoint(mut self, create_checkpoint: bool) -> Self {
+        self.create_checkpoint = create_checkpoint;
+        self
+    }
+
+    /// Add an additonal application transaction to the commit
+    pub fn with_application_transaction(mut self, txn: Transaction) -> Self {
+        self.app_transaction.push(txn);
+        self
+    }
+
+    /// Override application transactions for the commit
+    pub fn with_application_transactions(mut self, txn: Vec<Transaction>) -> Self {
+        self.app_transaction = txn;
+        self
+    }
+
+    /// Specify if it should clean up the logs when the logRetentionDuration interval is met
+    pub fn with_cleanup_expired_logs(mut self, cleanup_expired_logs: Option<bool>) -> Self {
+        self.cleanup_expired_logs = cleanup_expired_logs;
+        self
+    }
+}
+
+impl From<CommitProperties> for CommitBuilder {
+    fn from(value: CommitProperties) -> Self {
+        CommitBuilder {
+            max_retries: value.max_retries,
+            app_metadata: value.app_metadata,
+            post_commit_hook: Some(PostCommitHookProperties {
+                create_checkpoint: value.create_checkpoint,
+                cleanup_expired_logs: value.cleanup_expired_logs,
+            }),
+            app_transaction: value.app_transaction,
+            ..Default::default()
+        }
+    }
+}
+
+/// Prepare data to be committed to the Delta log and control how the commit is performed
+pub struct CommitBuilder {
+    actions: Vec<Action>,
+    app_metadata: HashMap<String, Value>,
+    app_transaction: Vec<Transaction>,
+    max_retries: usize,
+    post_commit_hook: Option<PostCommitHookProperties>,
+}
+
+impl Default for CommitBuilder {
+    fn default() -> Self {
+        CommitBuilder {
+            actions: Vec::new(),
+            app_metadata: HashMap::new(),
+            app_transaction: Vec::new(),
+            max_retries: DEFAULT_RETRIES,
+            post_commit_hook: None,
+        }
+    }
 }
 
-pub(crate) fn get_commit_bytes(
-    operation: &DeltaOperation,
-    actions: &Vec<Action>,
-    app_metadata: Option<HashMap<String, Value>>,
-) -> Result<bytes::Bytes, TransactionError> {
-    if !actions.iter().any(|a| matches!(a, Action::CommitInfo(..))) {
-        let mut extra_info = HashMap::<String, Value>::new();
-        let mut commit_info = operation.get_commit_info();
-        commit_info.timestamp = Some(Utc::now().timestamp_millis());
-        extra_info.insert(
-            "clientVersion".to_string(),
-            Value::String(format!("delta-rs.{}", crate_version())),
+impl<'a> CommitBuilder {
+    /// Actions to be included in the commit
+    pub fn with_actions(mut self, actions: Vec<Action>) -> Self {
+        self.actions = actions;
+        self
+    }
+
+    /// Metadata for the operation performed like metrics, user, and notebook
+    pub fn with_app_metadata(mut self, app_metadata: HashMap<String, Value>) -> Self {
+        self.app_metadata = app_metadata;
+        self
+    }
+
+    /// Maximum number of times to retry the transaction before failing to commit
+    pub fn with_max_retries(mut self, max_retries: usize) -> Self {
+        self.max_retries = max_retries;
+        self
+    }
+
+    /// Specify all the post commit hook properties
+    pub fn with_post_commit_hook(mut self, post_commit_hook: PostCommitHookProperties) -> Self {
+        self.post_commit_hook = Some(post_commit_hook);
+        self
+    }
+
+    /// Prepare a Commit operation using the configured builder
+    pub fn build(
+        self,
+        table_data: Option<&'a dyn TableReference>,
+        log_store: LogStoreRef,
+        operation: DeltaOperation,
+    ) -> PreCommit<'a> {
+        let data = CommitData::new(
+            self.actions,
+            operation,
+            self.app_metadata,
+            self.app_transaction,
         );
-        if let Some(meta) = app_metadata {
-            extra_info.extend(meta)
+        PreCommit {
+            log_store,
+            table_data,
+            max_retries: self.max_retries,
+            data,
+            post_commit_hook: self.post_commit_hook,
         }
-        commit_info.info = extra_info;
-        Ok(bytes::Bytes::from(log_entry_from_actions(
-            actions
-                .iter()
-                .chain(std::iter::once(&Action::CommitInfo(commit_info))),
-        )?))
-    } else {
-        Ok(bytes::Bytes::from(log_entry_from_actions(actions)?))
     }
 }
 
-/// Low-level transaction API. Creates a temporary commit file. Once created,
-/// the transaction object could be dropped and the actual commit could be executed
-/// with `DeltaTable.try_commit_transaction`.
-/// TODO: comment is outdated now
-pub async fn prepare_commit<'a>(
-    storage: &dyn ObjectStore,
-    operation: &DeltaOperation,
-    actions: &Vec<Action>,
-    app_metadata: Option<HashMap<String, Value>>,
-) -> Result<Path, TransactionError> {
-    // Serialize all actions that are part of this log entry.
-    let log_entry = get_commit_bytes(operation, actions, app_metadata)?;
-
-    // Write delta log entry as temporary file to storage. For the actual commit,
-    // the temporary file is moved (atomic rename) to the delta log folder within `commit` function.
-    let token = uuid::Uuid::new_v4().to_string();
-    let file_name = format!("_commit_{token}.json.tmp");
-    let path = Path::from_iter([DELTA_LOG_FOLDER, &file_name]);
-    storage.put(&path, log_entry).await?;
-
-    Ok(path)
+/// Represents a commit that has not yet started but all details are finalized
+pub struct PreCommit<'a> {
+    log_store: LogStoreRef,
+    table_data: Option<&'a dyn TableReference>,
+    data: CommitData,
+    max_retries: usize,
+    post_commit_hook: Option<PostCommitHookProperties>,
 }
 
-/// Commit a transaction, with up to 15 retries. This is higher-level transaction API.
-///
-/// Will error early if the a concurrent transaction has already been committed
-/// and conflicts with this transaction.
-pub async fn commit(
-    log_store: &dyn LogStore,
-    actions: &Vec<Action>,
-    operation: DeltaOperation,
-    read_snapshot: Option<&DeltaTableState>,
-    app_metadata: Option<HashMap<String, Value>>,
-) -> DeltaResult<i64> {
-    commit_with_retries(
-        log_store,
-        actions,
-        operation,
-        read_snapshot,
-        app_metadata,
-        15,
-    )
-    .await
+impl<'a> std::future::IntoFuture for PreCommit<'a> {
+    type Output = DeltaResult<FinalizedCommit>;
+    type IntoFuture = BoxFuture<'a, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        Box::pin(async move { self.into_prepared_commit_future().await?.await?.await })
+    }
 }
 
-/// Commit a transaction, with up configurable number of retries. This is higher-level transaction API.
-///
-/// The function will error early if the a concurrent transaction has already been committed
-/// and conflicts with this transaction.
-pub async fn commit_with_retries(
-    log_store: &dyn LogStore,
-    actions: &Vec<Action>,
-    operation: DeltaOperation,
-    read_snapshot: Option<&DeltaTableState>,
-    app_metadata: Option<HashMap<String, Value>>,
+impl<'a> PreCommit<'a> {
+    /// Prepare the commit but do not finalize it
+    pub fn into_prepared_commit_future(self) -> BoxFuture<'a, DeltaResult<PreparedCommit<'a>>> {
+        let this = self;
+
+        Box::pin(async move {
+            if let Some(table_reference) = this.table_data {
+                PROTOCOL.can_commit(table_reference, &this.data.actions, &this.data.operation)?;
+            }
+
+            // Write delta log entry as temporary file to storage. For the actual commit,
+            // the temporary file is moved (atomic rename) to the delta log folder within `commit` function.
+            let log_entry = this.data.get_bytes()?;
+            let token = uuid::Uuid::new_v4().to_string();
+            let path = Path::from_iter([DELTA_LOG_FOLDER, &format!("_commit_{token}.json.tmp")]);
+            this.log_store
+                .object_store()
+                .put(&path, log_entry.into())
+                .await?;
+
+            Ok(PreparedCommit {
+                path,
+                log_store: this.log_store,
+                table_data: this.table_data,
+                max_retries: this.max_retries,
+                data: this.data,
+                post_commit: this.post_commit_hook,
+            })
+        })
+    }
+}
+
+/// Represents a inflight commit with a temporary commit marker on the log store
+pub struct PreparedCommit<'a> {
+    path: Path,
+    log_store: LogStoreRef,
+    data: CommitData,
+    table_data: Option<&'a dyn TableReference>,
     max_retries: usize,
-) -> DeltaResult<i64> {
-    if let Some(read_snapshot) = read_snapshot {
-        PROTOCOL.can_commit(read_snapshot, actions)?;
-    }
-
-    let tmp_commit = prepare_commit(
-        log_store.object_store().as_ref(),
-        &operation,
-        actions,
-        app_metadata,
-    )
-    .await?;
-
-    if read_snapshot.is_none() {
-        log_store.write_commit_entry(0, &tmp_commit).await?;
-        return Ok(0);
-    }
-
-    let read_snapshot = read_snapshot.unwrap();
-
-    let mut attempt_number = 1;
-    while attempt_number <= max_retries {
-        let version = read_snapshot.version() + attempt_number as i64;
-        match log_store.write_commit_entry(version, &tmp_commit).await {
-            Ok(()) => return Ok(version),
-            Err(TransactionError::VersionAlreadyExists(version)) => {
-                let summary =
-                    WinningCommitSummary::try_new(log_store, version - 1, version).await?;
-                let transaction_info = TransactionInfo::try_new(
-                    read_snapshot,
-                    operation.read_predicate(),
-                    actions,
-                    // TODO allow tainting whole table
-                    false,
-                )?;
-                let conflict_checker =
-                    ConflictChecker::new(transaction_info, summary, Some(&operation));
-                match conflict_checker.check_conflicts() {
-                    Ok(_) => {
-                        attempt_number += 1;
+    post_commit: Option<PostCommitHookProperties>,
+}
+
+impl<'a> PreparedCommit<'a> {
+    /// The temporary commit file created
+    pub fn path(&self) -> &Path {
+        &self.path
+    }
+}
+
+impl<'a> std::future::IntoFuture for PreparedCommit<'a> {
+    type Output = DeltaResult<PostCommit<'a>>;
+    type IntoFuture = BoxFuture<'a, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            let tmp_commit = &this.path;
+
+            if this.table_data.is_none() {
+                this.log_store.write_commit_entry(0, tmp_commit).await?;
+                return Ok(PostCommit {
+                    version: 0,
+                    data: this.data,
+                    create_checkpoint: false,
+                    cleanup_expired_logs: None,
+                    log_store: this.log_store,
+                    table_data: this.table_data,
+                });
+            }
+
+            // unwrap() is safe here due to the above check
+            // TODO: refactor to only depend on TableReference Trait
+            let read_snapshot = this.table_data.unwrap().eager_snapshot();
+
+            let mut attempt_number = 1;
+            while attempt_number <= this.max_retries {
+                let version = read_snapshot.version() + attempt_number as i64;
+                match this.log_store.write_commit_entry(version, tmp_commit).await {
+                    Ok(()) => {
+                        return Ok(PostCommit {
+                            version,
+                            data: this.data,
+                            create_checkpoint: this
+                                .post_commit
+                                .map(|v| v.create_checkpoint)
+                                .unwrap_or_default(),
+                            cleanup_expired_logs: this
+                                .post_commit
+                                .map(|v| v.cleanup_expired_logs)
+                                .unwrap_or_default(),
+                            log_store: this.log_store,
+                            table_data: this.table_data,
+                        });
+                    }
+                    Err(TransactionError::VersionAlreadyExists(version)) => {
+                        let summary = WinningCommitSummary::try_new(
+                            this.log_store.as_ref(),
+                            version - 1,
+                            version,
+                        )
+                        .await?;
+                        let transaction_info = TransactionInfo::try_new(
+                            read_snapshot,
+                            this.data.operation.read_predicate(),
+                            &this.data.actions,
+                            this.data.operation.read_whole_table(),
+                        )?;
+                        let conflict_checker = ConflictChecker::new(
+                            transaction_info,
+                            summary,
+                            Some(&this.data.operation),
+                        );
+                        match conflict_checker.check_conflicts() {
+                            Ok(_) => {
+                                attempt_number += 1;
+                            }
+                            Err(err) => {
+                                this.log_store
+                                    .abort_commit_entry(version, tmp_commit)
+                                    .await?;
+                                return Err(TransactionError::CommitConflict(err).into());
+                            }
+                        };
                     }
                     Err(err) => {
-                        log_store.object_store().delete(&tmp_commit).await?;
-                        return Err(TransactionError::CommitConflict(err).into());
+                        this.log_store
+                            .abort_commit_entry(version, tmp_commit)
+                            .await?;
+                        return Err(err.into());
                     }
-                };
+                }
+            }
+
+            Err(TransactionError::MaxCommitAttempts(this.max_retries as i32).into())
+        })
+    }
+}
+
+/// Represents items for the post commit hook
+pub struct PostCommit<'a> {
+    /// The winning version number of the commit
+    pub version: i64,
+    /// The data that was comitted to the log store
+    pub data: CommitData,
+    create_checkpoint: bool,
+    cleanup_expired_logs: Option<bool>,
+    log_store: LogStoreRef,
+    table_data: Option<&'a dyn TableReference>,
+}
+
+impl<'a> PostCommit<'a> {
+    /// Runs the post commit activities
+    async fn run_post_commit_hook(&self) -> DeltaResult<DeltaTableState> {
+        if let Some(table) = self.table_data {
+            let mut snapshot = table.eager_snapshot().clone();
+            if self.version - snapshot.version() > 1 {
+                // This may only occur during concurrent write actions. We need to update the state first to - 1
+                // then we can advance.
+                snapshot
+                    .update(self.log_store.clone(), Some(self.version - 1))
+                    .await?;
+                snapshot.advance(vec![&self.data])?;
+            } else {
+                snapshot.advance(vec![&self.data])?;
+            }
+            let state = DeltaTableState { snapshot };
+            // Execute each hook
+            if self.create_checkpoint {
+                self.create_checkpoint(&state, &self.log_store, self.version)
+                    .await?;
             }
-            Err(err) => {
-                log_store.object_store().delete(&tmp_commit).await?;
-                return Err(err.into());
+            let cleanup_logs = if let Some(cleanup_logs) = self.cleanup_expired_logs {
+                cleanup_logs
+            } else {
+                state.table_config().enable_expired_log_cleanup()
+            };
+
+            if cleanup_logs {
+                cleanup_expired_logs_for(
+                    self.version,
+                    self.log_store.as_ref(),
+                    Utc::now().timestamp_millis()
+                        - state.table_config().log_retention_duration().as_millis() as i64,
+                )
+                .await?;
             }
+            Ok(state)
+        } else {
+            let state = DeltaTableState::try_new(
+                &Path::default(),
+                self.log_store.object_store(),
+                Default::default(),
+                Some(self.version),
+            )
+            .await?;
+            Ok(state)
+        }
+    }
+    async fn create_checkpoint(
+        &self,
+        table_state: &DeltaTableState,
+        log_store: &LogStoreRef,
+        version: i64,
+    ) -> DeltaResult<()> {
+        let checkpoint_interval = table_state.config().checkpoint_interval() as i64;
+        if ((version + 1) % checkpoint_interval) == 0 {
+            create_checkpoint_for(version, table_state, log_store.as_ref()).await?
         }
+        Ok(())
     }
+}
+
+/// A commit that successfully completed
+pub struct FinalizedCommit {
+    /// The new table state after a commmit
+    pub snapshot: DeltaTableState,
+
+    /// Version of the finalized commit
+    pub version: i64,
+}
 
-    Err(TransactionError::MaxCommitAttempts(max_retries as i32).into())
+impl FinalizedCommit {
+    /// The new table state after a commmit
+    pub fn snapshot(&self) -> DeltaTableState {
+        self.snapshot.clone()
+    }
+    /// Version of the finalized commit
+    pub fn version(&self) -> i64 {
+        self.version
+    }
+}
+
+impl<'a> std::future::IntoFuture for PostCommit<'a> {
+    type Output = DeltaResult<FinalizedCommit>;
+    type IntoFuture = BoxFuture<'a, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            match this.run_post_commit_hook().await {
+                Ok(snapshot) => Ok(FinalizedCommit {
+                    snapshot,
+                    version: this.version,
+                }),
+                Err(err) => Err(err),
+            }
+        })
+    }
 }
 
 #[cfg(test)]
 mod tests {
     use std::{collections::HashMap, sync::Arc};
 
-    use self::test_utils::init_table_actions;
     use super::*;
-    use crate::{logstore::default_logstore::DefaultLogStore, storage::commit_uri_from_version};
-    use object_store::memory::InMemory;
+    use crate::{
+        logstore::{default_logstore::DefaultLogStore, LogStore},
+        storage::commit_uri_from_version,
+    };
+    use object_store::{memory::InMemory, PutPayload};
     use url::Url;
 
     #[test]
@@ -275,15 +745,6 @@ mod tests {
         assert_eq!(version, Path::from("_delta_log/00000000000000000123.json"))
     }
 
-    #[test]
-    fn test_log_entry_from_actions() {
-        let actions = init_table_actions(None);
-        let entry = log_entry_from_actions(&actions).unwrap();
-        let lines: Vec<_> = entry.lines().collect();
-        // writes every action to a line
-        assert_eq!(actions.len(), lines.len())
-    }
-
     #[tokio::test]
     async fn test_try_commit_transaction() {
         let store = Arc::new(InMemory::new());
@@ -297,8 +758,8 @@ mod tests {
         );
         let tmp_path = Path::from("_delta_log/tmp");
         let version_path = Path::from("_delta_log/00000000000000000000.json");
-        store.put(&tmp_path, bytes::Bytes::new()).await.unwrap();
-        store.put(&version_path, bytes::Bytes::new()).await.unwrap();
+        store.put(&tmp_path, PutPayload::new()).await.unwrap();
+        store.put(&version_path, PutPayload::new()).await.unwrap();
 
         let res = log_store.write_commit_entry(0, &tmp_path).await;
         // fails if file version already exists
diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/operations/transaction/protocol.rs
index 07a7b75405..f3bb87098a 100644
--- a/crates/core/src/operations/transaction/protocol.rs
+++ b/crates/core/src/operations/transaction/protocol.rs
@@ -2,9 +2,13 @@ use std::collections::HashSet;
 
 use lazy_static::lazy_static;
 use once_cell::sync::Lazy;
+use tracing::log::*;
 
-use super::TransactionError;
-use crate::kernel::{Action, ReaderFeatures, WriterFeatures};
+use super::{TableReference, TransactionError};
+use crate::kernel::{
+    Action, DataType, EagerSnapshot, ReaderFeatures, Schema, StructField, WriterFeatures,
+};
+use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
 
 lazy_static! {
@@ -69,15 +73,60 @@ impl ProtocolChecker {
     }
 
     /// Check append-only at the high level (operation level)
-    pub fn check_append_only(&self, snapshot: &DeltaTableState) -> Result<(), TransactionError> {
+    pub fn check_append_only(&self, snapshot: &EagerSnapshot) -> Result<(), TransactionError> {
         if snapshot.table_config().append_only() {
             return Err(TransactionError::DeltaTableAppendOnly);
         }
         Ok(())
     }
 
+    /// checks if table contains timestamp_ntz in any field including nested fields.
+    pub fn contains_timestampntz<'a>(
+        &self,
+        mut fields: impl Iterator<Item = &'a StructField>,
+    ) -> bool {
+        fn _check_type(dtype: &DataType) -> bool {
+            match dtype {
+                &DataType::TIMESTAMP_NTZ => true,
+                DataType::Array(inner) => _check_type(inner.element_type()),
+                DataType::Struct(inner) => inner.fields().any(|f| _check_type(f.data_type())),
+                _ => false,
+            }
+        }
+        fields.any(|f| _check_type(f.data_type()))
+    }
+
+    /// Check can write_timestamp_ntz
+    pub fn check_can_write_timestamp_ntz(
+        &self,
+        snapshot: &DeltaTableState,
+        schema: &Schema,
+    ) -> Result<(), TransactionError> {
+        let contains_timestampntz = self.contains_timestampntz(schema.fields());
+        let required_features: Option<&HashSet<WriterFeatures>> =
+            match snapshot.protocol().min_writer_version {
+                0..=6 => None,
+                _ => snapshot.protocol().writer_features.as_ref(),
+            };
+
+        if let Some(table_features) = required_features {
+            if !table_features.contains(&WriterFeatures::TimestampWithoutTimezone)
+                && contains_timestampntz
+            {
+                return Err(TransactionError::WriterFeaturesRequired(
+                    WriterFeatures::TimestampWithoutTimezone,
+                ));
+            }
+        } else if contains_timestampntz {
+            return Err(TransactionError::WriterFeaturesRequired(
+                WriterFeatures::TimestampWithoutTimezone,
+            ));
+        }
+        Ok(())
+    }
+
     /// Check if delta-rs can read form the given delta table.
-    pub fn can_read_from(&self, snapshot: &DeltaTableState) -> Result<(), TransactionError> {
+    pub fn can_read_from(&self, snapshot: &dyn TableReference) -> Result<(), TransactionError> {
         let required_features: Option<&HashSet<ReaderFeatures>> =
             match snapshot.protocol().min_reader_version {
                 0 | 1 => None,
@@ -96,20 +145,36 @@ impl ProtocolChecker {
     }
 
     /// Check if delta-rs can write to the given delta table.
-    pub fn can_write_to(&self, snapshot: &DeltaTableState) -> Result<(), TransactionError> {
+    pub fn can_write_to(&self, snapshot: &dyn TableReference) -> Result<(), TransactionError> {
         // NOTE: writers must always support all required reader features
         self.can_read_from(snapshot)?;
+        let min_writer_version = snapshot.protocol().min_writer_version;
+
+        let required_features: Option<&HashSet<WriterFeatures>> = match min_writer_version {
+            0 | 1 => None,
+            2 => Some(&WRITER_V2),
+            3 => Some(&WRITER_V3),
+            4 => Some(&WRITER_V4),
+            5 => Some(&WRITER_V5),
+            6 => Some(&WRITER_V6),
+            _ => snapshot.protocol().writer_features.as_ref(),
+        };
 
-        let required_features: Option<&HashSet<WriterFeatures>> =
-            match snapshot.protocol().min_writer_version {
-                0 | 1 => None,
-                2 => Some(&WRITER_V2),
-                3 => Some(&WRITER_V3),
-                4 => Some(&WRITER_V4),
-                5 => Some(&WRITER_V5),
-                6 => Some(&WRITER_V6),
-                _ => snapshot.protocol().writer_features.as_ref(),
-            };
+        if (4..7).contains(&min_writer_version) {
+            debug!("min_writer_version is less 4-6, checking for unsupported table features");
+            if let Ok(schema) = snapshot.metadata().schema() {
+                for field in schema.fields() {
+                    if field.metadata.contains_key(
+                        crate::kernel::ColumnMetadataKey::GenerationExpression.as_ref(),
+                    ) {
+                        error!("The table contains `delta.generationExpression` settings on columns which mean this table cannot be currently written to by delta-rs");
+                        return Err(TransactionError::UnsupportedWriterFeatures(vec![
+                            WriterFeatures::GeneratedColumns,
+                        ]));
+                    }
+                }
+            }
+        }
 
         if let Some(features) = required_features {
             let mut diff = features.difference(&self.writer_features).peekable();
@@ -124,8 +189,9 @@ impl ProtocolChecker {
 
     pub fn can_commit(
         &self,
-        snapshot: &DeltaTableState,
+        snapshot: &dyn TableReference,
         actions: &[Action],
+        operation: &DeltaOperation,
     ) -> Result<(), TransactionError> {
         self.can_write_to(snapshot)?;
 
@@ -133,23 +199,30 @@ impl ProtocolChecker {
         let append_only_enabled = if snapshot.protocol().min_writer_version < 2 {
             false
         } else if snapshot.protocol().min_writer_version < 7 {
-            snapshot.table_config().append_only()
+            snapshot.config().append_only()
         } else {
             snapshot
                 .protocol()
                 .writer_features
                 .as_ref()
-                .ok_or(TransactionError::WriterFeaturesRequired)?
+                .ok_or(TransactionError::WriterFeaturesRequired(
+                    WriterFeatures::AppendOnly,
+                ))?
                 .contains(&WriterFeatures::AppendOnly)
-                && snapshot.table_config().append_only()
+                && snapshot.config().append_only()
         };
         if append_only_enabled {
-            actions.iter().try_for_each(|action| match action {
-                Action::Remove(remove) if remove.data_change => {
-                    Err(TransactionError::DeltaTableAppendOnly)
+            match operation {
+                DeltaOperation::Restore { .. } | DeltaOperation::FileSystemCheck { .. } => {}
+                _ => {
+                    actions.iter().try_for_each(|action| match action {
+                        Action::Remove(remove) if remove.data_change => {
+                            Err(TransactionError::DeltaTableAppendOnly)
+                        }
+                        _ => Ok(()),
+                    })?;
                 }
-                _ => Ok(()),
-            })?;
+            }
         }
 
         Ok(())
@@ -164,11 +237,18 @@ impl ProtocolChecker {
 /// As we implement new features, we need to update this instance accordingly.
 /// resulting version support is determined by the supported table feature set.
 pub static INSTANCE: Lazy<ProtocolChecker> = Lazy::new(|| {
-    let reader_features = HashSet::new();
+    let mut reader_features = HashSet::new();
+    reader_features.insert(ReaderFeatures::TimestampWithoutTimezone);
     // reader_features.insert(ReaderFeatures::ColumnMapping);
 
     let mut writer_features = HashSet::new();
     writer_features.insert(WriterFeatures::AppendOnly);
+    writer_features.insert(WriterFeatures::TimestampWithoutTimezone);
+    #[cfg(feature = "cdf")]
+    {
+        writer_features.insert(WriterFeatures::ChangeDataFeed);
+        writer_features.insert(WriterFeatures::GeneratedColumns);
+    }
     #[cfg(feature = "datafusion")]
     {
         writer_features.insert(WriterFeatures::Invariants);
@@ -186,7 +266,10 @@ pub static INSTANCE: Lazy<ProtocolChecker> = Lazy::new(|| {
 mod tests {
     use super::super::test_utils::create_metadata_action;
     use super::*;
-    use crate::kernel::{Action, Add, Protocol, Remove};
+    use crate::kernel::DataType as DeltaDataType;
+    use crate::kernel::{Action, Add, PrimitiveType, Protocol, Remove};
+    use crate::protocol::SaveMode;
+    use crate::table::state::DeltaTableState;
     use crate::DeltaConfigKey;
     use std::collections::HashMap;
 
@@ -197,6 +280,12 @@ mod tests {
             data_change: true,
             ..Default::default()
         })];
+        let append_op = DeltaOperation::Write {
+            mode: SaveMode::Append,
+            partition_by: None,
+            predicate: None,
+        };
+
         let change_actions = vec![
             Action::Add(Add {
                 path: "test".to_string(),
@@ -209,6 +298,8 @@ mod tests {
                 ..Default::default()
             }),
         ];
+        let change_op = DeltaOperation::Update { predicate: None };
+
         let neutral_actions = vec![
             Action::Add(Add {
                 path: "test".to_string(),
@@ -221,6 +312,7 @@ mod tests {
                 ..Default::default()
             }),
         ];
+        let neutral_op = DeltaOperation::Update { predicate: None };
 
         let create_actions = |writer: i32, append: &str, feat: Vec<WriterFeatures>| {
             vec![
@@ -244,39 +336,81 @@ mod tests {
 
         let actions = create_actions(1, "true", vec![]);
         let snapshot = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker.can_commit(&snapshot, &append_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &change_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &neutral_actions).is_ok());
+        let eager = snapshot.snapshot();
+        assert!(checker
+            .can_commit(eager, &append_actions, &append_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &change_actions, &change_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &neutral_actions, &neutral_op)
+            .is_ok());
 
         let actions = create_actions(2, "true", vec![]);
         let snapshot = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker.can_commit(&snapshot, &append_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &change_actions).is_err());
-        assert!(checker.can_commit(&snapshot, &neutral_actions).is_ok());
+        let eager = snapshot.snapshot();
+        assert!(checker
+            .can_commit(eager, &append_actions, &append_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &change_actions, &change_op)
+            .is_err());
+        assert!(checker
+            .can_commit(eager, &neutral_actions, &neutral_op)
+            .is_ok());
 
         let actions = create_actions(2, "false", vec![]);
         let snapshot = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker.can_commit(&snapshot, &append_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &change_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &neutral_actions).is_ok());
+        let eager = snapshot.snapshot();
+        assert!(checker
+            .can_commit(eager, &append_actions, &append_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &change_actions, &change_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &neutral_actions, &neutral_op)
+            .is_ok());
 
         let actions = create_actions(7, "true", vec![WriterFeatures::AppendOnly]);
         let snapshot = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker.can_commit(&snapshot, &append_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &change_actions).is_err());
-        assert!(checker.can_commit(&snapshot, &neutral_actions).is_ok());
+        let eager = snapshot.snapshot();
+        assert!(checker
+            .can_commit(eager, &append_actions, &append_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &change_actions, &change_op)
+            .is_err());
+        assert!(checker
+            .can_commit(eager, &neutral_actions, &neutral_op)
+            .is_ok());
 
         let actions = create_actions(7, "false", vec![WriterFeatures::AppendOnly]);
         let snapshot = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker.can_commit(&snapshot, &append_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &change_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &neutral_actions).is_ok());
+        let eager = snapshot.snapshot();
+        assert!(checker
+            .can_commit(eager, &append_actions, &append_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &change_actions, &change_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &neutral_actions, &neutral_op)
+            .is_ok());
 
         let actions = create_actions(7, "true", vec![]);
         let snapshot = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker.can_commit(&snapshot, &append_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &change_actions).is_ok());
-        assert!(checker.can_commit(&snapshot, &neutral_actions).is_ok());
+        let eager = snapshot.snapshot();
+        assert!(checker
+            .can_commit(eager, &append_actions, &append_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &change_actions, &change_op)
+            .is_ok());
+        assert!(checker
+            .can_commit(eager, &neutral_actions, &neutral_op)
+            .is_ok());
     }
 
     #[test]
@@ -291,8 +425,9 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_1 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_1.can_write_to(&snapshot_1).is_ok());
+        let eager_1 = snapshot_1.snapshot();
+        assert!(checker_1.can_read_from(eager_1).is_ok());
+        assert!(checker_1.can_write_to(eager_1).is_ok());
 
         let checker_2 = ProtocolChecker::new(READER_V2.clone(), HashSet::new());
         let actions = vec![
@@ -304,11 +439,12 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_2 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_2).is_err());
-        assert!(checker_1.can_write_to(&snapshot_2).is_err());
-        assert!(checker_2.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_2.can_read_from(&snapshot_2).is_ok());
-        assert!(checker_2.can_write_to(&snapshot_2).is_ok());
+        let eager_2 = snapshot_2.snapshot();
+        assert!(checker_1.can_read_from(eager_2).is_err());
+        assert!(checker_1.can_write_to(eager_2).is_err());
+        assert!(checker_2.can_read_from(eager_1).is_ok());
+        assert!(checker_2.can_read_from(eager_2).is_ok());
+        assert!(checker_2.can_write_to(eager_2).is_ok());
 
         let checker_3 = ProtocolChecker::new(READER_V2.clone(), WRITER_V2.clone());
         let actions = vec![
@@ -320,14 +456,15 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_3 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_3).is_err());
-        assert!(checker_1.can_write_to(&snapshot_3).is_err());
-        assert!(checker_2.can_read_from(&snapshot_3).is_ok());
-        assert!(checker_2.can_write_to(&snapshot_3).is_err());
-        assert!(checker_3.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_3.can_read_from(&snapshot_2).is_ok());
-        assert!(checker_3.can_read_from(&snapshot_3).is_ok());
-        assert!(checker_3.can_write_to(&snapshot_3).is_ok());
+        let eager_3 = snapshot_3.snapshot();
+        assert!(checker_1.can_read_from(eager_3).is_err());
+        assert!(checker_1.can_write_to(eager_3).is_err());
+        assert!(checker_2.can_read_from(eager_3).is_ok());
+        assert!(checker_2.can_write_to(eager_3).is_err());
+        assert!(checker_3.can_read_from(eager_1).is_ok());
+        assert!(checker_3.can_read_from(eager_2).is_ok());
+        assert!(checker_3.can_read_from(eager_3).is_ok());
+        assert!(checker_3.can_write_to(eager_3).is_ok());
 
         let checker_4 = ProtocolChecker::new(READER_V2.clone(), WRITER_V3.clone());
         let actions = vec![
@@ -339,17 +476,18 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_4 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_4).is_err());
-        assert!(checker_1.can_write_to(&snapshot_4).is_err());
-        assert!(checker_2.can_read_from(&snapshot_4).is_ok());
-        assert!(checker_2.can_write_to(&snapshot_4).is_err());
-        assert!(checker_3.can_read_from(&snapshot_4).is_ok());
-        assert!(checker_3.can_write_to(&snapshot_4).is_err());
-        assert!(checker_4.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_4.can_read_from(&snapshot_2).is_ok());
-        assert!(checker_4.can_read_from(&snapshot_3).is_ok());
-        assert!(checker_4.can_read_from(&snapshot_4).is_ok());
-        assert!(checker_4.can_write_to(&snapshot_4).is_ok());
+        let eager_4 = snapshot_4.snapshot();
+        assert!(checker_1.can_read_from(eager_4).is_err());
+        assert!(checker_1.can_write_to(eager_4).is_err());
+        assert!(checker_2.can_read_from(eager_4).is_ok());
+        assert!(checker_2.can_write_to(eager_4).is_err());
+        assert!(checker_3.can_read_from(eager_4).is_ok());
+        assert!(checker_3.can_write_to(eager_4).is_err());
+        assert!(checker_4.can_read_from(eager_1).is_ok());
+        assert!(checker_4.can_read_from(eager_2).is_ok());
+        assert!(checker_4.can_read_from(eager_3).is_ok());
+        assert!(checker_4.can_read_from(eager_4).is_ok());
+        assert!(checker_4.can_write_to(eager_4).is_ok());
 
         let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
         let actions = vec![
@@ -361,20 +499,21 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_5 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_5).is_err());
-        assert!(checker_1.can_write_to(&snapshot_5).is_err());
-        assert!(checker_2.can_read_from(&snapshot_5).is_ok());
-        assert!(checker_2.can_write_to(&snapshot_5).is_err());
-        assert!(checker_3.can_read_from(&snapshot_5).is_ok());
-        assert!(checker_3.can_write_to(&snapshot_5).is_err());
-        assert!(checker_4.can_read_from(&snapshot_5).is_ok());
-        assert!(checker_4.can_write_to(&snapshot_5).is_err());
-        assert!(checker_5.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_5.can_read_from(&snapshot_2).is_ok());
-        assert!(checker_5.can_read_from(&snapshot_3).is_ok());
-        assert!(checker_5.can_read_from(&snapshot_4).is_ok());
-        assert!(checker_5.can_read_from(&snapshot_5).is_ok());
-        assert!(checker_5.can_write_to(&snapshot_5).is_ok());
+        let eager_5 = snapshot_5.snapshot();
+        assert!(checker_1.can_read_from(eager_5).is_err());
+        assert!(checker_1.can_write_to(eager_5).is_err());
+        assert!(checker_2.can_read_from(eager_5).is_ok());
+        assert!(checker_2.can_write_to(eager_5).is_err());
+        assert!(checker_3.can_read_from(eager_5).is_ok());
+        assert!(checker_3.can_write_to(eager_5).is_err());
+        assert!(checker_4.can_read_from(eager_5).is_ok());
+        assert!(checker_4.can_write_to(eager_5).is_err());
+        assert!(checker_5.can_read_from(eager_1).is_ok());
+        assert!(checker_5.can_read_from(eager_2).is_ok());
+        assert!(checker_5.can_read_from(eager_3).is_ok());
+        assert!(checker_5.can_read_from(eager_4).is_ok());
+        assert!(checker_5.can_read_from(eager_5).is_ok());
+        assert!(checker_5.can_write_to(eager_5).is_ok());
 
         let checker_6 = ProtocolChecker::new(READER_V2.clone(), WRITER_V5.clone());
         let actions = vec![
@@ -386,23 +525,24 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_6 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_6).is_err());
-        assert!(checker_1.can_write_to(&snapshot_6).is_err());
-        assert!(checker_2.can_read_from(&snapshot_6).is_ok());
-        assert!(checker_2.can_write_to(&snapshot_6).is_err());
-        assert!(checker_3.can_read_from(&snapshot_6).is_ok());
-        assert!(checker_3.can_write_to(&snapshot_6).is_err());
-        assert!(checker_4.can_read_from(&snapshot_6).is_ok());
-        assert!(checker_4.can_write_to(&snapshot_6).is_err());
-        assert!(checker_5.can_read_from(&snapshot_6).is_ok());
-        assert!(checker_5.can_write_to(&snapshot_6).is_err());
-        assert!(checker_6.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_6.can_read_from(&snapshot_2).is_ok());
-        assert!(checker_6.can_read_from(&snapshot_3).is_ok());
-        assert!(checker_6.can_read_from(&snapshot_4).is_ok());
-        assert!(checker_6.can_read_from(&snapshot_5).is_ok());
-        assert!(checker_6.can_read_from(&snapshot_6).is_ok());
-        assert!(checker_6.can_write_to(&snapshot_6).is_ok());
+        let eager_6 = snapshot_6.snapshot();
+        assert!(checker_1.can_read_from(eager_6).is_err());
+        assert!(checker_1.can_write_to(eager_6).is_err());
+        assert!(checker_2.can_read_from(eager_6).is_ok());
+        assert!(checker_2.can_write_to(eager_6).is_err());
+        assert!(checker_3.can_read_from(eager_6).is_ok());
+        assert!(checker_3.can_write_to(eager_6).is_err());
+        assert!(checker_4.can_read_from(eager_6).is_ok());
+        assert!(checker_4.can_write_to(eager_6).is_err());
+        assert!(checker_5.can_read_from(eager_6).is_ok());
+        assert!(checker_5.can_write_to(eager_6).is_err());
+        assert!(checker_6.can_read_from(eager_1).is_ok());
+        assert!(checker_6.can_read_from(eager_2).is_ok());
+        assert!(checker_6.can_read_from(eager_3).is_ok());
+        assert!(checker_6.can_read_from(eager_4).is_ok());
+        assert!(checker_6.can_read_from(eager_5).is_ok());
+        assert!(checker_6.can_read_from(eager_6).is_ok());
+        assert!(checker_6.can_write_to(eager_6).is_ok());
 
         let checker_7 = ProtocolChecker::new(READER_V2.clone(), WRITER_V6.clone());
         let actions = vec![
@@ -414,25 +554,85 @@ mod tests {
             create_metadata_action(None, Some(HashMap::new())),
         ];
         let snapshot_7 = DeltaTableState::from_actions(actions).unwrap();
-        assert!(checker_1.can_read_from(&snapshot_7).is_err());
-        assert!(checker_1.can_write_to(&snapshot_7).is_err());
-        assert!(checker_2.can_read_from(&snapshot_7).is_ok());
-        assert!(checker_2.can_write_to(&snapshot_7).is_err());
-        assert!(checker_3.can_read_from(&snapshot_7).is_ok());
-        assert!(checker_3.can_write_to(&snapshot_7).is_err());
-        assert!(checker_4.can_read_from(&snapshot_7).is_ok());
-        assert!(checker_4.can_write_to(&snapshot_7).is_err());
-        assert!(checker_5.can_read_from(&snapshot_7).is_ok());
-        assert!(checker_5.can_write_to(&snapshot_7).is_err());
-        assert!(checker_6.can_read_from(&snapshot_7).is_ok());
-        assert!(checker_6.can_write_to(&snapshot_7).is_err());
-        assert!(checker_7.can_read_from(&snapshot_1).is_ok());
-        assert!(checker_7.can_read_from(&snapshot_2).is_ok());
-        assert!(checker_7.can_read_from(&snapshot_3).is_ok());
-        assert!(checker_7.can_read_from(&snapshot_4).is_ok());
-        assert!(checker_7.can_read_from(&snapshot_5).is_ok());
-        assert!(checker_7.can_read_from(&snapshot_6).is_ok());
-        assert!(checker_7.can_read_from(&snapshot_7).is_ok());
-        assert!(checker_7.can_write_to(&snapshot_7).is_ok());
+        let eager_7 = snapshot_7.snapshot();
+        assert!(checker_1.can_read_from(eager_7).is_err());
+        assert!(checker_1.can_write_to(eager_7).is_err());
+        assert!(checker_2.can_read_from(eager_7).is_ok());
+        assert!(checker_2.can_write_to(eager_7).is_err());
+        assert!(checker_3.can_read_from(eager_7).is_ok());
+        assert!(checker_3.can_write_to(eager_7).is_err());
+        assert!(checker_4.can_read_from(eager_7).is_ok());
+        assert!(checker_4.can_write_to(eager_7).is_err());
+        assert!(checker_5.can_read_from(eager_7).is_ok());
+        assert!(checker_5.can_write_to(eager_7).is_err());
+        assert!(checker_6.can_read_from(eager_7).is_ok());
+        assert!(checker_6.can_write_to(eager_7).is_err());
+        assert!(checker_7.can_read_from(eager_1).is_ok());
+        assert!(checker_7.can_read_from(eager_2).is_ok());
+        assert!(checker_7.can_read_from(eager_3).is_ok());
+        assert!(checker_7.can_read_from(eager_4).is_ok());
+        assert!(checker_7.can_read_from(eager_5).is_ok());
+        assert!(checker_7.can_read_from(eager_6).is_ok());
+        assert!(checker_7.can_read_from(eager_7).is_ok());
+        assert!(checker_7.can_write_to(eager_7).is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_minwriter_v4_with_cdf() {
+        let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
+        let actions = vec![
+            Action::Protocol(
+                Protocol::new(2, 4)
+                    .with_writer_features(vec![crate::kernel::WriterFeatures::ChangeDataFeed]),
+            ),
+            create_metadata_action(None, Some(HashMap::new())),
+        ];
+        let snapshot_5 = DeltaTableState::from_actions(actions).unwrap();
+        let eager_5 = snapshot_5.snapshot();
+        assert!(checker_5.can_write_to(eager_5).is_ok());
+    }
+
+    /// Technically we do not yet support generated columns, but it is okay to "accept" writing to
+    /// a column with minWriterVersion=4 and the generated columns feature as long as the
+    /// `delta.generationExpression` isn't actually defined the write is still allowed
+    #[tokio::test]
+    async fn test_minwriter_v4_with_generated_columns() {
+        let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
+        let actions = vec![
+            Action::Protocol(
+                Protocol::new(2, 4)
+                    .with_writer_features(vec![crate::kernel::WriterFeatures::GeneratedColumns]),
+            ),
+            create_metadata_action(None, Some(HashMap::new())),
+        ];
+        let snapshot_5 = DeltaTableState::from_actions(actions).unwrap();
+        let eager_5 = snapshot_5.snapshot();
+        assert!(checker_5.can_write_to(eager_5).is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_minwriter_v4_with_generated_columns_and_expressions() {
+        let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
+        let actions = vec![Action::Protocol(Protocol::new(2, 4))];
+
+        let table: crate::DeltaTable = crate::DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                Some(HashMap::from([(
+                    "delta.generationExpression".into(),
+                    "x IS TRUE".into(),
+                )])),
+            )
+            .with_actions(actions)
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        let eager_5 = table
+            .snapshot()
+            .expect("Failed to get snapshot from test table");
+        assert!(checker_5.can_write_to(eager_5).is_err());
     }
 }
diff --git a/crates/core/src/operations/transaction/state.rs b/crates/core/src/operations/transaction/state.rs
index ab778f2cb6..d705a616b1 100644
--- a/crates/core/src/operations/transaction/state.rs
+++ b/crates/core/src/operations/transaction/state.rs
@@ -5,111 +5,35 @@ use arrow::array::{ArrayRef, BooleanArray};
 use arrow::datatypes::{
     DataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
 };
-use datafusion::datasource::physical_plan::wrap_partition_type_in_dict;
-use datafusion::execution::context::SessionState;
+use datafusion::execution::context::SessionContext;
 use datafusion::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
 use datafusion_common::scalar::ScalarValue;
-use datafusion_common::{Column, DFSchema};
-use datafusion_expr::{utils::conjunction, Expr};
-use itertools::Either;
+use datafusion_common::{Column, ToDFSchema};
+use datafusion_expr::Expr;
+use itertools::Itertools;
 use object_store::ObjectStore;
+use parquet::arrow::arrow_reader::ArrowReaderOptions;
 use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
 
-use crate::delta_datafusion::expr::parse_predicate_expression;
-use crate::delta_datafusion::{
-    get_null_of_arrow_type, logical_expr_to_physical_expr, to_correct_scalar_value,
-};
+use crate::delta_datafusion::{get_null_of_arrow_type, to_correct_scalar_value, DataFusionMixins};
 use crate::errors::DeltaResult;
-use crate::kernel::Add;
+use crate::kernel::{Add, EagerSnapshot};
 use crate::table::state::DeltaTableState;
 
 impl DeltaTableState {
-    /// Get the table schema as an [`ArrowSchemaRef`]
-    pub fn arrow_schema(&self) -> DeltaResult<ArrowSchemaRef> {
-        self._arrow_schema(true)
-    }
-
-    fn _arrow_schema(&self, wrap_partitions: bool) -> DeltaResult<ArrowSchemaRef> {
-        let meta = self.metadata();
-        let fields = meta
-            .schema()?
-            .fields()
-            .iter()
-            .filter(|f| !meta.partition_columns.contains(&f.name().to_string()))
-            .map(|f| f.try_into())
-            .chain(
-                meta.schema()?
-                    .fields()
-                    .iter()
-                    .filter(|f| meta.partition_columns.contains(&f.name().to_string()))
-                    .map(|f| {
-                        let field = ArrowField::try_from(f)?;
-                        let corrected = if wrap_partitions {
-                            match field.data_type() {
-                                // Only dictionary-encode types that may be large
-                                // // https://github.com/apache/arrow-datafusion/pull/5545
-                                DataType::Utf8
-                                | DataType::LargeUtf8
-                                | DataType::Binary
-                                | DataType::LargeBinary => {
-                                    wrap_partition_type_in_dict(field.data_type().clone())
-                                }
-                                _ => field.data_type().clone(),
-                            }
-                        } else {
-                            field.data_type().clone()
-                        };
-                        Ok(field.with_data_type(corrected))
-                    }),
-            )
-            .collect::<Result<Vec<ArrowField>, _>>()?;
-
-        Ok(Arc::new(ArrowSchema::new(fields)))
-    }
-
-    pub(crate) fn input_schema(&self) -> DeltaResult<ArrowSchemaRef> {
-        self._arrow_schema(false)
-    }
-
-    /// Iterate over all files in the log matching a predicate
-    pub fn files_matching_predicate(
-        &self,
-        filters: &[Expr],
-    ) -> DeltaResult<impl Iterator<Item = Add>> {
-        if let Some(Some(predicate)) =
-            (!filters.is_empty()).then_some(conjunction(filters.iter().cloned()))
-        {
-            let expr = logical_expr_to_physical_expr(&predicate, self.arrow_schema()?.as_ref());
-            let pruning_predicate = PruningPredicate::try_new(expr, self.arrow_schema()?)?;
-            Ok(Either::Left(
-                self.file_actions()?
-                    .into_iter()
-                    .zip(pruning_predicate.prune(self)?)
-                    .filter_map(
-                        |(action, keep_file)| {
-                            if keep_file {
-                                Some(action)
-                            } else {
-                                None
-                            }
-                        },
-                    ),
-            ))
-        } else {
-            Ok(Either::Right(self.file_actions()?.into_iter()))
-        }
-    }
-
-    /// Parse an expression string into a datafusion [`Expr`]
-    pub fn parse_predicate_expression(
+    /// Get the physical table schema.
+    ///
+    /// This will construct a schema derived from the parquet schema of the latest data file,
+    /// and fields for partition columns from the schema defined in table meta data.
+    pub async fn physical_arrow_schema(
         &self,
-        expr: impl AsRef<str>,
-        df_state: &SessionState,
-    ) -> DeltaResult<Expr> {
-        let schema = DFSchema::try_from(self.arrow_schema()?.as_ref().to_owned())?;
-        parse_predicate_expression(&schema, expr, df_state)
+        object_store: Arc<dyn ObjectStore>,
+    ) -> DeltaResult<ArrowSchemaRef> {
+        self.snapshot.physical_arrow_schema(object_store).await
     }
+}
 
+impl EagerSnapshot {
     /// Get the physical table schema.
     ///
     /// This will construct a schema derived from the parquet schema of the latest data file,
@@ -118,18 +42,17 @@ impl DeltaTableState {
         &self,
         object_store: Arc<dyn ObjectStore>,
     ) -> DeltaResult<ArrowSchemaRef> {
-        if let Some(add) = self
-            .file_actions()?
-            .iter()
-            .max_by_key(|obj| obj.modification_time)
-        {
+        if let Some(add) = self.file_actions()?.max_by_key(|obj| obj.modification_time) {
             let file_meta = add.try_into()?;
             let file_reader = ParquetObjectReader::new(object_store, file_meta);
-            let file_schema = ParquetRecordBatchStreamBuilder::new(file_reader)
-                .await?
-                .build()?
-                .schema()
-                .clone();
+            let file_schema = ParquetRecordBatchStreamBuilder::new_with_options(
+                file_reader,
+                ArrowReaderOptions::new().with_skip_arrow_metadata(true),
+            )
+            .await?
+            .build()?
+            .schema()
+            .clone();
 
             let table_schema = Arc::new(ArrowSchema::new(
                 self.arrow_schema()?
@@ -228,7 +151,9 @@ impl<'a> AddContainer<'a> {
     /// so evaluating expressions is inexact. However, excluded files are guaranteed (for a correct log)
     /// to not contain matches by the predicate expression.
     pub fn predicate_matches(&self, predicate: Expr) -> DeltaResult<impl Iterator<Item = &Add>> {
-        let expr = logical_expr_to_physical_expr(&predicate, &self.schema);
+        //let expr = logical_expr_to_physical_expr(predicate, &self.schema);
+        let expr = SessionContext::new()
+            .create_physical_expr(predicate, &self.schema.clone().to_dfschema()?)?;
         let pruning_predicate = PruningPredicate::try_new(expr, self.schema.clone())?;
         Ok(self
             .inner
@@ -298,6 +223,21 @@ impl<'a> PruningStatistics for AddContainer<'a> {
         ScalarValue::iter_to_array(values).ok()
     }
 
+    /// return the number of rows for the named column in each container
+    /// as an `Option<UInt64Array>`.
+    ///
+    /// Note: the returned array must contain `num_containers()` rows
+    fn row_counts(&self, _column: &Column) -> Option<ArrayRef> {
+        let values = self.inner.iter().map(|add| {
+            if let Ok(Some(statistics)) = add.get_stats() {
+                ScalarValue::UInt64(Some(statistics.num_records as u64))
+            } else {
+                ScalarValue::UInt64(None)
+            }
+        });
+        ScalarValue::iter_to_array(values).ok()
+    }
+
     // This function is required since DataFusion 35.0, but is implemented as a no-op
     // https://github.com/apache/arrow-datafusion/blob/ec6abece2dcfa68007b87c69eefa6b0d7333f628/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs#L550
     fn contained(&self, _column: &Column, _value: &HashSet<ScalarValue>) -> Option<BooleanArray> {
@@ -305,11 +245,11 @@ impl<'a> PruningStatistics for AddContainer<'a> {
     }
 }
 
-impl PruningStatistics for DeltaTableState {
+impl PruningStatistics for EagerSnapshot {
     /// return the minimum values for the named column, if known.
     /// Note: the returned array must contain `num_containers()` rows
     fn min_values(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?;
+        let files = self.file_actions().ok()?.collect_vec();
         let partition_columns = &self.metadata().partition_columns;
         let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
         container.min_values(column)
@@ -318,7 +258,7 @@ impl PruningStatistics for DeltaTableState {
     /// return the maximum values for the named column, if known.
     /// Note: the returned array must contain `num_containers()` rows.
     fn max_values(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?;
+        let files = self.file_actions().ok()?.collect_vec();
         let partition_columns = &self.metadata().partition_columns;
         let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
         container.max_values(column)
@@ -335,12 +275,23 @@ impl PruningStatistics for DeltaTableState {
     ///
     /// Note: the returned array must contain `num_containers()` rows.
     fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?;
+        let files = self.file_actions().ok()?.collect_vec();
         let partition_columns = &self.metadata().partition_columns;
         let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
         container.null_counts(column)
     }
 
+    /// return the number of rows for the named column in each container
+    /// as an `Option<UInt64Array>`.
+    ///
+    /// Note: the returned array must contain `num_containers()` rows
+    fn row_counts(&self, column: &Column) -> Option<ArrayRef> {
+        let files = self.file_actions().ok()?.collect_vec();
+        let partition_columns = &self.metadata().partition_columns;
+        let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
+        container.row_counts(column)
+    }
+
     // This function is required since DataFusion 35.0, but is implemented as a no-op
     // https://github.com/apache/arrow-datafusion/blob/ec6abece2dcfa68007b87c69eefa6b0d7333f628/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs#L550
     fn contained(&self, _column: &Column, _value: &HashSet<ScalarValue>) -> Option<BooleanArray> {
@@ -348,9 +299,36 @@ impl PruningStatistics for DeltaTableState {
     }
 }
 
+impl PruningStatistics for DeltaTableState {
+    fn min_values(&self, column: &Column) -> Option<ArrayRef> {
+        self.snapshot.min_values(column)
+    }
+
+    fn max_values(&self, column: &Column) -> Option<ArrayRef> {
+        self.snapshot.max_values(column)
+    }
+
+    fn num_containers(&self) -> usize {
+        self.snapshot.num_containers()
+    }
+
+    fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
+        self.snapshot.null_counts(column)
+    }
+
+    fn row_counts(&self, column: &Column) -> Option<ArrayRef> {
+        self.snapshot.row_counts(column)
+    }
+
+    fn contained(&self, column: &Column, values: &HashSet<ScalarValue>) -> Option<BooleanArray> {
+        self.snapshot.contained(column, values)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::delta_datafusion::DataFusionFileMixins;
     use crate::operations::transaction::test_utils::{create_add_action, init_table_actions};
     use datafusion::prelude::SessionContext;
     use datafusion_expr::{col, lit};
@@ -391,6 +369,7 @@ mod tests {
 
         let state = DeltaTableState::from_actions(actions).unwrap();
         let files = state
+            .snapshot
             .files_matching_predicate(&[])
             .unwrap()
             .collect::<Vec<_>>();
@@ -401,6 +380,7 @@ mod tests {
             .or(col("value").lt_eq(lit::<i32>(0)));
 
         let files = state
+            .snapshot
             .files_matching_predicate(&[predictate])
             .unwrap()
             .collect::<Vec<_>>();
diff --git a/crates/core/src/operations/transaction/test_utils.rs b/crates/core/src/operations/transaction/test_utils.rs
index 484f69909a..ada5ded056 100644
--- a/crates/core/src/operations/transaction/test_utils.rs
+++ b/crates/core/src/operations/transaction/test_utils.rs
@@ -1,7 +1,7 @@
 #![allow(unused)]
 use std::collections::HashMap;
 
-use super::prepare_commit;
+use super::CommitBuilder;
 use crate::kernel::{
     Action, Add, CommitInfo, DataType, Metadata, PrimitiveType, Protocol, Remove, StructField,
     StructType,
@@ -162,17 +162,9 @@ pub async fn create_initialized_table(
         },
     };
     let actions = init_table_actions(None);
-    let prepared_commit = prepare_commit(
-        log_store.object_store().as_ref(),
-        &operation,
-        &actions,
-        None,
-    )
-    .await
-    .unwrap();
-
-    log_store
-        .write_commit_entry(0, &prepared_commit)
+    CommitBuilder::default()
+        .with_actions(actions)
+        .build(None, log_store.clone(), operation)
         .await
         .unwrap();
     DeltaTable::new_with_state(log_store, state)
diff --git a/crates/core/src/operations/update.rs b/crates/core/src/operations/update.rs
index d07f3f9fc0..2a947f486f 100644
--- a/crates/core/src/operations/update.rs
+++ b/crates/core/src/operations/update.rs
@@ -19,42 +19,59 @@
 //! ````
 
 use std::{
-    collections::{HashMap, HashSet},
+    collections::HashMap,
     sync::Arc,
     time::{Instant, SystemTime, UNIX_EPOCH},
 };
 
-use arrow::datatypes::Schema as ArrowSchema;
-use arrow_schema::Field;
+use super::write::{write_execution_plan, write_execution_plan_cdc};
+use super::{
+    datafusion_utils::Expression,
+    transaction::{CommitBuilder, CommitProperties},
+};
+use super::{transaction::PROTOCOL, write::WriterStatsConfig};
+use crate::delta_datafusion::{find_files, planner::DeltaPlanner, register_store};
+use crate::kernel::{Action, Remove};
+use crate::logstore::LogStoreRef;
+use crate::operations::cdc::*;
+use crate::protocol::DeltaOperation;
+use crate::table::state::DeltaTableState;
+use crate::{
+    delta_datafusion::{
+        expr::fmt_expr_to_sql,
+        logical::MetricObserver,
+        physical::{find_metric_node, get_metric, MetricObserverExec},
+        DataFusionMixins, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionContext,
+        DeltaTableProvider,
+    },
+    DeltaTableError,
+};
+use crate::{DeltaResult, DeltaTable};
+use async_trait::async_trait;
+use datafusion::error::Result as DataFusionResult;
 use datafusion::{
+    dataframe::DataFrame,
+    datasource::provider_as_source,
     execution::context::SessionState,
-    physical_plan::{metrics::MetricBuilder, projection::ProjectionExec, ExecutionPlan},
+    physical_plan::{metrics::MetricBuilder, ExecutionPlan},
+    physical_planner::{ExtensionPlanner, PhysicalPlanner},
     prelude::SessionContext,
 };
-use datafusion_common::{Column, DFSchema, ScalarValue};
-use datafusion_expr::{case, col, lit, when, Expr};
-use datafusion_physical_expr::{
-    create_physical_expr,
-    expressions::{self},
-    PhysicalExpr,
+use datafusion_common::{Column, ScalarValue};
+use datafusion_expr::{
+    case, col, lit, when, Expr, Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode,
 };
 use futures::future::BoxFuture;
 use parquet::file::properties::WriterProperties;
 use serde::Serialize;
-use serde_json::Value;
+use tracing::log::*;
 
-use super::datafusion_utils::Expression;
-use super::transaction::{commit, PROTOCOL};
-use super::write::write_execution_plan;
-use crate::delta_datafusion::{
-    expr::fmt_expr_to_sql, physical::MetricObserverExec, DeltaColumn, DeltaSessionContext,
-};
-use crate::delta_datafusion::{find_files, register_store, DeltaScanBuilder};
-use crate::kernel::{Action, Remove};
-use crate::logstore::LogStoreRef;
-use crate::protocol::DeltaOperation;
-use crate::table::state::DeltaTableState;
-use crate::{DeltaResult, DeltaTable};
+/// Custom column name used for marking internal [RecordBatch] rows as updated
+pub(crate) const UPDATE_PREDICATE_COLNAME: &str = "__delta_rs_update_predicate";
+
+const UPDATE_COUNT_ID: &str = "update_source_count";
+const UPDATE_ROW_COUNT: &str = "num_updated_rows";
+const COPIED_ROW_COUNT: &str = "num_copied_rows";
 
 /// Updates records in the Delta Table.
 /// See this module's documentation for more information
@@ -71,8 +88,8 @@ pub struct UpdateBuilder {
     state: Option<SessionState>,
     /// Properties passed to underlying parquet writer for when files are rewritten
     writer_properties: Option<WriterProperties>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
     /// safe_cast determines how data types that do not match the underlying table are handled
     /// By default an error is returned
     safe_cast: bool,
@@ -95,6 +112,8 @@ pub struct UpdateMetrics {
     pub scan_time_ms: u64,
 }
 
+impl super::Operation<()> for UpdateBuilder {}
+
 impl UpdateBuilder {
     /// Create a new ['UpdateBuilder']
     pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
@@ -105,7 +124,7 @@ impl UpdateBuilder {
             log_store,
             state: None,
             writer_properties: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
             safe_cast: false,
         }
     }
@@ -133,11 +152,8 @@ impl UpdateBuilder {
     }
 
     /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -162,17 +178,55 @@ impl UpdateBuilder {
     }
 }
 
+#[derive(Clone)]
+struct UpdateMetricExtensionPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for UpdateMetricExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(metric_observer) = node.as_any().downcast_ref::<MetricObserver>() {
+            if metric_observer.id.eq(UPDATE_COUNT_ID) {
+                return Ok(Some(MetricObserverExec::try_new(
+                    UPDATE_COUNT_ID.into(),
+                    physical_inputs,
+                    |batch, metrics| {
+                        let array = batch.column_by_name(UPDATE_PREDICATE_COLNAME).unwrap();
+                        let copied_rows = array.null_count();
+                        let num_updated = array.len() - copied_rows;
+
+                        MetricBuilder::new(metrics)
+                            .global_counter(UPDATE_ROW_COUNT)
+                            .add(num_updated);
+
+                        MetricBuilder::new(metrics)
+                            .global_counter(COPIED_ROW_COUNT)
+                            .add(copied_rows);
+                    },
+                )?));
+            }
+        }
+        Ok(None)
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 async fn execute(
     predicate: Option<Expression>,
     updates: HashMap<Column, Expression>,
     log_store: LogStoreRef,
-    snapshot: &DeltaTableState,
+    snapshot: DeltaTableState,
     state: SessionState,
     writer_properties: Option<WriterProperties>,
-    app_metadata: Option<HashMap<String, Value>>,
+    mut commit_properties: CommitProperties,
     safe_cast: bool,
-) -> DeltaResult<((Vec<Action>, i64, Option<DeltaOperation>), UpdateMetrics)> {
+) -> DeltaResult<(DeltaTableState, UpdateMetrics)> {
     // Validate the predicate and update expressions.
     //
     // If the predicate is not set, then all files need to be updated.
@@ -183,12 +237,17 @@ async fn execute(
     // perform update operations, and then commit add and remove actions to
     // the log.
 
+    let update_planner = DeltaPlanner::<UpdateMetricExtensionPlanner> {
+        extension_planner: UpdateMetricExtensionPlanner {},
+    };
+
+    let state = state.clone().with_query_planner(Arc::new(update_planner));
+
     let exec_start = Instant::now();
     let mut metrics = UpdateMetrics::default();
-    let mut version = snapshot.version();
 
     if updates.is_empty() {
-        return Ok(((Vec::new(), version, None), metrics));
+        return Ok((snapshot, metrics));
     }
 
     let predicate = match predicate {
@@ -199,185 +258,126 @@ async fn execute(
         None => None,
     };
 
-    let updates: HashMap<Column, Expr> = updates
+    let updates = updates
         .into_iter()
         .map(|(key, expr)| match expr {
-            Expression::DataFusion(e) => Ok((key, e)),
+            Expression::DataFusion(e) => Ok((key.name, e)),
             Expression::String(s) => snapshot
                 .parse_predicate_expression(s, &state)
-                .map(|e| (key, e)),
+                .map(|e| (key.name, e)),
         })
-        .collect::<Result<HashMap<Column, Expr>, _>>()?;
+        .collect::<Result<HashMap<String, Expr>, _>>()?;
 
     let current_metadata = snapshot.metadata();
     let table_partition_cols = current_metadata.partition_columns.clone();
 
     let scan_start = Instant::now();
-    let candidates = find_files(snapshot, log_store.clone(), &state, predicate.clone()).await?;
+    let candidates = find_files(&snapshot, log_store.clone(), &state, predicate.clone()).await?;
     metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_millis() as u64;
 
     if candidates.candidates.is_empty() {
-        return Ok(((Vec::new(), version, None), metrics));
+        return Ok((snapshot, metrics));
     }
 
     let predicate = predicate.unwrap_or(Expr::Literal(ScalarValue::Boolean(Some(true))));
 
-    let execution_props = state.execution_props();
+    let scan_config = DeltaScanConfigBuilder::default()
+        .with_file_column(false)
+        .build(&snapshot)?;
+
     // For each rewrite evaluate the predicate and then modify each expression
     // to either compute the new value or obtain the old one then write these batches
-    let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), &state)
-        .with_files(&candidates.candidates)
-        .build()
-        .await?;
-    let scan = Arc::new(scan);
+    let target_provider = Arc::new(
+        DeltaTableProvider::try_new(snapshot.clone(), log_store.clone(), scan_config.clone())?
+            .with_files(candidates.candidates.clone()),
+    );
 
-    // Create a projection for a new column with the predicate evaluated
-    let input_schema = snapshot.input_schema()?;
+    let target_provider = provider_as_source(target_provider);
+    let plan = LogicalPlanBuilder::scan("target", target_provider.clone(), None)?.build()?;
 
-    let mut fields = Vec::new();
-    for field in input_schema.fields.iter() {
-        fields.push(field.to_owned());
-    }
-    fields.push(Arc::new(Field::new(
-        "__delta_rs_update_predicate",
-        arrow_schema::DataType::Boolean,
-        true,
-    )));
-    // Recreate the schemas with the new column included
-    let input_schema = Arc::new(ArrowSchema::new(fields));
-    let input_dfschema: DFSchema = input_schema.as_ref().clone().try_into()?;
-
-    let mut expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = Vec::new();
-    let scan_schema = scan.schema();
-    for (i, field) in scan_schema.fields().into_iter().enumerate() {
-        expressions.push((
-            Arc::new(expressions::Column::new(field.name(), i)),
-            field.name().to_owned(),
-        ));
-    }
+    let df = DataFrame::new(state.clone(), plan);
 
     // Take advantage of how null counts are tracked in arrow arrays use the
     // null count to track how many records do NOT statisfy the predicate.  The
     // count is then exposed through the metrics through the `UpdateCountExec`
     // execution plan
-
     let predicate_null =
         when(predicate.clone(), lit(true)).otherwise(lit(ScalarValue::Boolean(None)))?;
-    let predicate_expr = create_physical_expr(&predicate_null, &input_dfschema, execution_props)?;
-    expressions.push((predicate_expr, "__delta_rs_update_predicate".to_string()));
-
-    let projection_predicate: Arc<dyn ExecutionPlan> =
-        Arc::new(ProjectionExec::try_new(expressions, scan)?);
-
-    let count_plan = Arc::new(MetricObserverExec::new(
-        "update_count".into(),
-        projection_predicate.clone(),
-        |batch, metrics| {
-            let array = batch.column_by_name("__delta_rs_update_predicate").unwrap();
-            let copied_rows = array.null_count();
-            let num_updated = array.len() - copied_rows;
-
-            MetricBuilder::new(metrics)
-                .global_counter("num_updated_rows")
-                .add(num_updated);
-
-            MetricBuilder::new(metrics)
-                .global_counter("num_copied_rows")
-                .add(copied_rows);
-        },
-    ));
 
-    // Perform another projection but instead calculate updated values based on
-    // the predicate value.  If the predicate is true then evalute the user
-    // provided expression otherwise return the original column value
-    //
-    // For each update column a new column with a name of __delta_rs_ + `original name` is created
-    let mut expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = Vec::new();
-    let scan_schema = count_plan.schema();
-    for (i, field) in scan_schema.fields().into_iter().enumerate() {
-        expressions.push((
-            Arc::new(expressions::Column::new(field.name(), i)),
-            field.name().to_owned(),
-        ));
-    }
+    let df_with_update_col = df
+        .clone()
+        .with_column(UPDATE_PREDICATE_COLNAME, predicate_null)?;
 
-    // Maintain a map from the original column name to its temporary column index
-    let mut map = HashMap::<String, usize>::new();
-    let mut control_columns = HashSet::<String>::new();
-    control_columns.insert("__delta_rs_update_predicate".to_owned());
-
-    for (column, expr) in updates {
-        let expr = case(col("__delta_rs_update_predicate"))
-            .when(lit(true), expr.to_owned())
-            .otherwise(col(column.to_owned()))?;
-        let predicate_expr = create_physical_expr(&expr, &input_dfschema, execution_props)?;
-        map.insert(column.name.clone(), expressions.len());
-        let c = "__delta_rs_".to_string() + &column.name;
-        expressions.push((predicate_expr, c.clone()));
-        control_columns.insert(c);
-    }
+    let plan_with_metrics = LogicalPlan::Extension(Extension {
+        node: Arc::new(MetricObserver {
+            id: UPDATE_COUNT_ID.into(),
+            input: df_with_update_col.into_unoptimized_plan(),
+            enable_pushdown: false,
+        }),
+    });
 
-    let projection_update: Arc<dyn ExecutionPlan> =
-        Arc::new(ProjectionExec::try_new(expressions, count_plan.clone())?);
-
-    // Project again to remove __delta_rs columns and rename update columns to their original name
-    let mut expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = Vec::new();
-    let scan_schema = projection_update.schema();
-    for (i, field) in scan_schema.fields().into_iter().enumerate() {
-        if !control_columns.contains(field.name()) {
-            match map.get(field.name()) {
-                Some(value) => {
-                    expressions.push((
-                        Arc::new(expressions::Column::new(field.name(), *value)),
-                        field.name().to_owned(),
-                    ));
-                }
-                None => {
-                    expressions.push((
-                        Arc::new(expressions::Column::new(field.name(), i)),
-                        field.name().to_owned(),
-                    ));
-                }
-            }
-        }
-    }
+    let df_with_predicate_and_metrics = DataFrame::new(state.clone(), plan_with_metrics);
 
-    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
-        expressions,
-        projection_update.clone(),
-    )?);
+    let expressions: Vec<Expr> = df_with_predicate_and_metrics
+        .schema()
+        .fields()
+        .into_iter()
+        .map(|field| {
+            let field_name = field.name();
+            let expr = match updates.get(field_name) {
+                Some(expr) => case(col(UPDATE_PREDICATE_COLNAME))
+                    .when(lit(true), expr.to_owned())
+                    .otherwise(col(Column::from_name(field_name)))?
+                    .alias(field_name),
+                None => col(Column::from_name(field_name)),
+            };
+            Ok(expr)
+        })
+        .collect::<DeltaResult<Vec<Expr>>>()?;
+
+    let updated_df = df_with_predicate_and_metrics.select(expressions.clone())?;
+    let physical_plan = updated_df.clone().create_physical_plan().await?;
+    let writer_stats_config = WriterStatsConfig::new(
+        snapshot.table_config().num_indexed_cols(),
+        snapshot
+            .table_config()
+            .stats_columns()
+            .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
+    );
+
+    let tracker = CDCTracker::new(
+        df,
+        updated_df.drop_columns(&vec![UPDATE_PREDICATE_COLNAME])?,
+    );
 
     let add_actions = write_execution_plan(
-        Some(snapshot),
+        Some(&snapshot),
         state.clone(),
-        projection.clone(),
+        physical_plan.clone(),
         table_partition_cols.clone(),
         log_store.object_store().clone(),
         Some(snapshot.table_config().target_file_size() as usize),
         None,
-        writer_properties,
+        writer_properties.clone(),
         safe_cast,
-        false,
+        None,
+        writer_stats_config.clone(),
+        None,
     )
     .await?;
 
-    let count_metrics = count_plan.metrics().unwrap();
-
-    metrics.num_updated_rows = count_metrics
-        .sum_by_name("num_updated_rows")
-        .map(|m| m.as_usize())
-        .unwrap_or(0);
+    let err = || DeltaTableError::Generic("Unable to locate expected metric node".into());
+    let update_count = find_metric_node(UPDATE_COUNT_ID, &physical_plan).ok_or_else(err)?;
+    let update_count_metrics = update_count.metrics().unwrap();
 
-    metrics.num_copied_rows = count_metrics
-        .sum_by_name("num_copied_rows")
-        .map(|m| m.as_usize())
-        .unwrap_or(0);
+    metrics.num_updated_rows = get_metric(&update_count_metrics, UPDATE_ROW_COUNT);
+    metrics.num_copied_rows = get_metric(&update_count_metrics, COPIED_ROW_COUNT);
 
     let deletion_timestamp = SystemTime::now()
         .duration_since(UNIX_EPOCH)
         .unwrap()
         .as_millis() as i64;
-    let mut actions: Vec<Action> = add_actions.into_iter().map(Action::Add).collect();
+    let mut actions: Vec<Action> = add_actions.clone();
 
     metrics.num_added_files = actions.len();
     metrics.num_removed_files = candidates.candidates.len();
@@ -403,27 +403,46 @@ async fn execute(
         predicate: Some(fmt_expr_to_sql(&predicate)?),
     };
 
-    let mut app_metadata = match app_metadata {
-        Some(meta) => meta,
-        None => HashMap::new(),
-    };
-
-    app_metadata.insert("readVersion".to_owned(), snapshot.version().into());
-
-    if let Ok(map) = serde_json::to_value(&metrics) {
-        app_metadata.insert("operationMetrics".to_owned(), map);
+    commit_properties
+        .app_metadata
+        .insert("readVersion".to_owned(), snapshot.version().into());
+
+    commit_properties.app_metadata.insert(
+        "operationMetrics".to_owned(),
+        serde_json::to_value(&metrics)?,
+    );
+
+    if let Ok(true) = should_write_cdc(&snapshot) {
+        match tracker.collect() {
+            Ok(df) => {
+                let cdc_actions = write_execution_plan_cdc(
+                    Some(&snapshot),
+                    state,
+                    df.create_physical_plan().await?,
+                    table_partition_cols,
+                    log_store.object_store(),
+                    Some(snapshot.table_config().target_file_size() as usize),
+                    None,
+                    writer_properties,
+                    safe_cast,
+                    writer_stats_config,
+                    None,
+                )
+                .await?;
+                actions.extend(cdc_actions);
+            }
+            Err(err) => {
+                error!("Failed to collect CDC batches: {err:#?}");
+            }
+        };
     }
 
-    version = commit(
-        log_store.as_ref(),
-        &actions,
-        operation.clone(),
-        Some(snapshot),
-        Some(app_metadata),
-    )
-    .await?;
+    let commit = CommitBuilder::from(commit_properties)
+        .with_actions(actions)
+        .build(Some(&snapshot), log_store, operation)
+        .await?;
 
-    Ok(((actions, version, Some(operation)), metrics))
+    Ok((commit.snapshot(), metrics))
 }
 
 impl std::future::IntoFuture for UpdateBuilder {
@@ -431,12 +450,11 @@ impl std::future::IntoFuture for UpdateBuilder {
     type IntoFuture = BoxFuture<'static, Self::Output>;
 
     fn into_future(self) -> Self::IntoFuture {
-        let mut this = self;
+        let this = self;
 
         Box::pin(async move {
-            PROTOCOL.check_append_only(&this.snapshot)?;
-
-            PROTOCOL.can_write_to(&this.snapshot)?;
+            PROTOCOL.check_append_only(&this.snapshot.snapshot)?;
+            PROTOCOL.can_write_to(&this.snapshot.snapshot)?;
 
             let state = this.state.unwrap_or_else(|| {
                 let session: SessionContext = DeltaSessionContext::default().into();
@@ -447,34 +465,34 @@ impl std::future::IntoFuture for UpdateBuilder {
                 session.state()
             });
 
-            let ((actions, version, operation), metrics) = execute(
+            let (snapshot, metrics) = execute(
                 this.predicate,
                 this.updates,
                 this.log_store.clone(),
-                &this.snapshot,
+                this.snapshot,
                 state,
                 this.writer_properties,
-                this.app_metadata,
+                this.commit_properties,
                 this.safe_cast,
             )
             .await?;
 
-            if let Some(op) = &operation {
-                this.snapshot.merge(actions, op, version)?;
-            }
-
-            let table = DeltaTable::new_with_state(this.log_store, this.snapshot);
-            Ok((table, metrics))
+            Ok((
+                DeltaTable::new_with_state(this.log_store, snapshot),
+                metrics,
+            ))
         })
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use super::*;
+
+    use crate::delta_datafusion::cdf::DeltaCdfScan;
     use crate::kernel::DataType as DeltaDataType;
-    use crate::kernel::PrimitiveType;
-    use crate::kernel::StructField;
-    use crate::kernel::StructType;
+    use crate::kernel::{Action, PrimitiveType, Protocol, StructField, StructType};
+    use crate::operations::collect_sendable_stream;
     use crate::operations::DeltaOps;
     use crate::writer::test_utils::datafusion::get_data;
     use crate::writer::test_utils::datafusion::write_batch;
@@ -483,12 +501,13 @@ mod tests {
     };
     use crate::DeltaConfigKey;
     use crate::DeltaTable;
+    use arrow::array::{Int32Array, StringArray};
     use arrow::datatypes::Schema as ArrowSchema;
     use arrow::datatypes::{Field, Schema};
     use arrow::record_batch::RecordBatch;
-    use arrow_array::Int32Array;
     use arrow_schema::DataType;
     use datafusion::assert_batches_sorted_eq;
+    use datafusion::physical_plan::ExecutionPlan;
     use datafusion::prelude::*;
     use serde_json::json;
     use std::sync::Arc;
@@ -498,7 +517,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions.unwrap_or_default())
             .await
             .unwrap();
@@ -788,7 +807,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         let table = write_batch(table, batch).await;
@@ -968,4 +987,248 @@ mod tests {
             .await;
         assert!(res.is_err());
     }
+
+    #[tokio::test]
+    async fn test_no_cdc_on_older_tables() {
+        let table = prepare_values_table().await;
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 1);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            arrow::datatypes::DataType::Int32,
+            true,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .update()
+            .with_predicate(col("value").eq(lit(2)))
+            .with_update("value", lit(12))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        // NOTE: This currently doesn't really assert anything because cdc_files() is not reading
+        // actions correct
+        if let Some(state) = table.state.clone() {
+            let cdc_files = state.cdc_files();
+            assert!(cdc_files.is_ok());
+            if let Ok(cdc_files) = cdc_files {
+                let cdc_files: Vec<_> = cdc_files.collect();
+                assert_eq!(cdc_files.len(), 0);
+            }
+        } else {
+            panic!("I shouldn't exist!");
+        }
+
+        // Too close for missiles, switching to guns. Just checking that the data wasn't actually
+        // written instead!
+        if let Ok(files) = crate::storage::utils::flatten_list_stream(
+            &table.object_store(),
+            Some(&object_store::path::Path::from("_change_data")),
+        )
+        .await
+        {
+            assert_eq!(
+                0,
+                files.len(),
+                "This test should not find any written CDC files! {files:#?}"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_update_cdc_enabled() {
+        // Currently you cannot pass EnableChangeDataFeed through `with_configuration_property`
+        // so the only way to create a truly CDC enabled table is by shoving the Protocol
+        // directly into the actions list
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            arrow::datatypes::DataType::Int32,
+            true,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .update()
+            .with_predicate(col("value").eq(lit(2)))
+            .with_update("value", lit(12))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+------------------+-----------------+",
+        "| value | _change_type     | _commit_version |",
+        "+-------+------------------+-----------------+",
+        "| 1     | insert           | 1               |",
+        "| 2     | insert           | 1               |",
+        "| 2     | update_preimage  | 2               |",
+        "| 12    | update_postimage | 2               |",
+        "| 3     | insert           | 1               |",
+        "+-------+------------------+-----------------+",
+            ], &batches }
+    }
+
+    #[tokio::test]
+    async fn test_update_cdc_enabled_partitions() {
+        // Currently you cannot pass EnableChangeDataFeed through `with_configuration_property`
+        // so the only way to create a truly CDC enabled table is by shoving the Protocol
+        // directly into the actions list
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "year",
+                DeltaDataType::Primitive(PrimitiveType::String),
+                true,
+                None,
+            )
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_partition_columns(vec!["year"])
+            .with_actions(actions)
+            .with_configuration_property(DeltaConfigKey::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("year", DataType::Utf8, true),
+            Field::new("value", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![
+                    Some("2020"),
+                    Some("2020"),
+                    Some("2024"),
+                ])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+            ],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .update()
+            .with_predicate(col("value").eq(lit(2)))
+            .with_update("year", "2024")
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        let _ = arrow::util::pretty::print_batches(&batches);
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+------------------+-----------------+------+",
+        "| value | _change_type     | _commit_version | year |",
+        "+-------+------------------+-----------------+------+",
+        "| 1     | insert           | 1               | 2020 |",
+        "| 2     | insert           | 1               | 2020 |",
+        "| 2     | update_preimage  | 2               | 2020 |",
+        "| 2     | update_postimage | 2               | 2024 |",
+        "| 3     | insert           | 1               | 2024 |",
+        "+-------+------------------+-----------------+------+",
+            ], &batches }
+    }
+
+    async fn collect_batches(
+        num_partitions: usize,
+        stream: DeltaCdfScan,
+        ctx: SessionContext,
+    ) -> Result<Vec<RecordBatch>, Box<dyn std::error::Error>> {
+        let mut batches = vec![];
+        for p in 0..num_partitions {
+            let data: Vec<RecordBatch> =
+                collect_sendable_stream(stream.execute(p, ctx.task_ctx())?).await?;
+            batches.extend_from_slice(&data);
+        }
+        Ok(batches)
+    }
 }
diff --git a/crates/core/src/operations/vacuum.rs b/crates/core/src/operations/vacuum.rs
index f539b0e22d..0e4bd2b467 100644
--- a/crates/core/src/operations/vacuum.rs
+++ b/crates/core/src/operations/vacuum.rs
@@ -21,7 +21,7 @@
 //! let (table, metrics) = VacuumBuilder::new(table.object_store(). table.state).await?;
 //! ````
 
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::fmt::Debug;
 use std::sync::Arc;
 
@@ -31,13 +31,10 @@ use futures::{StreamExt, TryStreamExt};
 use object_store::Error;
 use object_store::{path::Path, ObjectStore};
 use serde::Serialize;
-use serde_json::Value;
 
-use super::transaction::commit;
-use crate::crate_version;
+use super::transaction::{CommitBuilder, CommitProperties};
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::Action;
-use crate::logstore::{LogStore, LogStoreRef};
+use crate::logstore::LogStoreRef;
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
 use crate::DeltaTable;
@@ -94,10 +91,12 @@ pub struct VacuumBuilder {
     dry_run: bool,
     /// Override the source of time
     clock: Option<Arc<dyn Clock>>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
 }
 
+impl super::Operation<()> for VacuumBuilder {}
+
 /// Details for the Vacuum operation including which files were
 #[derive(Debug)]
 pub struct VacuumMetrics {
@@ -138,7 +137,7 @@ impl VacuumBuilder {
             enforce_retention_duration: true,
             dry_run: false,
             clock: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
         }
     }
 
@@ -168,11 +167,8 @@ impl VacuumBuilder {
     }
 
     /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -258,7 +254,11 @@ impl std::future::IntoFuture for VacuumBuilder {
             }
 
             let metrics = plan
-                .execute(this.log_store.as_ref(), &this.snapshot, this.app_metadata)
+                .execute(
+                    this.log_store.clone(),
+                    &this.snapshot,
+                    this.commit_properties,
+                )
                 .await?;
             Ok((
                 DeltaTable::new_with_state(this.log_store, this.snapshot),
@@ -286,9 +286,9 @@ impl VacuumPlan {
     /// Execute the vacuum plan and delete files from underlying storage
     pub async fn execute(
         self,
-        store: &dyn LogStore,
+        store: LogStoreRef,
         snapshot: &DeltaTableState,
-        app_metadata: Option<HashMap<String, serde_json::Value>>,
+        mut commit_properties: CommitProperties,
     ) -> Result<VacuumMetrics, DeltaTableError> {
         if self.files_to_delete.is_empty() {
             return Ok(VacuumMetrics {
@@ -307,30 +307,22 @@ impl VacuumPlan {
             status: String::from("COMPLETED"), // Maybe this should be FAILED when vacuum has error during the files, not sure how to check for this
         };
 
-        let start_metrics = serde_json::to_value(VacuumStartOperationMetrics {
+        let start_metrics = VacuumStartOperationMetrics {
             num_files_to_delete: self.files_to_delete.len() as i64,
             size_of_data_to_delete: self.file_sizes.iter().sum(),
-        });
+        };
 
         // Begin VACUUM START COMMIT
-        let mut commit_info = start_operation.get_commit_info();
-        let mut extra_info = match app_metadata.clone() {
-            Some(meta) => meta,
-            None => HashMap::new(),
-        };
-        commit_info.timestamp = Some(Utc::now().timestamp_millis());
-        extra_info.insert(
-            "clientVersion".to_string(),
-            Value::String(format!("delta-rs.{}", crate_version())),
+        let mut start_props = CommitProperties::default();
+        start_props.app_metadata = commit_properties.app_metadata.clone();
+        start_props.app_metadata.insert(
+            "operationMetrics".to_owned(),
+            serde_json::to_value(start_metrics)?,
         );
-        if let Ok(map) = start_metrics {
-            extra_info.insert("operationMetrics".to_owned(), map);
-        }
-        commit_info.info = extra_info;
 
-        let start_actions = vec![Action::CommitInfo(commit_info)];
-
-        commit(store, &start_actions, start_operation, Some(snapshot), None).await?;
+        CommitBuilder::from(start_props)
+            .build(Some(snapshot), store.clone(), start_operation)
+            .await?;
         // Finish VACUUM START COMMIT
 
         let locations = futures::stream::iter(self.files_to_delete)
@@ -349,32 +341,19 @@ impl VacuumPlan {
             .await?;
 
         // Create end metadata
-        let end_metrics = serde_json::to_value(VacuumEndOperationMetrics {
+        let end_metrics = VacuumEndOperationMetrics {
             num_deleted_files: files_deleted.len() as i64,
             num_vacuumed_directories: 0, // Set to zero since we only remove files not dirs
-        });
-
-        // Begin VACUUM END COMMIT
-        let mut commit_info = end_operation.get_commit_info();
-
-        let mut extra_info = match app_metadata.clone() {
-            Some(meta) => meta,
-            None => HashMap::new(),
         };
 
-        commit_info.timestamp = Some(Utc::now().timestamp_millis());
-        extra_info.insert(
-            "clientVersion".to_string(),
-            Value::String(format!("delta-rs.{}", crate_version())),
+        // Begin VACUUM END COMMIT
+        commit_properties.app_metadata.insert(
+            "operationMetrics".to_owned(),
+            serde_json::to_value(end_metrics)?,
         );
-        if let Ok(map) = end_metrics {
-            extra_info.insert("operationMetrics".to_owned(), map);
-        }
-        commit_info.info = extra_info;
-
-        let end_actions = vec![Action::CommitInfo(commit_info)];
-
-        commit(store, &end_actions, end_operation, Some(snapshot), None).await?;
+        CommitBuilder::from(commit_properties)
+            .build(Some(snapshot), store.clone(), end_operation)
+            .await?;
         // Finish VACUUM END COMMIT
 
         Ok(VacuumMetrics {
diff --git a/crates/core/src/operations/write.rs b/crates/core/src/operations/write.rs
index bb976b5fb9..923eadeeaf 100644
--- a/crates/core/src/operations/write.rs
+++ b/crates/core/src/operations/write.rs
@@ -1,4 +1,3 @@
-//! Used to write [RecordBatch]es into a delta table.
 //!
 //! New Table Semantics
 //!  - The schema of the [RecordBatch] is used to initialize the table.
@@ -26,33 +25,43 @@
 //! ````
 
 use std::collections::HashMap;
+use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{SystemTime, UNIX_EPOCH};
+use std::{iter, vec};
 
 use arrow_array::RecordBatch;
 use arrow_cast::can_cast_types;
-use arrow_schema::{DataType, Fields, SchemaRef as ArrowSchemaRef};
+use arrow_schema::{ArrowError, DataType, Fields, SchemaRef as ArrowSchemaRef};
 use datafusion::execution::context::{SessionContext, SessionState, TaskContext};
-use datafusion::physical_expr::create_physical_expr;
 use datafusion::physical_plan::filter::FilterExec;
+use datafusion::physical_plan::projection::ProjectionExec;
 use datafusion::physical_plan::{memory::MemoryExec, ExecutionPlan};
-use datafusion_common::DFSchema;
-use datafusion_expr::Expr;
+use datafusion_common::{DFSchema, ScalarValue};
+use datafusion_expr::{lit, Expr};
+use datafusion_physical_expr::expressions::{self};
+use datafusion_physical_expr::PhysicalExpr;
 use futures::future::BoxFuture;
 use futures::StreamExt;
+use object_store::prefix::PrefixStore;
 use parquet::file::properties::WriterProperties;
+use tracing::log::*;
 
+use super::cdc::should_write_cdc;
 use super::datafusion_utils::Expression;
-use super::transaction::PROTOCOL;
+use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL};
 use super::writer::{DeltaWriter, WriterConfig};
-use super::{transaction::commit, CreateBuilder};
+use super::CreateBuilder;
 use crate::delta_datafusion::expr::fmt_expr_to_sql;
 use crate::delta_datafusion::expr::parse_predicate_expression;
-use crate::delta_datafusion::DeltaDataChecker;
-use crate::delta_datafusion::{find_files, register_store, DeltaScanBuilder};
+use crate::delta_datafusion::{
+    find_files, register_store, DeltaScanBuilder, DeltaScanConfigBuilder,
+};
+use crate::delta_datafusion::{DataFusionMixins, DeltaDataChecker};
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Action, Add, PartitionsExt, Remove, StructType};
+use crate::kernel::{Action, Add, AddCDCFile, Metadata, PartitionsExt, Remove, StructType};
 use crate::logstore::LogStoreRef;
+use crate::operations::cast::{cast_record_batch, merge_schema};
 use crate::protocol::{DeltaOperation, SaveMode};
 use crate::storage::ObjectStoreRef;
 use crate::table::state::DeltaTableState;
@@ -60,6 +69,8 @@ use crate::table::Constraint as DeltaConstraint;
 use crate::writer::record_batch::divide_by_partition_values;
 use crate::DeltaTable;
 
+use tokio::sync::mpsc::Sender;
+
 #[derive(thiserror::Error, Debug)]
 enum WriteError {
     #[error("No data source supplied to write command.")]
@@ -88,6 +99,30 @@ impl From<WriteError> for DeltaTableError {
     }
 }
 
+///Specifies how to handle schema drifts
+#[derive(PartialEq, Clone, Copy)]
+pub enum SchemaMode {
+    /// Overwrite the schema with the new schema
+    Overwrite,
+    /// Append the new schema to the existing schema
+    Merge,
+}
+
+impl FromStr for SchemaMode {
+    type Err = DeltaTableError;
+
+    fn from_str(s: &str) -> DeltaResult<Self> {
+        match s.to_ascii_lowercase().as_str() {
+            "overwrite" => Ok(SchemaMode::Overwrite),
+            "merge" => Ok(SchemaMode::Merge),
+            _ => Err(DeltaTableError::Generic(format!(
+                "Invalid schema write mode provided: {}, only these are supported: ['overwrite', 'merge']",
+                s
+            ))),
+        }
+    }
+}
+
 /// Write data into a DeltaTable
 pub struct WriteBuilder {
     /// A snapshot of the to-be-loaded table's state
@@ -110,14 +145,14 @@ pub struct WriteBuilder {
     write_batch_size: Option<usize>,
     /// RecordBatches to be written into the table
     batches: Option<Vec<RecordBatch>>,
-    /// whether to overwrite the schema
-    overwrite_schema: bool,
+    /// whether to overwrite the schema or to merge it. None means to fail on schmema drift
+    schema_mode: Option<SchemaMode>,
     /// how to handle cast failures, either return NULL (safe=true) or return ERR (safe=false)
     safe_cast: bool,
     /// Parquet writer properties
     writer_properties: Option<WriterProperties>,
-    /// Additional metadata to be added to commit
-    app_metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
     /// Name of the table, only used when table doesn't exist yet
     name: Option<String>,
     /// Description of the table, only used when table doesn't exist yet
@@ -126,6 +161,8 @@ pub struct WriteBuilder {
     configuration: HashMap<String, Option<String>>,
 }
 
+impl super::Operation<()> for WriteBuilder {}
+
 impl WriteBuilder {
     /// Create a new [`WriteBuilder`]
     pub fn new(log_store: LogStoreRef, snapshot: Option<DeltaTableState>) -> Self {
@@ -141,9 +178,9 @@ impl WriteBuilder {
             write_batch_size: None,
             batches: None,
             safe_cast: false,
-            overwrite_schema: false,
+            schema_mode: None,
             writer_properties: None,
-            app_metadata: None,
+            commit_properties: CommitProperties::default(),
             name: None,
             description: None,
             configuration: Default::default(),
@@ -156,9 +193,9 @@ impl WriteBuilder {
         self
     }
 
-    /// Add overwrite_schema
-    pub fn with_overwrite_schema(mut self, overwrite_schema: bool) -> Self {
-        self.overwrite_schema = overwrite_schema;
+    /// Add Schema Write Mode
+    pub fn with_schema_mode(mut self, schema_mode: SchemaMode) -> Self {
+        self.schema_mode = Some(schema_mode);
         self
     }
 
@@ -222,11 +259,8 @@ impl WriteBuilder {
     }
 
     /// Additional metadata to be added to commit info
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (String, serde_json::Value)>,
-    ) -> Self {
-        self.app_metadata = Some(HashMap::from_iter(metadata));
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
         self
     }
 
@@ -259,6 +293,21 @@ impl WriteBuilder {
         match &self.snapshot {
             Some(snapshot) => {
                 PROTOCOL.can_write_to(snapshot)?;
+
+                let schema: StructType = if let Some(plan) = &self.input {
+                    (plan.schema()).try_into()?
+                } else if let Some(batches) = &self.batches {
+                    if batches.is_empty() {
+                        return Err(WriteError::MissingData.into());
+                    }
+                    (batches[0].schema()).try_into()?
+                } else {
+                    return Err(WriteError::MissingData.into());
+                };
+
+                if self.schema_mode.is_none() {
+                    PROTOCOL.check_can_write_timestamp_ntz(snapshot, &schema)?;
+                }
                 match self.mode {
                     SaveMode::ErrorIfExists => {
                         Err(WriteError::AlreadyExists(self.log_store.root_uri()).into())
@@ -279,7 +328,7 @@ impl WriteBuilder {
                 }?;
                 let mut builder = CreateBuilder::new()
                     .with_log_store(self.log_store.clone())
-                    .with_columns(schema.fields().clone())
+                    .with_columns(schema.fields().cloned())
                     .with_configuration(self.configuration.clone());
                 if let Some(partition_columns) = self.partition_columns.as_ref() {
                     builder = builder.with_partition_columns(partition_columns.clone())
@@ -299,6 +348,24 @@ impl WriteBuilder {
         }
     }
 }
+/// Configuration for the writer on how to collect stats
+#[derive(Clone)]
+pub struct WriterStatsConfig {
+    /// Number of columns to collect stats for, idx based
+    num_indexed_cols: i32,
+    /// Optional list of columns which to collect stats for, takes precedende over num_index_cols
+    stats_columns: Option<Vec<String>>,
+}
+
+impl WriterStatsConfig {
+    /// Create new writer stats config
+    pub fn new(num_indexed_cols: i32, stats_columns: Option<Vec<String>>) -> Self {
+        Self {
+            num_indexed_cols,
+            stats_columns,
+        }
+    }
+}
 
 #[allow(clippy::too_many_arguments)]
 async fn write_execution_plan_with_predicate(
@@ -312,17 +379,17 @@ async fn write_execution_plan_with_predicate(
     write_batch_size: Option<usize>,
     writer_properties: Option<WriterProperties>,
     safe_cast: bool,
-    overwrite_schema: bool,
-) -> DeltaResult<Vec<Add>> {
-    // Use input schema to prevent wrapping partitions columns into a dictionary.
-    let schema: ArrowSchemaRef = if overwrite_schema {
+    schema_mode: Option<SchemaMode>,
+    writer_stats_config: WriterStatsConfig,
+    sender: Option<Sender<RecordBatch>>,
+) -> DeltaResult<Vec<Action>> {
+    let schema: ArrowSchemaRef = if schema_mode.is_some() {
         plan.schema()
     } else {
         snapshot
             .and_then(|s| s.input_schema().ok())
             .unwrap_or(plan.schema())
     };
-
     let checker = if let Some(snapshot) = snapshot {
         DeltaDataChecker::new(snapshot)
     } else {
@@ -339,7 +406,7 @@ async fn write_execution_plan_with_predicate(
 
     // Write data to disk
     let mut tasks = vec![];
-    for i in 0..plan.output_partitioning().partition_count() {
+    for i in 0..plan.properties().output_partitioning().partition_count() {
         let inner_plan = plan.clone();
         let inner_schema = schema.clone();
         let task_ctx = Arc::new(TaskContext::from(&state));
@@ -349,27 +416,48 @@ async fn write_execution_plan_with_predicate(
             writer_properties.clone(),
             target_file_size,
             write_batch_size,
+            writer_stats_config.num_indexed_cols,
+            writer_stats_config.stats_columns.clone(),
         );
         let mut writer = DeltaWriter::new(object_store.clone(), config);
         let checker_stream = checker.clone();
+        let sender_stream = sender.clone();
         let mut stream = inner_plan.execute(i, task_ctx)?;
-        let handle: tokio::task::JoinHandle<DeltaResult<Vec<Add>>> =
-            tokio::task::spawn(async move {
+
+        let handle: tokio::task::JoinHandle<DeltaResult<Vec<Action>>> = tokio::task::spawn(
+            async move {
+                let sendable = sender_stream.clone();
                 while let Some(maybe_batch) = stream.next().await {
                     let batch = maybe_batch?;
+
                     checker_stream.check_batch(&batch).await?;
-                    let arr =
-                        super::cast::cast_record_batch(&batch, inner_schema.clone(), safe_cast)?;
+                    let arr = super::cast::cast_record_batch(
+                        &batch,
+                        inner_schema.clone(),
+                        safe_cast,
+                        schema_mode == Some(SchemaMode::Merge),
+                    )?;
+
+                    if let Some(s) = sendable.as_ref() {
+                        if let Err(e) = s.send(arr.clone()).await {
+                            error!("Failed to send data to observer: {e:#?}");
+                        }
+                    } else {
+                        debug!("write_execution_plan_with_predicate did not send any batches, no sender.");
+                    }
                     writer.write(&arr).await?;
                 }
-                writer.close().await
-            });
+                let add_actions = writer.close().await;
+                match add_actions {
+                    Ok(actions) => Ok(actions.into_iter().map(Action::Add).collect::<Vec<_>>()),
+                    Err(err) => Err(err),
+                }
+            },
+        );
 
         tasks.push(handle);
     }
-
-    // Collect add actions to add to commit
-    Ok(futures::future::join_all(tasks)
+    let actions = futures::future::join_all(tasks)
         .await
         .into_iter()
         .collect::<Result<Vec<_>, _>>()
@@ -378,7 +466,64 @@ async fn write_execution_plan_with_predicate(
         .collect::<Result<Vec<_>, _>>()?
         .concat()
         .into_iter()
-        .collect::<Vec<_>>())
+        .collect::<Vec<_>>();
+    // Collect add actions to add to commit
+    Ok(actions)
+}
+
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn write_execution_plan_cdc(
+    snapshot: Option<&DeltaTableState>,
+    state: SessionState,
+    plan: Arc<dyn ExecutionPlan>,
+    partition_columns: Vec<String>,
+    object_store: ObjectStoreRef,
+    target_file_size: Option<usize>,
+    write_batch_size: Option<usize>,
+    writer_properties: Option<WriterProperties>,
+    safe_cast: bool,
+    writer_stats_config: WriterStatsConfig,
+    sender: Option<Sender<RecordBatch>>,
+) -> DeltaResult<Vec<Action>> {
+    let cdc_store = Arc::new(PrefixStore::new(object_store, "_change_data"));
+
+    // If not overwrite, the plan schema is not taken but table schema,
+    // however we need the plan schema since it has the _change_type_col
+    let schema_mode = Some(SchemaMode::Overwrite);
+    Ok(write_execution_plan(
+        snapshot,
+        state,
+        plan,
+        partition_columns,
+        cdc_store,
+        target_file_size,
+        write_batch_size,
+        writer_properties,
+        safe_cast,
+        schema_mode,
+        writer_stats_config,
+        sender,
+    )
+    .await?
+    .into_iter()
+    .map(|add| {
+        // Modify add actions into CDC actions
+        match add {
+            Action::Add(add) => {
+                Action::Cdc(AddCDCFile {
+                    // This is a gnarly hack, but the action needs the nested path, not the
+                    // path isnide the prefixed store
+                    path: format!("_change_data/{}", add.path),
+                    size: add.size,
+                    partition_values: add.partition_values,
+                    data_change: false,
+                    tags: add.tags,
+                })
+            }
+            _ => panic!("Expected Add action"),
+        }
+    })
+    .collect::<Vec<_>>())
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -392,8 +537,10 @@ pub(crate) async fn write_execution_plan(
     write_batch_size: Option<usize>,
     writer_properties: Option<WriterProperties>,
     safe_cast: bool,
-    overwrite_schema: bool,
-) -> DeltaResult<Vec<Add>> {
+    schema_mode: Option<SchemaMode>,
+    writer_stats_config: WriterStatsConfig,
+    sender: Option<Sender<RecordBatch>>,
+) -> DeltaResult<Vec<Action>> {
     write_execution_plan_with_predicate(
         None,
         snapshot,
@@ -405,11 +552,14 @@ pub(crate) async fn write_execution_plan(
         write_batch_size,
         writer_properties,
         safe_cast,
-        overwrite_schema,
+        schema_mode,
+        writer_stats_config,
+        sender,
     )
     .await
 }
 
+#[allow(clippy::too_many_arguments)]
 async fn execute_non_empty_expr(
     snapshot: &DeltaTableState,
     log_store: LogStoreRef,
@@ -418,49 +568,143 @@ async fn execute_non_empty_expr(
     expression: &Expr,
     rewrite: &[Add],
     writer_properties: Option<WriterProperties>,
-) -> DeltaResult<Vec<Add>> {
+    writer_stats_config: WriterStatsConfig,
+    partition_scan: bool,
+) -> DeltaResult<Vec<Action>> {
     // For each identified file perform a parquet scan + filter + limit (1) + count.
     // If returned count is not zero then append the file to be rewritten and removed from the log. Otherwise do nothing to the file.
+    let mut actions: Vec<Action> = Vec::new();
 
     let input_schema = snapshot.input_schema()?;
     let input_dfschema: DFSchema = input_schema.clone().as_ref().clone().try_into()?;
 
+    let scan_config = DeltaScanConfigBuilder::new()
+        .with_schema(snapshot.input_schema()?)
+        .build(snapshot)?;
+
     let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), &state)
         .with_files(rewrite)
+        // Use input schema which doesn't wrap partition values, otherwise divide_by_partition_value won't work on UTF8 partitions
+        // Since it can't fetch a scalar from a dictionary type
+        .with_scan_config(scan_config)
         .build()
         .await?;
     let scan = Arc::new(scan);
 
-    // Apply the negation of the filter and rewrite files
-    let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+    // We don't want to verify the predicate against existing data
+    if !partition_scan {
+        // Apply the negation of the filter and rewrite files
+        let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+
+        let predicate_expr = state.create_physical_expr(negated_expression, &input_dfschema)?;
+        let filter: Arc<dyn ExecutionPlan> =
+            Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
+
+        let add_actions: Vec<Action> = write_execution_plan(
+            Some(snapshot),
+            state.clone(),
+            filter,
+            partition_columns.clone(),
+            log_store.object_store(),
+            Some(snapshot.table_config().target_file_size() as usize),
+            None,
+            writer_properties.clone(),
+            false,
+            None,
+            writer_stats_config.clone(),
+            None,
+        )
+        .await?;
 
-    let predicate_expr = create_physical_expr(
-        &negated_expression,
-        &input_dfschema,
-        state.execution_props(),
-    )?;
-    let filter: Arc<dyn ExecutionPlan> =
-        Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
+        actions.extend(add_actions);
+    }
 
-    // We don't want to verify the predicate against existing data
-    let add_actions = write_execution_plan(
-        Some(snapshot),
-        state,
-        filter,
+    // CDC logic, simply filters data with predicate and adds the _change_type="delete" as literal column
+    if let Some(cdc_actions) = execute_non_empty_expr_cdc(
+        snapshot,
+        log_store,
+        state.clone(),
+        scan,
+        input_dfschema,
+        expression,
         partition_columns,
-        log_store.object_store(),
-        Some(snapshot.table_config().target_file_size() as usize),
-        None,
         writer_properties,
-        false,
-        false,
+        writer_stats_config,
     )
-    .await?;
+    .await?
+    {
+        actions.extend(cdc_actions)
+    }
+    Ok(actions)
+}
 
-    Ok(add_actions)
+/// If CDC is enabled it writes all the deletions based on predicate into _change_data directory
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn execute_non_empty_expr_cdc(
+    snapshot: &DeltaTableState,
+    log_store: LogStoreRef,
+    state: SessionState,
+    scan: Arc<crate::delta_datafusion::DeltaScan>,
+    input_dfschema: DFSchema,
+    expression: &Expr,
+    table_partition_cols: Vec<String>,
+    writer_properties: Option<WriterProperties>,
+    writer_stats_config: WriterStatsConfig,
+) -> DeltaResult<Option<Vec<Action>>> {
+    match should_write_cdc(snapshot) {
+        // Create CDC scan
+        Ok(true) => {
+            let cdc_predicate_expr =
+                state.create_physical_expr(expression.clone(), &input_dfschema)?;
+            let cdc_scan: Arc<dyn ExecutionPlan> =
+                Arc::new(FilterExec::try_new(cdc_predicate_expr, scan.clone())?);
+
+            // Add literal column "_change_type"
+            let change_type_lit = lit(ScalarValue::Utf8(Some("delete".to_string())));
+            let change_type_expr = state.create_physical_expr(change_type_lit, &input_dfschema)?;
+
+            // Project columns and lit
+            let project_expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = scan
+                .schema()
+                .fields()
+                .into_iter()
+                .enumerate()
+                .map(|(idx, field)| -> (Arc<dyn PhysicalExpr>, String) {
+                    (
+                        Arc::new(expressions::Column::new(field.name(), idx)),
+                        field.name().to_owned(),
+                    )
+                })
+                .chain(iter::once((change_type_expr, "_change_type".to_owned())))
+                .collect();
+
+            let projected_scan: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
+                project_expressions,
+                cdc_scan.clone(),
+            )?);
+
+            let cdc_actions = write_execution_plan_cdc(
+                Some(snapshot),
+                state.clone(),
+                projected_scan.clone(),
+                table_partition_cols.clone(),
+                log_store.object_store(),
+                Some(snapshot.table_config().target_file_size() as usize),
+                None,
+                writer_properties,
+                false,
+                writer_stats_config,
+                None,
+            )
+            .await?;
+            Ok(Some(cdc_actions))
+        }
+        _ => Ok(None),
+    }
 }
 
 // This should only be called wth a valid predicate
+#[allow(clippy::too_many_arguments)]
 async fn prepare_predicate_actions(
     predicate: Expr,
     log_store: LogStoreRef,
@@ -469,27 +713,25 @@ async fn prepare_predicate_actions(
     partition_columns: Vec<String>,
     writer_properties: Option<WriterProperties>,
     deletion_timestamp: i64,
+    writer_stats_config: WriterStatsConfig,
 ) -> DeltaResult<Vec<Action>> {
     let candidates =
         find_files(snapshot, log_store.clone(), &state, Some(predicate.clone())).await?;
 
-    let add = if candidates.partition_scan {
-        Vec::new()
-    } else {
-        execute_non_empty_expr(
-            snapshot,
-            log_store,
-            state,
-            partition_columns,
-            &predicate,
-            &candidates.candidates,
-            writer_properties,
-        )
-        .await?
-    };
-    let remove = candidates.candidates;
+    let mut actions = execute_non_empty_expr(
+        snapshot,
+        log_store,
+        state,
+        partition_columns,
+        &predicate,
+        &candidates.candidates,
+        writer_properties,
+        writer_stats_config,
+        candidates.partition_scan,
+    )
+    .await?;
 
-    let mut actions: Vec<Action> = add.into_iter().map(Action::Add).collect();
+    let remove = candidates.candidates;
 
     for action in remove {
         actions.push(Action::Remove(Remove {
@@ -508,6 +750,47 @@ async fn prepare_predicate_actions(
     Ok(actions)
 }
 
+/// If CDC is enabled it writes all add add actions data as deletions into _change_data directory
+async fn execute_non_empty_expr_cdc_all_actions(
+    snapshot: &DeltaTableState,
+    log_store: LogStoreRef,
+    state: SessionState,
+    table_partition_cols: Vec<String>,
+    writer_properties: Option<WriterProperties>,
+    writer_stats_config: WriterStatsConfig,
+) -> DeltaResult<Option<Vec<Action>>> {
+    let current_state_add_actions = &snapshot.file_actions()?;
+
+    let scan_config = DeltaScanConfigBuilder::new()
+        .with_schema(snapshot.input_schema()?)
+        .build(snapshot)?;
+
+    // Since all files get removed, check to write CDC
+    let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), &state)
+        .with_files(current_state_add_actions)
+        // Use input schema which doesn't wrap partition values, otherwise divide_by_partition_value won't work on UTF8 partitions
+        // Since it can't fetch a scalar from a dictionary type
+        .with_scan_config(scan_config)
+        .build()
+        .await?;
+
+    let input_schema = snapshot.input_schema()?;
+    let input_dfschema: DFSchema = input_schema.clone().as_ref().clone().try_into()?;
+
+    execute_non_empty_expr_cdc(
+        snapshot,
+        log_store,
+        state,
+        scan.into(),
+        input_dfschema,
+        &Expr::Literal(ScalarValue::Boolean(Some(true))), // Keep all data
+        table_partition_cols,
+        writer_properties,
+        writer_stats_config,
+    )
+    .await
+}
+
 impl std::future::IntoFuture for WriteBuilder {
     type Output = DeltaResult<DeltaTable>;
     type IntoFuture = BoxFuture<'static, Self::Output>;
@@ -518,9 +801,14 @@ impl std::future::IntoFuture for WriteBuilder {
         Box::pin(async move {
             if this.mode == SaveMode::Overwrite {
                 if let Some(snapshot) = &this.snapshot {
-                    PROTOCOL.check_append_only(snapshot)?;
+                    PROTOCOL.check_append_only(&snapshot.snapshot)?;
                 }
             }
+            if this.schema_mode == Some(SchemaMode::Overwrite) && this.mode != SaveMode::Overwrite {
+                return Err(DeltaTableError::Generic(
+                    "Schema overwrite not supported for Append".to_string(),
+                ));
+            }
 
             // Create table actions to initialize table in case it does not yet exist and should be created
             let mut actions = this.check_preconditions().await?;
@@ -547,8 +835,13 @@ impl std::future::IntoFuture for WriteBuilder {
             } else {
                 Ok(this.partition_columns.unwrap_or_default())
             }?;
-
+            let mut schema_drift = false;
             let plan = if let Some(plan) = this.input {
+                if this.schema_mode == Some(SchemaMode::Merge) {
+                    return Err(DeltaTableError::Generic(
+                        "Schema merge not supported yet for Datafusion".to_string(),
+                    ));
+                }
                 Ok(plan)
             } else if let Some(batches) = this.batches {
                 if batches.is_empty() {
@@ -556,6 +849,7 @@ impl std::future::IntoFuture for WriteBuilder {
                 } else {
                     let schema = batches[0].schema();
 
+                    let mut new_schema = None;
                     if let Some(snapshot) = &this.snapshot {
                         let table_schema = snapshot
                             .physical_arrow_schema(this.log_store.object_store().clone())
@@ -563,23 +857,42 @@ impl std::future::IntoFuture for WriteBuilder {
                             .or_else(|_| snapshot.arrow_schema())
                             .unwrap_or(schema.clone());
 
-                        if !can_cast_batch(schema.fields(), table_schema.fields())
-                            && !(this.overwrite_schema && matches!(this.mode, SaveMode::Overwrite))
+                        if let Err(schema_err) =
+                            try_cast_batch(schema.fields(), table_schema.fields())
                         {
-                            return Err(DeltaTableError::Generic(
-                                "Schema of data does not match table schema".to_string(),
-                            ));
-                        };
+                            schema_drift = true;
+                            if this.mode == SaveMode::Overwrite
+                                && this.schema_mode == Some(SchemaMode::Merge)
+                            {
+                                new_schema =
+                                    Some(merge_schema(table_schema.clone(), schema.clone())?);
+                            } else if this.mode == SaveMode::Overwrite && this.schema_mode.is_some()
+                            {
+                                new_schema = None // we overwrite anyway, so no need to cast
+                            } else if this.schema_mode == Some(SchemaMode::Merge) {
+                                new_schema =
+                                    Some(merge_schema(table_schema.clone(), schema.clone())?);
+                            } else {
+                                return Err(schema_err.into());
+                            }
+                        }
                     }
 
                     let data = if !partition_columns.is_empty() {
                         // TODO partitioning should probably happen in its own plan ...
                         let mut partitions: HashMap<String, Vec<RecordBatch>> = HashMap::new();
                         for batch in batches {
+                            let real_batch = match new_schema.clone() {
+                                Some(new_schema) => {
+                                    cast_record_batch(&batch, new_schema, false, true)?
+                                }
+                                None => batch,
+                            };
+
                             let divided = divide_by_partition_values(
-                                schema.clone(),
+                                new_schema.clone().unwrap_or(schema.clone()),
                                 partition_columns.clone(),
-                                &batch,
+                                &real_batch,
                             )?;
                             for part in divided {
                                 let key = part.partition_values.hive_partition_path();
@@ -595,17 +908,70 @@ impl std::future::IntoFuture for WriteBuilder {
                         }
                         partitions.into_values().collect::<Vec<_>>()
                     } else {
-                        vec![batches]
+                        match new_schema {
+                            Some(ref new_schema) => {
+                                let mut new_batches = vec![];
+                                for batch in batches {
+                                    new_batches.push(cast_record_batch(
+                                        &batch,
+                                        new_schema.clone(),
+                                        false,
+                                        true,
+                                    )?);
+                                }
+                                vec![new_batches]
+                            }
+                            None => vec![batches],
+                        }
                     };
 
-                    Ok(Arc::new(MemoryExec::try_new(&data, schema.clone(), None)?)
-                        as Arc<dyn ExecutionPlan>)
+                    Ok(Arc::new(MemoryExec::try_new(
+                        &data,
+                        new_schema.unwrap_or(schema).clone(),
+                        None,
+                    )?) as Arc<dyn ExecutionPlan>)
                 }
             } else {
                 Err(WriteError::MissingData)
             }?;
             let schema = plan.schema();
-
+            if this.schema_mode == Some(SchemaMode::Merge) && schema_drift {
+                if let Some(snapshot) = &this.snapshot {
+                    let schema_struct: StructType = schema.clone().try_into()?;
+                    let current_protocol = snapshot.protocol();
+                    let configuration = snapshot.metadata().configuration.clone();
+                    let maybe_new_protocol = if PROTOCOL
+                        .contains_timestampntz(schema_struct.fields())
+                        && !current_protocol
+                            .reader_features
+                            .clone()
+                            .unwrap_or_default()
+                            .contains(&crate::kernel::ReaderFeatures::TimestampWithoutTimezone)
+                    // We can check only reader features, as reader and writer timestampNtz
+                    // should be always enabled together
+                    {
+                        let new_protocol = current_protocol.clone().enable_timestamp_ntz();
+                        if !(current_protocol.min_reader_version == 3
+                            && current_protocol.min_writer_version == 7)
+                        {
+                            Some(new_protocol.move_table_properties_into_features(&configuration))
+                        } else {
+                            Some(new_protocol)
+                        }
+                    } else {
+                        None
+                    };
+                    let schema_action = Action::Metadata(Metadata::try_new(
+                        schema_struct,
+                        partition_columns.clone(),
+                        configuration,
+                    )?);
+                    actions.push(schema_action);
+                    if let Some(new_protocol) = maybe_new_protocol {
+                        actions.push(new_protocol.into())
+                    }
+                }
+            }
             let state = match this.state {
                 Some(state) => state,
                 None => {
@@ -630,6 +996,18 @@ impl std::future::IntoFuture for WriteBuilder {
                 _ => (None, None),
             };
 
+            let config: Option<crate::table::config::TableConfig<'_>> = this
+                .snapshot
+                .as_ref()
+                .map(|snapshot| snapshot.table_config());
+
+            let (num_indexed_cols, stats_columns) =
+                super::get_num_idx_cols_and_stats_columns(config, this.configuration);
+
+            let writer_stats_config = WriterStatsConfig {
+                num_indexed_cols,
+                stats_columns,
+            };
             // Here we need to validate if the new data conforms to a predicate if one is provided
             let add_actions = write_execution_plan_with_predicate(
                 predicate.clone(),
@@ -642,10 +1020,12 @@ impl std::future::IntoFuture for WriteBuilder {
                 this.write_batch_size,
                 this.writer_properties.clone(),
                 this.safe_cast,
-                this.overwrite_schema,
+                this.schema_mode,
+                writer_stats_config.clone(),
+                None,
             )
             .await?;
-            actions.extend(add_actions.into_iter().map(Action::Add));
+            actions.extend(add_actions);
 
             // Collect remove actions if we are overwriting the table
             if let Some(snapshot) = &this.snapshot {
@@ -657,6 +1037,34 @@ impl std::future::IntoFuture for WriteBuilder {
                         .or_else(|_| snapshot.arrow_schema())
                         .unwrap_or(schema.clone());
 
+                    let configuration = snapshot.metadata().configuration.clone();
+                    let current_protocol = snapshot.protocol();
+                    let maybe_new_protocol = if PROTOCOL.contains_timestampntz(
+                        TryInto::<StructType>::try_into(schema.clone())?.fields(),
+                    ) && !current_protocol
+                        .reader_features
+                        .clone()
+                        .unwrap_or_default()
+                        .contains(&crate::kernel::ReaderFeatures::TimestampWithoutTimezone)
+                    // We can check only reader features, as reader and writer timestampNtz
+                    // should be always enabled together
+                    {
+                        let new_protocol = current_protocol.clone().enable_timestamp_ntz();
+                        if !(current_protocol.min_reader_version == 3
+                            && current_protocol.min_writer_version == 7)
+                        {
+                            Some(new_protocol.move_table_properties_into_features(&configuration))
+                        } else {
+                            Some(new_protocol)
+                        }
+                    } else {
+                        None
+                    };
+
+                    if let Some(protocol) = maybe_new_protocol {
+                        actions.push(protocol.into())
+                    }
+
                     if schema != table_schema {
                         let mut metadata = snapshot.metadata().clone();
                         let delta_schema: StructType = schema.as_ref().try_into()?;
@@ -679,6 +1087,7 @@ impl std::future::IntoFuture for WriteBuilder {
                                 partition_columns.clone(),
                                 this.writer_properties,
                                 deletion_timestamp,
+                                writer_stats_config,
                             )
                             .await?;
                             if !predicate_actions.is_empty() {
@@ -691,6 +1100,21 @@ impl std::future::IntoFuture for WriteBuilder {
                                 .into_iter()
                                 .map(|p| p.remove_action(true).into());
                             actions.extend(remove_actions);
+
+                            let cdc_actions = execute_non_empty_expr_cdc_all_actions(
+                                snapshot,
+                                this.log_store.clone(),
+                                state,
+                                partition_columns.clone(),
+                                this.writer_properties,
+                                writer_stats_config,
+                            )
+                            .await?;
+
+                            // ADD CDC ACTIONS HERE
+                            if let Some(cdc_actions) = cdc_actions {
+                                actions.extend(cdc_actions);
+                            }
                         }
                     };
                 }
@@ -706,48 +1130,83 @@ impl std::future::IntoFuture for WriteBuilder {
                 predicate: predicate_str,
             };
 
-            let version = commit(
-                this.log_store.as_ref(),
-                &actions,
-                operation.clone(),
-                this.snapshot.as_ref(),
-                this.app_metadata,
-            )
-            .await?;
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions)
+                .build(
+                    this.snapshot.as_ref().map(|f| f as &dyn TableReference),
+                    this.log_store.clone(),
+                    operation.clone(),
+                )
+                .await?;
 
-            // TODO we do not have the table config available, but since we are merging only our newly
-            // created actions, it may be safe to assume, that we want to include all actions.
-            // then again, having only some tombstones may be misleading.
-            if let Some(mut snapshot) = this.snapshot {
-                snapshot.merge(actions, &operation, version)?;
-                Ok(DeltaTable::new_with_state(this.log_store, snapshot))
-            } else {
-                let mut table = DeltaTable::new(this.log_store, Default::default());
-                table.update().await?;
-                Ok(table)
-            }
+            Ok(DeltaTable::new_with_state(this.log_store, commit.snapshot))
         })
     }
 }
 
-fn can_cast_batch(from_fields: &Fields, to_fields: &Fields) -> bool {
+fn try_cast_batch(from_fields: &Fields, to_fields: &Fields) -> Result<(), ArrowError> {
     if from_fields.len() != to_fields.len() {
-        return false;
+        return Err(ArrowError::SchemaError(format!(
+            "Cannot cast schema, number of fields does not match: {} vs {}",
+            from_fields.len(),
+            to_fields.len()
+        )));
     }
 
-    from_fields.iter().all(|f| {
-        if let Some((_, target_field)) = to_fields.find(f.name()) {
-            if let (DataType::Struct(fields0), DataType::Struct(fields1)) =
-                (f.data_type(), target_field.data_type())
-            {
-                can_cast_batch(fields0, fields1)
+    from_fields
+        .iter()
+        .map(|f| {
+            if let Some((_, target_field)) = to_fields.find(f.name()) {
+                if let (DataType::Struct(fields0), DataType::Struct(fields1)) =
+                    (f.data_type(), target_field.data_type())
+                {
+                    try_cast_batch(fields0, fields1)
+                } else {
+                    match (f.data_type(), target_field.data_type()) {
+                        (
+                            DataType::Decimal128(left_precision, left_scale) | DataType::Decimal256(left_precision, left_scale),
+                            DataType::Decimal128(right_precision, right_scale)
+                        ) => {
+                            if left_precision <= right_precision && left_scale <= right_scale {
+                                Ok(())
+                            } else {
+                                Err(ArrowError::SchemaError(format!(
+                                    "Cannot cast field {} from {} to {}",
+                                    f.name(),
+                                    f.data_type(),
+                                    target_field.data_type()
+                                )))
+                            }
+                        },
+                        (
+                            _,
+                            DataType::Decimal256(_, _),
+                        ) => {
+                            unreachable!("Target field can never be Decimal 256. According to the protocol: 'The precision and scale can be up to 38.'")
+                        },
+                        (left, right) => {
+                            if !can_cast_types(left, right) {
+                                Err(ArrowError::SchemaError(format!(
+                                    "Cannot cast field {} from {} to {}",
+                                    f.name(),
+                                    f.data_type(),
+                                    target_field.data_type()
+                                )))
+                            } else {
+                                Ok(())
+                            }
+                        }
+                    }
+                }
             } else {
-                can_cast_types(f.data_type(), target_field.data_type())
+                Err(ArrowError::SchemaError(format!(
+                    "Field {} not found in schema",
+                    f.name()
+                )))
             }
-        } else {
-            false
-        }
-    })
+        })
+        .collect::<Result<Vec<_>, _>>()?;
+    Ok(())
 }
 
 #[cfg(test)]
@@ -755,17 +1214,14 @@ mod tests {
     use super::*;
     use crate::operations::{collect_sendable_stream, DeltaOps};
     use crate::protocol::SaveMode;
-    use crate::writer::test_utils::datafusion::write_batch;
-    use crate::writer::test_utils::datafusion::{get_data, get_data_sorted};
+    use crate::writer::test_utils::datafusion::{get_data, get_data_sorted, write_batch};
     use crate::writer::test_utils::{
         get_arrow_schema, get_delta_schema, get_delta_schema_with_nested_struct, get_record_batch,
         get_record_batch_with_nested_struct, setup_table_with_configuration,
     };
     use crate::DeltaConfigKey;
-    use arrow::datatypes::Field;
-    use arrow::datatypes::Schema as ArrowSchema;
     use arrow_array::{Int32Array, StringArray, TimestampMicrosecondArray};
-    use arrow_schema::{DataType, TimeUnit};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
     use datafusion::prelude::*;
     use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
     use serde_json::{json, Value};
@@ -791,7 +1247,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -802,7 +1258,7 @@ mod tests {
         let mut table = DeltaOps(table)
             .write(vec![batch.clone()])
             .with_save_mode(SaveMode::Append)
-            .with_metadata(metadata.clone())
+            .with_commit_properties(CommitProperties::default().with_metadata(metadata.clone()))
             .await
             .unwrap();
         assert_eq!(table.version(), 1);
@@ -825,7 +1281,7 @@ mod tests {
         let mut table = DeltaOps(table)
             .write(vec![batch.clone()])
             .with_save_mode(SaveMode::Append)
-            .with_metadata(metadata.clone())
+            .with_commit_properties(CommitProperties::default().with_metadata(metadata.clone()))
             .await
             .unwrap();
         assert_eq!(table.version(), 2);
@@ -848,7 +1304,7 @@ mod tests {
         let mut table = DeltaOps(table)
             .write(vec![batch])
             .with_save_mode(SaveMode::Overwrite)
-            .with_metadata(metadata.clone())
+            .with_commit_properties(CommitProperties::default().with_metadata(metadata.clone()))
             .await
             .unwrap();
         assert_eq!(table.version(), 3);
@@ -942,23 +1398,25 @@ mod tests {
 
         let schema = Arc::new(ArrowSchema::new(vec![Field::new(
             "value",
-            DataType::Timestamp(TimeUnit::Microsecond, None),
+            DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into())),
             true,
         )]));
         let batch = RecordBatch::try_new(
             Arc::clone(&schema),
-            vec![Arc::new(TimestampMicrosecondArray::from(vec![Some(10000)]))],
+            vec![Arc::new(
+                TimestampMicrosecondArray::from(vec![Some(10000)]).with_timezone("UTC"),
+            )],
         )
         .unwrap();
 
         let _res = DeltaOps::from(table).write(vec![batch]).await.unwrap();
         let expected = [
-            "+-------------------------+",
-            "| value                   |",
-            "+-------------------------+",
-            "| 1970-01-01T00:00:00.010 |",
-            "| 2023-06-03 15:35:00     |",
-            "+-------------------------+",
+            "+--------------------------+",
+            "| value                    |",
+            "+--------------------------+",
+            "| 1970-01-01T00:00:00.010Z |",
+            "| 2023-06-03 15:35:00      |",
+            "+--------------------------+",
         ];
         let actual = get_data(&_res).await;
         assert_batches_sorted_eq!(&expected, &actual);
@@ -998,6 +1456,218 @@ mod tests {
         assert_eq!(table.get_files_count(), 4)
     }
 
+    #[tokio::test]
+    async fn test_merge_schema() {
+        let batch = get_record_batch(None, false);
+        let table = DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
+        for field in batch.schema().fields() {
+            if field.name() != "modified" {
+                new_schema_builder.push(field.clone());
+            }
+        }
+        new_schema_builder.push(Field::new("inserted_by", DataType::Utf8, true));
+        let new_schema = new_schema_builder.finish();
+        let new_fields = new_schema.fields();
+        let new_names = new_fields.iter().map(|f| f.name()).collect::<Vec<_>>();
+        assert_eq!(new_names, vec!["id", "value", "inserted_by"]);
+        let inserted_by = StringArray::from(vec![
+            Some("A1"),
+            Some("B1"),
+            None,
+            Some("B2"),
+            Some("A3"),
+            Some("A4"),
+            None,
+            None,
+            Some("B4"),
+            Some("A5"),
+            Some("A7"),
+        ]);
+        let new_batch = RecordBatch::try_new(
+            Arc::new(new_schema),
+            vec![
+                Arc::new(batch.column_by_name("id").unwrap().clone()),
+                Arc::new(batch.column_by_name("value").unwrap().clone()),
+                Arc::new(inserted_by),
+            ],
+        )
+        .unwrap();
+
+        let mut table = DeltaOps(table)
+            .write(vec![new_batch])
+            .with_save_mode(SaveMode::Append)
+            .with_schema_mode(SchemaMode::Merge)
+            .await
+            .unwrap();
+        table.load().await.unwrap();
+        assert_eq!(table.version(), 1);
+        let new_schema = table.metadata().unwrap().schema().unwrap();
+        let fields = new_schema.fields();
+        let names = fields.map(|f| f.name()).collect::<Vec<_>>();
+        assert_eq!(names, vec!["id", "value", "modified", "inserted_by"]);
+    }
+
+    #[tokio::test]
+    async fn test_merge_schema_with_partitions() {
+        let batch = get_record_batch(None, false);
+        let table = DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_partition_columns(vec!["id", "value"])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
+        for field in batch.schema().fields() {
+            if field.name() != "modified" {
+                new_schema_builder.push(field.clone());
+            }
+        }
+        new_schema_builder.push(Field::new("inserted_by", DataType::Utf8, true));
+        let new_schema = new_schema_builder.finish();
+        let new_fields = new_schema.fields();
+        let new_names = new_fields.iter().map(|f| f.name()).collect::<Vec<_>>();
+        assert_eq!(new_names, vec!["id", "value", "inserted_by"]);
+        let inserted_by = StringArray::from(vec![
+            Some("A1"),
+            Some("B1"),
+            None,
+            Some("B2"),
+            Some("A3"),
+            Some("A4"),
+            None,
+            None,
+            Some("B4"),
+            Some("A5"),
+            Some("A7"),
+        ]);
+        let new_batch = RecordBatch::try_new(
+            Arc::new(new_schema),
+            vec![
+                Arc::new(batch.column_by_name("id").unwrap().clone()),
+                Arc::new(batch.column_by_name("value").unwrap().clone()),
+                Arc::new(inserted_by),
+            ],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![new_batch])
+            .with_save_mode(SaveMode::Append)
+            .with_schema_mode(SchemaMode::Merge)
+            .await
+            .unwrap();
+
+        assert_eq!(table.version(), 1);
+        let new_schema = table.metadata().unwrap().schema().unwrap();
+        let fields = new_schema.fields();
+        let mut names = fields.map(|f| f.name()).collect::<Vec<_>>();
+        names.sort();
+        assert_eq!(names, vec!["id", "inserted_by", "modified", "value"]);
+        let part_cols = table.metadata().unwrap().partition_columns.clone();
+        assert_eq!(part_cols, vec!["id", "value"]); // we want to preserve partitions
+    }
+
+    #[tokio::test]
+    async fn test_overwrite_schema() {
+        let batch = get_record_batch(None, false);
+        let table = DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
+        for field in batch.schema().fields() {
+            if field.name() != "modified" {
+                new_schema_builder.push(field.clone());
+            }
+        }
+        new_schema_builder.push(Field::new("inserted_by", DataType::Utf8, true));
+        let new_schema = new_schema_builder.finish();
+        let new_fields = new_schema.fields();
+        let new_names = new_fields.iter().map(|f| f.name()).collect::<Vec<_>>();
+        assert_eq!(new_names, vec!["id", "value", "inserted_by"]);
+        let inserted_by = StringArray::from(vec![
+            Some("A1"),
+            Some("B1"),
+            None,
+            Some("B2"),
+            Some("A3"),
+            Some("A4"),
+            None,
+            None,
+            Some("B4"),
+            Some("A5"),
+            Some("A7"),
+        ]);
+        let new_batch = RecordBatch::try_new(
+            Arc::new(new_schema),
+            vec![
+                Arc::new(batch.column_by_name("id").unwrap().clone()),
+                Arc::new(batch.column_by_name("value").unwrap().clone()),
+                Arc::new(inserted_by),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![new_batch])
+            .with_save_mode(SaveMode::Append)
+            .with_schema_mode(SchemaMode::Overwrite)
+            .await;
+        assert!(table.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_overwrite_check() {
+        // If you do not pass a schema mode, we want to check the schema
+        let batch = get_record_batch(None, false);
+        let table = DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(SaveMode::ErrorIfExists)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
+
+        new_schema_builder.push(Field::new("inserted_by", DataType::Utf8, true));
+        let new_schema = new_schema_builder.finish();
+        let new_fields = new_schema.fields();
+        let new_names = new_fields.iter().map(|f| f.name()).collect::<Vec<_>>();
+        assert_eq!(new_names, vec!["inserted_by"]);
+        let inserted_by = StringArray::from(vec![
+            Some("A1"),
+            Some("B1"),
+            None,
+            Some("B2"),
+            Some("A3"),
+            Some("A4"),
+            None,
+            None,
+            Some("B4"),
+            Some("A5"),
+            Some("A7"),
+        ]);
+        let new_batch =
+            RecordBatch::try_new(Arc::new(new_schema), vec![Arc::new(inserted_by)]).unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![new_batch])
+            .with_save_mode(SaveMode::Append)
+            .await;
+        assert!(table.is_err());
+    }
+
     #[tokio::test]
     async fn test_check_invariants() {
         let batch = get_record_batch(None, false);
@@ -1015,7 +1685,7 @@ mod tests {
         let table = DeltaOps::new_in_memory()
             .create()
             .with_save_mode(SaveMode::ErrorIfExists)
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -1037,7 +1707,7 @@ mod tests {
         let table = DeltaOps::new_in_memory()
             .create()
             .with_save_mode(SaveMode::ErrorIfExists)
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -1053,7 +1723,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
diff --git a/crates/core/src/operations/writer.rs b/crates/core/src/operations/writer.rs
index 5d8808fa3c..5128611ffd 100644
--- a/crates/core/src/operations/writer.rs
+++ b/crates/core/src/operations/writer.rs
@@ -1,11 +1,13 @@
 //! Abstractions and implementations for writing data to delta tables
 
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
 
 use arrow::datatypes::SchemaRef as ArrowSchemaRef;
 use arrow::error::ArrowError;
 use arrow::record_batch::RecordBatch;
 use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
+use indexmap::IndexMap;
 use object_store::{path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
 use parquet::basic::Compression;
@@ -14,7 +16,7 @@ use tracing::debug;
 
 use crate::crate_version;
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Add, PartitionsExt, Scalar};
+use crate::kernel::{Add, PartitionsExt};
 use crate::storage::ObjectStoreRef;
 use crate::writer::record_batch::{divide_by_partition_values, PartitionResult};
 use crate::writer::stats::create_add;
@@ -65,6 +67,7 @@ impl From<WriteError> for DeltaTableError {
 }
 
 /// Configuration to write data into Delta tables
+#[derive(Debug)]
 pub struct WriterConfig {
     /// Schema of the delta table
     table_schema: ArrowSchemaRef,
@@ -77,6 +80,10 @@ pub struct WriterConfig {
     /// Row chunks passed to parquet writer. This and the internal parquet writer settings
     /// determine how fine granular we can track / control the size of resulting files.
     write_batch_size: usize,
+    /// Num index cols to collect stats for
+    num_indexed_cols: i32,
+    /// Stats columns, specific columns to collect stats from, takes precedence over num_indexed_cols
+    stats_columns: Option<Vec<String>>,
 }
 
 impl WriterConfig {
@@ -87,6 +94,8 @@ impl WriterConfig {
         writer_properties: Option<WriterProperties>,
         target_file_size: Option<usize>,
         write_batch_size: Option<usize>,
+        num_indexed_cols: i32,
+        stats_columns: Option<Vec<String>>,
     ) -> Self {
         let writer_properties = writer_properties.unwrap_or_else(|| {
             WriterProperties::builder()
@@ -102,6 +111,8 @@ impl WriterConfig {
             writer_properties,
             target_file_size,
             write_batch_size,
+            num_indexed_cols,
+            stats_columns,
         }
     }
 
@@ -111,6 +122,7 @@ impl WriterConfig {
     }
 }
 
+#[derive(Debug)]
 /// A parquet writer implementation tailored to the needs of writing data to a delta table.
 pub struct DeltaWriter {
     /// An object store pointing at Delta table root
@@ -155,7 +167,7 @@ impl DeltaWriter {
     pub async fn write_partition(
         &mut self,
         record_batch: RecordBatch,
-        partition_values: &BTreeMap<String, Scalar>,
+        partition_values: &IndexMap<String, Scalar>,
     ) -> DeltaResult<()> {
         let partition_key = Path::parse(partition_values.hive_partition_path())?;
 
@@ -174,8 +186,12 @@ impl DeltaWriter {
                     Some(self.config.target_file_size),
                     Some(self.config.write_batch_size),
                 )?;
-                let mut writer =
-                    PartitionWriter::try_with_config(self.object_store.clone(), config)?;
+                let mut writer = PartitionWriter::try_with_config(
+                    self.object_store.clone(),
+                    config,
+                    self.config.num_indexed_cols,
+                    self.config.stats_columns.clone(),
+                )?;
                 writer.write(&record_batch).await?;
                 let _ = self.partition_writers.insert(partition_key, writer);
             }
@@ -211,13 +227,15 @@ impl DeltaWriter {
     }
 }
 
-pub(crate) struct PartitionWriterConfig {
+/// Write configuration for partition writers
+#[derive(Debug)]
+pub struct PartitionWriterConfig {
     /// Schema of the data written to disk
     file_schema: ArrowSchemaRef,
     /// Prefix applied to all paths
     prefix: Path,
     /// Values for all partition columns
-    partition_values: BTreeMap<String, Scalar>,
+    partition_values: IndexMap<String, Scalar>,
     /// Properties passed to underlying parquet writer
     writer_properties: WriterProperties,
     /// Size above which we will write a buffered parquet file to disk.
@@ -228,9 +246,10 @@ pub(crate) struct PartitionWriterConfig {
 }
 
 impl PartitionWriterConfig {
+    /// Create a new instance of [PartitionWriterConfig]
     pub fn try_new(
         file_schema: ArrowSchemaRef,
-        partition_values: BTreeMap<String, Scalar>,
+        partition_values: IndexMap<String, Scalar>,
         writer_properties: Option<WriterProperties>,
         target_file_size: Option<usize>,
         write_batch_size: Option<usize>,
@@ -256,7 +275,12 @@ impl PartitionWriterConfig {
     }
 }
 
-pub(crate) struct PartitionWriter {
+/// Partition writer implementation
+/// This writer takes in table data as RecordBatches and writes it out to partitioned parquet files.
+/// It buffers data in memory until it reaches a certain size, then writes it out to optimize file sizes.
+/// When you complete writing you get back a list of Add actions that can be used to update the Delta table commit log.
+#[derive(Debug)]
+pub struct PartitionWriter {
     object_store: ObjectStoreRef,
     writer_id: uuid::Uuid,
     config: PartitionWriterConfig,
@@ -264,6 +288,10 @@ pub(crate) struct PartitionWriter {
     arrow_writer: ArrowWriter<ShareableBuffer>,
     part_counter: usize,
     files_written: Vec<Add>,
+    /// Num index cols to collect stats for
+    num_indexed_cols: i32,
+    /// Stats columns, specific columns to collect stats from, takes precedence over num_indexed_cols
+    stats_columns: Option<Vec<String>>,
 }
 
 impl PartitionWriter {
@@ -271,6 +299,8 @@ impl PartitionWriter {
     pub fn try_with_config(
         object_store: ObjectStoreRef,
         config: PartitionWriterConfig,
+        num_indexed_cols: i32,
+        stats_columns: Option<Vec<String>>,
     ) -> DeltaResult<Self> {
         let buffer = ShareableBuffer::default();
         let arrow_writer = ArrowWriter::try_new(
@@ -287,6 +317,8 @@ impl PartitionWriter {
             arrow_writer,
             part_counter: 0,
             files_written: Vec::new(),
+            num_indexed_cols,
+            stats_columns,
         })
     }
 
@@ -337,13 +369,15 @@ impl PartitionWriter {
         let file_size = buffer.len() as i64;
 
         // write file to object store
-        self.object_store.put(&path, buffer).await?;
+        self.object_store.put(&path, buffer.into()).await?;
         self.files_written.push(
             create_add(
                 &self.config.partition_values,
                 path.to_string(),
                 file_size,
                 &metadata,
+                self.num_indexed_cols,
+                &self.stats_columns,
             )
             .map_err(|err| WriteError::CreateAdd {
                 source: Box::new(err),
@@ -385,6 +419,7 @@ impl PartitionWriter {
         Ok(())
     }
 
+    /// Close the writer and get the new [Add] actions.
     pub async fn close(mut self) -> DeltaResult<Vec<Add>> {
         self.flush_arrow_writer().await?;
         Ok(self.files_written)
@@ -395,12 +430,51 @@ impl PartitionWriter {
 mod tests {
     use super::*;
     use crate::storage::utils::flatten_list_stream as list;
-    use crate::writer::test_utils::get_record_batch;
+    use crate::table::config::DEFAULT_NUM_INDEX_COLS;
+    use crate::writer::test_utils::*;
     use crate::DeltaTableBuilder;
     use arrow::array::{Int32Array, StringArray};
     use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
     use std::sync::Arc;
 
+    fn get_delta_writer(
+        object_store: ObjectStoreRef,
+        batch: &RecordBatch,
+        writer_properties: Option<WriterProperties>,
+        target_file_size: Option<usize>,
+        write_batch_size: Option<usize>,
+    ) -> DeltaWriter {
+        let config = WriterConfig::new(
+            batch.schema(),
+            vec![],
+            writer_properties,
+            target_file_size,
+            write_batch_size,
+            DEFAULT_NUM_INDEX_COLS,
+            None,
+        );
+        DeltaWriter::new(object_store, config)
+    }
+
+    fn get_partition_writer(
+        object_store: ObjectStoreRef,
+        batch: &RecordBatch,
+        writer_properties: Option<WriterProperties>,
+        target_file_size: Option<usize>,
+        write_batch_size: Option<usize>,
+    ) -> PartitionWriter {
+        let config = PartitionWriterConfig::try_new(
+            batch.schema(),
+            IndexMap::new(),
+            writer_properties,
+            target_file_size,
+            write_batch_size,
+        )
+        .unwrap();
+        PartitionWriter::try_with_config(object_store, config, DEFAULT_NUM_INDEX_COLS, None)
+            .unwrap()
+    }
+
     #[tokio::test]
     async fn test_write_partition() {
         let log_store = DeltaTableBuilder::from_uri("memory://")
@@ -410,7 +484,7 @@ mod tests {
         let batch = get_record_batch(None, false);
 
         // write single un-partitioned batch
-        let mut writer = get_writer(object_store.clone(), &batch, None, None, None);
+        let mut writer = get_partition_writer(object_store.clone(), &batch, None, None, None);
         writer.write(&batch).await.unwrap();
         let files = list(object_store.as_ref(), None).await.unwrap();
         assert_eq!(files.len(), 0);
@@ -442,8 +516,9 @@ mod tests {
         let properties = WriterProperties::builder()
             .set_max_row_group_size(1024)
             .build();
-        // configure small target file size and row group size so we can observe multiple files written
-        let mut writer = get_writer(object_store, &batch, Some(properties), Some(10_000), None);
+        // configure small target file size and and row group size so we can observe multiple files written
+        let mut writer =
+            get_partition_writer(object_store, &batch, Some(properties), Some(10_000), None);
         writer.write(&batch).await.unwrap();
 
         // check that we have written more then once file, and no more then 1 is below target size
@@ -470,7 +545,7 @@ mod tests {
             .unwrap()
             .object_store();
         // configure small target file size so we can observe multiple files written
-        let mut writer = get_writer(object_store, &batch, None, Some(10_000), None);
+        let mut writer = get_partition_writer(object_store, &batch, None, Some(10_000), None);
         writer.write(&batch).await.unwrap();
 
         // check that we have written more then once file, and no more then 1 is below target size
@@ -484,7 +559,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_do_not_write_empty_file_on_close() {
-        let base_int = Arc::new(Int32Array::from((0..10000 as i32).collect::<Vec<i32>>()));
+        let base_int = Arc::new(Int32Array::from((0..10000_i32).collect::<Vec<i32>>()));
         let base_str = Arc::new(StringArray::from(vec!["A"; 10000]));
         let schema = Arc::new(ArrowSchema::new(vec![
             Field::new("id", DataType::Utf8, true),
@@ -498,28 +573,59 @@ mod tests {
             .object_store();
         // configure high batch size and low file size to observe one file written and flushed immediately
         // upon writing batch, then ensures the buffer is empty upon closing writer
-        let mut writer = get_writer(object_store, &batch, None, Some(9000), Some(10000));
+        let mut writer = get_partition_writer(object_store, &batch, None, Some(9000), Some(10000));
         writer.write(&batch).await.unwrap();
 
         let adds = writer.close().await.unwrap();
         assert!(adds.len() == 1);
     }
 
-    fn get_writer(
-        object_store: ObjectStoreRef,
-        batch: &RecordBatch,
-        writer_properties: Option<WriterProperties>,
-        target_file_size: Option<usize>,
-        write_batch_size: Option<usize>,
-    ) -> PartitionWriter {
-        let config = PartitionWriterConfig::try_new(
-            batch.schema(),
-            BTreeMap::new(),
-            writer_properties,
-            target_file_size,
-            write_batch_size,
+    #[tokio::test]
+    async fn test_write_mismatched_schema() {
+        let log_store = DeltaTableBuilder::from_uri("memory://")
+            .build_storage()
+            .unwrap();
+        let object_store = log_store.object_store();
+        let batch = get_record_batch(None, false);
+
+        // write single un-partitioned batch
+        let mut writer = get_delta_writer(object_store.clone(), &batch, None, None, None);
+        writer.write(&batch).await.unwrap();
+        // Ensure the write hasn't been flushed
+        let files = list(object_store.as_ref(), None).await.unwrap();
+        assert_eq!(files.len(), 0);
+
+        // Create a second batch with a different schema
+        let second_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("name", DataType::Utf8, true),
+        ]));
+        let second_batch = RecordBatch::try_new(
+            second_schema,
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2)])),
+                Arc::new(StringArray::from(vec![Some("will"), Some("robert")])),
+            ],
         )
         .unwrap();
-        PartitionWriter::try_with_config(object_store, config).unwrap()
+
+        let result = writer.write(&second_batch).await;
+        assert!(result.is_err());
+
+        match result {
+            Ok(_) => {
+                panic!("Should not have successfully written");
+            }
+            Err(e) => {
+                match e {
+                    DeltaTableError::SchemaMismatch { .. } => {
+                        // this is expected
+                    }
+                    others => {
+                        panic!("Got the wrong error: {others:?}");
+                    }
+                }
+            }
+        };
     }
 }
diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs
index b6787b9b31..f2625e49cf 100644
--- a/crates/core/src/protocol/checkpoints.rs
+++ b/crates/core/src/protocol/checkpoints.rs
@@ -6,12 +6,15 @@ use std::iter::Iterator;
 use arrow_json::ReaderBuilder;
 use arrow_schema::ArrowError;
 
-use chrono::{Datelike, Utc};
+use chrono::{Datelike, NaiveDate, NaiveDateTime, Utc};
 use futures::{StreamExt, TryStreamExt};
+use itertools::Itertools;
 use lazy_static::lazy_static;
 use object_store::{Error, ObjectStore};
 use parquet::arrow::ArrowWriter;
+use parquet::basic::Compression;
 use parquet::errors::ParquetError;
+use parquet::file::properties::WriterProperties;
 use regex::Regex;
 use serde_json::Value;
 use tracing::{debug, error};
@@ -19,13 +22,12 @@ use tracing::{debug, error};
 use super::{time_utils, ProtocolError};
 use crate::kernel::arrow::delta_log_schema_for_table;
 use crate::kernel::{
-    Action, Add as AddAction, DataType, PrimitiveType, Protocol, Remove, StructField, Txn,
+    Action, Add as AddAction, DataType, PrimitiveType, Protocol, Remove, StructField,
 };
 use crate::logstore::LogStore;
 use crate::table::state::DeltaTableState;
 use crate::table::{get_partition_col_data_types, CheckPoint, CheckPointBuilder};
 use crate::{open_table_with_version, DeltaTable};
-
 type SchemaPath = Vec<String>;
 
 /// Error returned when there is an error during creating a checkpoint.
@@ -55,6 +57,9 @@ enum CheckpointError {
         #[from]
         source: ArrowError,
     },
+
+    #[error("missing rewquired action type in snapshot: {0}")]
+    MissingActionType(String),
 }
 
 impl From<CheckpointError> for ProtocolError {
@@ -64,6 +69,7 @@ impl From<CheckpointError> for ProtocolError {
             CheckpointError::Arrow { source } => Self::Arrow { source },
             CheckpointError::StaleTableVersion(..) => Self::Generic(value.to_string()),
             CheckpointError::Parquet { source } => Self::ParquetParseError { source },
+            CheckpointError::MissingActionType(_) => Self::Generic(value.to_string()),
         }
     }
 }
@@ -164,14 +170,16 @@ pub async fn create_checkpoint_for(
 
     let object_store = log_store.object_store();
     debug!("Writing checkpoint to {:?}.", checkpoint_path);
-    object_store.put(&checkpoint_path, parquet_bytes).await?;
+    object_store
+        .put(&checkpoint_path, parquet_bytes.into())
+        .await?;
 
     let last_checkpoint_content: Value = serde_json::to_value(checkpoint)?;
     let last_checkpoint_content = bytes::Bytes::from(serde_json::to_vec(&last_checkpoint_content)?);
 
     debug!("Writing _last_checkpoint to {:?}.", last_checkpoint_path);
     object_store
-        .put(&last_checkpoint_path, last_checkpoint_content)
+        .put(&last_checkpoint_path, last_checkpoint_content.into())
         .await?;
 
     Ok(())
@@ -186,7 +194,7 @@ pub async fn cleanup_expired_logs_for(
 ) -> Result<usize, ProtocolError> {
     lazy_static! {
         static ref DELTA_LOG_REGEX: Regex =
-            Regex::new(r"_delta_log/(\d{20})\.(json|checkpoint).*$").unwrap();
+            Regex::new(r"_delta_log/(\d{20})\.(json|checkpoint|json.tmp).*$").unwrap();
     }
 
     let object_store = log_store.object_store();
@@ -254,7 +262,8 @@ fn parquet_bytes_from_state(
 
     // Collect a map of paths that require special stats conversion.
     let mut stats_conversions: Vec<(SchemaPath, DataType)> = Vec::new();
-    collect_stats_conversions(&mut stats_conversions, schema.fields().as_slice());
+    let fields = schema.fields().collect_vec();
+    collect_stats_conversions(&mut stats_conversions, fields.as_slice());
 
     // if any, tombstones do not include extended file metadata, we must omit the extended metadata fields from the remove schema
     // See https://github.com/delta-io/delta/blob/master/PROTOCOL.md#add-file-and-remove-file
@@ -279,8 +288,16 @@ fn parquet_bytes_from_state(
     let jsons = std::iter::once(Action::Protocol(Protocol {
         min_reader_version: state.protocol().min_reader_version,
         min_writer_version: state.protocol().min_writer_version,
-        writer_features: None,
-        reader_features: None,
+        writer_features: if state.protocol().min_writer_version >= 7 {
+            Some(state.protocol().writer_features.clone().unwrap_or_default())
+        } else {
+            None
+        },
+        reader_features: if state.protocol().min_reader_version >= 3 {
+            Some(state.protocol().reader_features.clone().unwrap_or_default())
+        } else {
+            None
+        },
     }))
     // metaData
     .chain(std::iter::once(Action::Metadata(current_metadata.clone())))
@@ -288,14 +305,8 @@ fn parquet_bytes_from_state(
     .chain(
         state
             .app_transaction_version()
-            .iter()
-            .map(|(app_id, version)| {
-                Action::Txn(Txn {
-                    app_id: app_id.clone(),
-                    version: *version,
-                    last_updated: None,
-                })
-            }),
+            .map_err(|_| CheckpointError::MissingActionType("txn".to_string()))?
+            .map(Action::Txn),
     )
     // removes
     .chain(tombstones.iter().map(|r| {
@@ -325,7 +336,15 @@ fn parquet_bytes_from_state(
     debug!("Writing to checkpoint parquet buffer...");
     // Write the Checkpoint parquet file.
     let mut bytes = vec![];
-    let mut writer = ArrowWriter::try_new(&mut bytes, arrow_schema.clone(), None)?;
+    let mut writer = ArrowWriter::try_new(
+        &mut bytes,
+        arrow_schema.clone(),
+        Some(
+            WriterProperties::builder()
+                .set_compression(Compression::SNAPPY)
+                .build(),
+        ),
+    )?;
     let mut decoder = ReaderBuilder::new(arrow_schema)
         .with_batch_size(CHECKPOINT_RECORD_BATCH_SIZE)
         .build_decoder()?;
@@ -417,20 +436,22 @@ fn typed_partition_value_from_string(
                 .map_err(|_| CheckpointError::PartitionValueNotParseable(string_value.to_owned()))?
                 .into()),
             PrimitiveType::Date => {
-                let d = chrono::naive::NaiveDate::parse_from_str(string_value, "%Y-%m-%d")
-                    .map_err(|_| {
-                        CheckpointError::PartitionValueNotParseable(string_value.to_owned())
-                    })?;
+                let d = NaiveDate::parse_from_str(string_value, "%Y-%m-%d").map_err(|_| {
+                    CheckpointError::PartitionValueNotParseable(string_value.to_owned())
+                })?;
                 // day 0 is 1970-01-01 (719163 days from ce)
                 Ok((d.num_days_from_ce() - 719_163).into())
             }
-            PrimitiveType::Timestamp => {
-                let ts =
-                    chrono::naive::NaiveDateTime::parse_from_str(string_value, "%Y-%m-%d %H:%M:%S")
-                        .map_err(|_| {
-                            CheckpointError::PartitionValueNotParseable(string_value.to_owned())
-                        })?;
-                Ok((ts.timestamp_millis() * 1000).into())
+            PrimitiveType::Timestamp | PrimitiveType::TimestampNtz => {
+                let ts = NaiveDateTime::parse_from_str(string_value, "%Y-%m-%d %H:%M:%S.%6f");
+                let ts: NaiveDateTime = match ts {
+                    Ok(_) => ts,
+                    Err(_) => NaiveDateTime::parse_from_str(string_value, "%Y-%m-%d %H:%M:%S"),
+                }
+                .map_err(|_| {
+                    CheckpointError::PartitionValueNotParseable(string_value.to_owned())
+                })?;
+                Ok((ts.and_utc().timestamp_millis() * 1000).into())
             }
             s => unimplemented!(
                 "Primitive type {} is not supported for partition column values.",
@@ -460,7 +481,7 @@ fn typed_partition_value_from_option_string(
     }
 }
 
-fn collect_stats_conversions(paths: &mut Vec<(SchemaPath, DataType)>, fields: &[StructField]) {
+fn collect_stats_conversions(paths: &mut Vec<(SchemaPath, DataType)>, fields: &[&StructField]) {
     let mut _path = SchemaPath::new();
     fields
         .iter()
@@ -481,9 +502,7 @@ fn collect_field_conversion(
         DataType::Struct(struct_field) => {
             let struct_fields = struct_field.fields();
             current_path.push(field.name().to_owned());
-            struct_fields
-                .iter()
-                .for_each(|f| collect_field_conversion(current_path, all_paths, f));
+            struct_fields.for_each(|f| collect_field_conversion(current_path, all_paths, f));
             current_path.pop();
         }
         _ => { /* noop */ }
@@ -532,7 +551,9 @@ mod tests {
 
     use super::*;
     use crate::kernel::StructType;
+    use crate::operations::transaction::{CommitBuilder, TableReference};
     use crate::operations::DeltaOps;
+    use crate::protocol::Metadata;
     use crate::writer::test_utils::get_delta_schema;
 
     #[tokio::test]
@@ -541,7 +562,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(crate::protocol::SaveMode::Ignore)
             .await
             .unwrap();
@@ -565,13 +586,91 @@ mod tests {
         assert_eq!(last_checkpoint.version, 0);
     }
 
+    /// This test validates that a checkpoint can be written and re-read with the minimum viable
+    /// Metadata. There was a bug which didn't handle the optionality of createdTime.
+    #[tokio::test]
+    async fn test_create_checkpoint_with_metadata() {
+        let table_schema = get_delta_schema();
+
+        let mut table = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(table_schema.fields().cloned())
+            .with_save_mode(crate::protocol::SaveMode::Ignore)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_schema().unwrap(), &table_schema);
+
+        let part_cols: Vec<String> = vec![];
+        let metadata = Metadata::try_new(table_schema, part_cols, HashMap::new()).unwrap();
+        let actions = vec![Action::Metadata(metadata)];
+
+        let epoch_id = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .expect("Time went backwards")
+            .as_millis() as i64;
+
+        let operation = crate::protocol::DeltaOperation::StreamingUpdate {
+            output_mode: crate::protocol::OutputMode::Append,
+            query_id: "test".into(),
+            epoch_id,
+        };
+        let v = CommitBuilder::default()
+            .with_actions(actions)
+            .build(
+                table.state.as_ref().map(|f| f as &dyn TableReference),
+                table.log_store(),
+                operation,
+            )
+            .await
+            .unwrap()
+            .version();
+
+        assert_eq!(1, v, "Expected the commit to create table version 1");
+        table.load().await.expect("Failed to reload table");
+        assert_eq!(
+            table.version(),
+            1,
+            "The loaded version of the table is not up to date"
+        );
+
+        let res = create_checkpoint_for(
+            table.version(),
+            table.state.as_ref().unwrap(),
+            table.log_store.as_ref(),
+        )
+        .await;
+        assert!(res.is_ok());
+
+        // Look at the "files" and verify that the _last_checkpoint has the right version
+        let path = Path::from("_delta_log/_last_checkpoint");
+        let last_checkpoint = table
+            .object_store()
+            .get(&path)
+            .await
+            .expect("Failed to get the _last_checkpoint")
+            .bytes()
+            .await
+            .expect("Failed to get bytes for _last_checkpoint");
+        let last_checkpoint: CheckPoint = serde_json::from_slice(&last_checkpoint).expect("Fail");
+        assert_eq!(last_checkpoint.version, 1);
+
+        // If the regression exists, this will fail
+        table.load().await.expect("Failed to reload the table, this likely means that the optional createdTime was not actually optional");
+        assert_eq!(
+            1,
+            table.version(),
+            "The reloaded table doesn't have the right version"
+        );
+    }
+
     #[tokio::test]
     async fn test_create_checkpoint_for_invalid_version() {
         let table_schema = get_delta_schema();
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(crate::protocol::SaveMode::Ignore)
             .await
             .unwrap();
@@ -649,8 +748,11 @@ mod tests {
         }
 
         for (s, v) in [
+            ("2021-08-08 01:00:01.000000", 1628384401000000i64),
             ("2021-08-08 01:00:01", 1628384401000000i64),
+            ("1970-01-02 12:59:59.000000", 133199000000i64),
             ("1970-01-02 12:59:59", 133199000000i64),
+            ("1970-01-01 13:00:01.000000", 46801000000i64),
             ("1970-01-01 13:00:01", 46801000000i64),
             ("1969-12-31 00:00:00", -86400000000i64),
             ("1677-09-21 00:12:44", -9223372036000000i64),
@@ -702,9 +804,8 @@ mod tests {
     #[test]
     fn collect_stats_conversions_test() {
         let delta_schema: StructType = serde_json::from_value(SCHEMA.clone()).unwrap();
-        let fields = delta_schema.fields();
+        let fields = delta_schema.fields().collect_vec();
         let mut paths = Vec::new();
-
         collect_stats_conversions(&mut paths, fields.as_slice());
 
         assert_eq!(2, paths.len());
diff --git a/crates/core/src/protocol/mod.rs b/crates/core/src/protocol/mod.rs
index 3be8a734fa..ce6ef0e8b0 100644
--- a/crates/core/src/protocol/mod.rs
+++ b/crates/core/src/protocol/mod.rs
@@ -21,7 +21,7 @@ use std::str::FromStr;
 use tracing::{debug, error};
 
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Add, CommitInfo, Metadata, Protocol, Remove};
+use crate::kernel::{Add, CommitInfo, Metadata, Protocol, Remove, StructField};
 use crate::logstore::LogStore;
 use crate::table::CheckPoint;
 
@@ -326,6 +326,13 @@ pub struct MergePredicate {
 #[derive(Serialize, Deserialize, Debug, Clone)]
 #[serde(rename_all = "camelCase")]
 pub enum DeltaOperation {
+    /// Represents a Delta `Add Column` operation.
+    /// Used to add new columns or field in a struct
+    AddColumn {
+        /// Fields added to existing schema
+        fields: Vec<StructField>,
+    },
+
     /// Represents a Delta `Create` operation.
     /// Would usually only create the table, if also data is written,
     /// a `Write` operations is more appropriate
@@ -371,12 +378,21 @@ pub enum DeltaOperation {
         expr: String,
     },
 
+    /// Drops constraints from a table
+    DropConstraint {
+        /// Constraints name
+        name: String,
+    },
+
     /// Merge data with a source data with the following predicate
     #[serde(rename_all = "camelCase")]
     Merge {
-        /// The merge predicate
+        /// Cleaned merge predicate for conflict checks
         predicate: Option<String>,
 
+        /// The original merge predicate
+        merge_predicate: Option<String>,
+
         /// Match operations performed
         matched_predicates: Vec<MergePredicate>,
 
@@ -398,6 +414,13 @@ pub enum DeltaOperation {
         epoch_id: i64,
     },
 
+    /// Set table properties operations
+    #[serde(rename_all = "camelCase")]
+    SetTableProperties {
+        /// Table properties that were added
+        properties: HashMap<String, String>,
+    },
+
     #[serde(rename_all = "camelCase")]
     /// Represents a `Optimize` operation
     Optimize {
@@ -442,6 +465,7 @@ impl DeltaOperation {
     pub fn name(&self) -> &str {
         // operation names taken from https://learn.microsoft.com/en-us/azure/databricks/delta/history#--operation-metrics-keys
         match &self {
+            DeltaOperation::AddColumn { .. } => "ADD COLUMN",
             DeltaOperation::Create {
                 mode: SaveMode::Overwrite,
                 ..
@@ -452,12 +476,14 @@ impl DeltaOperation {
             DeltaOperation::Update { .. } => "UPDATE",
             DeltaOperation::Merge { .. } => "MERGE",
             DeltaOperation::StreamingUpdate { .. } => "STREAMING UPDATE",
+            DeltaOperation::SetTableProperties { .. } => "SET TBLPROPERTIES",
             DeltaOperation::Optimize { .. } => "OPTIMIZE",
             DeltaOperation::FileSystemCheck { .. } => "FSCK",
             DeltaOperation::Restore { .. } => "RESTORE",
             DeltaOperation::VacuumStart { .. } => "VACUUM START",
             DeltaOperation::VacuumEnd { .. } => "VACUUM END",
             DeltaOperation::AddConstraint { .. } => "ADD CONSTRAINT",
+            DeltaOperation::DropConstraint { .. } => "DROP CONSTRAINT",
         }
     }
 
@@ -494,9 +520,12 @@ impl DeltaOperation {
     pub fn changes_data(&self) -> bool {
         match self {
             Self::Optimize { .. }
+            | Self::SetTableProperties { .. }
+            | Self::AddColumn { .. }
             | Self::VacuumStart { .. }
             | Self::VacuumEnd { .. }
-            | Self::AddConstraint { .. } => false,
+            | Self::AddConstraint { .. }
+            | Self::DropConstraint { .. } => false,
             Self::Create { .. }
             | Self::FileSystemCheck {}
             | Self::StreamingUpdate { .. }
@@ -533,16 +562,15 @@ impl DeltaOperation {
     /// Denotes if the operation reads the entire table
     pub fn read_whole_table(&self) -> bool {
         match self {
-            // TODO just adding one operation example, as currently none of the
-            // implemented operations scan the entire table.
-            Self::Write { predicate, .. } if predicate.is_none() => false,
+            // Predicate is none -> Merge operation had to join full source and target
+            Self::Merge { predicate, .. } if predicate.is_none() => true,
             _ => false,
         }
     }
 }
 
 /// The SaveMode used when performing a DeltaOperation
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq)]
 pub enum SaveMode {
     /// Files will be appended to the target location.
     Append,
@@ -572,7 +600,7 @@ impl FromStr for SaveMode {
 }
 
 /// The OutputMode used in streaming operations.
-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
 pub enum OutputMode {
     /// Only new rows will be written when new data is available.
     Append,
@@ -1180,6 +1208,32 @@ mod tests {
             assert_eq!(expected, actions);
         }
 
+        #[tokio::test]
+        async fn test_table_not_always_with_stats() {
+            let path = "../test/tests/data/delta-stats-optional";
+            let mut table = crate::open_table(path).await.unwrap();
+            table.load().await.unwrap();
+            let actions = table.snapshot().unwrap().add_actions_table(true).unwrap();
+            let actions = sort_batch_by(&actions, "path").unwrap();
+            // get column-0 path, and column-4 num_records, and column_5 null_count.integer
+            let expected_path: ArrayRef = Arc::new(array::StringArray::from(vec![
+                "part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet",
+                "part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet",
+            ]));
+            let expected_num_records: ArrayRef =
+                Arc::new(array::Int64Array::from(vec![None, Some(1)]));
+            let expected_null_count: ArrayRef =
+                Arc::new(array::Int64Array::from(vec![None, Some(0)]));
+
+            let path_column = actions.column(0);
+            let num_records_column = actions.column(4);
+            let null_count_column = actions.column(5);
+
+            assert_eq!(&expected_path, path_column);
+            assert_eq!(&expected_num_records, num_records_column);
+            assert_eq!(&expected_null_count, null_count_column);
+        }
+
         #[tokio::test]
         async fn test_only_struct_stats() {
             // test table with no json stats
@@ -1298,15 +1352,21 @@ mod tests {
                 ),
                 (
                     "min.timestamp",
-                    Arc::new(array::TimestampMicrosecondArray::from(vec![
-                        TimestampMicrosecondType::parse("2022-10-24T22:59:32.846Z"),
-                    ])),
+                    Arc::new(
+                        array::TimestampMicrosecondArray::from(vec![
+                            TimestampMicrosecondType::parse("2022-10-24T22:59:32.846Z"),
+                        ])
+                        .with_timezone("UTC"),
+                    ),
                 ),
                 (
                     "max.timestamp",
-                    Arc::new(array::TimestampMicrosecondArray::from(vec![
-                        TimestampMicrosecondType::parse("2022-10-24T22:59:32.846Z"),
-                    ])),
+                    Arc::new(
+                        array::TimestampMicrosecondArray::from(vec![
+                            TimestampMicrosecondType::parse("2022-10-24T22:59:32.846Z"),
+                        ])
+                        .with_timezone("UTC"),
+                    ),
                 ),
                 (
                     "null_count.struct.struct_element",
diff --git a/crates/core/src/protocol/parquet_read/mod.rs b/crates/core/src/protocol/parquet_read/mod.rs
index f838bbdaeb..655dcb05f3 100644
--- a/crates/core/src/protocol/parquet_read/mod.rs
+++ b/crates/core/src/protocol/parquet_read/mod.rs
@@ -9,7 +9,8 @@ use tracing::{debug, error, warn};
 
 use crate::kernel::models::actions::serde_path::decode_path;
 use crate::kernel::{
-    Action, Add, AddCDCFile, DeletionVectorDescriptor, Metadata, Protocol, Remove, StorageType, Txn,
+    Action, Add, AddCDCFile, DeletionVectorDescriptor, Metadata, Protocol, Remove, StorageType,
+    Transaction,
 };
 use crate::protocol::{ColumnCountStat, ColumnValueStat, ProtocolError, Stats};
 
@@ -433,12 +434,10 @@ impl Metadata {
                         .map_err(|_| gen_action_type_error("metaData", "schemaString", "string"))?
                         .clone();
                 }
-                "createdTime" => {
-                    re.created_time =
-                        Some(record.get_long(i).map_err(|_| {
-                            gen_action_type_error("metaData", "createdTime", "long")
-                        })?);
-                }
+                "createdTime" => match record.get_long(i) {
+                    Ok(s) => re.created_time = Some(s),
+                    _ => re.created_time = None,
+                },
                 "configuration" => {
                     let configuration_map = record
                         .get_map(i)
@@ -586,7 +585,7 @@ impl Remove {
     }
 }
 
-impl Txn {
+impl Transaction {
     fn from_parquet_record(record: &parquet::record::Row) -> Result<Self, ProtocolError> {
         let mut re = Self {
             ..Default::default()
@@ -707,7 +706,7 @@ impl Action {
             "add" => Action::Add(Add::from_parquet_record(col_data)?),
             "metaData" => Action::Metadata(Metadata::from_parquet_record(col_data)?),
             "remove" => Action::Remove(Remove::from_parquet_record(col_data)?),
-            "txn" => Action::Txn(Txn::from_parquet_record(col_data)?),
+            "txn" => Action::Txn(Transaction::from_parquet_record(col_data)?),
             "protocol" => Action::Protocol(Protocol::from_parquet_record(col_data)?),
             "cdc" => Action::Cdc(AddCDCFile::from_parquet_record(col_data)?),
             name => {
diff --git a/crates/core/src/schema/partitions.rs b/crates/core/src/schema/partitions.rs
index a52b82bd9d..d2b2e84979 100644
--- a/crates/core/src/schema/partitions.rs
+++ b/crates/core/src/schema/partitions.rs
@@ -1,11 +1,13 @@
 //! Delta Table partition handling logic.
-//!
+
+use delta_kernel::expressions::Scalar;
+use serde::{Serialize, Serializer};
 use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::convert::TryFrom;
 
 use crate::errors::DeltaTableError;
-use crate::kernel::{DataType, PrimitiveType, Scalar};
+use crate::kernel::{scalars::ScalarExt, DataType, PrimitiveType};
 
 /// A special value used in Hive to represent the null partition in partitioned tables
 pub const NULL_PARTITION_VALUE_DATA_PATH: &str = "__HIVE_DEFAULT_PARTITION__";
@@ -31,6 +33,42 @@ pub enum PartitionValue {
     NotIn(Vec<String>),
 }
 
+#[derive(Clone, Debug, PartialEq)]
+struct ScalarHelper<'a>(&'a Scalar);
+
+impl PartialOrd for ScalarHelper<'_> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        use Scalar::*;
+        match (self.0, other.0) {
+            (Null(_), Null(_)) => Some(Ordering::Equal),
+            (Integer(a), Integer(b)) => a.partial_cmp(b),
+            (Long(a), Long(b)) => a.partial_cmp(b),
+            (Short(a), Short(b)) => a.partial_cmp(b),
+            (Byte(a), Byte(b)) => a.partial_cmp(b),
+            (Float(a), Float(b)) => a.partial_cmp(b),
+            (Double(a), Double(b)) => a.partial_cmp(b),
+            (String(a), String(b)) => a.partial_cmp(b),
+            (Boolean(a), Boolean(b)) => a.partial_cmp(b),
+            (Timestamp(a), Timestamp(b)) => a.partial_cmp(b),
+            (TimestampNtz(a), TimestampNtz(b)) => a.partial_cmp(b),
+            (Date(a), Date(b)) => a.partial_cmp(b),
+            (Binary(a), Binary(b)) => a.partial_cmp(b),
+            (Decimal(a, p1, s1), Decimal(b, p2, s2)) => {
+                // TODO implement proper decimal comparison
+                if p1 != p2 || s1 != s2 {
+                    return None;
+                };
+                a.partial_cmp(b)
+            }
+            // TODO should we make an assumption about the ordering of nulls?
+            // rigth now this is only used for internal purposes.
+            (Null(_), _) => Some(Ordering::Less),
+            (_, Null(_)) => Some(Ordering::Greater),
+            _ => None,
+        }
+    }
+}
+
 /// A Struct used for filtering a DeltaTable partition by key and value.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct PartitionFilter {
@@ -48,7 +86,7 @@ fn compare_typed_value(
     match data_type {
         DataType::Primitive(primitive_type) => {
             let other = primitive_type.parse_scalar(filter_value).ok()?;
-            partition_value.partial_cmp(&other)
+            ScalarHelper(partition_value).partial_cmp(&ScalarHelper(&other))
         }
         // NOTE: complex types are not supported as partition columns
         _ => None,
@@ -124,6 +162,36 @@ impl PartitionFilter {
     }
 }
 
+/// Create desired string representation for PartitionFilter.
+/// Used in places like predicate in operationParameters, etc.
+impl Serialize for PartitionFilter {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let s = match &self.value {
+            PartitionValue::Equal(value) => format!("{} = '{}'", self.key, value),
+            PartitionValue::NotEqual(value) => format!("{} != '{}'", self.key, value),
+            PartitionValue::GreaterThan(value) => format!("{} > '{}'", self.key, value),
+            PartitionValue::GreaterThanOrEqual(value) => format!("{} >= '{}'", self.key, value),
+            PartitionValue::LessThan(value) => format!("{} < '{}'", self.key, value),
+            PartitionValue::LessThanOrEqual(value) => format!("{} <= '{}'", self.key, value),
+            // used upper case for IN and NOT similar to SQL
+            PartitionValue::In(values) => {
+                let quoted_values: Vec<String> =
+                    values.iter().map(|v| format!("'{}'", v)).collect();
+                format!("{} IN ({})", self.key, quoted_values.join(", "))
+            }
+            PartitionValue::NotIn(values) => {
+                let quoted_values: Vec<String> =
+                    values.iter().map(|v| format!("'{}'", v)).collect();
+                format!("{} NOT IN ({})", self.key, quoted_values.join(", "))
+            }
+        };
+        serializer.serialize_str(&s)
+    }
+}
+
 /// Create a PartitionFilter from a filter Tuple with the structure (key, operation, value).
 impl TryFrom<(&str, &str, &str)> for PartitionFilter {
     type Error = DeltaTableError;
@@ -207,3 +275,55 @@ impl DeltaTablePartition {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    fn check_json_serialize(filter: PartitionFilter, expected_json: &str) {
+        assert_eq!(serde_json::to_value(filter).unwrap(), json!(expected_json))
+    }
+
+    #[test]
+    fn test_serialize_partition_filter() {
+        check_json_serialize(
+            PartitionFilter::try_from(("date", "=", "2022-05-22")).unwrap(),
+            "date = '2022-05-22'",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from(("date", "!=", "2022-05-22")).unwrap(),
+            "date != '2022-05-22'",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from(("date", ">", "2022-05-22")).unwrap(),
+            "date > '2022-05-22'",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from(("date", ">=", "2022-05-22")).unwrap(),
+            "date >= '2022-05-22'",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from(("date", "<", "2022-05-22")).unwrap(),
+            "date < '2022-05-22'",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from(("date", "<=", "2022-05-22")).unwrap(),
+            "date <= '2022-05-22'",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from(("date", "in", vec!["2023-11-04", "2023-06-07"].as_slice()))
+                .unwrap(),
+            "date IN ('2023-11-04', '2023-06-07')",
+        );
+        check_json_serialize(
+            PartitionFilter::try_from((
+                "date",
+                "not in",
+                vec!["2023-11-04", "2023-06-07"].as_slice(),
+            ))
+            .unwrap(),
+            "date NOT IN ('2023-11-04', '2023-06-07')",
+        );
+    }
+}
diff --git a/crates/core/src/storage/file.rs b/crates/core/src/storage/file.rs
index c63a00dae6..f7fa168127 100644
--- a/crates/core/src/storage/file.rs
+++ b/crates/core/src/storage/file.rs
@@ -6,12 +6,12 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use object_store::{
     local::LocalFileSystem, path::Path as ObjectStorePath, Error as ObjectStoreError, GetOptions,
-    GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    GetResult, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult,
     Result as ObjectStoreResult,
 };
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
 use std::ops::Range;
 use std::sync::Arc;
-use tokio::io::AsyncWrite;
 use url::Url;
 
 const STORE_NAME: &str = "DeltaLocalObjectStore";
@@ -166,14 +166,18 @@ impl std::fmt::Display for FileStorageBackend {
 
 #[async_trait::async_trait]
 impl ObjectStore for FileStorageBackend {
-    async fn put(&self, location: &ObjectStorePath, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(
+        &self,
+        location: &ObjectStorePath,
+        bytes: PutPayload,
+    ) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &ObjectStorePath,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -254,16 +258,16 @@ impl ObjectStore for FileStorageBackend {
     async fn put_multipart(
         &self,
         location: &ObjectStorePath,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &ObjectStorePath,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index 02a307d51f..3c38a337af 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -1,6 +1,7 @@
 //! Object storage backend abstraction layer for Delta Table transaction logs and data
 
 use dashmap::DashMap;
+use object_store::limit::LimitStore;
 use std::collections::HashMap;
 use std::sync::{Arc, OnceLock};
 
@@ -9,6 +10,7 @@ use serde::{Deserialize, Serialize};
 use url::Url;
 
 pub mod file;
+pub mod retry_ext;
 pub mod utils;
 
 use crate::{DeltaResult, DeltaTableError};
@@ -22,6 +24,7 @@ pub use object_store::{
     DynObjectStore, Error as ObjectStoreError, GetResult, ListResult, MultipartId, ObjectMeta,
     ObjectStore, Result as ObjectStoreResult,
 };
+pub use retry_ext::ObjectStoreRetryExt;
 pub use utils::*;
 
 lazy_static! {
@@ -48,18 +51,20 @@ impl ObjectStoreFactory for DefaultObjectStoreFactory {
     fn parse_url_opts(
         &self,
         url: &Url,
-        _options: &StorageOptions,
+        options: &StorageOptions,
     ) -> DeltaResult<(ObjectStoreRef, Path)> {
         match url.scheme() {
             "memory" => {
                 let path = Path::from_url_path(url.path())?;
-                let store: Arc<dyn ObjectStore> = Arc::new(InMemory::new()) as ObjectStoreRef;
-                Ok((url_prefix_handler(store, path.clone())?, path))
+                let inner = Arc::new(InMemory::new()) as ObjectStoreRef;
+                let store = limit_store_handler(url_prefix_handler(inner, path.clone()), options);
+                Ok((store, path))
             }
             "file" => {
-                let store = Arc::new(LocalFileSystem::new_with_prefix(
+                let inner = Arc::new(LocalFileSystem::new_with_prefix(
                     url.to_file_path().unwrap(),
                 )?) as ObjectStoreRef;
+                let store = limit_store_handler(inner, options);
                 Ok((store, Path::from("/")))
             }
             _ => Err(DeltaTableError::InvalidTableLocation(url.clone().into())),
@@ -146,17 +151,44 @@ pub fn str_is_truthy(val: &str) -> bool {
 
 /// Simple function to wrap the given [ObjectStore] in a [PrefixStore] if necessary
 ///
-/// This simplifies the use of t he storage since it ensures that list/get/etc operations
+/// This simplifies the use of the storage since it ensures that list/get/etc operations
 /// start from the prefix in the object storage rather than from the root configured URI of the
 /// [ObjectStore]
-pub fn url_prefix_handler<T: ObjectStore>(store: T, prefix: Path) -> DeltaResult<ObjectStoreRef> {
+pub fn url_prefix_handler<T: ObjectStore>(store: T, prefix: Path) -> ObjectStoreRef {
     if prefix != Path::from("/") {
-        Ok(Arc::new(PrefixStore::new(store, prefix)))
+        Arc::new(PrefixStore::new(store, prefix))
     } else {
-        Ok(Arc::new(store))
+        Arc::new(store)
     }
 }
 
+/// Simple function to wrap the given [ObjectStore] in a [LimitStore] if configured
+///
+/// Limits the number of concurrent connections the underlying object store
+/// Reference [LimitStore](https://docs.rs/object_store/latest/object_store/limit/struct.LimitStore.html) for more information
+pub fn limit_store_handler<T: ObjectStore>(store: T, options: &StorageOptions) -> ObjectStoreRef {
+    let concurrency_limit = options
+        .0
+        .get(storage_constants::OBJECT_STORE_CONCURRENCY_LIMIT)
+        .and_then(|v| v.parse().ok());
+
+    if let Some(limit) = concurrency_limit {
+        Arc::new(LimitStore::new(store, limit))
+    } else {
+        Arc::new(store)
+    }
+}
+
+/// Storage option keys to use when creating [ObjectStore].
+/// The same key should be used whether passing a key in the hashmap or setting it as an environment variable.
+/// Must be implemented for a given storage provider
+pub mod storage_constants {
+
+    /// The number of concurrent connections the underlying object store can create
+    /// Reference [LimitStore](https://docs.rs/object_store/latest/object_store/limit/struct.LimitStore.html) for more information
+    pub const OBJECT_STORE_CONCURRENCY_LIMIT: &str = "OBJECT_STORE_CONCURRENCY_LIMIT";
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -166,7 +198,28 @@ mod tests {
         let store = InMemory::new();
         let path = Path::parse("/databases/foo/bar").expect("Failed to parse path");
 
-        let prefixed = url_prefix_handler(store, path);
-        assert!(prefixed.is_ok());
+        let prefixed = url_prefix_handler(store, path.clone());
+
+        assert_eq!(
+            String::from("PrefixObjectStore(databases/foo/bar)"),
+            format!("{prefixed}")
+        );
+    }
+
+    #[test]
+    fn test_limit_store_handler() {
+        let store = InMemory::new();
+
+        let options = StorageOptions(HashMap::from_iter(vec![(
+            "OBJECT_STORE_CONCURRENCY_LIMIT".into(),
+            "500".into(),
+        )]));
+
+        let limited = limit_store_handler(store, &options);
+
+        assert_eq!(
+            String::from("LimitStore(500, InMemory)"),
+            format!("{limited}")
+        );
     }
 }
diff --git a/crates/core/src/storage/retry_ext.rs b/crates/core/src/storage/retry_ext.rs
new file mode 100644
index 0000000000..b63c29a8ae
--- /dev/null
+++ b/crates/core/src/storage/retry_ext.rs
@@ -0,0 +1,81 @@
+//! Retry extension for [`ObjectStore`]
+
+use object_store::{path::Path, Error, ObjectStore, PutPayload, PutResult, Result};
+use tracing::log::*;
+
+/// Retry extension for [`ObjectStore`]
+///
+/// Read-only operations are retried by [`ObjectStore`] internally. However, PUT/DELETE operations
+/// are not retried even thought they are technically idempotent. [`ObjectStore`] does not retry
+/// those operations because having preconditions may produce different results for the same
+/// request. PUT/DELETE operations without preconditions are idempotent and can be retried.
+/// Unfortunately, [`ObjectStore`]'s retry mechanism only works on HTTP request level, thus there
+/// is no way to distinguish whether a request has preconditions or not.
+///
+/// This trait provides additional methods for working with [`ObjectStore`] that automatically retry
+/// unconditional operations when they fail.
+///
+/// See also:
+/// - https://github.com/apache/arrow-rs/pull/5278
+#[async_trait::async_trait]
+pub trait ObjectStoreRetryExt: ObjectStore {
+    /// Save the provided bytes to the specified location
+    ///
+    /// The operation is guaranteed to be atomic, it will either successfully write the entirety of
+    /// bytes to location, or fail. No clients should be able to observe a partially written object
+    ///
+    /// Note that `put_with_opts` may have precondition semantics, and thus may not be retriable.
+    async fn put_with_retries(
+        &self,
+        location: &Path,
+        bytes: PutPayload,
+        max_retries: usize,
+    ) -> Result<PutResult> {
+        let mut attempt_number = 1;
+        while attempt_number <= max_retries {
+            match self.put(location, bytes.clone()).await {
+                Ok(result) => return Ok(result),
+                Err(err) if attempt_number == max_retries => {
+                    return Err(err);
+                }
+                Err(Error::Generic { store, source }) => {
+                    debug!(
+                        "put_with_retries attempt {} failed: {} {}",
+                        attempt_number, store, source
+                    );
+                    attempt_number += 1;
+                }
+                Err(err) => {
+                    return Err(err);
+                }
+            }
+        }
+        unreachable!("loop yields Ok or Err in body when attempt_number = max_retries")
+    }
+
+    /// Delete the object at the specified location
+    async fn delete_with_retries(&self, location: &Path, max_retries: usize) -> Result<()> {
+        let mut attempt_number = 1;
+        while attempt_number <= max_retries {
+            match self.delete(location).await {
+                Ok(()) | Err(Error::NotFound { .. }) => return Ok(()),
+                Err(err) if attempt_number == max_retries => {
+                    return Err(err);
+                }
+                Err(Error::Generic { store, source }) => {
+                    debug!(
+                        "delete_with_retries attempt {} failed: {} {}",
+                        attempt_number, store, source
+                    );
+                    attempt_number += 1;
+                }
+                Err(err) => {
+                    return Err(err);
+                }
+            }
+        }
+        unreachable!("loop yields Ok or Err in body when attempt_number = max_retries")
+    }
+}
+
+impl<T: ObjectStore + ?Sized> ObjectStoreRetryExt for T {}
diff --git a/crates/core/src/storage/utils.rs b/crates/core/src/storage/utils.rs
index e4dde08387..7ea5464b31 100644
--- a/crates/core/src/storage/utils.rs
+++ b/crates/core/src/storage/utils.rs
@@ -1,6 +1,6 @@
 //! Utility functions for working across Delta tables
 
-use chrono::{NaiveDateTime, TimeZone, Utc};
+use chrono::DateTime;
 use futures::TryStreamExt;
 use object_store::path::Path;
 use object_store::{DynObjectStore, ObjectMeta, Result as ObjectStoreResult};
@@ -32,14 +32,13 @@ impl TryFrom<&Add> for ObjectMeta {
     type Error = DeltaTableError;
 
     fn try_from(value: &Add) -> DeltaResult<Self> {
-        let last_modified = Utc.from_utc_datetime(
-            &NaiveDateTime::from_timestamp_millis(value.modification_time).ok_or(
-                DeltaTableError::from(crate::protocol::ProtocolError::InvalidField(format!(
-                    "invalid modification_time: {:?}",
-                    value.modification_time
-                ))),
-            )?,
-        );
+        let last_modified = DateTime::from_timestamp_millis(value.modification_time).ok_or(
+            DeltaTableError::from(crate::protocol::ProtocolError::InvalidField(format!(
+                "invalid modification_time: {:?}",
+                value.modification_time
+            ))),
+        )?;
+
         Ok(Self {
             // TODO this won't work for absolute paths, since Paths are always relative to store.
             location: Path::parse(value.path.as_str())?,
diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs
index e9bf74d1e5..b421a6199b 100644
--- a/crates/core/src/table/builder.rs
+++ b/crates/core/src/table/builder.rs
@@ -39,7 +39,7 @@ impl From<BuilderError> for DeltaTableError {
 }
 
 /// possible version specifications for loading a delta table
-#[derive(Debug, Clone, PartialEq, Eq, Default)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
 pub enum DeltaVersion {
     /// load the newest version
     #[default]
@@ -186,7 +186,7 @@ impl DeltaTableBuilder {
             ensure_file_location_exists(PathBuf::from(table_uri.as_ref()))?;
         }
 
-        let url = ensure_table_uri(&table_uri).expect("The specified table_uri is not valid");
+        let url = ensure_table_uri(&table_uri)?;
         debug!("creating table builder with {url}");
 
         Ok(Self {
@@ -321,7 +321,7 @@ impl DeltaTableBuilder {
 
     /// Build the [`DeltaTable`] and load its state
     pub async fn load(self) -> DeltaResult<DeltaTable> {
-        let version = self.options.version.clone();
+        let version = self.options.version;
         let mut table = self.build()?;
         match version {
             DeltaVersion::Newest => table.load().await?,
@@ -554,4 +554,11 @@ mod tests {
         let url = ensure_table_uri(&expected).unwrap();
         assert_eq!(expected.as_str().trim_end_matches('/'), url.as_str());
     }
+
+    #[test]
+    fn test_invalid_uri() {
+        // Urls should round trips as-is
+        DeltaTableBuilder::from_valid_uri("this://is.nonsense")
+            .expect_err("this should be an error");
+    }
 }
diff --git a/crates/core/src/table/config.rs b/crates/core/src/table/config.rs
index 24b11a01a4..47307cfecd 100644
--- a/crates/core/src/table/config.rs
+++ b/crates/core/src/table/config.rs
@@ -2,12 +2,12 @@
 use std::time::Duration;
 use std::{collections::HashMap, str::FromStr};
 
+use delta_kernel::features::ColumnMappingMode;
 use lazy_static::lazy_static;
 use serde::{Deserialize, Serialize};
 
-use crate::errors::DeltaTableError;
-
 use super::Constraint;
+use crate::errors::DeltaTableError;
 
 /// Typed property keys that can be defined on a delta table
 /// <https://docs.delta.io/latest/table-properties.html#delta-table-properties-reference>
@@ -208,6 +208,9 @@ macro_rules! table_config {
 /// Well known delta table configuration
 pub struct TableConfig<'a>(pub(crate) &'a HashMap<String, Option<String>>);
 
+/// Default num index cols
+pub const DEFAULT_NUM_INDEX_COLS: i32 = 32;
+
 impl<'a> TableConfig<'a> {
     table_config!(
         (
@@ -249,7 +252,7 @@ impl<'a> TableConfig<'a> {
         (
             "true to enable deletion vectors and predictive I/O for updates.",
             DeltaConfigKey::EnableDeletionVectors,
-            enable_deletio0n_vectors,
+            enable_deletion_vectors,
             bool,
             // in databricks the default is dependent on the workspace settings and runtime version
             // https://learn.microsoft.com/en-us/azure/databricks/administration-guide/workspace-settings/deletion-vectors
@@ -274,7 +277,7 @@ impl<'a> TableConfig<'a> {
             DeltaConfigKey::CheckpointInterval,
             checkpoint_interval,
             i32,
-            10
+            100
         ),
     );
 
@@ -289,7 +292,7 @@ impl<'a> TableConfig<'a> {
     ///   than this value. Otherwise, the query may not be able to restart, as it must still read old files.
     pub fn deleted_file_retention_duration(&self) -> Duration {
         lazy_static! {
-            static ref DEFAULT_DURATION: Duration = parse_interval("interval 1 week").unwrap();
+            static ref DEFAULT_DURATION: Duration = parse_interval("interval 1 weeks").unwrap();
         }
         self.0
             .get(DeltaConfigKey::DeletedFileRetentionDuration.as_ref())
@@ -305,7 +308,7 @@ impl<'a> TableConfig<'a> {
     /// constant time. Operations on history are parallel but will become more expensive as the log size increases.
     pub fn log_retention_duration(&self) -> Duration {
         lazy_static! {
-            static ref DEFAULT_DURATION: Duration = parse_interval("interval 30 day").unwrap();
+            static ref DEFAULT_DURATION: Duration = parse_interval("interval 30 days").unwrap();
         }
         self.0
             .get(DeltaConfigKey::LogRetentionDuration.as_ref())
@@ -362,7 +365,7 @@ impl<'a> TableConfig<'a> {
     }
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
 /// The isolation level applied during transaction
 pub enum IsolationLevel {
     /// The strongest isolation level. It ensures that committed write operations
@@ -460,49 +463,6 @@ impl FromStr for CheckpointPolicy {
     }
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
-/// The Column Mapping modes used for reading and writing data
-#[serde(rename_all = "camelCase")]
-pub enum ColumnMappingMode {
-    /// No column mapping is applied
-    None,
-    /// Columns are mapped by their field_id in parquet
-    Id,
-    /// Columns are mapped to a physical name
-    Name,
-}
-
-impl Default for ColumnMappingMode {
-    fn default() -> Self {
-        Self::None
-    }
-}
-
-impl AsRef<str> for ColumnMappingMode {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::None => "none",
-            Self::Id => "id",
-            Self::Name => "name",
-        }
-    }
-}
-
-impl FromStr for ColumnMappingMode {
-    type Err = DeltaTableError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s.to_ascii_lowercase().as_str() {
-            "none" => Ok(Self::None),
-            "id" => Ok(Self::Id),
-            "name" => Ok(Self::Name),
-            _ => Err(DeltaTableError::Generic(
-                "Invalid string for ColumnMappingMode".into(),
-            )),
-        }
-    }
-}
-
 const SECONDS_PER_MINUTE: u64 = 60;
 const SECONDS_PER_HOUR: u64 = 60 * SECONDS_PER_MINUTE;
 const SECONDS_PER_DAY: u64 = 24 * SECONDS_PER_HOUR;
@@ -525,14 +485,14 @@ fn parse_interval(value: &str) -> Result<Duration, DeltaConfigError> {
     let number = number as u64;
 
     let duration = match it.next().ok_or_else(not_an_interval)? {
-        "nanosecond" => Duration::from_nanos(number),
-        "microsecond" => Duration::from_micros(number),
-        "millisecond" => Duration::from_millis(number),
-        "second" => Duration::from_secs(number),
-        "minute" => Duration::from_secs(number * SECONDS_PER_MINUTE),
-        "hour" => Duration::from_secs(number * SECONDS_PER_HOUR),
-        "day" => Duration::from_secs(number * SECONDS_PER_DAY),
-        "week" => Duration::from_secs(number * SECONDS_PER_WEEK),
+        "nanosecond" | "nanoseconds" => Duration::from_nanos(number),
+        "microsecond" | "microseconds" => Duration::from_micros(number),
+        "millisecond" | "milliseconds" => Duration::from_millis(number),
+        "second" | "seconds" => Duration::from_secs(number),
+        "minute" | "minutes" => Duration::from_secs(number * SECONDS_PER_MINUTE),
+        "hour" | "hours" => Duration::from_secs(number * SECONDS_PER_HOUR),
+        "day" | "days" => Duration::from_secs(number * SECONDS_PER_DAY),
+        "week" | "weeks" => Duration::from_secs(number * SECONDS_PER_WEEK),
         unit => {
             return Err(DeltaConfigError::Validation(format!(
                 "Unknown unit '{unit}'"
@@ -591,7 +551,7 @@ mod tests {
     fn get_long_from_metadata_test() {
         let md = dummy_metadata();
         let config = TableConfig(&md.configuration);
-        assert_eq!(config.checkpoint_interval(), 10,)
+        assert_eq!(config.checkpoint_interval(), 100,)
     }
 
     #[test]
@@ -620,36 +580,76 @@ mod tests {
             Duration::from_nanos(123)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 nanoseconds").unwrap(),
+            Duration::from_nanos(123)
+        );
+
         assert_eq!(
             parse_interval("interval 123 microsecond").unwrap(),
             Duration::from_micros(123)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 microseconds").unwrap(),
+            Duration::from_micros(123)
+        );
+
         assert_eq!(
             parse_interval("interval 123 millisecond").unwrap(),
             Duration::from_millis(123)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 milliseconds").unwrap(),
+            Duration::from_millis(123)
+        );
+
         assert_eq!(
             parse_interval("interval 123 second").unwrap(),
             Duration::from_secs(123)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 seconds").unwrap(),
+            Duration::from_secs(123)
+        );
+
         assert_eq!(
             parse_interval("interval 123 minute").unwrap(),
             Duration::from_secs(123 * 60)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 minutes").unwrap(),
+            Duration::from_secs(123 * 60)
+        );
+
         assert_eq!(
             parse_interval("interval 123 hour").unwrap(),
             Duration::from_secs(123 * 3600)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 hours").unwrap(),
+            Duration::from_secs(123 * 3600)
+        );
+
         assert_eq!(
             parse_interval("interval 123 day").unwrap(),
             Duration::from_secs(123 * 86400)
         );
 
+        assert_eq!(
+            parse_interval("interval 123 days").unwrap(),
+            Duration::from_secs(123 * 86400)
+        );
+
+        assert_eq!(
+            parse_interval("interval 123 week").unwrap(),
+            Duration::from_secs(123 * 604800)
+        );
+
         assert_eq!(
             parse_interval("interval 123 week").unwrap(),
             Duration::from_secs(123 * 604800)
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index 7615c72dc3..10ca7bd770 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -1,24 +1,24 @@
 //! Delta Table read and write implementation
 
-use std::cmp::Ordering;
+use std::cmp::{min, Ordering};
 use std::collections::HashMap;
 use std::fmt;
 use std::fmt::Formatter;
 
 use chrono::{DateTime, Utc};
-use futures::TryStreamExt;
+use futures::{StreamExt, TryStreamExt};
 use object_store::{path::Path, ObjectStore};
 use serde::de::{Error, SeqAccess, Visitor};
 use serde::ser::SerializeSeq;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
-use tracing::debug;
 
 use self::builder::DeltaTableConfig;
 use self::state::DeltaTableState;
 use crate::kernel::{
     Action, CommitInfo, DataCheck, DataType, LogicalFile, Metadata, Protocol, StructType,
+    Transaction,
 };
-use crate::logstore::{self, LogStoreConfig, LogStoreRef};
+use crate::logstore::{self, extract_version_from_filename, LogStoreConfig, LogStoreRef};
 use crate::partitions::PartitionFilter;
 use crate::storage::{commit_uri_from_version, ObjectStoreRef};
 use crate::{DeltaResult, DeltaTableError};
@@ -163,7 +163,6 @@ pub(crate) fn get_partition_col_data_types<'a>(
     // When loading `partitionValues_parsed` we have to convert the stringified partition values back to the correct data type.
     schema
         .fields()
-        .iter()
         .filter_map(|f| {
             if metadata
                 .partition_columns
@@ -190,6 +189,7 @@ pub enum PeekCommit {
 }
 
 /// In memory representation of a Delta Table
+#[derive(Clone)]
 pub struct DeltaTable {
     /// The state of the table as of the most recent loaded Delta log entry.
     pub state: Option<DeltaTableState>,
@@ -287,6 +287,11 @@ impl DeltaTable {
         self.log_store.object_store()
     }
 
+    /// Check if the [`DeltaTable`] exists
+    pub async fn verify_deltatable_existence(&self) -> DeltaResult<bool> {
+        self.log_store.is_delta_table_location().await
+    }
+
     /// The URI of the underlying data
     pub fn table_uri(&self) -> String {
         self.log_store.root_uri()
@@ -340,10 +345,6 @@ impl DeltaTable {
         &mut self,
         max_version: Option<i64>,
     ) -> Result<(), DeltaTableError> {
-        debug!(
-            "incremental update with version({}) and max_version({max_version:?})",
-            self.version(),
-        );
         match self.state.as_mut() {
             Some(state) => state.update(self.log_store.clone(), max_version).await,
             _ => {
@@ -461,7 +462,7 @@ impl DeltaTable {
             .map(|path| self.log_store.to_uri(&path)))
     }
 
-    /// Get the number of files in the table - retrn 0 if no metadata is loaded
+    /// Get the number of files in the table - returns 0 if no metadata is loaded
     pub fn get_files_count(&self) -> usize {
         self.state.as_ref().map(|s| s.files_count()).unwrap_or(0)
     }
@@ -486,10 +487,11 @@ impl DeltaTable {
     }
 
     /// Returns the current version of the DeltaTable based on the loaded metadata.
-    pub fn get_app_transaction_version(&self) -> HashMap<String, i64> {
+    pub fn get_app_transaction_version(&self) -> HashMap<String, Transaction> {
         self.state
             .as_ref()
-            .map(|s| s.app_transaction_version().clone())
+            .and_then(|s| s.app_transaction_version().ok())
+            .map(|it| it.map(|t| (t.app_id.clone(), t)).collect())
             .unwrap_or_default()
     }
 
@@ -513,9 +515,29 @@ impl DeltaTable {
         &mut self,
         datetime: DateTime<Utc>,
     ) -> Result<(), DeltaTableError> {
-        let mut min_version = 0;
+        let mut min_version: i64 = -1;
+        let log_store = self.log_store();
+        let prefix = Some(log_store.log_path());
+        let offset_path = commit_uri_from_version(min_version);
+        let object_store = log_store.object_store();
+        let mut files = object_store.list_with_offset(prefix, &offset_path);
+
+        while let Some(obj_meta) = files.next().await {
+            let obj_meta = obj_meta?;
+            if let Some(log_version) = extract_version_from_filename(obj_meta.location.as_ref()) {
+                if min_version == -1 {
+                    min_version = log_version
+                } else {
+                    min_version = min(min_version, log_version);
+                }
+            }
+            if min_version == 0 {
+                break;
+            }
+        }
         let mut max_version = self.get_latest_version().await?;
         let mut version = min_version;
+        let lowest_table_version = min_version;
         let target_ts = datetime.timestamp_millis();
 
         // binary search
@@ -537,8 +559,8 @@ impl DeltaTable {
             }
         }
 
-        if version < 0 {
-            version = 0;
+        if version < lowest_table_version {
+            version = lowest_table_version;
         }
 
         self.load_version(version).await
diff --git a/crates/core/src/table/state.rs b/crates/core/src/table/state.rs
index ab5c229c49..9544198581 100644
--- a/crates/core/src/table/state.rs
+++ b/crates/core/src/table/state.rs
@@ -1,6 +1,6 @@
 //! The module for delta table state.
 
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
 use chrono::Utc;
@@ -8,22 +8,21 @@ use futures::TryStreamExt;
 use object_store::{path::Path, ObjectStore};
 use serde::{Deserialize, Serialize};
 
-use super::config::TableConfig;
-use super::{get_partition_col_data_types, DeltaTableConfig};
+use super::{config::TableConfig, get_partition_col_data_types, DeltaTableConfig};
+#[cfg(test)]
+use crate::kernel::Action;
 use crate::kernel::{
-    Action, Add, DataType, EagerSnapshot, LogDataHandler, LogicalFile, Metadata, Protocol, Remove,
-    StructType,
+    ActionType, Add, AddCDCFile, DataType, EagerSnapshot, LogDataHandler, LogicalFile, Metadata,
+    Protocol, Remove, StructType, Transaction,
 };
 use crate::logstore::LogStore;
 use crate::partitions::{DeltaTablePartition, PartitionFilter};
-use crate::protocol::DeltaOperation;
 use crate::{DeltaResult, DeltaTableError};
 
 /// State snapshot currently held by the Delta Table instance.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct DeltaTableState {
-    app_transaction_version: HashMap<String, i64>,
     pub(crate) snapshot: EagerSnapshot,
 }
 
@@ -35,11 +34,15 @@ impl DeltaTableState {
         config: DeltaTableConfig,
         version: Option<i64>,
     ) -> DeltaResult<Self> {
-        let snapshot = EagerSnapshot::try_new(table_root, store.clone(), config, version).await?;
-        Ok(Self {
-            snapshot,
-            app_transaction_version: HashMap::new(),
-        })
+        let snapshot = EagerSnapshot::try_new_with_visitor(
+            table_root,
+            store.clone(),
+            config,
+            version,
+            HashSet::from([ActionType::Txn]),
+        )
+        .await?;
+        Ok(Self { snapshot })
     }
 
     /// Return table version
@@ -57,7 +60,9 @@ impl DeltaTableState {
     /// Construct a delta table state object from a list of actions
     #[cfg(test)]
     pub fn from_actions(actions: Vec<Action>) -> DeltaResult<Self> {
-        use crate::protocol::SaveMode;
+        use crate::operations::transaction::CommitData;
+        use crate::protocol::{DeltaOperation, SaveMode};
+
         let metadata = actions
             .iter()
             .find_map(|a| match a {
@@ -73,7 +78,7 @@ impl DeltaTableState {
             })
             .ok_or(DeltaTableError::NotInitialized)?;
 
-        let commit_data = [(
+        let commit_data = [CommitData::new(
             actions,
             DeltaOperation::Create {
                 mode: SaveMode::Append,
@@ -81,13 +86,12 @@ impl DeltaTableState {
                 protocol: protocol.clone(),
                 metadata: metadata.clone(),
             },
-            None,
+            HashMap::new(),
+            Vec::new(),
         )];
+
         let snapshot = EagerSnapshot::new_test(&commit_data).unwrap();
-        Ok(Self {
-            app_transaction_version: Default::default(),
-            snapshot,
-        })
+        Ok(Self { snapshot })
     }
 
     /// Returns a semantic accessor to the currently loaded log data.
@@ -133,11 +137,22 @@ impl DeltaTableState {
         Ok(self.snapshot.file_actions()?.collect())
     }
 
+    /// Full list of add actions representing all parquet files that are part of the current
+    /// delta table state.
+    pub fn file_actions_iter(&self) -> DeltaResult<impl Iterator<Item = Add> + '_> {
+        self.snapshot.file_actions()
+    }
+
     /// Get the number of files in the current table state
     pub fn files_count(&self) -> usize {
         self.snapshot.files_count()
     }
 
+    /// Full list of all of the CDC files added as part of the changeDataFeed feature
+    pub fn cdc_files(&self) -> DeltaResult<impl Iterator<Item = AddCDCFile> + '_> {
+        self.snapshot.cdc_files()
+    }
+
     /// Returns an iterator of file names present in the loaded state
     #[inline]
     pub fn file_paths_iter(&self) -> impl Iterator<Item = Path> + '_ {
@@ -146,10 +161,9 @@ impl DeltaTableState {
             .map(|add| add.object_store_path())
     }
 
-    /// HashMap containing the last txn version stored for every app id writing txn
-    /// actions.
-    pub fn app_transaction_version(&self) -> &HashMap<String, i64> {
-        &self.app_transaction_version
+    /// HashMap containing the last transaction stored for every application.
+    pub fn app_transaction_version(&self) -> DeltaResult<impl Iterator<Item = Transaction> + '_> {
+        self.snapshot.transactions()
     }
 
     /// The most recent protocol of the table.
@@ -172,26 +186,9 @@ impl DeltaTableState {
         self.snapshot.table_config()
     }
 
-    /// Merges new state information into our state
-    ///
-    /// The DeltaTableState also carries the version information for the given state,
-    /// as there is a one-to-one match between a table state and a version. In merge/update
-    /// scenarios we cannot infer the intended / correct version number. By default this
-    /// function will update the tracked version if the version on `new_state` is larger then the
-    /// currently set version however it is up to the caller to update the `version` field according
-    /// to the version the merged state represents.
-    pub(crate) fn merge(
-        &mut self,
-        actions: Vec<Action>,
-        operation: &DeltaOperation,
-        version: i64,
-    ) -> Result<(), DeltaTableError> {
-        let commit_infos = vec![(actions, operation.clone(), None)];
-        let new_version = self.snapshot.advance(&commit_infos)?;
-        if new_version != version {
-            return Err(DeltaTableError::Generic("Version mismatch".to_string()));
-        }
-        Ok(())
+    /// Obtain the Eager snapshot of the state
+    pub fn snapshot(&self) -> &EagerSnapshot {
+        &self.snapshot
     }
 
     /// Update the state of the table to the given version.
diff --git a/crates/core/src/table/state_arrow.rs b/crates/core/src/table/state_arrow.rs
index 143ab23d1c..197e8d7fd3 100644
--- a/crates/core/src/table/state_arrow.rs
+++ b/crates/core/src/table/state_arrow.rs
@@ -14,9 +14,9 @@ use arrow_array::{
     StringArray, StructArray, TimestampMicrosecondArray, TimestampMillisecondArray,
 };
 use arrow_schema::{DataType, Field, Fields, TimeUnit};
+use delta_kernel::features::ColumnMappingMode;
 use itertools::Itertools;
 
-use super::config::ColumnMappingMode;
 use super::state::DeltaTableState;
 use crate::errors::DeltaTableError;
 use crate::kernel::{Add, DataType as DeltaDataType, StructType};
@@ -91,7 +91,7 @@ impl DeltaTableState {
                     .fields
                     .iter()
                     .map(|field| Cow::Owned(field.name().clone()))
-                    .zip(partition_cols_batch.columns().iter().map(Arc::clone)),
+                    .zip(partition_cols_batch.columns().iter().cloned()),
             )
         }
 
@@ -103,7 +103,7 @@ impl DeltaTableState {
                     .fields
                     .iter()
                     .map(|field| Cow::Owned(field.name().clone()))
-                    .zip(stats.columns().iter().map(Arc::clone)),
+                    .zip(stats.columns().iter().cloned()),
             );
         }
         if files.iter().any(|add| add.deletion_vector.is_some()) {
@@ -114,7 +114,7 @@ impl DeltaTableState {
                     .fields
                     .iter()
                     .map(|field| Cow::Owned(field.name().clone()))
-                    .zip(delvs.columns().iter().map(Arc::clone)),
+                    .zip(delvs.columns().iter().cloned()),
             );
         }
         if files.iter().any(|add| {
@@ -129,7 +129,7 @@ impl DeltaTableState {
                     .fields
                     .iter()
                     .map(|field| Cow::Owned(field.name().clone()))
-                    .zip(tags.columns().iter().map(Arc::clone)),
+                    .zip(tags.columns().iter().cloned()),
             );
         }
 
@@ -149,7 +149,13 @@ impl DeltaTableState {
             .map(
                 |name| -> Result<arrow::datatypes::DataType, DeltaTableError> {
                     let schema = metadata.schema()?;
-                    let field = schema.field_with_name(name)?;
+                    let field =
+                        schema
+                            .field(name)
+                            .ok_or(DeltaTableError::MetadataError(format!(
+                                "Invalid partition column {0}",
+                                name
+                            )))?;
                     Ok(field.data_type().try_into()?)
                 },
             )
@@ -173,12 +179,12 @@ impl DeltaTableState {
                 .map(|name| -> Result<_, DeltaTableError> {
                     let physical_name = self
                         .schema()
-                        .field_with_name(name)
-                        .or(Err(DeltaTableError::MetadataError(format!(
+                        .field(name)
+                        .ok_or(DeltaTableError::MetadataError(format!(
                             "Invalid partition column {0}",
                             name
-                        ))))?
-                        .physical_name()?
+                        )))?
+                        .physical_name(column_mapping_mode)?
                         .to_string();
                     Ok((physical_name, name.as_str()))
                 })
@@ -328,7 +334,7 @@ impl DeltaTableState {
 
         for add in files {
             if let Some(value) = &add.deletion_vector {
-                storage_type.append_value(&value.storage_type);
+                storage_type.append_value(value.storage_type);
                 path_or_inline_div.append_value(value.path_or_inline_dv.clone());
                 if let Some(ofs) = value.offset {
                     offset.append_value(ofs);
@@ -397,8 +403,7 @@ impl DeltaTableState {
         flatten: bool,
     ) -> Result<arrow::record_batch::RecordBatch, DeltaTableError> {
         let stats: Vec<Option<Stats>> = self
-            .file_actions()?
-            .iter()
+            .file_actions_iter()?
             .map(|f| {
                 f.get_stats()
                     .map_err(|err| DeltaTableError::InvalidStatsJson { json_err: err })
@@ -447,11 +452,12 @@ impl DeltaTableState {
             .map(|(path, datatype)| -> Result<ColStats, DeltaTableError> {
                 let null_count = stats
                     .iter()
-                    .flat_map(|maybe_stat| {
+                    .map(|maybe_stat| {
                         maybe_stat
                             .as_ref()
                             .map(|stat| resolve_column_count_stat(&stat.null_count, &path))
                     })
+                    .map(|null_count| null_count.flatten())
                     .collect::<Vec<Option<i64>>>();
                 let null_count = Some(value_vec_to_array(null_count, |values| {
                     Ok(Arc::new(arrow::array::Int64Array::from(values)))
@@ -463,11 +469,12 @@ impl DeltaTableState {
                 let min_values = if matches!(datatype, DeltaDataType::Primitive(_)) {
                     let min_values = stats
                         .iter()
-                        .flat_map(|maybe_stat| {
+                        .map(|maybe_stat| {
                             maybe_stat
                                 .as_ref()
                                 .map(|stat| resolve_column_value_stat(&stat.min_values, &path))
                         })
+                        .map(|min_value| min_value.flatten())
                         .collect::<Vec<Option<&serde_json::Value>>>();
 
                     Some(value_vec_to_array(min_values, |values| {
@@ -480,11 +487,12 @@ impl DeltaTableState {
                 let max_values = if matches!(datatype, DeltaDataType::Primitive(_)) {
                     let max_values = stats
                         .iter()
-                        .flat_map(|maybe_stat| {
+                        .map(|maybe_stat| {
                             maybe_stat
                                 .as_ref()
                                 .map(|stat| resolve_column_value_stat(&stat.max_values, &path))
                         })
+                        .map(|max_value| max_value.flatten())
                         .collect::<Vec<Option<&serde_json::Value>>>();
                     Some(value_vec_to_array(max_values, |values| {
                         json_value_to_array_general(&arrow_type, values.into_iter())
@@ -570,7 +578,7 @@ impl DeltaTableState {
                 // into StructArrays, until it is consolidated into a single array.
                 columnar_stats = columnar_stats
                     .into_iter()
-                    .group_by(|col_stat| {
+                    .chunk_by(|col_stat| {
                         if col_stat.path.len() < level {
                             col_stat.path.clone()
                         } else {
@@ -672,7 +680,6 @@ impl<'a> SchemaLeafIterator<'a> {
         SchemaLeafIterator {
             fields_remaining: schema
                 .fields()
-                .iter()
                 .map(|field| (vec![field.name().as_ref()], field.data_type()))
                 .collect(),
         }
@@ -737,8 +744,8 @@ fn json_value_to_array_general<'a>(
                 .map(|value| value.and_then(|value| value.as_str().map(|value| value.as_bytes())))
                 .collect_vec(),
         ))),
-        DataType::Timestamp(TimeUnit::Microsecond, None) => {
-            Ok(Arc::new(TimestampMicrosecondArray::from(
+        DataType::Timestamp(TimeUnit::Microsecond, tz) => match tz {
+            None => Ok(Arc::new(TimestampMicrosecondArray::from(
                 values
                     .map(|value| {
                         value.and_then(|value| {
@@ -746,13 +753,32 @@ fn json_value_to_array_general<'a>(
                         })
                     })
                     .collect_vec(),
-            )))
-        }
+            ))),
+            Some(tz_str) if tz_str.as_ref() == "UTC" => Ok(Arc::new(
+                TimestampMicrosecondArray::from(
+                    values
+                        .map(|value| {
+                            value.and_then(|value| {
+                                value.as_str().and_then(TimestampMicrosecondType::parse)
+                            })
+                        })
+                        .collect_vec(),
+                )
+                .with_timezone("UTC"),
+            )),
+            _ => Err(DeltaTableError::Generic(format!(
+                "Invalid datatype {}",
+                datatype
+            ))),
+        },
         DataType::Date32 => Ok(Arc::new(Date32Array::from(
             values
                 .map(|value| value.and_then(|value| value.as_str().and_then(Date32Type::parse)))
                 .collect_vec(),
         ))),
-        _ => Err(DeltaTableError::Generic("Invalid datatype".to_string())),
+        _ => Err(DeltaTableError::Generic(format!(
+            "Invalid datatype {}",
+            datatype
+        ))),
     }
 }
diff --git a/crates/core/src/writer/json.rs b/crates/core/src/writer/json.rs
index a51dd86b58..2cf7f6a950 100644
--- a/crates/core/src/writer/json.rs
+++ b/crates/core/src/writer/json.rs
@@ -1,11 +1,13 @@
 //! Main writer API to write json messages to delta table
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
 use arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
 use arrow::record_batch::*;
 use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
+use indexmap::IndexMap;
 use object_store::path::Path;
 use object_store::ObjectStore;
 use parquet::{
@@ -21,10 +23,12 @@ use super::utils::{
     arrow_schema_without_partitions, next_data_path, record_batch_from_message,
     record_batch_without_partitions,
 };
-use super::{DeltaWriter, DeltaWriterError};
+use super::{DeltaWriter, DeltaWriterError, WriteMode};
 use crate::errors::DeltaTableError;
-use crate::kernel::{Add, PartitionsExt, Scalar, StructType};
+use crate::kernel::{scalars::ScalarExt, Add, PartitionsExt, StructType};
+use crate::storage::ObjectStoreRetryExt;
 use crate::table::builder::DeltaTableBuilder;
+use crate::table::config::DEFAULT_NUM_INDEX_COLS;
 use crate::writer::utils::ShareableBuffer;
 use crate::DeltaTable;
 
@@ -45,7 +49,7 @@ pub(crate) struct DataArrowWriter {
     writer_properties: WriterProperties,
     buffer: ShareableBuffer,
     arrow_writer: ArrowWriter<ShareableBuffer>,
-    partition_values: BTreeMap<String, Scalar>,
+    partition_values: IndexMap<String, Scalar>,
     buffered_record_batch_count: usize,
 }
 
@@ -153,7 +157,7 @@ impl DataArrowWriter {
             writer_properties.clone(),
         )?;
 
-        let partition_values = BTreeMap::new();
+        let partition_values = IndexMap::new();
         let buffered_record_batch_count = 0;
 
         Ok(Self {
@@ -285,8 +289,20 @@ impl JsonWriter {
 
 #[async_trait::async_trait]
 impl DeltaWriter<Vec<Value>> for JsonWriter {
-    /// Writes the given values to internal parquet buffers for each represented partition.
+    /// Write a chunk of values into the internal write buffers with the default write mode
     async fn write(&mut self, values: Vec<Value>) -> Result<(), DeltaTableError> {
+        self.write_with_mode(values, WriteMode::Default).await
+    }
+
+    /// Writes the given values to internal parquet buffers for each represented partition.
+    async fn write_with_mode(
+        &mut self,
+        values: Vec<Value>,
+        mode: WriteMode,
+    ) -> Result<(), DeltaTableError> {
+        if mode != WriteMode::Default {
+            warn!("The JsonWriter does not currently support non-default write modes, falling back to default mode");
+        }
         let mut partial_writes: Vec<(Value, ParquetError)> = Vec::new();
         let arrow_schema = self.arrow_schema();
         let divided = self.divide_by_partition_values(values)?;
@@ -347,13 +363,17 @@ impl DeltaWriter<Vec<Value>> for JsonWriter {
             let path = next_data_path(&prefix, 0, &uuid, &writer.writer_properties);
             let obj_bytes = Bytes::from(writer.buffer.to_vec());
             let file_size = obj_bytes.len() as i64;
-            self.storage.put(&path, obj_bytes).await?;
+            self.storage
+                .put_with_retries(&path, obj_bytes.into(), 15)
+                .await?;
 
             actions.push(create_add(
                 &writer.partition_values,
                 path.to_string(),
                 file_size,
                 &metadata,
+                DEFAULT_NUM_INDEX_COLS,
+                &None,
             )?);
         }
         Ok(actions)
@@ -397,8 +417,8 @@ fn quarantine_failed_parquet_rows(
 fn extract_partition_values(
     partition_cols: &[String],
     record_batch: &RecordBatch,
-) -> Result<BTreeMap<String, Scalar>, DeltaWriterError> {
-    let mut partition_values = BTreeMap::new();
+) -> Result<IndexMap<String, Scalar>, DeltaWriterError> {
+    let mut partition_values = IndexMap::new();
 
     for col_name in partition_cols.iter() {
         let arrow_schema = record_batch.schema();
@@ -499,7 +519,7 @@ mod tests {
                 &record_batch
             )
             .unwrap(),
-            BTreeMap::from([
+            IndexMap::from([
                 (String::from("col1"), Scalar::Integer(1)),
                 (String::from("col2"), Scalar::Integer(2)),
                 (String::from("col3"), Scalar::Null(DataType::INTEGER)),
@@ -507,7 +527,7 @@ mod tests {
         );
         assert_eq!(
             extract_partition_values(&[String::from("col1")], &record_batch).unwrap(),
-            BTreeMap::from([(String::from("col1"), Scalar::Integer(1)),])
+            IndexMap::from([(String::from("col1"), Scalar::Integer(1)),])
         );
         assert!(extract_partition_values(&[String::from("col4")], &record_batch).is_err())
     }
@@ -543,4 +563,100 @@ mod tests {
             })
         ));
     }
+
+    // The following sets of tests are related to #1386 and mergeSchema support
+    // <https://github.com/delta-io/delta-rs/issues/1386>
+    mod schema_evolution {
+        use super::*;
+
+        #[tokio::test]
+        async fn test_json_write_mismatched_values() {
+            let table_dir = tempfile::tempdir().unwrap();
+            let schema = get_delta_schema();
+            let path = table_dir.path().to_str().unwrap().to_string();
+
+            let arrow_schema = <ArrowSchema as TryFrom<&StructType>>::try_from(&schema).unwrap();
+            let mut writer = JsonWriter::try_new(
+                path.clone(),
+                Arc::new(arrow_schema),
+                Some(vec!["modified".to_string()]),
+                None,
+            )
+            .unwrap();
+
+            let data = serde_json::json!(
+                {
+                    "id" : "A",
+                    "value": 42,
+                    "modified": "2021-02-01"
+                }
+            );
+
+            writer.write(vec![data]).await.unwrap();
+            let add_actions = writer.flush().await.unwrap();
+            assert_eq!(add_actions.len(), 1);
+
+            let second_data = serde_json::json!(
+                {
+                    "id" : 1,
+                    "name" : "Ion"
+                }
+            );
+
+            if writer.write(vec![second_data]).await.is_ok() {
+                panic!("Should not have successfully written");
+            }
+        }
+
+        #[tokio::test]
+        async fn test_json_write_mismatched_schema() {
+            use crate::operations::create::CreateBuilder;
+            let table_dir = tempfile::tempdir().unwrap();
+            let schema = get_delta_schema();
+            let path = table_dir.path().to_str().unwrap().to_string();
+
+            let mut table = CreateBuilder::new()
+                .with_location(&path)
+                .with_table_name("test-table")
+                .with_comment("A table for running tests")
+                .with_columns(schema.fields().cloned())
+                .await
+                .unwrap();
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 0);
+
+            let arrow_schema = <ArrowSchema as TryFrom<&StructType>>::try_from(&schema).unwrap();
+            let mut writer = JsonWriter::try_new(
+                path.clone(),
+                Arc::new(arrow_schema),
+                Some(vec!["modified".to_string()]),
+                None,
+            )
+            .unwrap();
+
+            let data = serde_json::json!(
+                {
+                    "id" : "A",
+                    "value": 42,
+                    "modified": "2021-02-01"
+                }
+            );
+
+            writer.write(vec![data]).await.unwrap();
+            let add_actions = writer.flush().await.unwrap();
+            assert_eq!(add_actions.len(), 1);
+
+            let second_data = serde_json::json!(
+                {
+                    "postcode" : 1,
+                    "name" : "Ion"
+                }
+            );
+
+            // TODO This should fail because we haven't asked to evolve the schema
+            writer.write(vec![second_data]).await.unwrap();
+            writer.flush_and_commit(&mut table).await.unwrap();
+            assert_eq!(table.version(), 1);
+        }
+    }
 }
diff --git a/crates/core/src/writer/mod.rs b/crates/core/src/writer/mod.rs
index b39c8264cb..d3fe529a89 100644
--- a/crates/core/src/writer/mod.rs
+++ b/crates/core/src/writer/mod.rs
@@ -8,7 +8,7 @@ use serde_json::Value;
 
 use crate::errors::DeltaTableError;
 use crate::kernel::{Action, Add};
-use crate::operations::transaction::commit;
+use crate::operations::transaction::CommitBuilder;
 use crate::protocol::{ColumnCountStat, DeltaOperation, SaveMode};
 use crate::DeltaTable;
 
@@ -116,17 +116,34 @@ impl From<DeltaWriterError> for DeltaTableError {
             DeltaWriterError::ObjectStore { source } => DeltaTableError::ObjectStore { source },
             DeltaWriterError::Parquet { source } => DeltaTableError::Parquet { source },
             DeltaWriterError::DeltaTable(e) => e,
+            DeltaWriterError::SchemaMismatch { .. } => DeltaTableError::SchemaMismatch {
+                msg: err.to_string(),
+            },
             _ => DeltaTableError::Generic(err.to_string()),
         }
     }
 }
 
+/// Write mode for the [DeltaWriter]
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum WriteMode {
+    /// Default write mode which will return an error if schemas do not match correctly
+    Default,
+    /// Merge the schema of the table with the newly written data
+    ///
+    /// [Read more here](https://delta.io/blog/2023-02-08-delta-lake-schema-evolution/)
+    MergeSchema,
+}
+
 #[async_trait]
 /// Trait for writing data to Delta tables
 pub trait DeltaWriter<T> {
-    /// write a chunk of values into the internal write buffers.
+    /// Write a chunk of values into the internal write buffers with the default write mode
     async fn write(&mut self, values: T) -> Result<(), DeltaTableError>;
 
+    /// Wreite a chunk of values into the internal write buffers with the specified [WriteMode]
+    async fn write_with_mode(&mut self, values: T, mode: WriteMode) -> Result<(), DeltaTableError>;
+
     /// Flush the internal write buffers to files in the delta table folder structure.
     /// The corresponding delta [`Add`] actions are returned and should be committed via a transaction.
     async fn flush(&mut self) -> Result<Vec<Add>, DeltaTableError>;
@@ -135,27 +152,33 @@ pub trait DeltaWriter<T> {
     /// and commit the changes to the Delta log, creating a new table version.
     async fn flush_and_commit(&mut self, table: &mut DeltaTable) -> Result<i64, DeltaTableError> {
         let adds: Vec<_> = self.flush().await?.drain(..).map(Action::Add).collect();
-        let snapshot = table.snapshot()?;
-        let partition_cols = snapshot.metadata().partition_columns.clone();
-        let partition_by = if !partition_cols.is_empty() {
-            Some(partition_cols)
-        } else {
-            None
-        };
-        let operation = DeltaOperation::Write {
-            mode: SaveMode::Append,
-            partition_by,
-            predicate: None,
-        };
-        let version = commit(
-            table.log_store.as_ref(),
-            &adds,
-            operation,
-            Some(snapshot),
-            None,
-        )
-        .await?;
-        table.update().await?;
-        Ok(version)
+        flush_and_commit(adds, table).await
     }
 }
+
+/// Method for flushing to be used by writers
+pub(crate) async fn flush_and_commit(
+    adds: Vec<Action>,
+    table: &mut DeltaTable,
+) -> Result<i64, DeltaTableError> {
+    let snapshot = table.snapshot()?;
+    let partition_cols = snapshot.metadata().partition_columns.clone();
+    let partition_by = if !partition_cols.is_empty() {
+        Some(partition_cols)
+    } else {
+        None
+    };
+    let operation = DeltaOperation::Write {
+        mode: SaveMode::Append,
+        partition_by,
+        predicate: None,
+    };
+
+    let version = CommitBuilder::default()
+        .with_actions(adds)
+        .build(Some(snapshot), table.log_store.clone(), operation)
+        .await?
+        .version();
+    table.update().await?;
+    Ok(version)
+}
diff --git a/crates/core/src/writer/record_batch.rs b/crates/core/src/writer/record_batch.rs
index 48525a3335..d99673c8cb 100644
--- a/crates/core/src/writer/record_batch.rs
+++ b/crates/core/src/writer/record_batch.rs
@@ -5,19 +5,21 @@
 //! the writer. Once written, add actions are returned by the writer. It's the users responsibility
 //! to create the transaction using those actions.
 
-use std::collections::BTreeMap;
 use std::{collections::HashMap, sync::Arc};
 
-use arrow::array::{Array, UInt32Array};
+use arrow::array::{new_null_array, Array, UInt32Array};
 use arrow::compute::{partition, take};
 use arrow::record_batch::RecordBatch;
 use arrow_array::ArrayRef;
 use arrow_row::{RowConverter, SortField};
 use arrow_schema::{ArrowError, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
 use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
+use indexmap::IndexMap;
 use object_store::{path::Path, ObjectStore};
 use parquet::{arrow::ArrowWriter, errors::ParquetError};
 use parquet::{basic::Compression, file::properties::WriterProperties};
+use tracing::log::*;
 use uuid::Uuid;
 
 use super::stats::create_add;
@@ -25,17 +27,22 @@ use super::utils::{
     arrow_schema_without_partitions, next_data_path, record_batch_without_partitions,
     ShareableBuffer,
 };
-use super::{DeltaWriter, DeltaWriterError};
+use super::{DeltaWriter, DeltaWriterError, WriteMode};
 use crate::errors::DeltaTableError;
-use crate::kernel::{Add, PartitionsExt, Scalar, StructType};
+use crate::kernel::{scalars::ScalarExt, Action, Add, PartitionsExt, StructType};
+use crate::operations::cast::merge_schema;
+use crate::storage::ObjectStoreRetryExt;
 use crate::table::builder::DeltaTableBuilder;
+use crate::table::config::DEFAULT_NUM_INDEX_COLS;
 use crate::DeltaTable;
 
 /// Writes messages to a delta lake table.
 pub struct RecordBatchWriter {
     storage: Arc<dyn ObjectStore>,
-    arrow_schema_ref: Arc<ArrowSchema>,
+    arrow_schema_ref: ArrowSchemaRef,
+    original_schema_ref: ArrowSchemaRef,
     writer_properties: WriterProperties,
+    should_evolve: bool,
     partition_columns: Vec<String>,
     arrow_writers: HashMap<String, PartitionWriter>,
 }
@@ -67,9 +74,11 @@ impl RecordBatchWriter {
 
         Ok(Self {
             storage,
-            arrow_schema_ref: schema,
+            arrow_schema_ref: schema.clone(),
+            original_schema_ref: schema,
             writer_properties,
             partition_columns: partition_columns.unwrap_or_default(),
+            should_evolve: false,
             arrow_writers: HashMap::new(),
         })
     }
@@ -91,9 +100,11 @@ impl RecordBatchWriter {
 
         Ok(Self {
             storage: table.object_store(),
-            arrow_schema_ref,
+            arrow_schema_ref: arrow_schema_ref.clone(),
+            original_schema_ref: arrow_schema_ref.clone(),
             writer_properties,
             partition_columns,
+            should_evolve: false,
             arrow_writers: HashMap::new(),
         })
     }
@@ -127,30 +138,29 @@ impl RecordBatchWriter {
     pub async fn write_partition(
         &mut self,
         record_batch: RecordBatch,
-        partition_values: &BTreeMap<String, Scalar>,
-    ) -> Result<(), DeltaTableError> {
+        partition_values: &IndexMap<String, Scalar>,
+        mode: WriteMode,
+    ) -> Result<ArrowSchemaRef, DeltaTableError> {
         let arrow_schema =
             arrow_schema_without_partitions(&self.arrow_schema_ref, &self.partition_columns);
         let partition_key = partition_values.hive_partition_path();
 
         let record_batch = record_batch_without_partitions(&record_batch, &self.partition_columns)?;
 
-        match self.arrow_writers.get_mut(&partition_key) {
-            Some(writer) => {
-                writer.write(&record_batch)?;
-            }
+        let written_schema = match self.arrow_writers.get_mut(&partition_key) {
+            Some(writer) => writer.write(&record_batch, mode)?,
             None => {
                 let mut writer = PartitionWriter::new(
                     arrow_schema,
                     partition_values.clone(),
                     self.writer_properties.clone(),
                 )?;
-                writer.write(&record_batch)?;
+                let schema = writer.write(&record_batch, mode)?;
                 let _ = self.arrow_writers.insert(partition_key, writer);
+                schema
             }
-        }
-
-        Ok(())
+        };
+        Ok(written_schema)
     }
 
     /// Sets the writer properties for the underlying arrow writer.
@@ -173,12 +183,32 @@ impl RecordBatchWriter {
 
 #[async_trait::async_trait]
 impl DeltaWriter<RecordBatch> for RecordBatchWriter {
+    /// Write a chunk of values into the internal write buffers with the default write mode
+    async fn write(&mut self, values: RecordBatch) -> Result<(), DeltaTableError> {
+        self.write_with_mode(values, WriteMode::Default).await
+    }
     /// Divides a single record batch into into multiple according to table partitioning.
     /// Values are written to arrow buffers, to collect data until it should be written to disk.
-    async fn write(&mut self, values: RecordBatch) -> Result<(), DeltaTableError> {
+    async fn write_with_mode(
+        &mut self,
+        values: RecordBatch,
+        mode: WriteMode,
+    ) -> Result<(), DeltaTableError> {
+        if mode == WriteMode::MergeSchema && !self.partition_columns.is_empty() {
+            return Err(DeltaTableError::Generic(
+                "Merging Schemas with partition columns present is currently unsupported"
+                    .to_owned(),
+            ));
+        }
+        // Set the should_evolve flag for later in case the writer should perform schema evolution
+        // on its flush_and_commit
+        self.should_evolve = mode == WriteMode::MergeSchema;
+
         for result in self.divide_by_partition_values(&values)? {
-            self.write_partition(result.record_batch, &result.partition_values)
+            let schema = self
+                .write_partition(result.record_batch, &result.partition_values, mode)
                 .await?;
+            self.arrow_schema_ref = schema;
         }
         Ok(())
     }
@@ -195,41 +225,66 @@ impl DeltaWriter<RecordBatch> for RecordBatchWriter {
             let path = next_data_path(&prefix, 0, &uuid, &writer.writer_properties);
             let obj_bytes = Bytes::from(writer.buffer.to_vec());
             let file_size = obj_bytes.len() as i64;
-            self.storage.put(&path, obj_bytes).await?;
+            self.storage
+                .put_with_retries(&path, obj_bytes.into(), 15)
+                .await?;
 
             actions.push(create_add(
                 &writer.partition_values,
                 path.to_string(),
                 file_size,
                 &metadata,
+                DEFAULT_NUM_INDEX_COLS,
+                &None,
             )?);
         }
         Ok(actions)
     }
+
+    /// Flush the internal write buffers to files in the delta table folder structure.
+    /// and commit the changes to the Delta log, creating a new table version.
+    async fn flush_and_commit(&mut self, table: &mut DeltaTable) -> Result<i64, DeltaTableError> {
+        use crate::kernel::{Metadata, StructType};
+        let mut adds: Vec<Action> = self.flush().await?.drain(..).map(Action::Add).collect();
+
+        if self.arrow_schema_ref != self.original_schema_ref && self.should_evolve {
+            let schema: StructType = self.arrow_schema_ref.clone().try_into()?;
+            if !self.partition_columns.is_empty() {
+                return Err(DeltaTableError::Generic(
+                    "Merging Schemas with partition columns present is currently unsupported"
+                        .to_owned(),
+                ));
+            }
+            let part_cols: Vec<String> = vec![];
+            let metadata = Metadata::try_new(schema, part_cols, HashMap::new())?;
+            adds.push(Action::Metadata(metadata));
+        }
+        super::flush_and_commit(adds, table).await
+    }
 }
 
 /// Helper container for partitioned record batches
 #[derive(Clone, Debug)]
 pub struct PartitionResult {
     /// values found in partition columns
-    pub partition_values: BTreeMap<String, Scalar>,
+    pub partition_values: IndexMap<String, Scalar>,
     /// remaining dataset with partition column values removed
     pub record_batch: RecordBatch,
 }
 
 struct PartitionWriter {
-    arrow_schema: Arc<ArrowSchema>,
+    arrow_schema: ArrowSchemaRef,
     writer_properties: WriterProperties,
     pub(super) buffer: ShareableBuffer,
     pub(super) arrow_writer: ArrowWriter<ShareableBuffer>,
-    pub(super) partition_values: BTreeMap<String, Scalar>,
+    pub(super) partition_values: IndexMap<String, Scalar>,
     pub(super) buffered_record_batch_count: usize,
 }
 
 impl PartitionWriter {
     pub fn new(
-        arrow_schema: Arc<ArrowSchema>,
-        partition_values: BTreeMap<String, Scalar>,
+        arrow_schema: ArrowSchemaRef,
+        partition_values: IndexMap<String, Scalar>,
         writer_properties: WriterProperties,
     ) -> Result<Self, ParquetError> {
         let buffer = ShareableBuffer::default();
@@ -254,21 +309,55 @@ impl PartitionWriter {
     /// Writes the record batch in-memory and updates internal state accordingly.
     /// This method buffers the write stream internally so it can be invoked for many
     /// record batches and flushed after the appropriate number of bytes has been written.
-    pub fn write(&mut self, record_batch: &RecordBatch) -> Result<(), DeltaWriterError> {
-        if record_batch.schema() != self.arrow_schema {
-            return Err(DeltaWriterError::SchemaMismatch {
-                record_batch_schema: record_batch.schema(),
-                expected_schema: self.arrow_schema.clone(),
-            });
-        }
+    ///
+    /// Returns the schema which was written by the write which can be used to understand if a
+    /// schema evolution has happened
+    pub fn write(
+        &mut self,
+        record_batch: &RecordBatch,
+        mode: WriteMode,
+    ) -> Result<ArrowSchemaRef, DeltaWriterError> {
+        let merged_batch = if record_batch.schema() != self.arrow_schema {
+            match mode {
+                WriteMode::MergeSchema => {
+                    debug!("The writer and record batch schemas do not match, merging");
+
+                    let merged =
+                        merge_schema(self.arrow_schema.clone(), record_batch.schema().clone())?;
+                    self.arrow_schema = merged;
+
+                    let mut cols = vec![];
+                    for field in self.arrow_schema.fields() {
+                        if let Some(column) = record_batch.column_by_name(field.name()) {
+                            cols.push(column.clone());
+                        } else {
+                            let null_column =
+                                new_null_array(field.data_type(), record_batch.num_rows());
+                            cols.push(null_column);
+                        }
+                    }
+                    Some(RecordBatch::try_new(self.arrow_schema.clone(), cols)?)
+                }
+                WriteMode::Default => {
+                    // If the schemas didn't match then an error should be pushed up
+                    Err(DeltaWriterError::SchemaMismatch {
+                        record_batch_schema: record_batch.schema(),
+                        expected_schema: self.arrow_schema.clone(),
+                    })?
+                }
+            }
+        } else {
+            None
+        };
 
         // Copy current cursor bytes so we can recover from failures
         let buffer_bytes = self.buffer.to_vec();
+        let record_batch = merged_batch.as_ref().unwrap_or(record_batch);
 
         match self.arrow_writer.write(record_batch) {
             Ok(_) => {
                 self.buffered_record_batch_count += 1;
-                Ok(())
+                Ok(self.arrow_schema.clone())
             }
             // If a write fails we need to reset the state of the PartitionWriter
             Err(e) => {
@@ -302,7 +391,7 @@ pub(crate) fn divide_by_partition_values(
 
     if partition_columns.is_empty() {
         partitions.push(PartitionResult {
-            partition_values: BTreeMap::new(),
+            partition_values: IndexMap::new(),
             record_batch: values.clone(),
         });
         return Ok(partitions);
@@ -375,8 +464,11 @@ fn lexsort_to_indices(arrays: &[ArrayRef]) -> UInt32Array {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::writer::test_utils::{create_initialized_table, get_record_batch};
+    use crate::operations::create::CreateBuilder;
+    use crate::writer::test_utils::*;
     use arrow::json::ReaderBuilder;
+    use arrow_array::{Int32Array, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
     use std::path::Path;
 
     #[tokio::test]
@@ -431,7 +523,7 @@ mod tests {
         use crate::DeltaOps;
 
         let table = crate::writer::test_utils::create_bare_table();
-        let partition_cols = vec!["modified".to_string()];
+        let partition_cols = ["modified".to_string()];
         let delta_schema = r#"
         {"type" : "struct",
         "fields" : [
@@ -450,7 +542,7 @@ mod tests {
         let table = DeltaOps(table)
             .create()
             .with_partition_columns(partition_cols.to_vec())
-            .with_columns(delta_schema.fields().clone())
+            .with_columns(delta_schema.fields().cloned())
             .await
             .unwrap();
 
@@ -477,7 +569,7 @@ mod tests {
         let mut writer = RecordBatchWriter::for_table(&table).unwrap();
         let partitions = writer.divide_by_partition_values(&batch).unwrap();
 
-        let expected_keys = vec![
+        let expected_keys = [
             String::from("modified=2021-02-01"),
             String::from("modified=2021-02-02"),
         ];
@@ -570,7 +662,7 @@ mod tests {
             .with_location(table_path.to_str().unwrap())
             .with_table_name("test-table")
             .with_comment("A table for running tests")
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partition_cols)
             .await
             .unwrap();
@@ -582,4 +674,313 @@ mod tests {
         let adds = writer.flush().await.unwrap();
         assert_eq!(adds.len(), 4);
     }
+
+    // The following sets of tests are related to #1386 and mergeSchema support
+    // <https://github.com/delta-io/delta-rs/issues/1386>
+    mod schema_evolution {
+        use itertools::Itertools;
+
+        use super::*;
+
+        #[tokio::test]
+        async fn test_write_mismatched_schema() {
+            let batch = get_record_batch(None, false);
+            let partition_cols = vec![];
+            let table = create_initialized_table(&partition_cols).await;
+            let mut writer = RecordBatchWriter::for_table(&table).unwrap();
+
+            // Write the first batch with the first schema to the table
+            writer.write(batch).await.unwrap();
+            let adds = writer.flush().await.unwrap();
+            assert_eq!(adds.len(), 1);
+
+            // Create a second batch with a different schema
+            let second_schema = Arc::new(ArrowSchema::new(vec![
+                Field::new("id", DataType::Int32, true),
+                Field::new("name", DataType::Utf8, true),
+            ]));
+            let second_batch = RecordBatch::try_new(
+                second_schema,
+                vec![
+                    Arc::new(Int32Array::from(vec![Some(1), Some(2)])),
+                    Arc::new(StringArray::from(vec![Some("will"), Some("robert")])),
+                ],
+            )
+            .unwrap();
+
+            let result = writer.write(second_batch).await;
+            assert!(result.is_err());
+
+            match result {
+                Ok(_) => {
+                    panic!("Should not have successfully written");
+                }
+                Err(e) => {
+                    match e {
+                        DeltaTableError::SchemaMismatch { .. } => {
+                            // this is expected
+                        }
+                        others => {
+                            panic!("Got the wrong error: {others:?}");
+                        }
+                    }
+                }
+            };
+        }
+
+        #[tokio::test]
+        async fn test_write_schema_evolution() {
+            let table_schema = get_delta_schema();
+            let table_dir = tempfile::tempdir().unwrap();
+            let table_path = table_dir.path();
+
+            let mut table = CreateBuilder::new()
+                .with_location(table_path.to_str().unwrap())
+                .with_table_name("test-table")
+                .with_comment("A table for running tests")
+                .with_columns(table_schema.fields().cloned())
+                .await
+                .unwrap();
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 0);
+
+            let batch = get_record_batch(None, false);
+            let mut writer = RecordBatchWriter::for_table(&table).unwrap();
+
+            writer.write(batch).await.unwrap();
+            let version = writer.flush_and_commit(&mut table).await.unwrap();
+            assert_eq!(version, 1);
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 1);
+
+            // Create a second batch with a different schema
+            let second_schema = Arc::new(ArrowSchema::new(vec![
+                Field::new("vid", DataType::Int32, true),
+                Field::new("name", DataType::Utf8, true),
+            ]));
+            let second_batch = RecordBatch::try_new(
+                second_schema,
+                vec![
+                    Arc::new(Int32Array::from(vec![Some(1), Some(2)])), // vid
+                    Arc::new(StringArray::from(vec![Some("will"), Some("robert")])), // name
+                ],
+            )
+            .unwrap();
+
+            let result = writer
+                .write_with_mode(second_batch, WriteMode::MergeSchema)
+                .await;
+            assert!(
+                result.is_ok(),
+                "Failed to write with WriteMode::MergeSchema, {:?}",
+                result
+            );
+            let version = writer.flush_and_commit(&mut table).await.unwrap();
+            assert_eq!(version, 2);
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 2);
+
+            let new_schema = table.metadata().unwrap().schema().unwrap();
+            let expected_columns = vec!["id", "value", "modified", "vid", "name"];
+            let found_columns: Vec<&String> = new_schema.fields().map(|f| f.name()).collect();
+            assert_eq!(
+                expected_columns, found_columns,
+                "The new table schema does not contain all evolved columns as expected"
+            );
+        }
+
+        #[tokio::test]
+        async fn test_write_schema_evolution_with_partition_columns_should_fail_as_unsupported() {
+            let table_schema = get_delta_schema();
+            let table_dir = tempfile::tempdir().unwrap();
+            let table_path = table_dir.path();
+
+            let mut table = CreateBuilder::new()
+                .with_location(table_path.to_str().unwrap())
+                .with_table_name("test-table")
+                .with_comment("A table for running tests")
+                .with_columns(table_schema.fields().cloned())
+                .with_partition_columns(["id"])
+                .await
+                .unwrap();
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 0);
+
+            let batch = get_record_batch(None, false);
+            let mut writer = RecordBatchWriter::for_table(&table).unwrap();
+
+            writer.write(batch).await.unwrap();
+            let version = writer.flush_and_commit(&mut table).await.unwrap();
+            assert_eq!(version, 1);
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 1);
+
+            // Create a second batch with appended columns
+            let second_batch = {
+                let second = get_record_batch(None, false);
+                let second_schema = ArrowSchema::new(
+                    second
+                        .schema()
+                        .fields
+                        .iter()
+                        .cloned()
+                        .chain([
+                            Field::new("vid", DataType::Int32, true).into(),
+                            Field::new("name", DataType::Utf8, true).into(),
+                        ])
+                        .collect_vec(),
+                );
+
+                let len = second.num_rows();
+
+                let second_arrays = second
+                    .columns()
+                    .iter()
+                    .cloned()
+                    .chain([
+                        Arc::new(Int32Array::from(vec![Some(1); len])) as _, // vid
+                        Arc::new(StringArray::from(vec![Some("will"); len])) as _, // name
+                    ])
+                    .collect_vec();
+
+                RecordBatch::try_new(second_schema.into(), second_arrays).unwrap()
+            };
+
+            let result = writer
+                .write_with_mode(second_batch, WriteMode::MergeSchema)
+                .await;
+
+            assert!(result.is_err());
+
+            match result.unwrap_err() {
+                DeltaTableError::Generic(s) => {
+                    assert_eq!(
+                        s,
+                        "Merging Schemas with partition columns present is currently unsupported"
+                    )
+                }
+                e => panic!("unexpected error: {e:?}"),
+            }
+        }
+
+        #[tokio::test]
+        async fn test_schema_evolution_column_type_mismatch() {
+            let batch = get_record_batch(None, false);
+            let partition_cols = vec![];
+            let mut table = create_initialized_table(&partition_cols).await;
+
+            let mut writer = RecordBatchWriter::for_table(&table).unwrap();
+
+            // Write the first batch with the first schema to the table
+            writer.write(batch).await.unwrap();
+            let version = writer.flush_and_commit(&mut table).await.unwrap();
+            assert_eq!(version, 1);
+
+            // Create a second batch with a different schema
+            let second_schema = Arc::new(ArrowSchema::new(vec![
+                Field::new("id", DataType::Int32, true),
+                Field::new("name", DataType::Utf8, true),
+            ]));
+            let second_batch = RecordBatch::try_new(
+                second_schema,
+                vec![
+                    Arc::new(Int32Array::from(vec![Some(1), Some(2)])), // vid
+                    Arc::new(StringArray::from(vec![Some("will"), Some("robert")])), // name
+                ],
+            )
+            .unwrap();
+
+            let result = writer
+                .write_with_mode(second_batch, WriteMode::MergeSchema)
+                .await;
+            assert!(
+                result.is_err(),
+                "Did not expect to successfully add new writes with different column types: {:?}",
+                result
+            );
+        }
+
+        #[tokio::test]
+        async fn test_schema_evolution_with_nonnullable_col() {
+            use crate::kernel::{
+                DataType as DeltaDataType, PrimitiveType, StructField, StructType,
+            };
+
+            let table_schema = StructType::new(vec![
+                StructField::new(
+                    "id".to_string(),
+                    DeltaDataType::Primitive(PrimitiveType::String),
+                    false,
+                ),
+                StructField::new(
+                    "value".to_string(),
+                    DeltaDataType::Primitive(PrimitiveType::Integer),
+                    true,
+                ),
+                StructField::new(
+                    "modified".to_string(),
+                    DeltaDataType::Primitive(PrimitiveType::String),
+                    true,
+                ),
+            ]);
+            let table_dir = tempfile::tempdir().unwrap();
+            let table_path = table_dir.path();
+
+            let mut table = CreateBuilder::new()
+                .with_location(table_path.to_str().unwrap())
+                .with_table_name("test-table")
+                .with_comment("A table for running tests")
+                .with_columns(table_schema.fields().cloned())
+                .await
+                .unwrap();
+            table.load().await.expect("Failed to load table");
+            assert_eq!(table.version(), 0);
+
+            // Hand-crafting the first RecordBatch to ensure that a write with non-nullable columns
+            // works properly before attepting the second write
+            let arrow_schema = Arc::new(ArrowSchema::new(vec![
+                Field::new("id", DataType::Utf8, false),
+                Field::new("value", DataType::Int32, true),
+                Field::new("modified", DataType::Utf8, true),
+            ]));
+            let batch = RecordBatch::try_new(
+                arrow_schema,
+                vec![
+                    Arc::new(StringArray::from(vec![Some("1"), Some("2")])), // id
+                    Arc::new(new_null_array(&DataType::Int32, 2)),           // value
+                    Arc::new(new_null_array(&DataType::Utf8, 2)),            // modified
+                ],
+            )
+            .unwrap();
+
+            // Write the first batch with the first schema to the table
+            let mut writer = RecordBatchWriter::for_table(&table).unwrap();
+            writer.write(batch).await.unwrap();
+            let version = writer.flush_and_commit(&mut table).await.unwrap();
+            assert_eq!(version, 1);
+
+            // Create a second batch with a different schema
+            let second_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+                "name",
+                DataType::Utf8,
+                true,
+            )]));
+            let second_batch = RecordBatch::try_new(
+                second_schema,
+                vec![
+                    Arc::new(StringArray::from(vec![Some("will"), Some("robert")])), // name
+                ],
+            )
+            .unwrap();
+
+            let result = writer
+                .write_with_mode(second_batch, WriteMode::MergeSchema)
+                .await;
+            assert!(
+                result.is_err(),
+                "Should not have been able to write with a missing non-nullable column: {:?}",
+                result
+            );
+        }
+    }
 }
diff --git a/crates/core/src/writer/stats.rs b/crates/core/src/writer/stats.rs
index 4ba217cc1e..28a089ae1c 100644
--- a/crates/core/src/writer/stats.rs
+++ b/crates/core/src/writer/stats.rs
@@ -1,8 +1,11 @@
-use std::collections::BTreeMap;
+use std::cmp::min;
 use std::sync::Arc;
 use std::time::{SystemTime, UNIX_EPOCH};
 use std::{collections::HashMap, ops::AddAssign};
 
+use delta_kernel::expressions::Scalar;
+use indexmap::IndexMap;
+use parquet::file::metadata::ParquetMetaData;
 use parquet::format::FileMetaData;
 use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor};
 use parquet::{basic::LogicalType, errors::ParquetError};
@@ -12,17 +15,24 @@ use parquet::{
 };
 
 use super::*;
-use crate::kernel::{Add, Scalar};
+use crate::kernel::{scalars::ScalarExt, Add};
 use crate::protocol::{ColumnValueStat, Stats};
 
 /// Creates an [`Add`] log action struct.
 pub fn create_add(
-    partition_values: &BTreeMap<String, Scalar>,
+    partition_values: &IndexMap<String, Scalar>,
     path: String,
     size: i64,
     file_metadata: &FileMetaData,
+    num_indexed_cols: i32,
+    stats_columns: &Option<Vec<String>>,
 ) -> Result<Add, DeltaTableError> {
-    let stats = stats_from_file_metadata(partition_values, file_metadata)?;
+    let stats = stats_from_file_metadata(
+        partition_values,
+        file_metadata,
+        num_indexed_cols,
+        stats_columns,
+    )?;
     let stats_string = serde_json::to_string(&stats)?;
 
     // Determine the modification timestamp to include in the add action - milliseconds since epoch
@@ -58,26 +68,94 @@ pub fn create_add(
     })
 }
 
+// As opposed to `stats_from_file_metadata` which operates on `parquet::format::FileMetaData`,
+// this function produces the stats by reading the metadata from already written out files.
+//
+// Note that the file metadata used here is actually `parquet::file::metadata::FileMetaData`
+// which is a thrift decoding of the `parquet::format::FileMetaData` which is typically obtained
+// when flushing the write.
+pub(crate) fn stats_from_parquet_metadata(
+    partition_values: &IndexMap<String, Scalar>,
+    parquet_metadata: &ParquetMetaData,
+    num_indexed_cols: i32,
+    stats_columns: &Option<Vec<String>>,
+) -> Result<Stats, DeltaWriterError> {
+    let num_rows = parquet_metadata.file_metadata().num_rows();
+    let schema_descriptor = parquet_metadata.file_metadata().schema_descr_ptr();
+    let row_group_metadata = parquet_metadata.row_groups().to_vec();
+
+    stats_from_metadata(
+        partition_values,
+        schema_descriptor,
+        row_group_metadata,
+        num_rows,
+        num_indexed_cols,
+        stats_columns,
+    )
+}
+
 fn stats_from_file_metadata(
-    partition_values: &BTreeMap<String, Scalar>,
+    partition_values: &IndexMap<String, Scalar>,
     file_metadata: &FileMetaData,
+    num_indexed_cols: i32,
+    stats_columns: &Option<Vec<String>>,
 ) -> Result<Stats, DeltaWriterError> {
     let type_ptr = parquet::schema::types::from_thrift(file_metadata.schema.as_slice());
     let schema_descriptor = type_ptr.map(|type_| Arc::new(SchemaDescriptor::new(type_)))?;
 
+    let row_group_metadata: Vec<RowGroupMetaData> = file_metadata
+        .row_groups
+        .iter()
+        .map(|rg| RowGroupMetaData::from_thrift(schema_descriptor.clone(), rg.clone()))
+        .collect::<Result<Vec<RowGroupMetaData>, ParquetError>>()?;
+
+    stats_from_metadata(
+        partition_values,
+        schema_descriptor,
+        row_group_metadata,
+        file_metadata.num_rows,
+        num_indexed_cols,
+        stats_columns,
+    )
+}
+
+fn stats_from_metadata(
+    partition_values: &IndexMap<String, Scalar>,
+    schema_descriptor: Arc<SchemaDescriptor>,
+    row_group_metadata: Vec<RowGroupMetaData>,
+    num_rows: i64,
+    num_indexed_cols: i32,
+    stats_columns: &Option<Vec<String>>,
+) -> Result<Stats, DeltaWriterError> {
     let mut min_values: HashMap<String, ColumnValueStat> = HashMap::new();
     let mut max_values: HashMap<String, ColumnValueStat> = HashMap::new();
     let mut null_count: HashMap<String, ColumnCountStat> = HashMap::new();
 
-    let row_group_metadata: Result<Vec<RowGroupMetaData>, ParquetError> = file_metadata
-        .row_groups
-        .iter()
-        .map(|rg| RowGroupMetaData::from_thrift(schema_descriptor.clone(), rg.clone()))
-        .collect();
-    let row_group_metadata = row_group_metadata?;
+    let idx_to_iterate = if let Some(stats_cols) = stats_columns {
+        schema_descriptor
+            .columns()
+            .iter()
+            .enumerate()
+            .filter_map(|(index, col)| {
+                if stats_cols.contains(&col.name().to_string()) {
+                    Some(index)
+                } else {
+                    None
+                }
+            })
+            .collect()
+    } else if num_indexed_cols == -1 {
+        (0..schema_descriptor.num_columns()).collect::<Vec<_>>()
+    } else if num_indexed_cols >= 0 {
+        (0..min(num_indexed_cols as usize, schema_descriptor.num_columns())).collect::<Vec<_>>()
+    } else {
+        return Err(DeltaWriterError::DeltaTable(DeltaTableError::Generic(
+            "delta.dataSkippingNumIndexedCols valid values are >=-1".to_string(),
+        )));
+    };
 
-    for i in 0..schema_descriptor.num_columns() {
-        let column_descr = schema_descriptor.column(i);
+    for idx in idx_to_iterate {
+        let column_descr = schema_descriptor.column(idx);
 
         let column_path = column_descr.path();
         let column_path_parts = column_path.parts();
@@ -90,7 +168,7 @@ fn stats_from_file_metadata(
         let maybe_stats: Option<AggregatedStats> = row_group_metadata
             .iter()
             .map(|g| {
-                g.column(i)
+                g.column(idx)
                     .statistics()
                     .map(|s| AggregatedStats::from((s, &column_descr.logical_type())))
             })
@@ -118,7 +196,7 @@ fn stats_from_file_metadata(
     Ok(Stats {
         min_values,
         max_values,
-        num_records: file_metadata.num_rows,
+        num_records: num_rows,
         null_count,
     })
 }
@@ -180,19 +258,19 @@ impl StatsScalar {
                 // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#timestamp-without-timezone-timestampntz
                 let v = get_stat!(v);
                 let timestamp = match unit {
-                    TimeUnit::MILLIS(_) => chrono::NaiveDateTime::from_timestamp_millis(v),
-                    TimeUnit::MICROS(_) => chrono::NaiveDateTime::from_timestamp_micros(v),
+                    TimeUnit::MILLIS(_) => chrono::DateTime::from_timestamp_millis(v),
+                    TimeUnit::MICROS(_) => chrono::DateTime::from_timestamp_micros(v),
                     TimeUnit::NANOS(_) => {
                         let secs = v / 1_000_000_000;
                         let nanosecs = (v % 1_000_000_000) as u32;
-                        chrono::NaiveDateTime::from_timestamp_opt(secs, nanosecs)
+                        chrono::DateTime::from_timestamp(secs, nanosecs)
                     }
                 };
                 let timestamp = timestamp.ok_or(DeltaWriterError::StatsParsingFailed {
                     debug_value: v.to_string(),
                     logical_type: logical_type.clone(),
                 })?;
-                Ok(Self::Timestamp(timestamp))
+                Ok(Self::Timestamp(timestamp.naive_utc()))
             }
             (Statistics::Int64(v), Some(LogicalType::Decimal { scale, .. })) => {
                 let val = get_stat!(v) as f64 / 10.0_f64.powi(*scale);
@@ -231,18 +309,8 @@ impl StatsScalar {
                     v.max_bytes()
                 };
 
-                let val = if val.len() <= 4 {
-                    let mut bytes = [0; 4];
-                    bytes[..val.len()].copy_from_slice(val);
-                    i32::from_be_bytes(bytes) as f64
-                } else if val.len() <= 8 {
-                    let mut bytes = [0; 8];
-                    bytes[..val.len()].copy_from_slice(val);
-                    i64::from_be_bytes(bytes) as f64
-                } else if val.len() <= 16 {
-                    let mut bytes = [0; 16];
-                    bytes[..val.len()].copy_from_slice(val);
-                    i128::from_be_bytes(bytes) as f64
+                let val = if val.len() <= 16 {
+                    i128::from_be_bytes(sign_extend_be(val)) as f64
                 } else {
                     return Err(DeltaWriterError::StatsParsingFailed {
                         debug_value: format!("{val:?}"),
@@ -284,6 +352,19 @@ impl StatsScalar {
     }
 }
 
+/// Performs big endian sign extension
+/// Copied from arrow-rs repo/parquet crate:
+/// https://github.com/apache/arrow-rs/blob/b25c441745602c9967b1e3cc4a28bc469cfb1311/parquet/src/arrow/buffer/bit_util.rs#L54
+pub fn sign_extend_be<const N: usize>(b: &[u8]) -> [u8; N] {
+    assert!(b.len() <= N, "Array too large, expected less than {N}");
+    let is_negative = (b[0] & 128u8) == 128u8;
+    let mut result = if is_negative { [255u8; N] } else { [0u8; N] };
+    for (d, s) in result.iter_mut().skip(N - b.len()).zip(b) {
+        *d = *s;
+    }
+    result
+}
+
 impl From<StatsScalar> for serde_json::Value {
     fn from(scalar: StatsScalar) -> Self {
         match scalar {
@@ -622,6 +703,17 @@ mod tests {
                 }),
                 Value::from(1243124142314.423),
             ),
+            (
+                simple_parquet_stat!(
+                    Statistics::FixedLenByteArray,
+                    FixedLenByteArray::from(vec![0, 39, 16])
+                ),
+                Some(LogicalType::Decimal {
+                    scale: 3,
+                    precision: 5,
+                }),
+                Value::from(10.0),
+            ),
             (
                 simple_parquet_stat!(
                     Statistics::FixedLenByteArray,
@@ -645,7 +737,6 @@ mod tests {
         }
     }
 
-    #[ignore]
     #[tokio::test]
     async fn test_delta_stats() {
         let temp_dir = tempfile::tempdir().unwrap();
diff --git a/crates/core/src/writer/test_utils.rs b/crates/core/src/writer/test_utils.rs
index 093ad7cbd0..ff860ed1cf 100644
--- a/crates/core/src/writer/test_utils.rs
+++ b/crates/core/src/writer/test_utils.rs
@@ -276,7 +276,7 @@ pub async fn setup_table_with_configuration(
     let table_schema = get_delta_schema();
     DeltaOps::new_in_memory()
         .create()
-        .with_columns(table_schema.fields().clone())
+        .with_columns(table_schema.fields().cloned())
         .with_configuration_property(key, value)
         .await
         .expect("Failed to create table")
@@ -299,7 +299,7 @@ pub async fn create_initialized_table(partition_cols: &[String]) -> DeltaTable {
         .with_location(table_path.to_str().unwrap())
         .with_table_name("test-table")
         .with_comment("A table for running tests")
-        .with_columns(table_schema.fields().clone())
+        .with_columns(table_schema.fields().cloned())
         .with_partition_columns(partition_cols)
         .await
         .unwrap()
diff --git a/crates/core/tests/command_merge.rs b/crates/core/tests/command_merge.rs
new file mode 100644
index 0000000000..76b511254b
--- /dev/null
+++ b/crates/core/tests/command_merge.rs
@@ -0,0 +1,231 @@
+#![allow(dead_code)]
+mod fs_common;
+
+use arrow_array::RecordBatch;
+use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+use datafusion::dataframe::DataFrame;
+use datafusion::prelude::SessionContext;
+use datafusion_common::Column;
+use datafusion_expr::{col, lit, Expr};
+use deltalake_core::kernel::{DataType as DeltaDataType, PrimitiveType, StructField, StructType};
+use deltalake_core::operations::merge::MergeMetrics;
+use deltalake_core::operations::transaction::TransactionError;
+use deltalake_core::protocol::SaveMode;
+use deltalake_core::{open_table, DeltaOps, DeltaResult, DeltaTable, DeltaTableError};
+use std::sync::Arc;
+
+async fn create_table(table_uri: &str, partition: Option<Vec<&str>>) -> DeltaTable {
+    let table_schema = get_delta_schema();
+    let ops = DeltaOps::try_from_uri(table_uri).await.unwrap();
+    let table = ops
+        .create()
+        .with_columns(table_schema.fields().cloned())
+        .with_partition_columns(partition.unwrap_or_default())
+        .await
+        .expect("Failed to create table");
+
+    let schema = get_arrow_schema();
+    write_data(table, &schema).await
+}
+
+fn get_delta_schema() -> StructType {
+    StructType::new(vec![
+        StructField::new(
+            "id".to_string(),
+            DeltaDataType::Primitive(PrimitiveType::String),
+            true,
+        ),
+        StructField::new(
+            "value".to_string(),
+            DeltaDataType::Primitive(PrimitiveType::Integer),
+            true,
+        ),
+        StructField::new(
+            "event_date".to_string(),
+            DeltaDataType::Primitive(PrimitiveType::String),
+            true,
+        ),
+    ])
+}
+
+fn get_arrow_schema() -> Arc<ArrowSchema> {
+    Arc::new(ArrowSchema::new(vec![
+        Field::new("id", DataType::Utf8, true),
+        Field::new("value", DataType::Int32, true),
+        Field::new("event_date", DataType::Utf8, true),
+    ]))
+}
+
+async fn write_data(table: DeltaTable, schema: &Arc<ArrowSchema>) -> DeltaTable {
+    let batch = RecordBatch::try_new(
+        Arc::clone(schema),
+        vec![
+            Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C", "D"])),
+            Arc::new(arrow::array::Int32Array::from(vec![1, 10, 10, 100])),
+            Arc::new(arrow::array::StringArray::from(vec![
+                "2021-02-01",
+                "2021-02-01",
+                "2021-02-02",
+                "2021-02-02",
+            ])),
+        ],
+    )
+    .unwrap();
+    // write some data
+    DeltaOps(table)
+        .write(vec![batch.clone()])
+        .with_save_mode(SaveMode::Append)
+        .await
+        .unwrap()
+}
+
+fn create_test_data() -> (DataFrame, DataFrame) {
+    let schema = get_arrow_schema();
+    let ctx = SessionContext::new();
+    let batch = RecordBatch::try_new(
+        Arc::clone(&schema),
+        vec![
+            Arc::new(arrow::array::StringArray::from(vec!["C", "D"])),
+            Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+            Arc::new(arrow::array::StringArray::from(vec![
+                "2021-02-02",
+                "2021-02-02",
+            ])),
+        ],
+    )
+    .unwrap();
+    let df1 = ctx.read_batch(batch).unwrap();
+    let batch = RecordBatch::try_new(
+        Arc::clone(&schema),
+        vec![
+            Arc::new(arrow::array::StringArray::from(vec!["E", "F"])),
+            Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+            Arc::new(arrow::array::StringArray::from(vec![
+                "2021-02-03",
+                "2021-02-03",
+            ])),
+        ],
+    )
+    .unwrap();
+    let df2 = ctx.read_batch(batch).unwrap();
+    (df1, df2)
+}
+
+async fn merge(
+    table: DeltaTable,
+    df: DataFrame,
+    predicate: Expr,
+) -> DeltaResult<(DeltaTable, MergeMetrics)> {
+    DeltaOps(table)
+        .merge(df, predicate)
+        .with_source_alias("source")
+        .with_target_alias("target")
+        .when_matched_update(|update| {
+            update
+                .update("value", col("source.value"))
+                .update("event_date", col("source.event_date"))
+        })
+        .unwrap()
+        .when_not_matched_insert(|insert| {
+            insert
+                .set("id", col("source.id"))
+                .set("value", col("source.value"))
+                .set("event_date", col("source.event_date"))
+        })
+        .unwrap()
+        .await
+}
+
+#[tokio::test]
+async fn test_merge_concurrent_conflict() {
+    // Overlapping id ranges -> Commit conflict
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
+
+    let table_ref1 = create_table(&table_uri.to_string(), Some(vec!["event_date"])).await;
+    let table_ref2 = open_table(table_uri).await.unwrap();
+    let (df1, _df2) = create_test_data();
+
+    let expr = col("target.id").eq(col("source.id"));
+    let (_table_ref1, _metrics) = merge(table_ref1, df1.clone(), expr.clone()).await.unwrap();
+    let result = merge(table_ref2, df1, expr).await;
+
+    assert!(matches!(
+        result.as_ref().unwrap_err(),
+        DeltaTableError::Transaction { .. }
+    ));
+    if let DeltaTableError::Transaction { source } = result.unwrap_err() {
+        assert!(matches!(source, TransactionError::CommitConflict(_)));
+    }
+}
+
+#[tokio::test]
+async fn test_merge_different_range() {
+    // No overlapping id ranges -> No conflict
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
+
+    let table_ref1 = create_table(table_uri, Some(vec!["event_date"])).await;
+    let table_ref2 = open_table(table_uri).await.unwrap();
+    let (df1, df2) = create_test_data();
+
+    let expr = col("target.id").eq(col("source.id"));
+    let (_table_ref1, _metrics) = merge(table_ref1, df1, expr.clone()).await.unwrap();
+    let result = merge(table_ref2, df2, expr).await;
+
+    assert!(result.is_ok());
+}
+
+#[tokio::test]
+async fn test_merge_concurrent_different_partition() {
+    // partition key in predicate -> Successful merge
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
+
+    let table_ref1 = create_table(table_uri, Some(vec!["event_date"])).await;
+    let table_ref2 = open_table(table_uri).await.unwrap();
+    let (df1, df2) = create_test_data();
+
+    let expr = col("target.id")
+        .eq(col("source.id"))
+        .and(col("target.event_date").eq(col("source.event_date")));
+    let (_table_ref1, _metrics) = merge(table_ref1, df1, expr.clone()).await.unwrap();
+    let result = merge(table_ref2, df2, expr).await;
+
+    assert!(result.is_ok());
+}
+
+#[tokio::test]
+async fn test_merge_concurrent_with_overlapping_files() {
+    // predicate contains filter and files are overlapping -> Commit conflict
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
+
+    let table_ref1 = create_table(table_uri, None).await;
+    let table_ref2 = open_table(table_uri).await.unwrap();
+    let (df1, _df2) = create_test_data();
+
+    let expr = col("target.id").eq(col("source.id"));
+    let (_table_ref1, _metrics) = merge(
+        table_ref1,
+        df1.clone(),
+        expr.clone()
+            .and(col(Column::from_qualified_name("target.event_date")).lt_eq(lit("2021-02-02"))),
+    )
+    .await
+    .unwrap();
+    let result = merge(
+        table_ref2,
+        df1,
+        expr.and(col(Column::from_qualified_name("target.event_date")).eq(lit("2021-02-02"))),
+    )
+    .await;
+
+    assert!(matches!(
+        result.as_ref().unwrap_err(),
+        DeltaTableError::Transaction { .. }
+    ));
+    if let DeltaTableError::Transaction { source } = result.unwrap_err() {
+        assert!(matches!(source, TransactionError::CommitConflict(_)));
+    }
+}
diff --git a/crates/core/tests/command_optimize.rs b/crates/core/tests/command_optimize.rs
index 5c3875eb92..13cbd168e4 100644
--- a/crates/core/tests/command_optimize.rs
+++ b/crates/core/tests/command_optimize.rs
@@ -9,7 +9,7 @@ use deltalake_core::kernel::{Action, DataType, PrimitiveType, StructField};
 use deltalake_core::operations::optimize::{
     create_merge_plan, MetricDetails, Metrics, OptimizeType,
 };
-use deltalake_core::operations::transaction::commit;
+use deltalake_core::operations::transaction::{CommitBuilder, CommitProperties};
 use deltalake_core::operations::DeltaOps;
 use deltalake_core::protocol::DeltaOperation;
 use deltalake_core::storage::ObjectStoreRef;
@@ -180,6 +180,12 @@ async fn test_optimize_non_partitioned_table() -> Result<(), Box<dyn Error>> {
     assert_eq!(metrics.partitions_optimized, 1);
     assert_eq!(dt.get_files_count(), 2);
 
+    let commit_info = dt.history(None).await?;
+    let last_commit = &commit_info[0];
+    let parameters = last_commit.operation_parameters.clone().unwrap();
+    assert_eq!(parameters["targetSize"], json!("2000000"));
+    assert_eq!(parameters["predicate"], "[]");
+
     Ok(())
 }
 
@@ -243,7 +249,7 @@ async fn test_optimize_with_partitions() -> Result<(), Box<dyn Error>> {
     let partition_values = partition_adds[0].partition_values()?;
     assert_eq!(
         partition_values.get("date"),
-        Some(&deltalake_core::kernel::Scalar::String(
+        Some(&delta_kernel::expressions::Scalar::String(
             "2022-05-22".to_string()
         ))
     );
@@ -252,7 +258,6 @@ async fn test_optimize_with_partitions() -> Result<(), Box<dyn Error>> {
 }
 
 #[tokio::test]
-#[ignore]
 /// Validate that optimize fails when a remove action occurs
 async fn test_conflict_for_remove_actions() -> Result<(), Box<dyn Error>> {
     let context = setup_test(true).await?;
@@ -291,20 +296,24 @@ async fn test_conflict_for_remove_actions() -> Result<(), Box<dyn Error>> {
     let remove = add.remove_action(true);
 
     let operation = DeltaOperation::Delete { predicate: None };
-    commit(
-        other_dt.log_store().as_ref(),
-        &vec![Action::Remove(remove)],
-        operation,
-        Some(other_dt.snapshot()?),
-        None,
-    )
-    .await?;
+    CommitBuilder::default()
+        .with_actions(vec![Action::Remove(remove)])
+        .build(Some(other_dt.snapshot()?), other_dt.log_store(), operation)
+        .await?;
 
     let maybe_metrics = plan
-        .execute(dt.log_store(), dt.snapshot()?, 1, 20, None, None)
+        .execute(
+            dt.log_store(),
+            dt.snapshot()?,
+            1,
+            20,
+            None,
+            CommitProperties::default(),
+        )
         .await;
 
     assert!(maybe_metrics.is_err());
+    dt.update().await?;
     assert_eq!(dt.version(), version + 1);
     Ok(())
 }
@@ -352,7 +361,14 @@ async fn test_no_conflict_for_append_actions() -> Result<(), Box<dyn Error>> {
     .await?;
 
     let metrics = plan
-        .execute(dt.log_store(), dt.snapshot()?, 1, 20, None, None)
+        .execute(
+            dt.log_store(),
+            dt.snapshot()?,
+            1,
+            20,
+            None,
+            CommitProperties::default(),
+        )
         .await?;
     assert_eq!(metrics.num_files_added, 1);
     assert_eq!(metrics.num_files_removed, 2);
@@ -398,7 +414,7 @@ async fn test_commit_interval() -> Result<(), Box<dyn Error>> {
             1,
             20,
             Some(Duration::from_secs(0)), // this will cause as many commits as num_files_added
-            None,
+            CommitProperties::default(),
         )
         .await?;
     assert_eq!(metrics.num_files_added, 2);
@@ -410,7 +426,6 @@ async fn test_commit_interval() -> Result<(), Box<dyn Error>> {
 }
 
 #[tokio::test]
-#[ignore]
 /// Validate that bin packing is idempotent.
 async fn test_idempotent() -> Result<(), Box<dyn Error>> {
     //TODO: Compression makes it hard to get the target file size...
@@ -613,8 +628,7 @@ async fn test_commit_info() -> Result<(), Box<dyn Error>> {
     assert_eq!(last_commit.read_version, Some(version));
     let parameters = last_commit.operation_parameters.clone().unwrap();
     assert_eq!(parameters["targetSize"], json!("2000000"));
-    // TODO: Requires a string representation for PartitionFilter
-    // assert_eq!(parameters["predicate"], None);
+    assert_eq!(parameters["predicate"], "[\"date = '2022-05-22'\"]");
 
     Ok(())
 }
diff --git a/crates/core/tests/command_restore.rs b/crates/core/tests/command_restore.rs
index 1e49132d23..aa5b598347 100644
--- a/crates/core/tests/command_restore.rs
+++ b/crates/core/tests/command_restore.rs
@@ -1,7 +1,7 @@
 use arrow::datatypes::Schema as ArrowSchema;
 use arrow_array::{Int32Array, RecordBatch};
 use arrow_schema::{DataType as ArrowDataType, Field};
-use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
+use chrono::DateTime;
 use deltalake_core::kernel::{DataType, PrimitiveType, StructField};
 use deltalake_core::protocol::SaveMode;
 use deltalake_core::storage::commit_uri_from_version;
@@ -128,8 +128,7 @@ async fn test_restore_by_datetime() -> Result<(), Box<dyn Error>> {
         .head(&commit_uri_from_version(version))
         .await?;
     let timestamp = meta.last_modified.timestamp_millis();
-    let naive = NaiveDateTime::from_timestamp_millis(timestamp).unwrap();
-    let datetime: DateTime<Utc> = Utc.from_utc_datetime(&naive);
+    let datetime = DateTime::from_timestamp_millis(timestamp).unwrap();
 
     let result = DeltaOps(table)
         .restore()
@@ -147,8 +146,7 @@ async fn test_restore_with_error_params() -> Result<(), Box<dyn Error>> {
     let table = context.table;
     let history = table.history(Some(10)).await?;
     let timestamp = history.get(1).unwrap().timestamp.unwrap();
-    let naive = NaiveDateTime::from_timestamp_millis(timestamp).unwrap();
-    let datetime: DateTime<Utc> = Utc.from_utc_datetime(&naive);
+    let datetime = DateTime::from_timestamp_millis(timestamp).unwrap();
 
     // datetime and version both set
     let result = DeltaOps(table)
diff --git a/crates/core/tests/commit_info_format.rs b/crates/core/tests/commit_info_format.rs
index b47850ae30..df817365b3 100644
--- a/crates/core/tests/commit_info_format.rs
+++ b/crates/core/tests/commit_info_format.rs
@@ -2,7 +2,7 @@
 mod fs_common;
 
 use deltalake_core::kernel::Action;
-use deltalake_core::operations::transaction::commit;
+use deltalake_core::operations::transaction::CommitBuilder;
 use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use serde_json::json;
 use std::error::Error;
@@ -20,14 +20,10 @@ async fn test_operational_parameters() -> Result<(), Box<dyn Error>> {
         predicate: None,
     };
 
-    commit(
-        table.log_store().as_ref(),
-        &actions,
-        operation,
-        Some(table.snapshot()?),
-        None,
-    )
-    .await?;
+    CommitBuilder::default()
+        .with_actions(actions)
+        .build(Some(table.snapshot()?), table.log_store(), operation)
+        .await?;
     table.update().await?;
 
     let commit_info = table.history(None).await?;
diff --git a/crates/core/tests/fs_common/mod.rs b/crates/core/tests/fs_common/mod.rs
index 088d22a630..13683b408a 100644
--- a/crates/core/tests/fs_common/mod.rs
+++ b/crates/core/tests/fs_common/mod.rs
@@ -3,12 +3,14 @@ use deltalake_core::kernel::{
     Action, Add, DataType, PrimitiveType, Remove, StructField, StructType,
 };
 use deltalake_core::operations::create::CreateBuilder;
-use deltalake_core::operations::transaction::commit;
+use deltalake_core::operations::transaction::CommitBuilder;
 use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::storage::{GetResult, ObjectStoreResult};
 use deltalake_core::DeltaTable;
 use object_store::path::Path as StorePath;
-use object_store::{ObjectStore, PutOptions, PutResult};
+use object_store::{
+    MultipartUpload, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult,
+};
 use serde_json::Value;
 use std::collections::HashMap;
 use std::fs;
@@ -55,7 +57,7 @@ pub async fn create_test_table(
         .with_location(path)
         .with_table_name("test-table")
         .with_comment("A table for running tests")
-        .with_columns(schema.fields().clone())
+        .with_columns(schema.fields().cloned())
         .with_partition_columns(partition_columns)
         .with_configuration(config)
         .await
@@ -119,15 +121,16 @@ pub async fn commit_actions(
     actions: Vec<Action>,
     operation: DeltaOperation,
 ) -> i64 {
-    let version = commit(
-        table.log_store().as_ref(),
-        &actions,
-        operation,
-        Some(table.snapshot().unwrap()),
-        None,
-    )
-    .await
-    .unwrap();
+    let version = CommitBuilder::default()
+        .with_actions(actions)
+        .build(
+            Some(table.snapshot().unwrap()),
+            table.log_store().clone(),
+            operation,
+        )
+        .await
+        .unwrap()
+        .version();
     table.update().await.unwrap();
     version
 }
@@ -143,6 +146,7 @@ impl std::fmt::Display for SlowStore {
 }
 
 impl SlowStore {
+    #[allow(dead_code)]
     pub fn new(
         location: Url,
         _options: impl Into<deltalake_core::storage::StorageOptions> + Clone,
@@ -156,14 +160,14 @@ impl SlowStore {
 #[async_trait::async_trait]
 impl ObjectStore for SlowStore {
     /// Save the provided bytes to the specified location.
-    async fn put(&self, location: &StorePath, bytes: bytes::Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &StorePath, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &StorePath,
-        bytes: bytes::Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -270,18 +274,15 @@ impl ObjectStore for SlowStore {
     async fn put_multipart(
         &self,
         location: &StorePath,
-    ) -> ObjectStoreResult<(
-        object_store::MultipartId,
-        Box<dyn tokio::io::AsyncWrite + Unpin + Send>,
-    )> {
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &StorePath,
-        multipart_id: &object_store::MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
diff --git a/crates/core/tests/integration_datafusion.rs b/crates/core/tests/integration_datafusion.rs
index 90fc3ea9fa..ea83bce29e 100644
--- a/crates/core/tests/integration_datafusion.rs
+++ b/crates/core/tests/integration_datafusion.rs
@@ -1,14 +1,10 @@
 #![cfg(feature = "datafusion")]
-
-use arrow::array::Int64Array;
-use deltalake_test::datafusion::*;
-use deltalake_test::utils::*;
-use serial_test::serial;
-
 use std::collections::{HashMap, HashSet};
+use std::error::Error;
 use std::path::PathBuf;
 use std::sync::Arc;
 
+use arrow::array::Int64Array;
 use arrow::array::*;
 use arrow::record_batch::RecordBatch;
 use arrow_schema::{
@@ -28,8 +24,6 @@ use datafusion_expr::Expr;
 use datafusion_proto::bytes::{
     physical_plan_from_bytes_with_extension_codec, physical_plan_to_bytes_with_extension_codec,
 };
-use url::Url;
-
 use deltalake_core::delta_datafusion::{DeltaPhysicalCodec, DeltaScan};
 use deltalake_core::kernel::{DataType, MapType, PrimitiveType, StructField, StructType};
 use deltalake_core::logstore::logstore_for;
@@ -41,7 +35,10 @@ use deltalake_core::{
     operations::{write::WriteBuilder, DeltaOps},
     DeltaTable, DeltaTableError,
 };
-use std::error::Error;
+use deltalake_test::datafusion::*;
+use deltalake_test::utils::*;
+use serial_test::serial;
+use url::Url;
 
 mod local {
     use datafusion::common::stats::Precision;
@@ -68,6 +65,8 @@ mod local {
     #[derive(Debug, Default)]
     pub struct ExecutionMetricsCollector {
         scanned_files: HashSet<Label>,
+        pub skip_count: usize,
+        pub keep_count: usize,
     }
 
     impl ExecutionMetricsCollector {
@@ -86,6 +85,15 @@ mod local {
             if let Some(exec) = plan.as_any().downcast_ref::<ParquetExec>() {
                 let files = get_scanned_files(exec);
                 self.scanned_files.extend(files);
+            } else if let Some(exec) = plan.as_any().downcast_ref::<DeltaScan>() {
+                self.keep_count = exec
+                    .metrics()
+                    .and_then(|m| m.sum_by_name("files_scanned").map(|v| v.as_usize()))
+                    .unwrap_or_default();
+                self.skip_count = exec
+                    .metrics()
+                    .and_then(|m| m.sum_by_name("files_pruned").map(|v| v.as_usize()))
+                    .unwrap_or_default();
             }
             Ok(true)
         }
@@ -106,7 +114,7 @@ mod local {
             .unwrap()
             .create()
             .with_save_mode(SaveMode::Ignore)
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions)
             .await
             .unwrap();
@@ -114,7 +122,7 @@ mod local {
         for batch in batches {
             table = DeltaOps(table)
                 .write(vec![batch])
-                .with_save_mode(save_mode.clone())
+                .with_save_mode(save_mode)
                 .await
                 .unwrap();
         }
@@ -198,10 +206,8 @@ mod local {
             &ctx,
             &DeltaPhysicalCodec {},
         )?;
-        let fields = StructType::try_from(source_scan.schema())
-            .unwrap()
-            .fields()
-            .clone();
+        let schema = StructType::try_from(source_scan.schema()).unwrap();
+        let fields = schema.fields().cloned();
 
         // Create target Delta Table
         let target_table = CreateBuilder::new()
@@ -440,11 +446,15 @@ mod local {
     ) -> Result<ExecutionMetricsCollector> {
         let mut metrics = ExecutionMetricsCollector::default();
         let scan = table.scan(state, None, e, None).await?;
-        if scan.output_partitioning().partition_count() > 0 {
+        if scan.properties().output_partitioning().partition_count() > 0 {
             let plan = CoalescePartitionsExec::new(scan);
             let task_ctx = Arc::new(TaskContext::from(state));
             let _result = collect(plan.execute(0, task_ctx)?).await?;
             visit_execution_plan(&plan, &mut metrics).unwrap();
+        } else {
+            // if scan produces no output from ParquetExec, we still want to visit DeltaScan
+            // to check its metrics
+            visit_execution_plan(scan.as_ref(), &mut metrics).unwrap();
         }
 
         Ok(metrics)
@@ -621,6 +631,8 @@ mod local {
 
         let metrics = get_scan_metrics(&table, &state, &[]).await?;
         assert_eq!(metrics.num_scanned_files(), 3);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 0);
 
         // (Column name, value from file 1, value from file 2, value from file 3, non existent value)
         let tests = [
@@ -667,11 +679,15 @@ mod local {
             let e = col(column).eq(file1_value.clone());
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 1);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 2);
 
             // Value does not exist
             let e = col(column).eq(non_existent_value.clone());
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 0);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 3);
 
             // Conjunction
             let e = col(column)
@@ -679,6 +695,8 @@ mod local {
                 .and(col(column).lt(file2_value.clone()));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 1);
 
             // Disjunction
             let e = col(column)
@@ -686,6 +704,8 @@ mod local {
                 .or(col(column).gt(file3_value.clone()));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 1);
         }
 
         // Validate Boolean type
@@ -697,10 +717,14 @@ mod local {
         let e = col("boolean").eq(lit(true));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         let e = col("boolean").eq(lit(false));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         let tests = [
             TestCase::new_wrapped("utf8", |value| lit(value.to_string())),
@@ -767,11 +791,15 @@ mod local {
             let e = col(column).eq(file1_value.clone());
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 1);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 8);
 
             // Value does not exist
             let e = col(column).eq(non_existent_value);
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 0);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 9);
 
             // Conjunction
             let e = col(column)
@@ -779,11 +807,15 @@ mod local {
                 .and(col(column).lt(file2_value));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 7);
 
             // Disjunction
             let e = col(column).lt(file1_value).or(col(column).gt(file3_value));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 7);
 
             // TODO how to get an expression with the right datatypes eludes me ..
             // Validate null pruning
@@ -813,10 +845,14 @@ mod local {
         let e = col("boolean").eq(lit(true));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         let e = col("boolean").eq(lit(false));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         // Ensure that tables without stats and partition columns can be pruned for just partitions
         // let table = open_table("./tests/data/delta-0.8.0-null-partition").await?;
@@ -905,13 +941,14 @@ mod local {
 
         let batches = ctx.sql("SELECT * FROM demo").await?.collect().await?;
 
+        // Without defining a schema of the select the default for a timestamp is ms UTC
         let expected = vec![
-            "+-------------------------------+---------------------+------------+",
-            "| BIG_DATE                      | NORMAL_DATE         | SOME_VALUE |",
-            "+-------------------------------+---------------------+------------+",
-            "| 1816-03-28T05:56:08.066277376 | 2022-02-01T00:00:00 | 2          |",
-            "| 1816-03-29T05:56:08.066277376 | 2022-01-01T00:00:00 | 1          |",
-            "+-------------------------------+---------------------+------------+",
+            "+-----------------------------+----------------------+------------+",
+            "| BIG_DATE                    | NORMAL_DATE          | SOME_VALUE |",
+            "+-----------------------------+----------------------+------------+",
+            "| 1816-03-28T05:56:08.066278Z | 2022-02-01T00:00:00Z | 2          |",
+            "| 1816-03-29T05:56:08.066278Z | 2022-01-01T00:00:00Z | 1          |",
+            "+-----------------------------+----------------------+------------+",
         ];
 
         assert_batches_sorted_eq!(&expected, &batches);
@@ -982,7 +1019,7 @@ mod local {
 
         Ok(())
     }
-
+    #[ignore]
     #[tokio::test]
     async fn test_issue_1374() -> Result<()> {
         let ctx = SessionContext::new();
@@ -1035,7 +1072,7 @@ mod local {
             deltalake_core::DeltaTableBuilder::from_uri("./tests/data/issue-1619").build()?;
         let _ = DeltaOps::from(table)
             .create()
-            .with_columns(schema.fields().to_owned())
+            .with_columns(schema.fields().cloned())
             .await?;
 
         let mut table = open_table("./tests/data/issue-1619").await?;
@@ -1118,7 +1155,7 @@ mod local {
             .unwrap();
         let batch = batches.pop().unwrap();
 
-        let expected_schema = Schema::new(vec![Field::new("id", ArrowDataType::Int32, true)]);
+        let expected_schema = Schema::new(vec![Field::new("id", ArrowDataType::Int64, false)]);
         assert_eq!(batch.schema().as_ref(), &expected_schema);
         Ok(())
     }
diff --git a/crates/core/tests/read_delta_log_test.rs b/crates/core/tests/read_delta_log_test.rs
index fbce4e626e..92915f9162 100644
--- a/crates/core/tests/read_delta_log_test.rs
+++ b/crates/core/tests/read_delta_log_test.rs
@@ -172,3 +172,13 @@ async fn read_delta_table_from_dlt() {
     assert_eq!(table.version(), 1);
     assert!(table.get_schema().is_ok());
 }
+
+#[tokio::test]
+async fn read_delta_table_with_null_stats_in_notnull_struct() {
+    let table =
+        deltalake_core::open_table("../test/tests/data/table_with_null_stats_in_notnull_struct")
+            .await
+            .unwrap();
+    assert_eq!(table.version(), 1);
+    assert!(table.get_schema().is_ok());
+}
diff --git a/crates/deltalake/Cargo.toml b/crates/deltalake/Cargo.toml
index 605957f5a0..d571e47dc7 100644
--- a/crates/deltalake/Cargo.toml
+++ b/crates/deltalake/Cargo.toml
@@ -1,23 +1,27 @@
 [package]
 name = "deltalake"
-version = "0.17.0"
-rust-version = "1.64"
-authors = ["Qingping Hou <dave2008713@gmail.com>"]
-homepage = "https://github.com/delta-io/delta.rs"
-license = "Apache-2.0"
-keywords = ["deltalake", "delta", "datalake"]
-description = "Native Delta Lake implementation in Rust"
-documentation = "https://docs.rs/deltalake"
-repository = "https://github.com/delta-io/delta.rs"
-readme = "../README.md"
-edition = "2021"
+version = "0.18.3"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[package.metadata.docs.rs]
+# We cannot use all_features because TLS features are mutually exclusive.
+features = ["azure", "datafusion", "gcs", "hdfs", "json", "python", "s3", "unity-experimental"]
 
 [dependencies]
-deltalake-core = { path = "../core" }
-deltalake-aws = { path = "../aws", default-features = false, optional = true }
-deltalake-azure = { path = "../azure", optional = true }
-deltalake-gcp = { path = "../gcp", optional = true }
-deltalake-catalog-glue = { path = "../catalog-glue", optional = true }
+deltalake-core = { version = "~0.18.0", path = "../core" }
+deltalake-aws = { version = "0.1.1", path = "../aws", default-features = false, optional = true }
+deltalake-azure = { version = "0.1.1", path = "../azure", optional = true }
+deltalake-gcp = { version = "0.2.1", path = "../gcp", optional = true }
+deltalake-hdfs = { version = "0.1.0", path = "../hdfs", optional = true }
+deltalake-catalog-glue = { version = "0.1.0", path = "../catalog-glue", optional = true }
 
 [features]
 # All of these features are just reflected into the core crate until that
@@ -28,7 +32,7 @@ datafusion = ["deltalake-core/datafusion"]
 datafusion-ext = ["datafusion"]
 gcs = ["deltalake-gcp"]
 glue = ["deltalake-catalog-glue"]
-hdfs = []
+hdfs = ["deltalake-hdfs"]
 json = ["deltalake-core/json"]
 python = ["deltalake-core/python"]
 s3-native-tls = ["deltalake-aws/native-tls"]
@@ -50,4 +54,3 @@ required-features = ["datafusion"]
 
 [[example]]
 name = "recordbatch-writer"
-required-features = ["arrow"]
diff --git a/crates/deltalake/examples/basic_operations.rs b/crates/deltalake/examples/basic_operations.rs
index e697e4cf53..d911551f3f 100644
--- a/crates/deltalake/examples/basic_operations.rs
+++ b/crates/deltalake/examples/basic_operations.rs
@@ -27,7 +27,7 @@ fn get_table_columns() -> Vec<StructField> {
         ),
         StructField::new(
             String::from("timestamp"),
-            DataType::Primitive(PrimitiveType::Timestamp),
+            DataType::Primitive(PrimitiveType::TimestampNtz),
             true,
         ),
     ]
diff --git a/crates/deltalake/examples/load_table.rs b/crates/deltalake/examples/load_table.rs
index 584f762353..f780a2b81e 100644
--- a/crates/deltalake/examples/load_table.rs
+++ b/crates/deltalake/examples/load_table.rs
@@ -8,7 +8,7 @@ async fn main() -> Result<(), deltalake::errors::DeltaTableError> {
     let ops = if let Ok(table_uri) = std::env::var("TABLE_URI") {
         DeltaOps::try_from_uri(table_uri).await?
     } else {
-        DeltaOps::try_from_uri("./rust/tests/data/delta-0.8.0").await?
+        DeltaOps::try_from_uri("../test/tests/data/delta-0.8.0").await?
     };
 
     let (_table, stream) = ops.load().await?;
diff --git a/crates/deltalake/examples/read_delta_table.rs b/crates/deltalake/examples/read_delta_table.rs
index 9c0c60ef1a..703b8a0096 100644
--- a/crates/deltalake/examples/read_delta_table.rs
+++ b/crates/deltalake/examples/read_delta_table.rs
@@ -1,6 +1,6 @@
 #[tokio::main(flavor = "current_thread")]
 async fn main() -> Result<(), deltalake::errors::DeltaTableError> {
-    let table_path = "./tests/data/delta-0.8.0";
+    let table_path = "../test/tests/data/delta-0.8.0";
     let table = deltalake::open_table(table_path).await?;
     println!("{table}");
     Ok(())
diff --git a/crates/deltalake/examples/recordbatch-writer.rs b/crates/deltalake/examples/recordbatch-writer.rs
index 874b4a10f7..94504cb25d 100644
--- a/crates/deltalake/examples/recordbatch-writer.rs
+++ b/crates/deltalake/examples/recordbatch-writer.rs
@@ -88,7 +88,7 @@ impl WeatherRecord {
         vec![
             StructField::new(
                 "timestamp".to_string(),
-                DataType::Primitive(PrimitiveType::Timestamp),
+                DataType::Primitive(PrimitiveType::TimestampNtz),
                 true,
             ),
             StructField::new(
@@ -98,12 +98,12 @@ impl WeatherRecord {
             ),
             StructField::new(
                 "lat".to_string(),
-                DataType::Primitive(PrimitiveType::Float),
+                DataType::Primitive(PrimitiveType::Double),
                 true,
             ),
             StructField::new(
                 "long".to_string(),
-                DataType::Primitive(PrimitiveType::Float),
+                DataType::Primitive(PrimitiveType::Double),
                 true,
             ),
         ]
diff --git a/crates/deltalake/src/lib.rs b/crates/deltalake/src/lib.rs
index 38dc5d52dc..8da5c15146 100644
--- a/crates/deltalake/src/lib.rs
+++ b/crates/deltalake/src/lib.rs
@@ -9,3 +9,5 @@ pub use deltalake_aws as aws;
 pub use deltalake_azure as azure;
 #[cfg(feature = "gcs")]
 pub use deltalake_gcp as gcp;
+#[cfg(feature = "hdfs")]
+pub use deltalake_hdfs as hdfs;
diff --git a/crates/gcp/Cargo.toml b/crates/gcp/Cargo.toml
index c76bc917e4..8defc92c70 100644
--- a/crates/gcp/Cargo.toml
+++ b/crates/gcp/Cargo.toml
@@ -1,10 +1,18 @@
 [package]
 name = "deltalake-gcp"
-version = "0.1.0"
-edition = "2021"
+version = "0.2.2"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = "0.17.0", path = "../core" }
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
 lazy_static = "1"
 
 # workspace depenndecies
@@ -22,7 +30,7 @@ url = { workspace = true }
 chrono = { workspace = true }
 serial_test = "3"
 deltalake-test = { path = "../test" }
-pretty_env_logger = "*"
+pretty_env_logger = "0.5.0"
 rand = "0.8"
 serde_json = { workspace = true }
 tempfile = "3"
diff --git a/crates/gcp/src/lib.rs b/crates/gcp/src/lib.rs
index 6fe040d398..e50681ed30 100644
--- a/crates/gcp/src/lib.rs
+++ b/crates/gcp/src/lib.rs
@@ -4,7 +4,8 @@ use std::sync::Arc;
 
 use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory};
 use deltalake_core::storage::{
-    factories, url_prefix_handler, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
+    factories, limit_store_handler, url_prefix_handler, ObjectStoreFactory, ObjectStoreRef,
+    StorageOptions,
 };
 use deltalake_core::{DeltaResult, Path};
 use object_store::gcp::GoogleConfigKey;
@@ -13,6 +14,7 @@ use url::Url;
 
 mod config;
 pub mod error;
+mod storage;
 
 trait GcpOptions {
     fn as_gcp_options(&self) -> HashMap<GoogleConfigKey, String>;
@@ -42,8 +44,10 @@ impl ObjectStoreFactory for GcpFactory {
         options: &StorageOptions,
     ) -> DeltaResult<(ObjectStoreRef, Path)> {
         let config = config::GcpConfigHelper::try_new(options.as_gcp_options())?.build()?;
-        let (store, prefix) = parse_url_opts(url, config)?;
-        Ok((url_prefix_handler(store, prefix.clone())?, prefix))
+        let (inner, prefix) = parse_url_opts(url, config)?;
+        let gcs_backend = crate::storage::GcsStorageBackend::try_new(Arc::new(inner))?;
+        let store = limit_store_handler(url_prefix_handler(gcs_backend, prefix.clone()), options);
+        Ok((store, prefix))
     }
 }
 
diff --git a/crates/gcp/src/storage.rs b/crates/gcp/src/storage.rs
new file mode 100644
index 0000000000..db02d33687
--- /dev/null
+++ b/crates/gcp/src/storage.rs
@@ -0,0 +1,134 @@
+//! GCP GCS storage backend.
+
+use bytes::Bytes;
+use deltalake_core::storage::ObjectStoreRef;
+use deltalake_core::Path;
+use futures::stream::BoxStream;
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
+use std::ops::Range;
+
+use deltalake_core::storage::object_store::{
+    GetOptions, GetResult, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    Result as ObjectStoreResult,
+};
+
+pub(crate) struct GcsStorageBackend {
+    inner: ObjectStoreRef,
+}
+
+impl GcsStorageBackend {
+    pub fn try_new(storage: ObjectStoreRef) -> ObjectStoreResult<Self> {
+        Ok(Self { inner: storage })
+    }
+}
+
+impl std::fmt::Debug for GcsStorageBackend {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(fmt, "GcsStorageBackend")
+    }
+}
+
+impl std::fmt::Display for GcsStorageBackend {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(fmt, "GcsStorageBackend")
+    }
+}
+
+#[async_trait::async_trait]
+impl ObjectStore for GcsStorageBackend {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
+        self.inner.put(location, bytes).await
+    }
+
+    async fn put_opts(
+        &self,
+        location: &Path,
+        bytes: PutPayload,
+        options: PutOptions,
+    ) -> ObjectStoreResult<PutResult> {
+        self.inner.put_opts(location, bytes, options).await
+    }
+
+    async fn get(&self, location: &Path) -> ObjectStoreResult<GetResult> {
+        self.inner.get(location).await
+    }
+
+    async fn get_opts(&self, location: &Path, options: GetOptions) -> ObjectStoreResult<GetResult> {
+        self.inner.get_opts(location, options).await
+    }
+
+    async fn get_range(&self, location: &Path, range: Range<usize>) -> ObjectStoreResult<Bytes> {
+        self.inner.get_range(location, range).await
+    }
+
+    async fn head(&self, location: &Path) -> ObjectStoreResult<ObjectMeta> {
+        self.inner.head(location).await
+    }
+
+    async fn delete(&self, location: &Path) -> ObjectStoreResult<()> {
+        self.inner.delete(location).await
+    }
+
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list(prefix)
+    }
+
+    fn list_with_offset(
+        &self,
+        prefix: Option<&Path>,
+        offset: &Path,
+    ) -> BoxStream<'_, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list_with_offset(prefix, offset)
+    }
+
+    async fn list_with_delimiter(&self, prefix: Option<&Path>) -> ObjectStoreResult<ListResult> {
+        self.inner.list_with_delimiter(prefix).await
+    }
+
+    async fn copy(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> {
+        self.inner.copy(from, to).await
+    }
+
+    async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> {
+        self.inner.copy_if_not_exists(from, to).await
+    }
+
+    async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> {
+        let res = self.inner.rename_if_not_exists(from, to).await;
+        match res {
+            Ok(_) => Ok(()),
+            Err(e) => {
+                match e {
+                    object_store::Error::Generic { store, source } => {
+                        // If this is a 429 (rate limit) error it means more than 1 mutation operation per second
+                        // Was attempted on this same key
+                        // That means we're experiencing concurrency conflicts, so return a transaction error
+                        // Source would be a reqwest error which we don't have access to so the easiest thing to do is check
+                        // for "429" in the error message
+                        if format!("{:?}", source).contains("429") {
+                            Err(object_store::Error::AlreadyExists {
+                                path: to.to_string(),
+                                source,
+                            })
+                        } else {
+                            Err(object_store::Error::Generic { store, source })
+                        }
+                    }
+                    _ => Err(e),
+                }
+            }
+        }
+    }
+
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart(location).await
+    }
+
+    async fn put_multipart_opts(
+        &self,
+        location: &Path,
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
+    }
+}
diff --git a/crates/gcp/tests/context.rs b/crates/gcp/tests/context.rs
index b96bd1f41b..5419075f68 100644
--- a/crates/gcp/tests/context.rs
+++ b/crates/gcp/tests/context.rs
@@ -39,7 +39,7 @@ pub async fn sync_stores(
     while let Some(file) = meta_stream.next().await {
         if let Ok(meta) = file {
             let bytes = from_store.get(&meta.location).await?.bytes().await?;
-            to_store.put(&meta.location, bytes).await?;
+            to_store.put(&meta.location, bytes.into()).await?;
         }
     }
     Ok(())
diff --git a/crates/hdfs/Cargo.toml b/crates/hdfs/Cargo.toml
new file mode 100644
index 0000000000..9d9eade902
--- /dev/null
+++ b/crates/hdfs/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+name = "deltalake-hdfs"
+version = "0.1.0"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
+hdfs-native-object-store = "0.11"
+
+# workspace dependecies
+object_store = { workspace = true }
+tokio = { workspace = true }
+url = { workspace = true }
+
+[dev-dependencies]
+serial_test = "3"
+deltalake-test = { path = "../test" }
+which = "6"
+
+[features]
+integration_test = ["hdfs-native-object-store/integration-test"]
diff --git a/crates/hdfs/src/lib.rs b/crates/hdfs/src/lib.rs
new file mode 100644
index 0000000000..45b14740b7
--- /dev/null
+++ b/crates/hdfs/src/lib.rs
@@ -0,0 +1,48 @@
+use std::sync::Arc;
+
+use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory};
+use deltalake_core::storage::{
+    factories, url_prefix_handler, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
+};
+use deltalake_core::{DeltaResult, Path};
+use hdfs_native_object_store::HdfsObjectStore;
+use url::Url;
+
+#[derive(Clone, Default, Debug)]
+pub struct HdfsFactory {}
+
+impl ObjectStoreFactory for HdfsFactory {
+    fn parse_url_opts(
+        &self,
+        url: &Url,
+        options: &StorageOptions,
+    ) -> DeltaResult<(ObjectStoreRef, Path)> {
+        let store: ObjectStoreRef = Arc::new(HdfsObjectStore::with_config(
+            url.as_str(),
+            options.0.clone(),
+        )?);
+        let prefix = Path::parse(url.path())?;
+        Ok((url_prefix_handler(store, prefix.clone()), prefix))
+    }
+}
+
+impl LogStoreFactory for HdfsFactory {
+    fn with_options(
+        &self,
+        store: ObjectStoreRef,
+        location: &Url,
+        options: &StorageOptions,
+    ) -> DeltaResult<Arc<dyn LogStore>> {
+        Ok(default_logstore(store, location, options))
+    }
+}
+
+/// Register an [ObjectStoreFactory] for common HDFS [Url] schemes
+pub fn register_handlers(_additional_prefixes: Option<Url>) {
+    let factory = Arc::new(HdfsFactory {});
+    for scheme in ["hdfs", "viewfs"].iter() {
+        let url = Url::parse(&format!("{}://", scheme)).unwrap();
+        factories().insert(url.clone(), factory.clone());
+        logstores().insert(url.clone(), factory.clone());
+    }
+}
diff --git a/crates/hdfs/tests/context.rs b/crates/hdfs/tests/context.rs
new file mode 100644
index 0000000000..29011d9836
--- /dev/null
+++ b/crates/hdfs/tests/context.rs
@@ -0,0 +1,60 @@
+#![cfg(feature = "integration_test")]
+use deltalake_hdfs::register_handlers;
+use deltalake_test::utils::*;
+use hdfs_native_object_store::minidfs::MiniDfs;
+use std::{
+    collections::HashSet,
+    process::{Command, ExitStatus},
+};
+
+use which::which;
+
+pub struct HdfsIntegration {
+    minidfs: MiniDfs,
+}
+
+impl Default for HdfsIntegration {
+    fn default() -> Self {
+        register_handlers(None);
+        let minidfs = MiniDfs::with_features(&HashSet::new());
+        Self { minidfs }
+    }
+}
+
+impl StorageIntegration for HdfsIntegration {
+    fn prepare_env(&self) {
+        println!("Preparing env");
+    }
+
+    fn create_bucket(&self) -> std::io::Result<ExitStatus> {
+        let hadoop_exc = which("hadoop").expect("Failed to find hadoop executable");
+
+        Ok(Command::new(hadoop_exc)
+            .args(["fs", "-mkdir", &self.root_uri()])
+            .status()
+            .unwrap())
+    }
+
+    fn bucket_name(&self) -> String {
+        "/test-deltalake".to_string()
+    }
+
+    fn root_uri(&self) -> String {
+        format!("{}{}", self.minidfs.url, self.bucket_name())
+    }
+
+    fn copy_directory(&self, source: &str, destination: &str) -> std::io::Result<ExitStatus> {
+        println!("Copy directory called with {} {}", source, destination);
+        let hadoop_exc = which("hadoop").expect("Failed to find hadoop executable");
+        Ok(Command::new(hadoop_exc)
+            .args([
+                "fs",
+                "-copyFromLocal",
+                "-p",
+                source,
+                &format!("{}/{}", self.root_uri(), destination),
+            ])
+            .status()
+            .unwrap())
+    }
+}
diff --git a/crates/hdfs/tests/integration.rs b/crates/hdfs/tests/integration.rs
new file mode 100644
index 0000000000..a2b63449dc
--- /dev/null
+++ b/crates/hdfs/tests/integration.rs
@@ -0,0 +1,16 @@
+#![cfg(feature = "integration_test")]
+use deltalake_test::{test_read_tables, IntegrationContext, TestResult};
+use serial_test::serial;
+
+mod context;
+use context::*;
+
+#[tokio::test]
+#[serial]
+async fn test_read_tables_hdfs() -> TestResult {
+    let context = IntegrationContext::new(Box::<HdfsIntegration>::default())?;
+
+    test_read_tables(&context).await?;
+
+    Ok(())
+}
diff --git a/crates/mount/Cargo.toml b/crates/mount/Cargo.toml
new file mode 100644
index 0000000000..a111e8b16e
--- /dev/null
+++ b/crates/mount/Cargo.toml
@@ -0,0 +1,43 @@
+[package]
+name = "deltalake-mount"
+version = "0.1.0"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core", features = [
+    "datafusion",
+] }
+lazy_static = "1"
+errno = "0.3"
+
+# workspace depenndecies
+async-trait = { workspace = true }
+bytes = { workspace = true }
+futures = { workspace = true }
+tracing = { workspace = true }
+object_store = { workspace = true }
+thiserror = { workspace = true }
+tokio = { workspace = true }
+regex = { workspace = true }
+url = { workspace = true }
+
+[dev-dependencies]
+chrono = { workspace = true }
+serial_test = "3"
+deltalake-test = { path = "../test" }
+pretty_env_logger = "0.5.0"
+rand = "0.8"
+serde_json = { workspace = true }
+tempfile = "3"
+fs_extra = "1.3.0"
+
+[features]
+integration_test = []
diff --git a/crates/mount/src/config.rs b/crates/mount/src/config.rs
new file mode 100644
index 0000000000..79dbfc88d0
--- /dev/null
+++ b/crates/mount/src/config.rs
@@ -0,0 +1,80 @@
+//! Auxiliary module for generating a valig Mount configuration.
+use std::collections::{hash_map::Entry, HashMap};
+use std::str::FromStr;
+
+use crate::error::{Error, Result};
+
+/// Typed property keys that can be defined on a mounted path
+#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy)]
+#[non_exhaustive]
+pub enum MountConfigKey {
+    /// If set to "true", allows creating commits without concurrent writer protection.
+    /// Only safe if there is one writer to a given table.
+    AllowUnsafeRename,
+}
+
+impl AsRef<str> for MountConfigKey {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::AllowUnsafeRename => "mount_allow_unsafe_rename",
+        }
+    }
+}
+
+impl FromStr for MountConfigKey {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "mount_allow_unsafe_rename" | "allow_unsafe_rename" => Ok(Self::AllowUnsafeRename),
+            _ => Err(Error::UnknownConfigKey(s.to_string())),
+        }
+    }
+}
+
+/// Helper struct to create full configuration from passed options and environment
+pub(crate) struct MountConfigHelper {
+    config: HashMap<MountConfigKey, String>,
+    env_config: HashMap<MountConfigKey, String>,
+}
+
+impl MountConfigHelper {
+    /// Create a new [`ConfigHelper`]
+    pub fn try_new(
+        config: impl IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>,
+    ) -> Result<Self> {
+        let mut env_config = HashMap::new();
+        for (os_key, os_value) in std::env::vars_os() {
+            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
+                if key.starts_with("MOUNT_") {
+                    if let Ok(config_key) = MountConfigKey::from_str(&key.to_ascii_lowercase()) {
+                        env_config.insert(config_key, value.to_string());
+                    }
+                }
+            }
+        }
+
+        Ok(Self {
+            config: config
+                .into_iter()
+                .map(|(key, value)| Ok((MountConfigKey::from_str(key.as_ref())?, value.into())))
+                .collect::<Result<_, Error>>()?,
+            env_config,
+        })
+    }
+
+    /// Generate a cofiguration augmented with options from the environment
+    pub fn build(mut self) -> Result<HashMap<MountConfigKey, String>> {
+        // Add keys from the environment to the configuration, as e.g. client configuration options.
+        // NOTE We have to specifically configure omitting keys, since workload identity can
+        // work purely using defaults, but partial config may be present in the environment.
+        // Preference of conflicting configs (e.g. msi resource id vs. client id is handled in object store)
+        for key in self.env_config.keys() {
+            if let Entry::Vacant(e) = self.config.entry(*key) {
+                e.insert(self.env_config.get(key).unwrap().to_owned());
+            }
+        }
+
+        Ok(self.config)
+    }
+}
diff --git a/crates/mount/src/error.rs b/crates/mount/src/error.rs
new file mode 100644
index 0000000000..3693b0be07
--- /dev/null
+++ b/crates/mount/src/error.rs
@@ -0,0 +1,33 @@
+use deltalake_core::errors::DeltaTableError;
+
+pub(crate) type Result<T, E = Error> = std::result::Result<T, E>;
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[allow(dead_code)]
+    #[error("failed to parse config: {0}")]
+    Parse(String),
+
+    /// Unknown configuration key
+    #[error("Unknown configuration key: {0}")]
+    UnknownConfigKey(String),
+
+    #[error("The `allow_unsafe_rename` parameter must be specified")]
+    AllowUnsafeRenameNotSpecified,
+
+    #[error(transparent)]
+    ObjectStore(#[from] object_store::Error),
+}
+
+impl From<Error> for DeltaTableError {
+    fn from(e: Error) -> Self {
+        match e {
+            Error::Parse(msg) => DeltaTableError::Generic(msg),
+            Error::UnknownConfigKey(msg) => DeltaTableError::Generic(msg),
+            Error::AllowUnsafeRenameNotSpecified => DeltaTableError::Generic(
+                "The `allow_unsafe_rename` parameter must be specified".to_string(),
+            ),
+            Error::ObjectStore(e) => DeltaTableError::ObjectStore { source: e },
+        }
+    }
+}
diff --git a/crates/mount/src/file.rs b/crates/mount/src/file.rs
new file mode 100644
index 0000000000..29285a4a96
--- /dev/null
+++ b/crates/mount/src/file.rs
@@ -0,0 +1,353 @@
+//! Mount file storage backend. This backend read and write objects from mounted filesystem.
+//!
+//! The mount file storage backend is not multi-writer safe.
+
+use bytes::Bytes;
+use futures::stream::BoxStream;
+use object_store::{
+    local::LocalFileSystem, path::Path as ObjectStorePath, Error as ObjectStoreError, GetOptions,
+    GetResult, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    Result as ObjectStoreResult,
+};
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
+use std::ops::Range;
+use std::sync::Arc;
+use url::Url;
+
+pub(crate) const STORE_NAME: &str = "MountObjectStore";
+
+/// Error raised by storage lock client
+#[derive(thiserror::Error, Debug)]
+#[allow(dead_code)]
+pub enum LocalFileSystemError {
+    /// Object exists already at path
+    #[error("Object exists already at path: {} ({:?})", path, source)]
+    AlreadyExists {
+        /// Path of the already existing file
+        path: String,
+        /// Originating error
+        source: Box<dyn std::error::Error + Send + Sync + 'static>,
+    },
+
+    /// Object not found at the given path
+    #[error("Object not found at path: {} ({:?})", path, source)]
+    NotFound {
+        /// Provided path which does not exist
+        path: String,
+        /// Originating error
+        source: Box<dyn std::error::Error + Send + Sync + 'static>,
+    },
+
+    /// Invalid argument sent to OS call
+    #[error("Invalid argument in OS call for path: {} ({:?})", path, source)]
+    InvalidArgument {
+        /// Provided path
+        path: String,
+        /// Originating error
+        source: errno::Errno,
+    },
+
+    /// Null error for path for FFI
+    #[error("Null error in FFI for path: {} ({:?})", path, source)]
+    NullError {
+        /// Given path
+        path: String,
+        /// Originating error
+        source: std::ffi::NulError,
+    },
+
+    /// Generic catch-all error for this store
+    #[error("Generic error in store: {} ({:?})", store, source)]
+    Generic {
+        /// String name of the object store
+        store: &'static str,
+        /// Originating error
+        source: Box<dyn std::error::Error + Send + Sync + 'static>,
+    },
+
+    /// Errors from the Tokio runtime
+    #[error("Error executing async task for path: {} ({:?})", path, source)]
+    Tokio {
+        /// Path
+        path: String,
+        /// Originating error
+        source: tokio::task::JoinError,
+    },
+}
+
+impl From<LocalFileSystemError> for ObjectStoreError {
+    fn from(e: LocalFileSystemError) -> Self {
+        match e {
+            LocalFileSystemError::AlreadyExists { path, source } => {
+                ObjectStoreError::AlreadyExists { path, source }
+            }
+            LocalFileSystemError::NotFound { path, source } => {
+                ObjectStoreError::NotFound { path, source }
+            }
+            LocalFileSystemError::InvalidArgument { source, .. } => ObjectStoreError::Generic {
+                store: STORE_NAME,
+                source: Box::new(source),
+            },
+            LocalFileSystemError::NullError { source, .. } => ObjectStoreError::Generic {
+                store: STORE_NAME,
+                source: Box::new(source),
+            },
+            LocalFileSystemError::Tokio { source, .. } => ObjectStoreError::Generic {
+                store: STORE_NAME,
+                source: Box::new(source),
+            },
+            LocalFileSystemError::Generic { store, source } => {
+                ObjectStoreError::Generic { store, source }
+            }
+        }
+    }
+}
+
+/// Mount File Storage Backend.
+/// Note that it's non-atomic writing and may leave the filesystem in an inconsistent state if it fails.
+#[derive(Debug)]
+pub struct MountFileStorageBackend {
+    inner: Arc<LocalFileSystem>,
+    root_url: Arc<Url>,
+}
+
+impl MountFileStorageBackend {
+    /// Creates a new MountFileStorageBackend.
+    pub fn try_new(path: impl AsRef<std::path::Path>) -> ObjectStoreResult<Self> {
+        Ok(Self {
+            root_url: Arc::new(Self::path_to_root_url(path.as_ref())?),
+            inner: Arc::new(LocalFileSystem::new_with_prefix(path)?),
+        })
+    }
+
+    fn path_to_root_url(path: &std::path::Path) -> ObjectStoreResult<Url> {
+        let root_path =
+            std::fs::canonicalize(path).map_err(|e| object_store::Error::InvalidPath {
+                source: object_store::path::Error::Canonicalize {
+                    path: path.into(),
+                    source: e,
+                },
+            })?;
+
+        Url::from_file_path(root_path).map_err(|_| object_store::Error::InvalidPath {
+            source: object_store::path::Error::InvalidPath { path: path.into() },
+        })
+    }
+
+    /// Return an absolute filesystem path of the given location
+    fn path_to_filesystem(&self, location: &ObjectStorePath) -> String {
+        let mut url = self.root_url.as_ref().clone();
+        url.path_segments_mut()
+            .expect("url path")
+            // technically not necessary as Path ignores empty segments
+            // but avoids creating paths with "//" which look odd in error messages.
+            .pop_if_empty()
+            .extend(location.parts());
+
+        url.to_file_path().unwrap().to_str().unwrap().to_owned()
+    }
+}
+
+impl std::fmt::Display for MountFileStorageBackend {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "MountFileStorageBackend")
+    }
+}
+
+#[async_trait::async_trait]
+impl ObjectStore for MountFileStorageBackend {
+    async fn put(
+        &self,
+        location: &ObjectStorePath,
+        bytes: PutPayload,
+    ) -> ObjectStoreResult<PutResult> {
+        self.inner.put(location, bytes).await
+    }
+
+    async fn put_opts(
+        &self,
+        location: &ObjectStorePath,
+        bytes: PutPayload,
+        options: PutOptions,
+    ) -> ObjectStoreResult<PutResult> {
+        self.inner.put_opts(location, bytes, options).await
+    }
+
+    async fn get(&self, location: &ObjectStorePath) -> ObjectStoreResult<GetResult> {
+        self.inner.get(location).await
+    }
+
+    async fn get_opts(
+        &self,
+        location: &ObjectStorePath,
+        options: GetOptions,
+    ) -> ObjectStoreResult<GetResult> {
+        self.inner.get_opts(location, options).await
+    }
+
+    async fn get_range(
+        &self,
+        location: &ObjectStorePath,
+        range: Range<usize>,
+    ) -> ObjectStoreResult<Bytes> {
+        self.inner.get_range(location, range).await
+    }
+
+    async fn head(&self, location: &ObjectStorePath) -> ObjectStoreResult<ObjectMeta> {
+        self.inner.head(location).await
+    }
+
+    async fn delete(&self, location: &ObjectStorePath) -> ObjectStoreResult<()> {
+        self.inner.delete(location).await
+    }
+
+    fn list(
+        &self,
+        prefix: Option<&ObjectStorePath>,
+    ) -> BoxStream<'_, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list(prefix)
+    }
+
+    fn list_with_offset(
+        &self,
+        prefix: Option<&ObjectStorePath>,
+        offset: &ObjectStorePath,
+    ) -> BoxStream<'_, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list_with_offset(prefix, offset)
+    }
+
+    async fn list_with_delimiter(
+        &self,
+        prefix: Option<&ObjectStorePath>,
+    ) -> ObjectStoreResult<ListResult> {
+        self.inner.list_with_delimiter(prefix).await
+    }
+
+    async fn copy(&self, from: &ObjectStorePath, to: &ObjectStorePath) -> ObjectStoreResult<()> {
+        self.inner.copy(from, to).await
+    }
+
+    async fn copy_if_not_exists(
+        &self,
+        from: &ObjectStorePath,
+        to: &ObjectStorePath,
+    ) -> ObjectStoreResult<()> {
+        self.inner.copy_if_not_exists(from, to).await
+    }
+
+    async fn rename_if_not_exists(
+        &self,
+        from: &ObjectStorePath,
+        to: &ObjectStorePath,
+    ) -> ObjectStoreResult<()> {
+        let path_from = self.path_to_filesystem(from);
+        let path_to = self.path_to_filesystem(to);
+        Ok(regular_rename(path_from.as_ref(), path_to.as_ref()).await?)
+    }
+
+    async fn put_multipart(
+        &self,
+        location: &ObjectStorePath,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart(location).await
+    }
+
+    async fn put_multipart_opts(
+        &self,
+        location: &ObjectStorePath,
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
+    }
+}
+
+/// Regular renames `from` to `to`.
+/// `from` has to exist, but `to` is not, otherwise the operation will fail.
+/// It's not atomic and cannot be called in parallel with other operations on the same file.
+#[inline]
+async fn regular_rename(from: &str, to: &str) -> Result<(), LocalFileSystemError> {
+    let from_path = String::from(from);
+    let to_path = String::from(to);
+
+    println!("rr {from_path} -> {to_path}");
+
+    tokio::task::spawn_blocking(move || {
+        if std::fs::metadata(&to_path).is_ok() {
+            Err(LocalFileSystemError::AlreadyExists {
+                path: to_path,
+                source: Box::new(std::io::Error::new(
+                    std::io::ErrorKind::AlreadyExists,
+                    "Already exists",
+                )),
+            })
+        } else if std::path::Path::new(&from_path).exists() {
+            std::fs::rename(&from_path, &to_path).map_err(|err| LocalFileSystemError::Generic {
+                store: STORE_NAME,
+                source: Box::new(err),
+            })
+        } else {
+            Err(LocalFileSystemError::NotFound {
+                path: from_path.clone(),
+                source: Box::new(std::io::Error::new(
+                    std::io::ErrorKind::NotFound,
+                    format!("Could not find {from_path}"),
+                )),
+            })
+        }
+    })
+    .await
+    .unwrap()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs::File;
+    use std::io::Write;
+    use std::path::{Path, PathBuf};
+
+    #[tokio::test]
+    async fn test_regular_rename() {
+        let tmp_dir = tempfile::tempdir().unwrap();
+        let a = create_file(tmp_dir.path(), "a");
+        let b = create_file(tmp_dir.path(), "b");
+        let c = &tmp_dir.path().join("c");
+
+        // unsuccessful move not_exists to C, not_exists is missing
+        let result = regular_rename("not_exists", c.to_str().unwrap()).await;
+        assert!(matches!(
+            result.expect_err("nonexistent should fail"),
+            LocalFileSystemError::NotFound { .. }
+        ));
+
+        // successful move A to C
+        assert!(a.exists());
+        assert!(!c.exists());
+        match regular_rename(a.to_str().unwrap(), c.to_str().unwrap()).await {
+            Err(LocalFileSystemError::InvalidArgument {source, ..}) =>
+                panic!("expected success, got: {source:?}. Note: atomically renaming Windows files from WSL2 is not supported."),
+            Err(e) => panic!("expected success, got: {e:?}"),
+            _ => {}
+        }
+        assert!(!a.exists());
+        assert!(c.exists());
+
+        // unsuccessful move B to C, C already exists, B is not deleted
+        assert!(b.exists());
+        match regular_rename(b.to_str().unwrap(), c.to_str().unwrap()).await {
+            Err(LocalFileSystemError::AlreadyExists { path, .. }) => {
+                assert_eq!(path, c.to_str().unwrap())
+            }
+            _ => panic!("unexpected"),
+        }
+        assert!(b.exists());
+        assert_eq!(std::fs::read_to_string(c).unwrap(), "a");
+    }
+
+    fn create_file(dir: &Path, name: &str) -> PathBuf {
+        let path = dir.join(name);
+        let mut file = File::create(&path).unwrap();
+        file.write_all(name.as_bytes()).unwrap();
+        path
+    }
+}
diff --git a/crates/mount/src/lib.rs b/crates/mount/src/lib.rs
new file mode 100644
index 0000000000..2decb92b6c
--- /dev/null
+++ b/crates/mount/src/lib.rs
@@ -0,0 +1,102 @@
+use std::collections::HashMap;
+use std::str::FromStr;
+use std::sync::Arc;
+
+use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory};
+use deltalake_core::storage::{
+    factories, str_is_truthy, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
+};
+use deltalake_core::{DeltaResult, DeltaTableError, Path};
+use object_store::local::LocalFileSystem;
+use url::Url;
+
+mod config;
+pub mod error;
+mod file;
+
+trait MountOptions {
+    fn as_mount_options(&self) -> HashMap<config::MountConfigKey, String>;
+}
+
+impl MountOptions for StorageOptions {
+    fn as_mount_options(&self) -> HashMap<config::MountConfigKey, String> {
+        self.0
+            .iter()
+            .filter_map(|(key, value)| {
+                Some((
+                    config::MountConfigKey::from_str(&key.to_ascii_lowercase()).ok()?,
+                    value.clone(),
+                ))
+            })
+            .collect()
+    }
+}
+
+#[derive(Clone, Default, Debug)]
+pub struct MountFactory {}
+
+impl ObjectStoreFactory for MountFactory {
+    fn parse_url_opts(
+        &self,
+        url: &Url,
+        options: &StorageOptions,
+    ) -> DeltaResult<(ObjectStoreRef, Path)> {
+        let config = config::MountConfigHelper::try_new(options.as_mount_options())?.build()?;
+
+        let allow_unsafe_rename = str_is_truthy(
+            config
+                .get(&config::MountConfigKey::AllowUnsafeRename)
+                .unwrap_or(&String::new()),
+        );
+
+        match url.scheme() {
+            "dbfs" => {
+                if !allow_unsafe_rename {
+                    // Just let the user know that they need to set the allow_unsafe_rename option
+                    return Err(error::Error::AllowUnsafeRenameNotSpecified.into());
+                }
+                // We need to convert the dbfs url to a file url
+                let new_url = Url::parse(&format!("file:///dbfs{}", url.path())).unwrap();
+                let store = Arc::new(file::MountFileStorageBackend::try_new(
+                    new_url.to_file_path().unwrap(),
+                )?) as ObjectStoreRef;
+                Ok((store, Path::from("/")))
+            }
+            "file" => {
+                if allow_unsafe_rename {
+                    let store = Arc::new(file::MountFileStorageBackend::try_new(
+                        url.to_file_path().unwrap(),
+                    )?) as ObjectStoreRef;
+                    Ok((store, Path::from("/")))
+                } else {
+                    let store = Arc::new(LocalFileSystem::new_with_prefix(
+                        url.to_file_path().unwrap(),
+                    )?) as ObjectStoreRef;
+                    Ok((store, Path::from("/")))
+                }
+            }
+            _ => Err(DeltaTableError::InvalidTableLocation(url.clone().into())),
+        }
+    }
+}
+
+impl LogStoreFactory for MountFactory {
+    fn with_options(
+        &self,
+        store: ObjectStoreRef,
+        location: &Url,
+        options: &StorageOptions,
+    ) -> DeltaResult<Arc<dyn LogStore>> {
+        Ok(default_logstore(store, location, options))
+    }
+}
+
+/// Register an [ObjectStoreFactory] for common Mount [Url] schemes
+pub fn register_handlers(_additional_prefixes: Option<Url>) {
+    let factory = Arc::new(MountFactory {});
+    for scheme in ["dbfs", "file"].iter() {
+        let url = Url::parse(&format!("{}://", scheme)).unwrap();
+        factories().insert(url.clone(), factory.clone());
+        logstores().insert(url.clone(), factory.clone());
+    }
+}
diff --git a/crates/mount/tests/context.rs b/crates/mount/tests/context.rs
new file mode 100644
index 0000000000..d7977b36de
--- /dev/null
+++ b/crates/mount/tests/context.rs
@@ -0,0 +1,85 @@
+use deltalake_mount::register_handlers;
+use deltalake_test::utils::{set_env_if_not_set, StorageIntegration};
+use fs_extra::dir::{copy, CopyOptions};
+use std::process::ExitStatus;
+use tempfile::{tempdir, TempDir};
+
+pub struct MountIntegration {
+    tmp_dir: TempDir,
+}
+
+impl Default for MountIntegration {
+    fn default() -> Self {
+        register_handlers(None);
+        Self {
+            tmp_dir: tempdir().expect("Failed to make temp dir"),
+        }
+    }
+}
+
+impl StorageIntegration for MountIntegration {
+    fn create_bucket(&self) -> std::io::Result<ExitStatus> {
+        Ok(ExitStatus::default())
+    }
+
+    fn prepare_env(&self) {
+        set_env_if_not_set("MOUNT_ALLOW_UNSAFE_RENAME", "true");
+    }
+    fn bucket_name(&self) -> String {
+        self.tmp_dir.as_ref().to_str().unwrap().to_owned()
+    }
+    fn root_uri(&self) -> String {
+        format!("file://{}", self.bucket_name())
+    }
+    fn copy_directory(&self, source: &str, destination: &str) -> std::io::Result<ExitStatus> {
+        let mut options = CopyOptions::new();
+        options.content_only = true;
+        let dest_path = self.tmp_dir.path().join(destination);
+        std::fs::create_dir_all(&dest_path)?;
+        copy(source, &dest_path, &options).expect("Failed to copy");
+        Ok(ExitStatus::default())
+    }
+}
+
+pub struct DbfsIntegration {
+    tmp_dir: TempDir,
+}
+
+impl Default for DbfsIntegration {
+    fn default() -> Self {
+        register_handlers(None);
+        Self {
+            tmp_dir: tempdir().expect("Failed to make temp dir"),
+        }
+    }
+}
+
+impl StorageIntegration for DbfsIntegration {
+    fn create_bucket(&self) -> std::io::Result<ExitStatus> {
+        Ok(ExitStatus::default())
+    }
+
+    fn prepare_env(&self) {
+        set_env_if_not_set("MOUNT_ALLOW_UNSAFE_RENAME", "true");
+        std::fs::create_dir_all(format!("/dbfs{}", self.tmp_dir.as_ref().to_str().unwrap()))
+            .expect("Failed to create dir");
+    }
+    fn bucket_name(&self) -> String {
+        self.tmp_dir.as_ref().to_str().unwrap().to_owned()
+    }
+    fn root_uri(&self) -> String {
+        format!("dbfs:{}", self.bucket_name())
+    }
+    fn copy_directory(&self, source: &str, destination: &str) -> std::io::Result<ExitStatus> {
+        let mut options = CopyOptions::new();
+        options.content_only = true;
+        let dest_path = format!(
+            "/dbfs{}/{}",
+            self.tmp_dir.as_ref().to_str().unwrap(),
+            destination
+        );
+        std::fs::create_dir_all(&dest_path)?;
+        copy(source, &dest_path, &options).expect("Failed to copy");
+        Ok(ExitStatus::default())
+    }
+}
diff --git a/crates/mount/tests/integration.rs b/crates/mount/tests/integration.rs
new file mode 100644
index 0000000000..14fcbcdc95
--- /dev/null
+++ b/crates/mount/tests/integration.rs
@@ -0,0 +1,39 @@
+#![cfg(feature = "integration_test")]
+
+use deltalake_test::read::read_table_paths;
+use deltalake_test::{test_read_tables, IntegrationContext, TestResult};
+use serial_test::serial;
+
+mod context;
+use context::*;
+
+static TEST_PREFIXES: &[&str] = &["my table", "你好/😊"];
+
+#[tokio::test]
+#[serial]
+async fn test_integration_local() -> TestResult {
+    let context = IntegrationContext::new(Box::<MountIntegration>::default())?;
+
+    test_read_tables(&context).await?;
+
+    for prefix in TEST_PREFIXES {
+        read_table_paths(&context, prefix, prefix).await?;
+    }
+
+    Ok(())
+}
+
+#[tokio::test]
+#[serial]
+#[ignore = "The DBFS tests currently hang due to CI pipeline cannot write to /dbfs"]
+async fn test_integration_dbfs() -> TestResult {
+    let context = IntegrationContext::new(Box::<DbfsIntegration>::default())?;
+
+    test_read_tables(&context).await?;
+
+    for prefix in TEST_PREFIXES {
+        read_table_paths(&context, prefix, prefix).await?;
+    }
+
+    Ok(())
+}
diff --git a/crates/sql/Cargo.toml b/crates/sql/Cargo.toml
index f6fb345a18..14457117b7 100644
--- a/crates/sql/Cargo.toml
+++ b/crates/sql/Cargo.toml
@@ -1,7 +1,15 @@
 [package]
 name = "deltalake-sql"
 version = "0.1.0"
-edition = "2021"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
diff --git a/crates/sql/src/logical_plan.rs b/crates/sql/src/logical_plan.rs
index 164462a90c..6e3c7d5dbc 100644
--- a/crates/sql/src/logical_plan.rs
+++ b/crates/sql/src/logical_plan.rs
@@ -1,7 +1,7 @@
 use std::fmt::{self, Debug, Display};
 use std::sync::Arc;
 
-use datafusion_common::{DFSchema, DFSchemaRef, OwnedTableReference};
+use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, TableReference};
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::{Expr, UserDefinedLogicalNodeCore};
 
@@ -90,13 +90,31 @@ impl UserDefinedLogicalNodeCore for DeltaStatement {
     }
 
     fn from_template(&self, exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        exprs: Vec<Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> datafusion_common::Result<Self> {
         match self {
             Self::Vacuum(_) | Self::DescribeHistory(_) => {
-                assert_eq!(inputs.len(), 0, "input size inconsistent");
-                assert_eq!(exprs.len(), 0, "expression size inconsistent");
-                self.clone()
+                if !inputs.is_empty() {
+                    return Err(DataFusionError::External("Input size inconsistent".into()));
+                }
+                if !exprs.is_empty() {
+                    return Err(DataFusionError::External(
+                        "Expression size inconsistent".into(),
+                    ));
+                }
+                Ok(self.clone())
             }
-            _ => todo!(),
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "with_exprs_and_inputs not implemented for {:?}",
+                self
+            ))),
         }
     }
 }
@@ -107,7 +125,7 @@ impl UserDefinedLogicalNodeCore for DeltaStatement {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct Vacuum {
     /// A reference to the table being vacuumed
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// The retention threshold.
     pub retention_hours: Option<i32>,
     /// Return a list of up to 1000 files to be deleted.
@@ -117,7 +135,7 @@ pub struct Vacuum {
 }
 
 impl Vacuum {
-    pub fn new(table: OwnedTableReference, retention_hours: Option<i32>, dry_run: bool) -> Self {
+    pub fn new(table: TableReference, retention_hours: Option<i32>, dry_run: bool) -> Self {
         Self {
             table,
             retention_hours,
@@ -133,13 +151,13 @@ impl Vacuum {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct DescribeHistory {
     /// A reference to the table
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// Schema for commit provenence information
     pub schema: DFSchemaRef,
 }
 
 impl DescribeHistory {
-    pub fn new(table: OwnedTableReference) -> Self {
+    pub fn new(table: TableReference) -> Self {
         Self {
             table,
             // TODO: add proper schema
@@ -153,13 +171,13 @@ impl DescribeHistory {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct DescribeDetails {
     /// A reference to the table
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// Schema for commit provenence information
     pub schema: DFSchemaRef,
 }
 
 impl DescribeDetails {
-    pub fn new(table: OwnedTableReference) -> Self {
+    pub fn new(table: TableReference) -> Self {
         Self {
             table,
             // TODO: add proper schema
@@ -172,13 +190,13 @@ impl DescribeDetails {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct DescribeFiles {
     /// A reference to the table
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// Schema for commit provenence information
     pub schema: DFSchemaRef,
 }
 
 impl DescribeFiles {
-    pub fn new(table: OwnedTableReference) -> Self {
+    pub fn new(table: TableReference) -> Self {
         Self {
             table,
             // TODO: add proper schema
diff --git a/crates/sql/src/parser.rs b/crates/sql/src/parser.rs
index 10e7252730..19bf3f00b0 100644
--- a/crates/sql/src/parser.rs
+++ b/crates/sql/src/parser.rs
@@ -14,7 +14,7 @@ macro_rules! parser_err {
     };
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum DescribeOperation {
     Detail,
     History,
@@ -164,7 +164,7 @@ impl<'a> DeltaParser<'a> {
     }
 
     pub fn parse_vacuum(&mut self) -> Result<Statement, ParserError> {
-        let table_name = self.parser.parse_object_name()?;
+        let table_name = self.parser.parse_object_name(false)?;
         match self.parser.peek_token().token {
             Token::Word(w) => match w.keyword {
                 Keyword::RETAIN => {
diff --git a/crates/sql/src/planner.rs b/crates/sql/src/planner.rs
index 099f97087d..90b7827575 100644
--- a/crates/sql/src/planner.rs
+++ b/crates/sql/src/planner.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use datafusion_common::{OwnedTableReference, Result as DFResult};
+use datafusion_common::{Result as DFResult, TableReference};
 use datafusion_expr::logical_plan::{Extension, LogicalPlan};
 use datafusion_sql::planner::{
     object_name_to_table_reference, ContextProvider, IdentNormalizer, ParserOptions, SqlToRel,
@@ -42,6 +42,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
                     ParserOptions {
                         parse_float_as_decimal: self.options.parse_float_as_decimal,
                         enable_ident_normalization: self.options.enable_ident_normalization,
+                        support_varchar_with_length: false,
                     },
                 );
                 planner.statement_to_plan(s)
@@ -54,7 +55,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
     fn vacuum_to_plan(&self, vacuum: VacuumStatement) -> DFResult<LogicalPlan> {
         let table_ref = self.object_name_to_table_reference(vacuum.table)?;
         let plan = DeltaStatement::Vacuum(Vacuum::new(
-            table_ref.to_owned_reference(),
+            table_ref.clone(),
             vacuum.retention_hours,
             vacuum.dry_run,
         ));
@@ -65,8 +66,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
 
     fn describe_to_plan(&self, describe: DescribeStatement) -> DFResult<LogicalPlan> {
         let table_ref = self.object_name_to_table_reference(describe.table)?;
-        let plan =
-            DeltaStatement::DescribeFiles(DescribeFiles::new(table_ref.to_owned_reference()));
+        let plan = DeltaStatement::DescribeFiles(DescribeFiles::new(table_ref.clone()));
         Ok(LogicalPlan::Extension(Extension {
             node: Arc::new(plan),
         }))
@@ -75,7 +75,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
     pub(crate) fn object_name_to_table_reference(
         &self,
         object_name: ObjectName,
-    ) -> DFResult<OwnedTableReference> {
+    ) -> DFResult<TableReference> {
         object_name_to_table_reference(object_name, self.options.enable_ident_normalization)
     }
 }
@@ -122,10 +122,6 @@ mod tests {
     }
 
     impl ContextProvider for TestSchemaProvider {
-        fn get_table_provider(&self, name: TableReference) -> DFResult<Arc<dyn TableSource>> {
-            self.get_table_source(name)
-        }
-
         fn get_table_source(&self, name: TableReference) -> DFResult<Arc<dyn TableSource>> {
             match self.tables.get(name.table()) {
                 Some(table) => Ok(table.clone()),
@@ -155,6 +151,18 @@ mod tests {
         fn get_window_meta(&self, _name: &str) -> Option<Arc<datafusion_expr::WindowUDF>> {
             None
         }
+
+        fn udf_names(&self) -> Vec<String> {
+            Vec::new()
+        }
+
+        fn udaf_names(&self) -> Vec<String> {
+            Vec::new()
+        }
+
+        fn udwf_names(&self) -> Vec<String> {
+            Vec::new()
+        }
     }
 
     fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
diff --git a/crates/test/Cargo.toml b/crates/test/Cargo.toml
index bca9094150..b4fa816176 100644
--- a/crates/test/Cargo.toml
+++ b/crates/test/Cargo.toml
@@ -7,7 +7,7 @@ publish = false
 [dependencies]
 bytes = { workspace = true }
 chrono = { workspace = true, default-features = false, features = ["clock"] }
-deltalake-core = { version = "0.17.0", path = "../core" }
+deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
 dotenvy = "0"
 fs_extra = "1.3.0"
 futures = { version = "0.3" }
diff --git a/crates/test/src/concurrent.rs b/crates/test/src/concurrent.rs
index 83bf784ca1..d028917a1e 100644
--- a/crates/test/src/concurrent.rs
+++ b/crates/test/src/concurrent.rs
@@ -4,7 +4,7 @@ use std::iter::FromIterator;
 use std::time::Duration;
 
 use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType};
-use deltalake_core::operations::transaction::commit;
+use deltalake_core::operations::transaction::CommitBuilder;
 use deltalake_core::operations::DeltaOps;
 use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::{DeltaTable, DeltaTableBuilder};
@@ -34,7 +34,7 @@ async fn prepare_table(
 
     let table = DeltaOps(table)
         .create()
-        .with_columns(schema.fields().clone())
+        .with_columns(schema.fields().cloned())
         .await?;
 
     assert_eq!(0, table.version());
@@ -137,15 +137,14 @@ impl Worker {
             default_row_commit_version: None,
             clustering_provider: None,
         })];
-        let version = commit(
-            self.table.log_store().as_ref(),
-            &actions,
-            operation,
-            Some(self.table.snapshot().unwrap()),
-            None,
-        )
-        .await
-        .unwrap();
+        let snapshot = self.table.snapshot().unwrap().snapshot();
+
+        let version = CommitBuilder::default()
+            .with_actions(actions)
+            .build(Some(snapshot), self.table.log_store(), operation)
+            .await
+            .unwrap()
+            .version();
         self.table.update().await.unwrap();
         version
     }
diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs
index 44296e54b5..c53d34b1d3 100644
--- a/crates/test/src/lib.rs
+++ b/crates/test/src/lib.rs
@@ -4,7 +4,7 @@ use bytes::Bytes;
 use deltalake_core::kernel::{Action, Add, Remove, StructType};
 use deltalake_core::logstore::LogStore;
 use deltalake_core::operations::create::CreateBuilder;
-use deltalake_core::operations::transaction::commit;
+use deltalake_core::operations::transaction::CommitBuilder;
 use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::DeltaTable;
 use deltalake_core::DeltaTableBuilder;
@@ -86,7 +86,7 @@ impl TestContext {
             .with_log_store(log_store)
             .with_table_name("delta-rs_test_table")
             .with_comment("Table created by delta-rs tests")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_partition_columns(p)
             .await
             .unwrap()
@@ -119,7 +119,7 @@ pub async fn add_file(
     commit_to_log: bool,
 ) {
     let backend = table.object_store();
-    backend.put(path, data.clone()).await.unwrap();
+    backend.put(path, data.clone().into()).await.unwrap();
 
     if commit_to_log {
         let mut part_values = HashMap::new();
@@ -147,15 +147,13 @@ pub async fn add_file(
             predicate: None,
         };
         let actions = vec![Action::Add(add)];
-        commit(
-            table.log_store().as_ref(),
-            &actions,
-            operation,
-            table.state.as_ref(),
-            None,
-        )
-        .await
-        .unwrap();
+        let snapshot = table.snapshot().unwrap().snapshot();
+
+        CommitBuilder::default()
+            .with_actions(actions)
+            .build(Some(snapshot), table.log_store(), operation)
+            .await
+            .unwrap();
         table.update().await.unwrap();
     }
 }
@@ -185,14 +183,12 @@ pub async fn remove_file(
     };
     let operation = DeltaOperation::Delete { predicate: None };
     let actions = vec![Action::Remove(remove)];
-    commit(
-        table.log_store().as_ref(),
-        &actions,
-        operation,
-        table.state.as_ref(),
-        None,
-    )
-    .await
-    .unwrap();
+    let snapshot = table.snapshot().unwrap().snapshot();
+
+    CommitBuilder::default()
+        .with_actions(actions)
+        .build(Some(snapshot), table.log_store(), operation)
+        .await
+        .unwrap();
     table.update().await.unwrap();
 }
diff --git a/crates/test/src/utils.rs b/crates/test/src/utils.rs
index b5102e2808..f7c35d0b52 100644
--- a/crates/test/src/utils.rs
+++ b/crates/test/src/utils.rs
@@ -154,6 +154,7 @@ pub enum TestTables {
     Delta0_8_0Partitioned,
     Delta0_8_0SpecialPartitioned,
     Checkpoints,
+    LatestNotCheckpointed,
     WithDvSmall,
     Custom(String),
 }
@@ -189,6 +190,11 @@ impl TestTables {
                 .unwrap()
                 .to_owned(),
             Self::Checkpoints => data_path.join("checkpoints").to_str().unwrap().to_owned(),
+            Self::LatestNotCheckpointed => data_path
+                .join("latest_not_checkpointed")
+                .to_str()
+                .unwrap()
+                .to_owned(),
             Self::WithDvSmall => data_path
                 .join("table-with-dv-small")
                 .to_str()
@@ -208,6 +214,7 @@ impl TestTables {
             Self::Delta0_8_0Partitioned => "delta-0.8.0-partitioned".into(),
             Self::Delta0_8_0SpecialPartitioned => "delta-0.8.0-special-partition".into(),
             Self::Checkpoints => "checkpoints".into(),
+            Self::LatestNotCheckpointed => "latest_not_checkpointed".into(),
             Self::WithDvSmall => "table-with-dv-small".into(),
             Self::Custom(name) => name.to_owned(),
         }
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..a1dea14296
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..e26d3add58
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..5ec7564c50
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..77e39b81d4
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..997e9db293
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..9c7d70c58a
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..ef39d10bbd
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..1303651224
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..7526ee54ed
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..b2547772ce
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..b5de62a3de
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..14e9fdf09f
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..40542b6be8
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..37d56abc45
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..5c6926f370
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..38b817e1ef
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..2cf62caf4b
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/.part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/.part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet.crc
new file mode 100644
index 0000000000..9b93f676dc
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/.part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..1da6ec9e30
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..ae94613f9f
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..7963117865
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..20ffcc417d
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..90d40e3abf
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..287fdeea8c
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..47f198ed0d
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/.cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet
new file mode 100644
index 0000000000..fd020feca6
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet
new file mode 100644
index 0000000000..30adb9956b
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet
new file mode 100644
index 0000000000..447df150cb
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet
new file mode 100644
index 0000000000..54e2a3b044
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet
new file mode 100644
index 0000000000..809770b5fd
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet
new file mode 100644
index 0000000000..2059c5f06e
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet
new file mode 100644
index 0000000000..d6e9dd4a7e
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_change_data/cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000000.json.crc b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000000.json.crc
new file mode 100644
index 0000000000..83adb20a3d
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000000.json.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000001.json.crc b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000001.json.crc
new file mode 100644
index 0000000000..d83f453aef
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000001.json.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000002.json.crc b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000002.json.crc
new file mode 100644
index 0000000000..39544280b1
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000002.json.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000003.json.crc b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000003.json.crc
new file mode 100644
index 0000000000..a4a1dee7a0
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000003.json.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000004.json.crc b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000004.json.crc
new file mode 100644
index 0000000000..f559c66ea4
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/.00000000000000000004.json.crc differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000000.json b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..69327845e7
--- /dev/null
+++ b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000000.json
@@ -0,0 +1,13 @@
+{"commitInfo":{"timestamp":1713110306249,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"10","numOutputRows":"10","numOutputBytes":"19607"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"3c23f1ee-c3b1-4a95-9695-2cdb2f43aa1f"}}
+{"metaData":{"id":"bb0fdeb2-76dd-4f5e-b1ea-845ecec8fa7e","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"birthday\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"long_field\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"boolean_field\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"double_field\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"smallint_field\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableChangeDataFeed":"true"},"createdTime":1713110303902}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":4}}
+{"add":{"path":"part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet","partitionValues":{},"size":1965,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1,\"name\":\"Steve\",\"birthday\":\"2024-04-14\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":1,\"name\":\"Steve\",\"birthday\":\"2024-04-14\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet","partitionValues":{},"size":1951,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2,\"name\":\"Bob\",\"birthday\":\"2024-04-15\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":2,\"name\":\"Bob\",\"birthday\":\"2024-04-15\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet","partitionValues":{},"size":1958,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3,\"name\":\"Dave\",\"birthday\":\"2024-04-15\",\"long_field\":2,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":3,\"name\":\"Dave\",\"birthday\":\"2024-04-15\",\"long_field\":2,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet","partitionValues":{},"size":1958,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4,\"name\":\"Kate\",\"birthday\":\"2024-04-15\",\"long_field\":3,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":4,\"name\":\"Kate\",\"birthday\":\"2024-04-15\",\"long_field\":3,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet","partitionValues":{},"size":1965,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5,\"name\":\"Emily\",\"birthday\":\"2024-04-16\",\"long_field\":4,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":5,\"name\":\"Emily\",\"birthday\":\"2024-04-16\",\"long_field\":4,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet","partitionValues":{},"size":1958,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6,\"name\":\"Carl\",\"birthday\":\"2024-04-16\",\"long_field\":5,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":6,\"name\":\"Carl\",\"birthday\":\"2024-04-16\",\"long_field\":5,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet","partitionValues":{},"size":1971,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7,\"name\":\"Dennis\",\"birthday\":\"2024-04-16\",\"long_field\":6,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":7,\"name\":\"Dennis\",\"birthday\":\"2024-04-16\",\"long_field\":6,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet","partitionValues":{},"size":1972,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8,\"name\":\"Claire\",\"birthday\":\"2024-04-17\",\"long_field\":7,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":8,\"name\":\"Claire\",\"birthday\":\"2024-04-17\",\"long_field\":7,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet","partitionValues":{},"size":1951,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9,\"name\":\"Ada\",\"birthday\":\"2024-04-17\",\"long_field\":8,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":9,\"name\":\"Ada\",\"birthday\":\"2024-04-17\",\"long_field\":8,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet","partitionValues":{},"size":1958,"modificationTime":1713110305447,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10,\"name\":\"Borb\",\"birthday\":\"2024-04-17\",\"long_field\":99999999999999999,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":10,\"name\":\"Borb\",\"birthday\":\"2024-04-17\",\"long_field\":99999999999999999,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000001.json b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..9f7d5a4a40
--- /dev/null
+++ b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000001.json
@@ -0,0 +1,10 @@
+{"commitInfo":{"timestamp":1713110309393,"operation":"UPDATE","operationParameters":{"predicate":"[\"id#1262 IN (2,3,4)\"]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"3","numRemovedBytes":"13190","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"885","numDeletionVectorsUpdated":"0","scanTimeMs":"645","numAddedFiles":"3","numUpdatedRows":"3","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"3","numAddedBytes":"6518","rewriteTimeMs":"238"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"783d4c37-86e4-47fd-9594-8c96d1f06073"}}
+{"add":{"path":"part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet","partitionValues":{},"size":2175,"modificationTime":1713110309367,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3,\"name\":\"Dave\",\"birthday\":\"2024-04-14\",\"long_field\":2,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":3,\"name\":\"Dave\",\"birthday\":\"2024-04-14\",\"long_field\":2,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet","partitionValues":{},"size":2175,"modificationTime":1713110309366,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4,\"name\":\"Kate\",\"birthday\":\"2024-04-14\",\"long_field\":3,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":4,\"name\":\"Kate\",\"birthday\":\"2024-04-14\",\"long_field\":3,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet","partitionValues":{},"size":2168,"modificationTime":1713110309367,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2,\"name\":\"Bob\",\"birthday\":\"2024-04-14\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":2,\"name\":\"Bob\",\"birthday\":\"2024-04-14\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"cdc":{"path":"_change_data/cdc-00000-cb45a6ca-4b46-4293-b0ab-5b9937a8bd12.c000.snappy.parquet","partitionValues":{},"size":2442,"dataChange":false}}
+{"cdc":{"path":"_change_data/cdc-00001-c07209de-cdb1-4dbf-8e90-e33319ebf581.c000.snappy.parquet","partitionValues":{},"size":2444,"dataChange":false}}
+{"cdc":{"path":"_change_data/cdc-00002-67b4ede6-b16d-46d1-aefb-cd68c1672e02.c000.snappy.parquet","partitionValues":{},"size":2437,"dataChange":false}}
+{"remove":{"path":"part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet","deletionTimestamp":1713110309389,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1958}}
+{"remove":{"path":"part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet","deletionTimestamp":1713110309389,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1958}}
+{"remove":{"path":"part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet","deletionTimestamp":1713110309389,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1951}}
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000002.json b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000002.json
new file mode 100644
index 0000000000..206ed8624e
--- /dev/null
+++ b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000002.json
@@ -0,0 +1,10 @@
+{"commitInfo":{"timestamp":1713110311257,"operation":"UPDATE","operationParameters":{"predicate":"[\"id#1262 IN (5,6,7)\"]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"3","numRemovedBytes":"13248","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"1033","numDeletionVectorsUpdated":"0","scanTimeMs":"431","numAddedFiles":"3","numUpdatedRows":"3","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"3","numAddedBytes":"6545","rewriteTimeMs":"601"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"f25b6023-7d88-44ec-9d9f-abdf45929791"}}
+{"add":{"path":"part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet","partitionValues":{},"size":2188,"modificationTime":1713110311248,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7,\"name\":\"Dennis\",\"birthday\":\"2024-04-14\",\"long_field\":6,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":7,\"name\":\"Dennis\",\"birthday\":\"2024-04-14\",\"long_field\":6,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet","partitionValues":{},"size":2182,"modificationTime":1713110311247,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5,\"name\":\"Emily\",\"birthday\":\"2024-04-14\",\"long_field\":4,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":5,\"name\":\"Emily\",\"birthday\":\"2024-04-14\",\"long_field\":4,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet","partitionValues":{},"size":2175,"modificationTime":1713110311248,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6,\"name\":\"Carl\",\"birthday\":\"2024-04-14\",\"long_field\":5,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":6,\"name\":\"Carl\",\"birthday\":\"2024-04-14\",\"long_field\":5,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"cdc":{"path":"_change_data/cdc-00000-4a1d2de0-dda2-4651-8239-f53ca8761e0e.c000.snappy.parquet","partitionValues":{},"size":2459,"dataChange":false}}
+{"cdc":{"path":"_change_data/cdc-00001-3083ab22-0fc6-4ccc-9053-48dc314be509.c000.snappy.parquet","partitionValues":{},"size":2451,"dataChange":false}}
+{"cdc":{"path":"_change_data/cdc-00002-d6ab6a1a-7a19-47f1-a938-58954786d23f.c000.snappy.parquet","partitionValues":{},"size":2444,"dataChange":false}}
+{"remove":{"path":"part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet","deletionTimestamp":1713110311257,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1971}}
+{"remove":{"path":"part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet","deletionTimestamp":1713110311257,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1965}}
+{"remove":{"path":"part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet","deletionTimestamp":1713110311257,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1958}}
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000003.json b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000003.json
new file mode 100644
index 0000000000..730973b3d5
--- /dev/null
+++ b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000003.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1713110312495,"operation":"DELETE","operationParameters":{"predicate":"[\"(name#1263 = Dennis)\"]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"2188","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"525","numDeletionVectorsUpdated":"0","numAddedFiles":"0","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"1","numDeletedRows":"1","scanTimeMs":"286","numAddedBytes":"0","rewriteTimeMs":"239"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"906a565f-43c9-43bc-8dae-0d1b113fe5ab"}}
+{"remove":{"path":"part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet","deletionTimestamp":1713110312487,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":2188}}
+{"cdc":{"path":"_change_data/cdc-00000-a0f26ad2-e42f-4ee9-9a42-c551810ffef9.c000.snappy.parquet","partitionValues":{},"size":2242,"dataChange":false}}
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000004.json b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000004.json
new file mode 100644
index 0000000000..85d27368aa
--- /dev/null
+++ b/crates/test/tests/data/cdf-table-non-partitioned/_delta_log/00000000000000000004.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1713110313444,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"2","numOutputBytes":"3916"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"a19e87ff-05cb-4c4f-bb1b-73909946663d"}}
+{"add":{"path":"part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet","partitionValues":{},"size":1958,"modificationTime":1713110313440,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1,\"name\":\"Alex\",\"birthday\":\"2024-04-14\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":1,\"name\":\"Alex\",\"birthday\":\"2024-04-14\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
+{"add":{"path":"part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet","partitionValues":{},"size":1958,"modificationTime":1713110313441,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2,\"name\":\"Alan\",\"birthday\":\"2024-04-15\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"maxValues\":{\"id\":2,\"name\":\"Alan\",\"birthday\":\"2024-04-15\",\"long_field\":1,\"double_field\":3.14,\"smallint_field\":1},\"nullCount\":{\"id\":0,\"name\":0,\"birthday\":0,\"long_field\":0,\"boolean_field\":0,\"double_field\":0,\"smallint_field\":0}}"}}
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet
new file mode 100644
index 0000000000..0368ae2e7d
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-3cb5dee7-9ab2-4b6f-bf66-fc2a968c4feb.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet
new file mode 100644
index 0000000000..dc29aa1f31
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-47bab5c8-9d0c-41b6-9f56-d45f47d5dea5.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet
new file mode 100644
index 0000000000..1405a875d2
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-94321f1e-f3e8-456d-ae43-5bf5b4c36a3d-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet
new file mode 100644
index 0000000000..e4afd94337
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00000-a9118234-f574-4613-b674-deb4d1b82aee-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet
new file mode 100644
index 0000000000..710e826ea6
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-69317b6f-2e84-4ae7-a847-cc5d1415f35d.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet
new file mode 100644
index 0000000000..7dd10c1331
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-75bdbc7a-6029-4166-bf76-1987f87901f1-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet
new file mode 100644
index 0000000000..e78a4c29de
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-db3fa6b7-6267-43be-a1bc-7a81e4a5ddce-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet
new file mode 100644
index 0000000000..6fc4437eea
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00001-df7ec87d-5e3a-40da-be02-282f3629694c.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet
new file mode 100644
index 0000000000..805de42f30
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00002-05c18098-92f8-41f0-89d4-0d73a5d5b971.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet
new file mode 100644
index 0000000000..cbff959013
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00002-a34b39da-a6ce-4459-8ea5-ddd162a84e94.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet
new file mode 100644
index 0000000000..5bad380657
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00002-fce8caa1-4b6f-47af-a2df-a50985051257-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet
new file mode 100644
index 0000000000..06067bbfef
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00003-7920983b-c6d6-4703-a93f-c1bf180e7008-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet
new file mode 100644
index 0000000000..2d20a4a810
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00004-5de927bd-552f-4462-94e2-e7987a580b6a-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet
new file mode 100644
index 0000000000..f41d4ef4ba
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00005-9443cba0-84c4-4dc4-be61-76396629a546-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet
new file mode 100644
index 0000000000..72a581a761
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00006-a200a57d-283c-445d-acd1-d2642d1a778c-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet
new file mode 100644
index 0000000000..071189c3bc
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00007-e6053215-0e6f-4891-8dbc-812b7274a4e5-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet
new file mode 100644
index 0000000000..f285aedb14
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00008-6fdf199c-d268-4ab2-9288-5a6503bac41b-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table-non-partitioned/part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet b/crates/test/tests/data/cdf-table-non-partitioned/part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet
new file mode 100644
index 0000000000..cb94a7003a
Binary files /dev/null and b/crates/test/tests/data/cdf-table-non-partitioned/part-00009-24d335c6-4da8-4a23-931d-168b2821adca-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..173d881c51
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..429fc2c216
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..d43627bfab
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/.cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet
new file mode 100644
index 0000000000..7fb822f3a5
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet
new file mode 100644
index 0000000000..9420cd532b
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet
new file mode 100644
index 0000000000..b90a2592c7
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..286d809331
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..c8833d2d34
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..9476f346f4
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/.cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet
new file mode 100644
index 0000000000..50b5da505a
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet
new file mode 100644
index 0000000000..fe6fc81be2
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet
new file mode 100644
index 0000000000..dca1df96f6
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..918300ab2b
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..e8e5b3d53e
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..7a2a49872d
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/.cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet
new file mode 100644
index 0000000000..4298341a2a
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet
new file mode 100644
index 0000000000..3f6d0df77d
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet
new file mode 100644
index 0000000000..53985a92a1
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..ea2c2b4223
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..60e5f4b8ba
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..75deae3c0d
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..9acb3a1d1b
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/.cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet
new file mode 100644
index 0000000000..0169eb7b25
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet
new file mode 100644
index 0000000000..ff7097554e
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet
new file mode 100644
index 0000000000..9d860c43c2
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet
new file mode 100644
index 0000000000..f126fc421b
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000000.json.crc b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000000.json.crc
new file mode 100644
index 0000000000..966584a04c
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000000.json.crc differ
diff --git a/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000001.json.crc b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000001.json.crc
new file mode 100644
index 0000000000..557f7ea9da
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000001.json.crc differ
diff --git a/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000002.json.crc b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000002.json.crc
new file mode 100644
index 0000000000..0ae38ad332
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000002.json.crc differ
diff --git a/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000003.json.crc b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000003.json.crc
new file mode 100644
index 0000000000..26b5bbb569
Binary files /dev/null and b/crates/test/tests/data/cdf-table/_delta_log/.00000000000000000003.json.crc differ
diff --git a/crates/test/tests/data/cdf-table/_delta_log/00000000000000000000.json b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..4c635f5dcc
--- /dev/null
+++ b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000000.json
@@ -0,0 +1,13 @@
+{"commitInfo":{"timestamp":1703265018828,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[\"birthday\"]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"10","numOutputRows":"10","numOutputBytes":"6897"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"d05345ec-8304-433e-88ff-6498dc37ca19"}}
+{"metaData":{"id":"d38a7090-96be-4b1b-b20f-b85ad8ae1a38","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"birthday\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["birthday"],"configuration":{"delta.enableChangeDataFeed":"true"},"createdTime":1703265016759}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":4}}
+{"add":{"path":"birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":694,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1,\"name\":\"Steve\"},\"maxValues\":{\"id\":1,\"name\":\"Steve\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-23"},"size":680,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2,\"name\":\"Bob\"},\"maxValues\":{\"id\":2,\"name\":\"Bob\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-23"},"size":687,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3,\"name\":\"Dave\"},\"maxValues\":{\"id\":3,\"name\":\"Dave\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-23"},"size":687,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4,\"name\":\"Kate\"},\"maxValues\":{\"id\":4,\"name\":\"Kate\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-24"},"size":694,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5,\"name\":\"Emily\"},\"maxValues\":{\"id\":5,\"name\":\"Emily\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-24"},"size":687,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6,\"name\":\"Carl\"},\"maxValues\":{\"id\":6,\"name\":\"Carl\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-24"},"size":700,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7,\"name\":\"Dennis\"},\"maxValues\":{\"id\":7,\"name\":\"Dennis\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-25"},"size":701,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8,\"name\":\"Claire\"},\"maxValues\":{\"id\":8,\"name\":\"Claire\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-25"},"size":680,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9,\"name\":\"Ada\"},\"maxValues\":{\"id\":9,\"name\":\"Ada\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-25"},"size":687,"modificationTime":1703265018088,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10,\"name\":\"Borb\"},\"maxValues\":{\"id\":10,\"name\":\"Borb\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
diff --git a/crates/test/tests/data/cdf-table/_delta_log/00000000000000000001.json b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..d6d9b78338
--- /dev/null
+++ b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000001.json
@@ -0,0 +1,13 @@
+{"commitInfo":{"timestamp":1703265021675,"operation":"UPDATE","operationParameters":{"predicate":"[\"id#1065 IN (2,3,4)\"]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"3","numRemovedBytes":"8187","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"808","numDeletionVectorsUpdated":"0","scanTimeMs":"639","numAddedFiles":"3","numUpdatedRows":"3","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"6","numAddedBytes":"2705","rewriteTimeMs":"167"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"afc86e2b-99e4-4dc3-bf82-2bfd5c6762bb"}}
+{"add":{"path":"birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":904,"modificationTime":1703265021654,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3,\"name\":\"Dave\"},\"maxValues\":{\"id\":3,\"name\":\"Dave\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":904,"modificationTime":1703265021655,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4,\"name\":\"Kate\"},\"maxValues\":{\"id\":4,\"name\":\"Kate\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":897,"modificationTime":1703265021655,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2,\"name\":\"Bob\"},\"maxValues\":{\"id\":2,\"name\":\"Bob\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"cdc":{"path":"_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":1028,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-23"},"size":1021,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":1028,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-23"},"size":1021,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":1021,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-23"},"size":1014,"dataChange":false}}
+{"remove":{"path":"birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet","deletionTimestamp":1703265021672,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-23"},"size":687}}
+{"remove":{"path":"birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet","deletionTimestamp":1703265021672,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-23"},"size":687}}
+{"remove":{"path":"birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet","deletionTimestamp":1703265021672,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-23"},"size":680}}
diff --git a/crates/test/tests/data/cdf-table/_delta_log/00000000000000000002.json b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000002.json
new file mode 100644
index 0000000000..abbe569bdb
--- /dev/null
+++ b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000002.json
@@ -0,0 +1,13 @@
+{"commitInfo":{"timestamp":1703886093785,"operation":"UPDATE","operationParameters":{"predicate":"[\"id#39 IN (5,6,7)\"]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"3","numRemovedBytes":"8268","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"3666","numDeletionVectorsUpdated":"0","scanTimeMs":"3237","numAddedFiles":"3","numUpdatedRows":"3","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"6","numAddedBytes":"2732","rewriteTimeMs":"427"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"179df3d2-696a-460b-bebe-eb911c63e0b8"}}
+{"add":{"path":"birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":917,"modificationTime":1703886093724,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7,\"name\":\"Dennis\"},\"maxValues\":{\"id\":7,\"name\":\"Dennis\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":911,"modificationTime":1703886093724,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5,\"name\":\"Emily\"},\"maxValues\":{\"id\":5,\"name\":\"Emily\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"add":{"path":"birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":904,"modificationTime":1703886093724,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6,\"name\":\"Carl\"},\"maxValues\":{\"id\":6,\"name\":\"Carl\"},\"nullCount\":{\"id\":0,\"name\":0}}"}}
+{"cdc":{"path":"_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-24"},"size":1034,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":1041,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-24"},"size":1028,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":1035,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-24"},"size":1021,"dataChange":false}}
+{"cdc":{"path":"_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":1028,"dataChange":false}}
+{"remove":{"path":"birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet","deletionTimestamp":1703886093764,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-24"},"size":700}}
+{"remove":{"path":"birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet","deletionTimestamp":1703886093764,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-24"},"size":694}}
+{"remove":{"path":"birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet","deletionTimestamp":1703886093764,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-24"},"size":687}}
diff --git a/crates/test/tests/data/cdf-table/_delta_log/00000000000000000003.json b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000003.json
new file mode 100644
index 0000000000..26b9aa78df
--- /dev/null
+++ b/crates/test/tests/data/cdf-table/_delta_log/00000000000000000003.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1704559499570,"operation":"DELETE","operationParameters":{"predicate":"[\"(name#40 = Dennis)\"]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"917","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"3479","numDeletionVectorsUpdated":"0","numAddedFiles":"0","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"1","numDeletedRows":"1","scanTimeMs":"3157","numAddedBytes":"0","rewriteTimeMs":"322"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"ef48960f-ceb5-4bc2-9b59-8c947083ae58"}}
+{"remove":{"path":"birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet","deletionTimestamp":1704559499540,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-29"},"size":917}}
+{"cdc":{"path":"_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":971,"dataChange":false}}
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..7ccb487581
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..124dae08bf
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..031ec7d55e
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..41c3d9679b
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/.part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet
new file mode 100644
index 0000000000..7a24bef8d2
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet
new file mode 100644
index 0000000000..f5e6762c81
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet
new file mode 100644
index 0000000000..a6c9b9265c
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet
new file mode 100644
index 0000000000..51ff3c55b0
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..7e5ff66214
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..8a645acc63
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..2e80f1525d
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-23/.part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet
new file mode 100644
index 0000000000..2a814ac262
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet
new file mode 100644
index 0000000000..5f2ad4cf7e
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet
new file mode 100644
index 0000000000..357f6f12ca
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..1b77d1c5e1
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..ddf9a2ea89
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..80c0c5edc4
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-24/.part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet
new file mode 100644
index 0000000000..5a85db4f6b
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet
new file mode 100644
index 0000000000..317e8a0137
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet
new file mode 100644
index 0000000000..f716e5ddb4
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..17a65cb259
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..1186a67ee7
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..ec370518fd
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-25/.part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet
new file mode 100644
index 0000000000..97740ce49c
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet
new file mode 100644
index 0000000000..b690d3363a
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet
new file mode 100644
index 0000000000..5cc0801871
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..ac22997311
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..463fbfdfc6
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet.crc b/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet.crc
new file mode 100644
index 0000000000..c0fafe9ca5
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-29/.part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet.crc differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet
new file mode 100644
index 0000000000..b24ab63fdc
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet
new file mode 100644
index 0000000000..c7717e10ca
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet b/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet
new file mode 100644
index 0000000000..c0fd23c794
Binary files /dev/null and b/crates/test/tests/data/cdf-table/birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet differ
diff --git a/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000000.json b/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..a67c417df7
--- /dev/null
+++ b/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000000.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1666652369577,"userId":"6114986638742036","userName":"dummy_username","operation":"CREATE OR REPLACE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpoint.writeStatsAsJson\":\"false\",\"delta.checkpoint.writeStatsAsStruct\":\"true\"}"},"notebook":{"notebookId":"1829280694121074"},"clusterId":"1007-161845-fa2h8e50","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"a8510a45-92dc-4e9f-9f7a-42bbcc9b752d"}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
+{"metaData":{"id":"8d3d2b8a-f091-4d7d-8a37-432a9beaf17b","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"integer\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true"},"createdTime":1666652369483}}
diff --git a/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000001.json b/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..9ed804569e
--- /dev/null
+++ b/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000001.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1666652373383,"userId":"6114986638742036","userName":"dummy_username","operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"notebook":{"notebookId":"1829280694121074"},"clusterId":"1007-161845-fa2h8e50","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"5489"},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"35e88c76-9cfb-4e0e-bce8-2317f3c49c75"}}
+{"metaData":{"id":"8d3d2b8a-f091-4d7d-8a37-432a9beaf17b","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"integer\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"null\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decimal\",\"type\":\"decimal(8,5)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"binary\",\"type\":\"binary\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"array\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"nested_struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_element\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"nested_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"struct_of_array_of_map\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_element\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true"},"createdTime":1666652369483}}
+{"add":{"path":"part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet","partitionValues":{},"size":5489,"modificationTime":1666652373000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"integer\":0,\"double\":1.234,\"decimal\":-5.67800,\"string\":\"string\",\"date\":\"2022-10-24\",\"timestamp\":\"2022-10-24T22:59:32.846Z\",\"struct\":{\"struct_element\":\"struct_value\"},\"nested_struct\":{\"struct_element\":{\"nested_struct_element\":\"nested_struct_value\"}}},\"maxValues\":{\"integer\":0,\"double\":1.234,\"decimal\":-5.67800,\"string\":\"string\",\"date\":\"2022-10-24\",\"timestamp\":\"2022-10-24T22:59:32.846Z\",\"struct\":{\"struct_element\":\"struct_value\"},\"nested_struct\":{\"struct_element\":{\"nested_struct_element\":\"nested_struct_value\"}}},\"nullCount\":{\"integer\":0,\"null\":1,\"boolean\":0,\"double\":0,\"decimal\":0,\"string\":0,\"binary\":0,\"date\":0,\"timestamp\":0,\"struct\":{\"struct_element\":0},\"map\":0,\"array\":0,\"nested_struct\":{\"struct_element\":{\"nested_struct_element\":0}},\"struct_of_array_of_map\":{\"struct_element\":0}}}","tags":{"INSERTION_TIME":"1666652373000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
diff --git a/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000002.json b/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000002.json
new file mode 100644
index 0000000000..f6f9a119ce
--- /dev/null
+++ b/crates/test/tests/data/delta-stats-optional/_delta_log/00000000000000000002.json
@@ -0,0 +1,2 @@
+{"commitInfo":{"timestamp":1666652374424,"userId":"6114986638742036","userName":"dummy_username","operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"notebook":{"notebookId":"1829280694121074"},"clusterId":"1007-161845-fa2h8e50","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"5489"},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"efe25f5f-e03a-458d-8fbe-34ed2111b3c1"}}
+{"add":{"path":"part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet","partitionValues":{},"size":5489,"modificationTime":1666652374000,"dataChange":true,"stats_parsed":null,"tags":{"INSERTION_TIME":"1666652374000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
diff --git a/crates/test/tests/data/delta-stats-optional/part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet b/crates/test/tests/data/delta-stats-optional/part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet
new file mode 100644
index 0000000000..1b473a23e8
Binary files /dev/null and b/crates/test/tests/data/delta-stats-optional/part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/delta-stats-optional/part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet b/crates/test/tests/data/delta-stats-optional/part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet
new file mode 100644
index 0000000000..612bc6a5b6
Binary files /dev/null and b/crates/test/tests/data/delta-stats-optional/part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000000.json b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..8ecc70e647
--- /dev/null
+++ b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000000.json
@@ -0,0 +1,3 @@
+{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
+{"metaData":{"id":"84b09beb-329c-4b5e-b493-f58c6c78b8fd","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"letter\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1674611455081}}
+{"commitInfo":{"timestamp":1674611455099,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpointInterval\":\"2\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"d87e63fb-7388-4b1c-9afc-750a561012b7"}}
diff --git a/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000001.json b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..5f4304c65c
--- /dev/null
+++ b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000001.json
@@ -0,0 +1,2 @@
+{"add":{"path":"part-00000-ad1a4bb7-07e8-4f40-b50b-49910d209e0c-c000.snappy.parquet","partitionValues":{},"size":965,"modificationTime":1674611456921,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"letter\":\"b\",\"int\":288,\"date\":\"1978-02-01\"},\"maxValues\":{\"letter\":\"c\",\"int\":988,\"date\":\"2020-05-01\"},\"nullCount\":{\"letter\":3,\"int\":0,\"date\":0}}"}}
+{"commitInfo":{"timestamp":1674611457269,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"965"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"71d9bcd1-7f2b-46f8-bd1f-e0a8e872f3c3"}}
diff --git a/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000002.checkpoint.parquet b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000002.checkpoint.parquet
new file mode 100644
index 0000000000..659bf517d6
Binary files /dev/null and b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000002.checkpoint.parquet differ
diff --git a/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000002.json b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000002.json
new file mode 100644
index 0000000000..f59c40dd67
--- /dev/null
+++ b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000002.json
@@ -0,0 +1,3 @@
+{"add":{"path":"part-00000-a190be9e-e3df-439e-b366-06a863f51e99-c000.snappy.parquet","partitionValues":{},"size":976,"modificationTime":1674611458901,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"letter\":\"a\",\"int\":120,\"date\":\"1971-07-01\"},\"maxValues\":{\"letter\":\"c\",\"int\":667,\"date\":\"2018-02-01\"},\"nullCount\":{\"letter\":2,\"int\":0,\"date\":0}}"}}
+{"remove":{"path":"part-00000-ad1a4bb7-07e8-4f40-b50b-49910d209e0c-c000.snappy.parquet","deletionTimestamp":1674611459307,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":965}}
+{"commitInfo":{"timestamp":1674611459307,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"976"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"b08f5758-a8e9-4dd1-af7e-7b6e53928d7a"}}
diff --git a/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000003.json b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000003.json
new file mode 100644
index 0000000000..21a717332f
--- /dev/null
+++ b/crates/test/tests/data/latest_not_checkpointed/_delta_log/00000000000000000003.json
@@ -0,0 +1,3 @@
+{"add":{"path":"part-00000-70b1dcdf-0236-4f63-a072-124cdbafd8a0-c000.snappy.parquet","partitionValues":{},"size":1010,"modificationTime":1674611461541,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"letter\":\"a\",\"int\":93,\"date\":\"1975-06-01\"},\"maxValues\":{\"letter\":\"c\",\"int\":753,\"date\":\"2013-03-01\"},\"nullCount\":{\"letter\":1,\"int\":0,\"date\":0}}"}}
+{"remove":{"path":"part-00000-a190be9e-e3df-439e-b366-06a863f51e99-c000.snappy.parquet","deletionTimestamp":1674611461982,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":976}}
+{"commitInfo":{"timestamp":1674611461982,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1010"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"0403bbaf-a6f2-4543-9e6c-bd068e76670f"}}
diff --git a/crates/test/tests/data/latest_not_checkpointed/_delta_log/_last_checkpoint b/crates/test/tests/data/latest_not_checkpointed/_delta_log/_last_checkpoint
new file mode 100644
index 0000000000..29ecddda01
--- /dev/null
+++ b/crates/test/tests/data/latest_not_checkpointed/_delta_log/_last_checkpoint
@@ -0,0 +1 @@
+{"version":2,"size":1}
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000000.json b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..ae3330f9ab
--- /dev/null
+++ b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000000.json
@@ -0,0 +1,4 @@
+{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
+{"metaData":{"id":"98c9faeb-7940-43eb-9898-50b2a99c0a7e","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"price\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sold\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deleted\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1709986334419,"configuration":{}}}
+{"add":{"path":"part-00001-9c90a84d-6999-463c-bd2d-f68333e6d03d-c000.snappy.parquet","partitionValues":{},"size":1432,"modificationTime":1709986334424,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"deleted\":false,\"id\":\"1\",\"sold\":0,\"price\":0},\"maxValues\":{\"price\":4,\"id\":\"5\",\"deleted\":false,\"sold\":4},\"nullCount\":{\"deleted\":0,\"id\":0,\"price\":0,\"sold\":0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}}
+{"commitInfo":{"timestamp":1709986334424,"operation":"WRITE","operationParameters":{"mode":"Append"},"clientVersion":"delta-rs.0.17.1"}}
\ No newline at end of file
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000001.checkpoint.parquet b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000001.checkpoint.parquet
new file mode 100644
index 0000000000..86af3c9c47
Binary files /dev/null and b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000001.checkpoint.parquet differ
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000001.json b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..b4d6ce3d67
--- /dev/null
+++ b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000001.json
@@ -0,0 +1,2 @@
+{"add":{"path":"part-00001-fed6d112-d244-4c54-810d-25ba3f0a4016-c000.snappy.parquet","partitionValues":{},"size":1432,"modificationTime":1709986334474,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"deleted\":false,\"price\":0,\"id\":\"1\",\"sold\":0},\"maxValues\":{\"price\":4,\"id\":\"5\",\"sold\":4,\"deleted\":false},\"nullCount\":{\"id\":0,\"sold\":0,\"deleted\":0,\"price\":0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}}
+{"commitInfo":{"timestamp":1709986334475,"operation":"WRITE","operationParameters":{"mode":"Append"},"clientVersion":"delta-rs.0.17.1"}}
\ No newline at end of file
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000002.json b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000002.json
new file mode 100644
index 0000000000..6df3799b4b
--- /dev/null
+++ b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000002.json
@@ -0,0 +1,2 @@
+{"add":{"path":"part-00001-6791b37e-f318-4d2b-87a0-89be205c338b-c000.snappy.parquet","partitionValues":{},"size":1432,"modificationTime":1709986423857,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"deleted\":false,\"price\":0,\"id\":\"1\",\"sold\":0},\"maxValues\":{\"sold\":4,\"id\":\"5\",\"price\":4,\"deleted\":false},\"nullCount\":{\"id\":0,\"price\":0,\"sold\":0,\"deleted\":0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}}
+{"commitInfo":{"timestamp":1709986423857,"operation":"WRITE","operationParameters":{"mode":"Append"},"clientVersion":"delta-rs.0.17.1"}}
\ No newline at end of file
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000003.checkpoint.parquet b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000003.checkpoint.parquet
new file mode 100644
index 0000000000..cf46a2973e
Binary files /dev/null and b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000003.checkpoint.parquet differ
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000003.json b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000003.json
new file mode 100644
index 0000000000..2ea49db4b5
--- /dev/null
+++ b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/00000000000000000003.json
@@ -0,0 +1,2 @@
+{"add":{"path":"part-00001-bea93a33-9112-41a5-aca6-c2d1f2c43873-c000.snappy.parquet","partitionValues":{},"size":1432,"modificationTime":1709986423962,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":\"1\",\"sold\":0,\"deleted\":false,\"price\":0},\"maxValues\":{\"deleted\":false,\"id\":\"5\",\"price\":4,\"sold\":4},\"nullCount\":{\"id\":0,\"price\":0,\"sold\":0,\"deleted\":0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}}
+{"commitInfo":{"timestamp":1709986423962,"operation":"WRITE","operationParameters":{"mode":"Append"},"clientVersion":"delta-rs.0.17.1"}}
\ No newline at end of file
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/_last_checkpoint b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/_last_checkpoint
new file mode 100644
index 0000000000..6869291ae5
--- /dev/null
+++ b/crates/test/tests/data/table_failed_last_checkpoint_update/_delta_log/_last_checkpoint
@@ -0,0 +1 @@
+{"size":6,"size_in_bytes":23521,"version":1}
\ No newline at end of file
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-6791b37e-f318-4d2b-87a0-89be205c338b-c000.snappy.parquet b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-6791b37e-f318-4d2b-87a0-89be205c338b-c000.snappy.parquet
new file mode 100644
index 0000000000..81742b7049
Binary files /dev/null and b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-6791b37e-f318-4d2b-87a0-89be205c338b-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-9c90a84d-6999-463c-bd2d-f68333e6d03d-c000.snappy.parquet b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-9c90a84d-6999-463c-bd2d-f68333e6d03d-c000.snappy.parquet
new file mode 100644
index 0000000000..0d502583fa
Binary files /dev/null and b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-9c90a84d-6999-463c-bd2d-f68333e6d03d-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-bea93a33-9112-41a5-aca6-c2d1f2c43873-c000.snappy.parquet b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-bea93a33-9112-41a5-aca6-c2d1f2c43873-c000.snappy.parquet
new file mode 100644
index 0000000000..81742b7049
Binary files /dev/null and b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-bea93a33-9112-41a5-aca6-c2d1f2c43873-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-fed6d112-d244-4c54-810d-25ba3f0a4016-c000.snappy.parquet b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-fed6d112-d244-4c54-810d-25ba3f0a4016-c000.snappy.parquet
new file mode 100644
index 0000000000..81742b7049
Binary files /dev/null and b/crates/test/tests/data/table_failed_last_checkpoint_update/part-00001-fed6d112-d244-4c54-810d-25ba3f0a4016-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/table_with_null_stats_in_notnull_struct/_delta_log/00000000000000000000.json b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..afa5a22725
--- /dev/null
+++ b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/_delta_log/00000000000000000000.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1711560767220,"operation":"CREATE OR REPLACE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.1.0","txnId":"99840af5-4b41-4a0e-b1b8-8d22ae77ccf6"}}
+{"metaData":{"id":"e849a43f-42e1-404e-b13d-224d85eb7573","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"s\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"l\",\"type\":\"long\",\"nullable\":false,\"metadata\":{}},{\"name\":\"b\",\"type\":\"boolean\",\"nullable\":false,\"metadata\":{}}]},\"nullable\":false,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1711560767211}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
diff --git a/crates/test/tests/data/table_with_null_stats_in_notnull_struct/_delta_log/00000000000000000001.json b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..598689466b
--- /dev/null
+++ b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/_delta_log/00000000000000000001.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1711560789508,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"2","numOutputBytes":"1978"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.1.0","txnId":"80eed5be-d309-42dd-91e9-e1215a03e6dd"}}
+{"add":{"path":"part-00004-e7a7f9d9-18cb-45ee-9d64-6c711cd8eb77-c000.snappy.parquet","partitionValues":{},"size":763,"modificationTime":1711560789496,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"s\":{\"l\":10}},\"maxValues\":{\"s\":{\"l\":10}},\"nullCount\":{\"s\":{\"l\":0,\"b\":0}}}"}}
+{"add":{"path":"part-00009-ffa42641-8c4e-4f27-875e-bbb68e3bb100-c000.snappy.parquet","partitionValues":{},"size":762,"modificationTime":1711560789496,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"s\":{\"l\":20}},\"maxValues\":{\"s\":{\"l\":20}},\"nullCount\":{\"s\":{\"l\":0,\"b\":0}}}"}}
diff --git a/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00000-38343f4d-d7db-4e6c-b174-d0dd01510f34-c000.snappy.parquet b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00000-38343f4d-d7db-4e6c-b174-d0dd01510f34-c000.snappy.parquet
new file mode 100644
index 0000000000..77caede622
Binary files /dev/null and b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00000-38343f4d-d7db-4e6c-b174-d0dd01510f34-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00004-e7a7f9d9-18cb-45ee-9d64-6c711cd8eb77-c000.snappy.parquet b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00004-e7a7f9d9-18cb-45ee-9d64-6c711cd8eb77-c000.snappy.parquet
new file mode 100644
index 0000000000..b8ac3be4d1
Binary files /dev/null and b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00004-e7a7f9d9-18cb-45ee-9d64-6c711cd8eb77-c000.snappy.parquet differ
diff --git a/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00009-ffa42641-8c4e-4f27-875e-bbb68e3bb100-c000.snappy.parquet b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00009-ffa42641-8c4e-4f27-875e-bbb68e3bb100-c000.snappy.parquet
new file mode 100644
index 0000000000..fe8217f84d
Binary files /dev/null and b/crates/test/tests/data/table_with_null_stats_in_notnull_struct/part-00009-ffa42641-8c4e-4f27-875e-bbb68e3bb100-c000.snappy.parquet differ
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index 4928b6c061..5b4d1cf547 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -16,8 +16,8 @@
 set -e
 
 LANGUAGE="rust"
-SINCE_VERSION=${SINCE_VERSION:-"0.6.0"}
-FUTURE_RELEASE=${FUTURE_RELEASE:-"0.7.0"}
+SINCE_VERSION=${SINCE_VERSION:-"0.18.1"}
+FUTURE_RELEASE=${FUTURE_RELEASE:-"0.18.3"}
 
 # only consider tags of the correct language
 if [ "$LANGUAGE" == "rust" ]; then
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000..9a1bfea066
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+.DEFAULT_GOAL := help
+
+.PHONY: install-ruff
+install-ruff: ## install ruff
+	$(info --- Installing ruff ---)
+	pip install ruff==0.5.2
+
+.PHONY: format
+format: install-ruff ## format code with ruff
+	$(info --- format Python code in docs ---)
+	ruff format .
+
+.PHONY: check
+check: install-ruff ## check if code is formatted with ruff
+	$(info --- format Python code in docs ---)
+	ruff format --check .
+
+.PHONY: help
+help:
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
diff --git a/docs/delta-lake-best-practices.md b/docs/delta-lake-best-practices.md
new file mode 100644
index 0000000000..c83c7db7a3
--- /dev/null
+++ b/docs/delta-lake-best-practices.md
@@ -0,0 +1,121 @@
+# Delta Lake Best Practices
+
+This page outlines Delta Lake best practices.
+
+You should consider several factors to optimize the performance of your Delta tables and minimize costs.
+
+The Delta Lake best practices depend on your data ingestion into the Delta table and query patterns.  You must understand your data and how users run queries to best leverage Delta Lake.
+
+## Compacting small files
+
+Delta tables work best when the files are “right-sized”.  Files that are too small create I/O overhead.  Files that are too large limit the impact of file skipping (a critical query optimization).
+
+Delta tables can accumulate a lot of small files, especially if you’re frequently writing small amounts of data.  If your table has many small files, you should run a small compaction operation to consolidate all the tiny files into “right-sized” files.
+
+It’s generally best for files in a Delta table to be between 100MB and 1GB, but that can vary based on the overall size of the table and the query patterns.
+
+Delta Lake makes it easy to [compact the small files](https://delta-io.github.io/delta-rs/usage/optimize/small-file-compaction-with-optimize/).
+
+## Optimizing table layout
+
+You can colocate similar data in the same files to make file skipping more effective.  Delta Lake supports [Z Ordering](https://delta-io.github.io/delta-rs/usage/optimize/delta-lake-z-order/), which can colocate similar data in the same files.
+
+Z Ordering can yield impressive performance gains for low-cardinality columns but also works well for high-cardinality columns.  This is an advantage compared to Hive-style partitioning, which is only suitable for low-cardinality columns.
+
+You must analyze the most common query patterns and Z Order your dataset based on the columns allowing the most file skipping.  The ability to colocate data in the same files decreases when you add more Z Order columns.
+
+Let’s look at Hive-style partitioning, another way to colocate data in the same files.  You can also use Hive-style partitioning in conjunction with Z Ordering.
+
+## Partitioning datasets
+
+You can partition your Delta tables, which separates the data by one or more partition keys into separate folders.  Partitioning can be an excellent performance optimization (when you filter on the partition key) and is a good way to sidestep concurrency conflict issues.
+
+Hive-style partitioning also has some significant downsides.
+
+* It’s only suitable for low-cardinality columns.
+* It can create many small files, especially if you use the wrong partition key or frequently update the Delta table.
+* It can cause some queries that don’t rely on the partition key to run slower (because of the excessive number of small files).  A large number of small files is problematic for I/O throughput.
+
+Hive-style partitioning can be a great data management tactic and a fantastic option for many Delta tables.  Beware of the downsides before partitioning your tables.
+
+You can use Hive-style partitioning in conjunction with Z Ordering.  You can partition a table by one column and Z Order by another.  They’re different tactics that aim to help you skip more files and run queries faster.
+
+Let’s look at some of the built-in Delta features that help maintain the integrity of your tables.
+
+## Use appropriate quality controls
+
+Delta Lake supports schema enforcement and column constraints to protect the integrity of your data.
+
+Delta Lake enabled schema enforcement by default, so you can only append data to an existing table with the same exact schema.  You can bypass schema enforcement by enabling schema evolution, which allows you to append mismatched schemas to a table.
+
+You should only enable schema evolution when you want to allow the schema of your table to change.  You should not enable schema evolution if you don’t want this flexibility.  Schema enforcement is a good default setting.
+
+Column-level constraints prevent you from appending data that fail SQL predicates.  For example, you may add a constraint that requires all the values in the `age` column of a table to be positive.
+
+You should add column constraints to your table whenever you want a column only to include values that satisfy a predicate.
+
+No data is appended when you apply a constraint and a row check fails.  For example, if you try to append 100 rows of data to a table and one row has a failing check, then no data is added.
+
+When you have column constraints, it’s often a good idea to append the failing data to a “quarantine table” and the passing data to the main Delta table.  Or you can filter out the failing rows and just append the passing rows.  Keeping a history of the failing rows in a quarantine table is helpful for debugging.
+
+See here to learn more about [Delta Lake constraints](https://delta-io.github.io/delta-rs/usage/constraints/).
+
+## Best practices for DML operations
+
+DML operations like deleting, updating, and merging write existing data in new files and mark existing files for deletion in the transaction log.  Rewriting data files is expensive, so you want to minimize the number of rewritten files when you run DML operations.
+
+Delta Lake supports a table feature called deletion vectors that implements DML transactions more efficiently under the hood.  Enabling deletion vectors is usually the best way to make DML operations run faster.  Note: delta-rs doesn’t support deletion vectors yet.
+
+You should periodically purge deletion vectors because they can accumulate and slow subsequent read operations.  Once you enable the feature, you must purge the deletion vectors in your table with an appropriate cadence.
+
+## Use vacuum to save storage costs
+
+Delta Lake supports transactions, which necessitates keeping old versions of data in storage, even the files marked for removal in the transactions log.
+
+Keeping old versions of Delta tables in storage is often desirable because it allows for versioned data, time travel, and rolling back tables to a previous state.
+
+If you don’t want to leverage older versions of a table, then you should remove the legacy files from storage with the vacuum command.  Vacuum will remove all files older than the table retention period and marked for removal in the transaction log.
+
+You only need to vacuum when you perform operations that mark files for removal in the transaction log.  An append-only table doesn’t create legacy files that need to be vacuumed.
+
+Create a good vacuum strategy for your tables to minimize your storage costs.
+
+## Delta Lake best practices to minimize costs
+
+Delta Lake helps you minimize costs in many ways:
+
+* It's a free, open source format (based on Parquet). It's not a proprietary format that you need to pay for.
+* Delta tables store column-level min/max values in the transaction log, allowing file skipping.
+* Delta tables can be optimized (small file compaction, Z Ordering, etc.), so your queries run faster. When your queries run faster, then you pay less on compute.
+* Deletion vectors let you perform DML operations (delete, update, merge) much faster. If your delete operation runs 100x faster, then you pay 100x less compute.
+* It's easy to remove legacy files from storage with VACUUM, which minimizes storage costs.
+
+You should understand your organization’s query patterns and use these features to minimize the overall cost.  You need to assess tradeoffs.  For example, Z Ordering is a computation that costs money, but it can save you lots of money in the long run if all your subsequent queries run a lot faster and use less compute.
+
+## Collect metadata stats on columns used for file skipping
+
+Delta tables don’t always store each column's min/max values.  Some Delta Lake implementations only store min/max values for the first 32 columns in the table, for example.
+
+Delta Lake can only apply file-skipping when it has min/max values for the relevant columns stored in the transaction log.  Suppose you’re running a filtering operation on `col_a,` for example.  Delta Lake can only apply file skipping when the transaction log stores `col_a` min/max metadata.
+
+Ensure the transaction log stores metadata stats for all the columns that benefit from file skipping.
+
+## Don’t collect column metadata when it’s unnecessary
+
+It takes some time to compute column statistics when writing files, and it isn’t worth the effort if you cannot use the column for file skipping.
+
+Suppose you have a table column containing a long string of arbitrary text.  It’s unlikely that this column would ever provide any data-skipping benefits.  So, you can just avoid the overhead of collecting the statistics for this particular column.
+
+## Additional reading
+
+Delta Lake relies on transactions, and you should check out [this page to learn more](https://delta-io.github.io/delta-rs/how-delta-lake-works/delta-lake-acid-transactions/).
+
+Many Delta Lake performance benefits rely on [file skipping](https://delta-io.github.io/delta-rs/how-delta-lake-works/delta-lake-file-skipping/), which you should understand well to get the most out of Delta.
+
+## Conclusion
+
+Delta Lake is a powerful technology that makes your data pipelines more reliable, saves money, and makes everyday data processing tasks easy.
+
+You need to learn how Delta Lake works at a high level to leverage Delta's power fully.  You will not be able to leverage Delta Lake’s full performance potential if your table has improperly sized files or if you’re not colocating data in the same files to maximize data skipping, for example.
+
+Luckily, there are only a few details that are important to learn.  You don’t need to know the implementation details - just the essential high-level concepts.
diff --git a/docs/how-delta-lake-works/architecture-of-delta-table.md b/docs/how-delta-lake-works/architecture-of-delta-table.md
index 5406295ad3..5a6df49969 100644
--- a/docs/how-delta-lake-works/architecture-of-delta-table.md
+++ b/docs/how-delta-lake-works/architecture-of-delta-table.md
@@ -27,7 +27,7 @@ tmp/some-table
     └── 00000000000000000000.json
 ```
 
-The Parquet file stores the data that was written.  The `_delta_log` directory stores metadata about the transactions.  Let's inspect the `_delta_log/00000000000000000000.json` file.
+The Parquet file stores the data that was written. The `_delta_log` directory stores metadata about the transactions. Let's inspect the `_delta_log/00000000000000000000.json` file.
 
 ```json
 {
@@ -76,11 +76,11 @@ The Parquet file stores the data that was written.  The `_delta_log` directory s
 }
 ```
 
-The tranasction log file contains the following information:
+The transaction log file contains the following information:
 
-* the files added to the Delta table
-* schema of the files
-* column level metadata including the min/max value for each file
+- the files added to the Delta table
+- schema of the files
+- column level metadata including the min/max value for each file
 
 Create another pandas DataFrame and append it to the Delta table to see how this transaction is recorded.
 
@@ -194,11 +194,11 @@ Here are the contents of the `_delta_log/0002.json` file:
 }
 ```
 
-This transaction adds a data file and marks the two exising data files for removal.  Marking a file for removal in the transaction log is known as "tombstoning the file" or a "logical delete".  This is different from a "physical delete" which actually removes the data file from storage.
+This transaction adds a data file and marks the two exising data files for removal. Marking a file for removal in the transaction log is known as "tombstoning the file" or a "logical delete". This is different from a "physical delete" which actually removes the data file from storage.
 
 ## How Delta table operations differ from data lakes
 
-Data lakes consist of data files persisted in storage.  They don't have a transaction log that retain metadata about the transactions.
+Data lakes consist of data files persisted in storage. They don't have a transaction log that retain metadata about the transactions.
 
 Data lakes perform transactions differently than Delta tables.
 
@@ -206,6 +206,6 @@ When you perform an overwrite tranasction with a Delta table, you logically dele
 
 Data lakes don't support logical deletes, so you have to physically delete the data from storage.
 
-Logical data operations are safer because they can be rolled back if they don't complete successfully.  Physically removing data from storage can be dangerous, especially if it's before a transaction is complete.
+Logical data operations are safer because they can be rolled back if they don't complete successfully. Physically removing data from storage can be dangerous, especially if it's before a transaction is complete.
 
-We're now ready to look into Delta Lake ACID transactions in more detail.
+We're now ready to look into [Delta Lake ACID transactions](../how-delta-lake-works/delta-lake-acid-transactions.md) in more detail.
diff --git a/docs/how-delta-lake-works/delta-lake-acid-transactions.md b/docs/how-delta-lake-works/delta-lake-acid-transactions.md
index 46dbd402e1..75fa00520c 100644
--- a/docs/how-delta-lake-works/delta-lake-acid-transactions.md
+++ b/docs/how-delta-lake-works/delta-lake-acid-transactions.md
@@ -1,20 +1,20 @@
 # Delta Lake Transactions
 
-This page teaches you about Delta Lake transactions and why transactions are important in production data settings.  Data lakes don’t support transactions and this is a huge downside because they offer a poor user experience, lack functionality, and can easily be corrupted.
+This page teaches you about Delta Lake transactions and why transactions are important in production data settings. Data lakes don’t support transactions and this is a huge downside because they offer a poor user experience, lack functionality, and can easily be corrupted.
 
-Transactions on Delta Lake tables are operations that change the state of table and record descriptive entries (metadata) of those changes to the Delta Lake transaction log.  Here are some examples of transactions:
+Transactions on Delta Lake tables are operations that change the state of table and record descriptive entries (metadata) of those changes to the Delta Lake transaction log. Here are some examples of transactions:
 
-* Deleting rows
-* Appending to the table
-* Compacting small files
-* Upserting
-* Overwriting rows
+- Deleting rows
+- Appending to the table
+- Compacting small files
+- Upserting
+- Overwriting rows
 
 All Delta Lake write operations are transactions in Delta tables. Reads actually aren’t technically transactions because they don’t result in new entries being appended to the transaction log.
 
 ## What are transactions?
 
-Transactions are any Delta operation that change the underlying files of a Delta table and result in new entries metadata entries in the transaction log.  Some Delta operations rearrange data in the existing table (like Z Ordering the table or compacting the small files) and these are also transactions.  Let’s look at a simple example.
+Transactions are any Delta operation that change the underlying files of a Delta table and result in new entries metadata entries in the transaction log. Some Delta operations rearrange data in the existing table (like Z Ordering the table or compacting the small files) and these are also transactions. Let’s look at a simple example.
 
 Suppose you have a Delta table with the following data:
 
@@ -62,7 +62,7 @@ tmp/my-delta-table
 └── part-00001-90312b96-b487-4a8f-9edc-1b9b3963f136-c000.snappy.parquet
 ```
 
-Notice the `00000000000000000001.json` file that was added to the transaction log to record this transaction.  Let’s inspect the content of the file.
+Notice the `00000000000000000001.json` file that was added to the transaction log to record this transaction. Let’s inspect the content of the file.
 
 ```
 {
@@ -114,10 +114,10 @@ Notice the `00000000000000000001.json` file that was added to the transaction lo
 
 We can see that this transaction includes two components:
 
-* Remove file `0-fea2de92-861a-423e-9708-a9e91dafb27b-0.parquet`
-* Add file `part-00001-90312b96-b487-4a8f-9edc-1b9b3963f136-c000.snappy.parquet`
+- Remove file `0-fea2de92-861a-423e-9708-a9e91dafb27b-0.parquet`
+- Add file `part-00001-90312b96-b487-4a8f-9edc-1b9b3963f136-c000.snappy.parquet`
 
-Transactions are recorded in the transaction log.  The transaction log is also referred to as the table metadata and is the `_delta_log` directory in storage.
+Transactions are recorded in the transaction log. The transaction log is also referred to as the table metadata and is the `_delta_log` directory in storage.
 
 Let’s see how Delta Lake implements transactions.
 
@@ -135,24 +135,24 @@ Let’s recall our delete operation from the prior section and see how it fits i
 1. We read the existing metadata to find the file paths for the existing Parquet files
 2. We read the existing Parquet files and identify the files that contains data that should be removed
 3. We write new Parquet files with the deleted data filtered out
-4. Once the new Parquet files are written, we check for conflicts and then make an entry in the transaction log.  The next section will discuss transaction conflicts in more detail.
+4. Once the new Parquet files are written, we check for conflicts and then make an entry in the transaction log. The next section will discuss transaction conflicts in more detail.
 
 Blind append operations can skip a few steps and are executed as follows:
 
 1. Write the Parquet files for the current transaction
 2. Record the new transaction in the metadata
 
-Delta implements a non locking MVCC (multi version concurrency control) so writers optimistically write new data and simply abandon the transaction if it conflicts at the end.  The alternative would be getting a lock at the start thereby guaranteeing the transaction immediately.
+Delta implements a non locking MVCC (multi version concurrency control) so writers optimistically write new data and simply abandon the transaction if it conflicts at the end. The alternative would be getting a lock at the start thereby guaranteeing the transaction immediately.
 
 Let’s look at the case when a Delta Lake transaction conflicts.
 
 ## How Delta Lake transactions can conflict
 
-Suppose you have a transaction that deletes a row of data that’s stored in FileA (Transaction 1).  While this job is running, there is another transaction that deletes some other rows in FileA (Transaction 2).  Transaction 1 finishes running first and is recorded in the metadata.
+Suppose you have a transaction that deletes a row of data that’s stored in FileA (Transaction 1). While this job is running, there is another transaction that deletes some other rows in FileA (Transaction 2). Transaction 1 finishes running first and is recorded in the metadata.
 
 Before Transaction 2 is recorded as a transaction, it will check the metadata, find that Transaction 2 conflicts with a transaction that was already recorded (from Transaction 1), and error without recording a new transaction.
 
-Transactions 2 will write Parquet data files, but will not be recorded as a transaction, so the data files will be ignored.  The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
+Transactions 2 will write Parquet data files, but will not be recorded as a transaction, so the data files will be ignored. The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
 
 Transaction 2 must fail otherwise it would cause the data to be incorrect.
 
@@ -160,11 +160,11 @@ Delta Lake transactions prevent users from making changes that would corrupt the
 
 ## Transactions rely on atomic primitives storage guarantees
 
-Suppose you have two transactions that are finishishing at the same exact time.  Both of these transactions look at the existing Delta Lake transaction log, see that the latest transaction was `003.json` and determine that the next entry should be `004.json`.
+Suppose you have two transactions that are finishishing at the same exact time. Both of these transactions look at the existing Delta Lake transaction log, see that the latest transaction was `003.json` and determine that the next entry should be `004.json`.
 
 If both transactions are recorded in the `004.json` file, then one of them will be clobbered, and the transaction log entry for the clobbered metadata entry will be lost.
 
-Delta tables rely on storage systems that provide atomic primitives for safe concurrency.  The storage system must allow Delta Lake to write the file, _only if it does not exist already_, and error out otherwise.  The storage system must NOT permit concurrent writers to overwrite existing metadata entries.
+Delta tables rely on storage systems that provide atomic primitives for safe concurrency. The storage system must allow Delta Lake to write the file, _only if it does not exist already_, and error out otherwise. The storage system must NOT permit concurrent writers to overwrite existing metadata entries.
 
 Some clouds have filesystems that don’t explicitly support these atomic primitives, and therefore must be coupled with other services to provide the necessary guarantees.
 
@@ -172,7 +172,7 @@ Some clouds have filesystems that don’t explicitly support these atomic primit
 
 Delta Lake transactions are only valid for a single table.
 
-Some databases offer transaction support for operations that impact multiple tables.  Delta Lake does not support multi-table transactions.
+Some databases offer transaction support for operations that impact multiple tables. Delta Lake does not support multi-table transactions.
 
 ## Data lakes don’t support transactions
 
@@ -182,15 +182,15 @@ Data lakes don’t have a metadata layer, conflict resolution, or any way to sto
 
 Data lakes are prone to multiple types of errors because they don’t support transactions:
 
-* Easy to corrupt
-* Downtime/unstable state while jobs are running
-* Operations can conflict
+- Easy to corrupt
+- Downtime/unstable state while jobs are running
+- Operations can conflict
 
 Data lakes have many downsides and it’s almost always better to use a lakehouse storage system like Delta Lake compared to a data lake.
 
 ## ACID Transactions
 
-We’ve already explored how Delta Lake supports transactions.  This section explains how Delta Lake transactions have the Atomic, Consistent, Isolated and Durable (ACID transaction) properties.  Reading this section is optional.
+We’ve already explored how Delta Lake supports transactions. This section explains how Delta Lake transactions have the Atomic, Consistent, Isolated and Durable (ACID transaction) properties. Reading this section is optional.
 
 ACID transactions are commonplace in databases but notably absent for data lakes.
 
@@ -204,18 +204,18 @@ An atomic transaction either fully completes or fully fails, with nothing in bet
 
 Delta Lake transactions are atomic, unlike data lake transactions that are not atomic.
 
-Suppose you have a job that’s writing 100 files to a table.  Further suppose that the job errors out and the cluster dies after writing 40 files:
+Suppose you have a job that’s writing 100 files to a table. Further suppose that the job errors out and the cluster dies after writing 40 files:
 
-* For a Delta table, no additional data will be added to the table.  Parquet files were written to the table, but the job errored, so no transaction log entry was added and no data was added to the table.
-* For a data lake, the 40 files are added and the transaction “partially succeeds”.
+- For a Delta table, no additional data will be added to the table. Parquet files were written to the table, but the job errored, so no transaction log entry was added and no data was added to the table.
+- For a data lake, the 40 files are added and the transaction “partially succeeds”.
 
 For data tables, it’s almost always preferable to have a transaction that “fully fails” instead of one that “partially succeeds” because partial writes are hard to unwind and debug.
 
 Delta Lake implements atomic transactions by writing data files first before making a new entry in the Delta transaction log.
 
-These guarantees are provided at the protocol level through the "transaction" abstraction.  We’ve already discussed what constitutes a transaction for Delta Lake.
+These guarantees are provided at the protocol level through the "transaction" abstraction. We’ve already discussed what constitutes a transaction for Delta Lake.
 
-If there is an error with the transaction and some files don’t get written, then no metadata entry is made and the partial data write is ignored.  The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
+If there is an error with the transaction and some files don’t get written, then no metadata entry is made and the partial data write is ignored. The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
 
 Now let’s look at how Delta Lake also provides consistent transactions.
 
@@ -225,22 +225,22 @@ Consistency means that transactions won’t violate integrity constraints on the
 
 Delta Lake has two types of consistency checks:
 
-* Schema enforcement checks
-* Column constraints
+- Schema enforcement checks
+- Column constraints
 
-Schema enforcement checks verify that new data appended to a Delta table matches the schema of the existing table.  You cannot append data with a different schema, unless you enable schema evolution.
+Schema enforcement checks verify that new data appended to a Delta table matches the schema of the existing table. You cannot append data with a different schema, unless you enable schema evolution.
 
-Delta Lake column constraints allow users to specify the requirements of data that’s added to a Delta table.  For example, if you have an age column with a constraint that requires the value to be positive, then Delta Lake will reject appends of any data that doesn’t meet the constraint.
+Delta Lake column constraints allow users to specify the requirements of data that’s added to a Delta table. For example, if you have an age column with a constraint that requires the value to be positive, then Delta Lake will reject appends of any data that doesn’t meet the constraint.
 
-Data lakes don’t support schema enforcement or column constraints.  That’s another reason why data lakes are not ACID-compliant.
+Data lakes don’t support schema enforcement or column constraints. That’s another reason why data lakes are not ACID-compliant.
 
 **Isolated transactions**
 
 Isolation means that transactions are applied to a Delta table sequentially.
 
-Delta Lake transactions are persisted in monotonically increasing transaction files, as we saw in the previous example.  First `00000000000000000000.json`, then `00000000000000000001.json`, then `00000000000000000002.json`, and so on.
+Delta Lake transactions are persisted in monotonically increasing transaction files, as we saw in the previous example. First `00000000000000000000.json`, then `00000000000000000001.json`, then `00000000000000000002.json`, and so on.
 
-Delta Lake uses concurrency control to ensure that transactions are executed sequentially, even when user operations are performed concurrently.  The next page of this guide explains concurrency in Delta Lake in detail.
+Delta Lake uses concurrency control to ensure that transactions are executed sequentially, even when user operations are performed concurrently. The next page of this guide explains concurrency in Delta Lake in detail.
 
 **Durable transactions**
 
@@ -248,11 +248,11 @@ Delta tables are generally persisted in cloud object stores which provide durabi
 
 Durability means that all transactions that are successfully completed will always remain persisted, even if there are service outages or program crashes.
 
-Suppose you have a Delta table that’s persisted in Azure blob storage.  The Delta table transactions that are committed will always remain available, even in these circumstances:
+Suppose you have a Delta table that’s persisted in Azure blob storage. The Delta table transactions that are committed will always remain available, even in these circumstances:
 
-* When there are Azure service outages
-* If a computation cluster that’s writing the Delta table crashes for some reason
-* Two operations are running concurrently and one of them fails
+- When there are Azure service outages
+- If a computation cluster that’s writing the Delta table crashes for some reason
+- Two operations are running concurrently and one of them fails
 
 Successful transactions are always registered in the Delta table and persisted no matter what.
 
@@ -260,10 +260,12 @@ Successful transactions are always registered in the Delta table and persisted n
 
 Delta Lake supports transactions which provide necessary reliability guarantees for production data systems.
 
-Vanilla data lakes don’t provide transactions and this can cause nasty bugs and a bad user experience.  Let’s look at a couple of scenarios when the lack of transactions cause a poor user experience:
+Vanilla data lakes don’t provide transactions and this can cause nasty bugs and a bad user experience. Let’s look at a couple of scenarios when the lack of transactions cause a poor user experience:
 
-* While running a compaction operation on a data lake, newly compacted “right sized” files are added before the small files are deleted.  If you read the data lake while this operation is running, you will see duplicate data.
-* While writing to a data lake, a job might fail, which leaves behind partially written files.  These files are corrupt, which means that the data lake cannot be read until the corrupt files are manually removed.
-* Users want to run a simple DML operation like deleting a few rows of data which require a few files to be rewritten.  This operation renders the data lake unusable until it’s done running.
+- While running a compaction operation on a data lake, newly compacted “right sized” files are added before the small files are deleted. If you read the data lake while this operation is running, you will see duplicate data.
+- While writing to a data lake, a job might fail, which leaves behind partially written files. These files are corrupt, which means that the data lake cannot be read until the corrupt files are manually removed.
+- Users want to run a simple DML operation like deleting a few rows of data which require a few files to be rewritten. This operation renders the data lake unusable until it’s done running.
 
-Transactions are a key advantage of Delta Lake vs. data lakes.  There are many other advantages, but proper transactions are necessary in production data environments.
+Transactions are a key advantage of Delta Lake vs. data lakes. There are many other advantages, but proper transactions are necessary in production data environments.
+
+Let's take a look at [File Skipping](../how-delta-lake-works/delta-lake-file-skipping.md) in the next section.
diff --git a/docs/index.md b/docs/index.md
index 99b7dc6cb3..a6ac3271da 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,7 +8,7 @@ This module provides the capability to read, write, and manage [Delta Lake](http
 
 * "Rust deltalake" refers to the Rust API of delta-rs (no Spark dependency)
 * "Python deltalake" refers to the Python API of delta-rs (no Spark dependency)
-* "Delta Spark" refers to the Scala impementation of the Delta Lake transaction log protocol.  This depends on Spark and Java.
+* "Delta Spark" refers to the Scala implementation of the Delta Lake transaction log protocol.  This depends on Spark and Java.
 
 ## Why implement the Delta Lake transaction log protocol in Rust and Scala?
 
diff --git a/docs/integrations/delta-lake-daft.md b/docs/integrations/delta-lake-daft.md
new file mode 100644
index 0000000000..cedbb61d68
--- /dev/null
+++ b/docs/integrations/delta-lake-daft.md
@@ -0,0 +1,209 @@
+# Using Delta Lake with Daft
+
+[Daft](https://www.getdaft.io) is a framework for ETL, analytics, and ML/AI at scale with a familiar Python dataframe API, implemented in Rust.
+
+Daft and Delta Lake work really well together. Daft provides unified compute for Delta Lake’s unified storage. Together, Delta Lake and Daft give you high-performance query optimization and distributed compute on massive datasets.
+
+Delta Lake is a great storage format for Daft workloads. Delta gives Daft users:
+
+- **Query optimization** via file-skipping and column pruning
+- **Versioning** for easy time travel functionality
+- **Faster reads** via Z-ordering
+- **ACID transactions** and **schema enforcement** for more reliable reads and writes
+
+For Delta Lake users, Daft is a great data processing tool because it gives you the following features:
+
+- **Multimodal Dataframes**: read, write and transform multimodal data incl. images, JSON, PDF, audio, etc.
+- **Parallel + Distributed Reads**: Daft parallelizes Delta Lake table reads over all cores of your machine, if using the default multithreading runner, or all cores + machines of your Ray cluster, if using the distributed Ray runner.
+- **Skipping Filtered Data**: Daft implements automatic partition pruning and stats-based file pruning for filter predicates, skipping data that doesn’t need to be read.
+
+Let's look at how to use Delta Lake with Daft.
+
+## Installing Daft for Delta Lake
+
+The easiest way to use the Delta Lake table format with Daft DataFrames is to install Daft with the `[deltalake]` extras using `pip`:
+
+```python
+!pip install -U "getdaft[deltalake]"
+```
+
+This adds the `deltalake` Python package to your install. This package is used to fetch metadata about the Delta Lake table, such as paths to the underlying Parquet files and table statistics. You can of course also install the `deltalake` manually.
+
+## Read Delta Lake into a Daft DataFrame
+
+You can easily read Delta Lake tables into a Daft DataFrame using the `read_delta_lake` method. Let's use it to read some data stored in a Delta Lake on disk. You can access the data stored as a Delta Lake [on Github](https://github.com/delta-io/delta-examples/tree/master/data/people_countries_delta_dask)
+
+```python
+import daft
+
+# read delta table into Daft DataFrame
+df = daft.read_delta_lake("path/to/delta_table")
+```
+
+You can also read in Delta Lake data from remote sources like S3:
+
+```python
+# table_uri = (
+#     "s3://daft-public-datasets/red-pajamas/"
+#     "stackexchange-sample-north-germanic-deltalake"
+# )
+# df = daft.read_delta_lake(table_uri)
+```
+
+```python
+df
+```
+
+<div>
+<table class="dataframe">
+<thead><tr><th style="text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left">first_name<br />Utf8</th><th style="text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left">last_name<br />Utf8</th><th style="text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left">country<br />Utf8</th><th style="text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left">continent<br />Utf8</th></tr></thead>
+</table>
+<small>(No data to display: Dataframe not materialized)</small>
+</div>
+
+Daft DataFrames are lazy by default. This means that the contents will not be computed ("materialized") unless you explicitly tell Daft to do so. This is best practice for working with larger-than-memory datasets and parallel/distributed architectures.
+
+The Delta table we have just loaded only has 5 rows. You can materialize it in memory using the `.collect` method:
+
+```python
+> df.collect()
+
+|    | first_name   | last_name   | country   | continent   |
+|---:|:-------------|:------------|:----------|:------------|
+|  0 | Ernesto      | Guevara     | Argentina | NaN         |
+|  1 | Bruce        | Lee         | China     | Asia        |
+|  2 | Jack         | Ma          | China     | Asia        |
+|  3 | Wolfgang     | Manche      | Germany   | NaN         |
+|  4 | Soraya       | Jala        | Germany   | NaN         |
+```
+
+## Write to Delta Lake
+
+You can use `write_deltalake` to write a Daft DataFrame to a Delta table:
+
+```
+df.write_deltalake("tmp/daft-table", mode="overwrite")
+```
+
+Daft supports multiple write modes. See the [Daft documentation](https://www.getdaft.io/projects/docs/en/latest/api_docs/doc_gen/dataframe_methods/daft.DataFrame.write_deltalake.html#daft.DataFrame.write_deltalake) for more information.
+
+## What can I do with a Daft DataFrame?
+
+Daft gives you [full-featured DataFrame functionality](https://www.getdaft.io/projects/docs/en/latest/user_guide/basic_concepts.html), similar to what you might be used to from pandas, Dask or PySpark.
+
+On top of this, Daft also gives you:
+
+- **Multimodal data type support** to work with Images, URLs, Tensors and more
+- **Expressions API** for easy column transformations
+- **UDFs** for multi-column transformation, incl. ML applications
+
+Check out the [Daft User Guide](https://www.getdaft.io/projects/docs/en/latest/user_guide/index.html) for a complete list of DataFrame operations.
+
+## Data Skipping Optimizations
+
+Delta Lake and Daft work together to give you highly-optimized query performance.
+
+Delta Lake stores your data in Parquet files. Parquet is a columnar row format that natively supports column pruning. If your query only needs to read data from a specific column or set of columns, you don't need to read in the entire file. This can save you lots of time and compute.
+
+Delta Lake goes beyond the basic Parquet features by also giving you:
+
+- partitioned reads
+- file skipping via z-ordering.
+
+This is great for Daft users who want to run efficient queries on large-scale data.
+
+Let's look at how this works.
+
+### Partitioned Reads
+
+You may have noticed the Delta Lake warning at the top when we first called `collect()` on our DataFrame:
+
+> `WARNING: has partitioning keys = [PartitionField(country#Utf8)], but no partition filter was specified. This will result in a full table scan.`
+
+Delta Lake is informing us that the data is partitioned on the `country` column.
+
+Daft does some nice magic here to help you out. The Daft query optimizer has access to all of the Delta Lake metadata. This means it can optimize your query by skipping the partitions that are not relevant for this query. Instead of having to read all 3 partitions, we can read only 1 and get the same result, just faster!
+
+```python
+# Filter on partition columns will result in efficient partition pruning; non-matching partitions will be skipped.
+> df.where(df["country"] == "Germany").show()
+
+|    | first_name   | last_name   | country   |   continent |
+|---:|:-------------|:------------|:----------|------------:|
+|  0 | Wolfgang     | Manche      | Germany   |         nan |
+|  1 | Soraya       | Jala        | Germany   |         nan |
+```
+
+You can use the `explain()` method to see how Daft is optimizing your query.
+
+> Since we've already called `collect` on our DataFrame, it is already in memory. So below we copy the output of `explain(show_all=True)` **before** calling `collect`:
+
+Running `df.where(df["continent"] == "Asia").explain(True)` returns:
+
+```
+(...)
+
+== Optimized Logical Plan ==
+
+* PythonScanOperator: DeltaLakeScanOperator(None)
+|   File schema = first_name#Utf8, last_name#Utf8, country#Utf8, continent#Utf8
+|   Partitioning keys = [PartitionField(country#Utf8)]
+|   Filter pushdown = col(continent) == lit("Asia")
+|   Output schema = first_name#Utf8, last_name#Utf8, country#Utf8, continent#Utf8
+
+
+== Physical Plan ==
+
+* TabularScan:
+|   Num Scan Tasks = 3
+|   Estimated Scan Bytes = 3045
+|   Clustering spec = { Num partitions = 3 }
+```
+
+Whereas running `df.where(df["country"] == "Germany").explain(True)` returns:
+
+```
+(...)
+
+== Optimized Logical Plan ==
+
+* PythonScanOperator: DeltaLakeScanOperator(None)
+|   File schema = first_name#Utf8, last_name#Utf8, country#Utf8, continent#Utf8
+|   Partitioning keys = [PartitionField(country#Utf8)]
+|   Partition Filter = col(country) == lit("Germany")
+|   Output schema = first_name#Utf8, last_name#Utf8, country#Utf8, continent#Utf8
+
+
+== Physical Plan ==
+
+* TabularScan:
+|   Num Scan Tasks = 1
+|   Estimated Scan Bytes = 1025
+|   Clustering spec = { Num partitions = 1 }
+```
+
+Running a query on a non-partitioned column like `continent` will require reading in all partitions, totalling 3045 bytes in the case of this toy example.
+
+Instead, running a query on a partitioned column (`country` in this case) means Daft only has to read only the relevant partition, saving us a ~60% of the compute. This has huge impacts when you're working at scale.
+
+### Z-Ordering for enhanced file skipping
+
+[Z-ordering](https://delta.io/blog/2023-06-03-delta-lake-z-order/) stores similar data close together to optimize query performance. This is especially useful when you're querying on one or multiple columns.
+
+Using Z-Ordered Delta tables instead of regular Parquet can give Daft users significant speed-ups.
+
+Read [High-Performance Querying on Massive Delta Lake Tables with Daft](https://delta.io/blog/daft-delta-lake-integration/) for an in-depth benchmarking of query optimization with Delta Lake and Daft using partitioning and Z-ordering.
+
+## Daft gives you Multimodal Data Type Support
+
+Daft has a rich multimodal type-system with support for Python objects, Images, URLs, Tensors and more.
+
+The [Expressions API](https://www.getdaft.io/projects/docs/en/latest/api_docs/expressions.html) provides useful tools to work with these data types. By combining multimodal data support with the [User-Defined Functions API](https://www.getdaft.io/projects/docs/en/latest/api_docs/udf.html) you can [run ML workloads](https://www.getdaft.io/projects/docs/en/latest/user_guide/tutorials.html#mnist-digit-classification) right within your DataFrame.
+
+Take a look at the notebook in the [`delta-examples` Github repository](https://github.com/delta-io/delta-examples) for a closer look at how Daft handles URLs, images and ML applications.
+
+## Contribute to `daft`
+
+Excited about Daft and want to contribute? Join them on [Github](https://github.com/Eventual-Inc/Daft) 🚀
+
+Like many technologies, Daft collects some non-identifiable telemetry to improve the product. This is stricly non-identifiable metadata. You can disable telemetry by setting the following environment variable: `DAFT_ANALYTICS_ENABLED=0`. Read more in the [Daft documentation](https://www.getdaft.io/projects/docs/en/latest/faq/telemetry.html).
diff --git a/docs/integrations/delta-lake-dagster.md b/docs/integrations/delta-lake-dagster.md
new file mode 100644
index 0000000000..0fe413c7a0
--- /dev/null
+++ b/docs/integrations/delta-lake-dagster.md
@@ -0,0 +1,278 @@
+# Using Delta Lake with Dagster¶
+
+Delta Lake is a great storage format for Dagster workflows. This page will explain why and how to use Delta Lake with Dagster.
+
+You will learn how to use the Delta Lake I/O Manager to read and write your Dagster Software-Defined Assets (SDAs). You will also learn about the unique advantages Delta Lake offers the Dagster community.
+
+Here are some of the benefits that Delta Lake provides Dagster users:
+- native PyArrow integration for lazy computation of large datasets,
+- more efficient querying with file skipping via Z Ordering and liquid clustering
+- built-in vacuuming to remove unnecessary files and versions
+- ACID transactions for reliable writes
+- smooth versioning integration so that versions can be use to trigger downstream updates.
+- surfacing table stats based on the file statistics 
+
+
+## Dagster I/O Managers
+Dagster uses [I/O Managers](https://docs.dagster.io/concepts/io-management/io-managers#overview) to simplify data reads and writes. I/O Managers help you reduce boilerplate code by storing Dagster Asset and Op outputs and loading them as inputs to downstream objects. They make it easy to change where and how your data is stored.
+
+You only need to define your I/O Manager and its settings (such as storage location and schema) once and the I/O Manager will take care of correctly reading and writing all your Dagster Assets automatically.
+
+If you need lower-level access than the Dagster I/O Managers provide, take a look at the Delta Table Resource.
+
+## The Delta Lake I/O Manager
+You can easily read and write Delta Lake Tables from Dagster by using the `DeltaLakeIOManager()`. 
+
+Install the DeltaLakeIOManager:
+
+```
+pip install dagster-deltalake
+```
+
+Next, configure the following settings in your project’s `__init__.py` file:
+- `io_manager`: set this to `DeltaLakeIOManager()`, this sets the default I/O Manager for all your Assets
+
+Within the DeltaLakeIOManager, define:
+- `root_uri`: the root path where your Delta Tables will be created
+- `storage_options`: configuration for accessing storage location
+- `schema`: name of schema to use (optional, defaults to public)
+
+```
+defs = Definitions(
+   assets=all_assets,
+   resources={
+        "io_manager": DeltaLakePyarrowIOManager(
+            root_uri="path/to/deltalake",
+            storage_options=LocalConfig(),
+            schema="dagster_deltalake",
+        ),
+   },
+)
+```
+
+Now, when you materialize an Asset, it will be saved as a Delta Lake in a folder `dagster_deltalake/asset_name` under the root directory `path/to/deltalake`.
+
+The default Delta Lake I/O Manager supports Arrow reads and writes. You can also use the Delta Lake I/O Manager with [pandas](#using-delta-lake-and-dagster-with-pandas) or [polars](#using-delta-lake-and-dagster-with-polars). 
+
+## Creating Delta Lake Tables with Dagster
+You don’t need to do anything else to store your Dagster Assets as Delta Lake tables. The I/O Manager will handle storing and loading your Assets as Delta Lake tables from now on.
+
+You can proceed to write Dagster code as you normally would. For example, you can create an Asset that reads in some toy data about animals and writes it out to an Arrow Table:
+
+```
+import pyarrow as pa
+from pyarrow import csv
+
+from dagster import asset
+
+@asset
+def raw_dataset() -> pa.Table:
+   n_legs = pa.array([2, 4, None, 100])
+   animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+   data = {'n_legs': n_legs, 'animals': animals}
+
+   return pa.Table.from_pydict(data)
+```
+
+When you materialize the Asset defined above (using the config settings defined earlier), the Delta Lake I/O Manager will create the table `dagster_deltalake/iris_dataset` if it doesn’t exist yet.
+
+### Overwrites when Rematerializing Assets
+If the table does already exist at the specified location, the Delta Lake I/O Manager will perform an overwrite. Delta Lake’s transaction log maintains a record of all changes to your Delta Lake tables. You can inspect the record of changes to your Delta Lake tables by taking a look at these transaction logs.
+
+## Loading Delta Lake Tables in Downstream Assets
+You can use Assets stored as Delta Lake tables as input to downstream Assets. Dagster and the Delta Lake I/O Manager make this easy for you.
+
+You can write Dagster code as you normally would. Pass the upstream Asset as an argument to the downstream object to set up the dependency. Make sure to define the correct data type.
+
+The Delta Lake I/O Manager will handle reading and writing the data from your Delta Lake.
+
+```
+import pyarrow as pa
+from dagster import asset
+
+# ... raw_dataset asset is defined here ...
+
+@asset
+def clean_dataset(raw_dataset: pa.Table) -> pa.Table:
+   return raw_dataset.drop_null()
+```
+
+## Reading Existing Delta Lake Tables into Dagster
+You can make existing Delta Lake tables (that were not created in Dagster) available to your Dagster assets. Use the `SourceAsset` object and pass the table name as the key argument: 
+
+```
+from dagster import SourceAsset
+
+iris_harvest_data = SourceAsset(key="more_animal_data")
+```
+
+This will load a table `more_animal_data` located at `<root_uri>/<schema>` as configured in the Definitions object above (see [Delta Lake I/O Manager](#the-delta-lake-io-manager) section).
+
+## Column Pruning
+You can often improve the efficiency of your computations by only loading specific columns of your Delta table. This is called column pruning. 
+
+With the Delta Lake I/O manager, you can select specific columns to load defining the `columns` in the `metadata` parameter of the `AssetIn` that loads the upstream Asset:
+
+```
+import pyarrow as pa
+from dagster import AssetIn, asset
+
+# this example uses the clean_dataset Asset defined earlier
+
+@asset(
+       ins={
+           "mammal_bool": AssetIn(
+               key="clean_dataset",
+               metadata={"columns": ["is_mammal", "animals"]},
+           )
+       }
+)
+def mammal_data(mammal_bool: pa.Table) -> pa.Table:
+   mammals = mammal_bool["is_mammal"].cast("bool")
+   animals = mammal_bool["animals"]
+   data = {"mammal_bool": mammals, "animals": animals}
+   return pa.Table.from_pydict(data)
+```
+
+Here, we select only the `sepal_length_cm` and `sepal_width_cm` columns from the `iris_dataset` table and load them into an `AssetIn` object called `iris_sepal`. This AssetIn object is used to create a new Asset `sepal_data`, containing only the selected columns.
+
+## Working with Partitioned Assets
+Partitioning is an important feature of Delta Lake that can make your computations more efficient. The Delta Lake I/O manager helps you read and write partitioned data easily. You can work with static partitions, time-based partitions, multi-partitions, and dynamic partitions.
+
+For example, you can partition the Iris dataset on the `species` column as follows:
+
+```
+import pyarrow as pa
+
+from dagster import StaticPartitionsDefinition, asset
+
+@asset(
+  partitions_def=StaticPartitionsDefinition(
+      ["Human", "Horse",]
+  ),
+  metadata={"partition_expr": "n_legs"},
+)
+def dataset_partitioned(
+   context,
+   clean_dataset: pa.Table,
+   ) -> pa.Table:
+   animals = context.asset_partition_key_for_output()
+   table = clean_dataset
+
+   return table.filter(pc.field("animals") == animals)
+```
+
+To partition your data, make sure to include the relevant `partitions_def` and `metadata` arguments to the `@asset` decorator. Refer to the Dagster documentation on [partitioning assets](https://docs.dagster.io/concepts/partitions-schedules-sensors/partitioning-assets) for more information.
+
+## Using Delta Lake and Dagster with Pandas
+To read and write data to Delta Lake using pandas, use the `DeltaLakePandasIOManager()`. 
+
+You will need to install it using:
+
+```
+pip install dagster-deltalake-pandas
+```
+
+In your `Definitions` object, change the `io_manager` to `DeltaLakePandasIOManager()`:
+
+```
+from dagster_deltalake_pandas import DeltaLakePandasIOManager
+
+
+defs = Definitions(
+   assets=all_assets,
+   resources={
+        "io_manager": DeltaLakePandasIOManager(
+            root_uri="path/to/deltalake",
+            storage_options=LocalConfig(),
+            schema="dagster_deltalake",
+        ),
+   },
+)
+```
+
+Now you can read and write Dagster Assets defined as pandas DataFrames in Delta Lake format. For example:
+
+```
+import pandas as pd
+from dagster import asset
+
+@asset
+def iris_dataset() -> pd.DataFrame:
+   return pd.read_csv(
+       "https://docs.dagster.io/assets/iris.csv",
+       names=[
+           "sepal_length_cm",
+           "sepal_width_cm",
+           "petal_length_cm",
+           "petal_width_cm",
+           "species",
+       ],
+   )
+```
+
+## Using Delta Lake and Dagster with Polars
+To read and write data to Delta Lake using pandas, use the `DeltaLakePolarsIOManager()`. 
+
+You will need to install it using:
+
+```
+pip install dagster-deltalake-polars
+```
+
+In your `Definitions` object, change the `io_manager` to `DeltaLakePolarsIOManager()`:
+
+```
+from dagster_polars import DeltaLakePolarsIOManager
+
+defs = Definitions(
+   assets=all_assets,
+   resources={
+        "io_manager": DeltaLakePolarsIOManager(
+            root_uri="path/to/deltalake",
+            storage_options=LocalConfig(),
+            schema="dagster_deltalake",
+        ),
+   },
+)
+```
+
+Now you can read and write Dagster Assets defined as Polars DataFrames in Delta Lake format. For example:
+
+```
+import polars as pl
+from dagster import asset
+
+
+@asset
+def iris_dataset() -> pl.DataFrame:
+   return pl.read_csv(
+       "https://docs.dagster.io/assets/iris.csv",
+       new_columns=[
+          "sepal_length_cm",
+          "sepal_width_cm",
+          "petal_length_cm",
+          "petal_width_cm",
+          "species",
+      ],
+   has_header=False
+)
+```
+
+## Delta Lake Table Resource
+I/O managers are a helpful tool in many common usage situations. But when you need lower-level access, the I/O Manager might not be the right tool to use. In these cases you may want to use the Delta Lake Table Resource.
+
+The Delta Lake Table Resource is a low-level access method to the table object. It gives you more fine-grained control and allows for modeling of more complex data. You can also use the Table Resource to run optimization and vacuuming jobs.
+
+## Schema and Constraint Enforcement
+Delta Lake provides built-in checks to ensure schema consistency when appending data to a table, as well as the ability to evolve the schema. This is a great feature for the Dagster community as it prevents bad data from being appended to tables, ensuring data consistency and accuracy. 
+
+Read more about how to add constraints to a table in [the Delta Lake documentation](https://delta-io.github.io/delta-rs/usage/constraints/).
+
+## Z-Ordering
+Delta Lake offers Z-ordering functionality to colocate similar data in the same files. This can make your Delta Table queries much more efficient via file skipping. Dagster users can now benefit from this great feature through the Delta Lake I/O Manager.
+
+Read more about Z-Ordering on [the Delta Lake blog](https://delta.io/blog/2023-06-03-delta-lake-z-order/).
+
+## Contribute
+To contribute to the Delta Lake and Dagster integration, go to [link]
diff --git a/docs/integrations/delta-lake-dask.md b/docs/integrations/delta-lake-dask.md
index d3f0ebaf18..2315ce4771 100644
--- a/docs/integrations/delta-lake-dask.md
+++ b/docs/integrations/delta-lake-dask.md
@@ -5,6 +5,7 @@ Delta Lake is a great storage format for Dask analyses. This page explains why a
 You will learn how to read Delta Lakes into Dask DataFrames, how to query Delta tables with Dask, and the unique advantages Delta Lake offers the Dask community.
 
 Here are some of the benefits that Delta Lake provides Dask users:
+
 - better performance with file skipping
 - enhanced file skipping via Z Ordering
 - ACID transactions for reliable writes
@@ -20,10 +21,9 @@ To use Delta Lake with Dask, first install the library using
 pip install dask-deltatable
 ```
 
-
 ## Reading Delta Tables into a Dask DataFrame
 
-You can read data stored in a Delta Lake into a Dask DataFrame using `dask-deltatable.read_deltalake`. 
+You can read data stored in a Delta Lake into a Dask DataFrame using `dask-deltatable.read_deltalake`.
 
 Let's read in a toy dataset to see what we can do with Delta Lake and Dask. You can access the data stored as a Delta Lake [on Github](https://github.com/rrpelgrim/delta-examples/tree/master/data)
 
@@ -50,7 +50,6 @@ Dask is a library for efficient distributed computing and works with lazy evalua
 |  1 | Soraya       | Jala        | Germany   | NaN         |
 ```
 
-
 You can read in specific versions of Delta tables by specifying a `version` number or a timestamp:
 
 ```
@@ -58,7 +57,8 @@ You can read in specific versions of Delta tables by specifying a `version` numb
 ddf = ddt.read_deltalake(delta_path, version=3)
 
 # with specific datetime
-ddt.read_deltalake(delta_path, datetime="2018-12-19T16:39:57-08:00")```
+ddt.read_deltalake(delta_path, datetime="2018-12-19T16:39:57-08:00")
+```
 
 `dask-deltatable` also supports reading from remote sources like S3 with:
 
@@ -112,7 +112,7 @@ You can inspect a single partition using `dask.dataframe.get_partition()`:
 
 ## Perform Dask Operations
 
-Let's perform some basic computations over the Delta Lake data that's now stored in our Dask DataFrame. 
+Let's perform some basic computations over the Delta Lake data that's now stored in our Dask DataFrame.
 
 Suppose you want to group the dataset by the `country` column:
 
@@ -126,11 +126,11 @@ Suppose you want to group the dataset by the `country` column:
 | Germany   |            2 |           2 |           2 |
 ```
 
-Dask executes this `groupby` operation in parallel across all available cores. 
+Dask executes this `groupby` operation in parallel across all available cores.
 
 ## Map Functions over Partitions
 
-You can also use Dask's `map_partitions` method to map a custom Python function over all the partitions. 
+You can also use Dask's `map_partitions` method to map a custom Python function over all the partitions.
 
 Let's write a function that will replace the missing `continent` values with the right continent names.
 
@@ -150,7 +150,7 @@ def replace_proper(partition, na_string):
         partition.loc[partition.country=="Germany"] = partition.loc[partition.country=="Germany"].replace(na_string, "Europe")
     else:
         pass
-    return partition        
+    return partition
 ```
 
 Now map this over all partitions in the Dask DataFrame:
@@ -171,11 +171,13 @@ Now map this over all partitions in the Dask DataFrame:
 ```
 
 ## Write to Delta Lake
+
 After doing your data processing in Dask, you can write the data back out to Delta Lake using `to_deltalake`:
 
 ```
-ddt.to_deltalake(ddf, "tmp/test_write")
+ddt.to_deltalake("tmp/test_write", ddf)
 ```
 
 ## Contribute to `dask-deltalake`
-To contribute, go to the [`dask-deltalake` Github repository](https://github.com/rrpelgrim/dask-deltatable).
\ No newline at end of file
+
+To contribute, go to the [`dask-deltalake` Github repository](https://github.com/rrpelgrim/dask-deltatable).
diff --git a/docs/integrations/delta-lake-pandas.md b/docs/integrations/delta-lake-pandas.md
index b14c1bd45b..ca60362838 100644
--- a/docs/integrations/delta-lake-pandas.md
+++ b/docs/integrations/delta-lake-pandas.md
@@ -250,10 +250,10 @@ Schema enforcement protects your table from getting corrupted by appending data
 
 ## Overwriting schema of table
 
-You can overwrite the table contents and schema by setting the `overwrite_schema` option.  Here's how to overwrite the table contents:
+You can overwrite the table contents and schema by setting the `schema_mode` option.  Here's how to overwrite the table contents:
 
 ```python
-write_deltalake("tmp/some-table", df, mode="overwrite", overwrite_schema=True)
+write_deltalake("tmp/some-table", df, mode="overwrite", schema_mode="overwrite")
 ```
 
 Here are the contents of the table after the values and schema have been overwritten:
@@ -267,6 +267,8 @@ Here are the contents of the table after the values and schema have been overwri
 +-------+----------+
 ```
 
+If you want the schema to be merged instead, specify schema_mode="merge".
+
 ## In-memory vs. in-storage data changes
 
 It's important to distinguish between data stored in-memory and data stored on disk when understanding the functionality offered by Delta Lake.
diff --git a/docs/integrations/delta-lake-polars.md b/docs/integrations/delta-lake-polars.md
index 0e029847e8..eec9a7c2f4 100644
--- a/docs/integrations/delta-lake-polars.md
+++ b/docs/integrations/delta-lake-polars.md
@@ -132,7 +132,7 @@ print(pl.read_delta("tmp/bear_delta_lake", version=1))
 +-----+
 ```
 
-Read the Delta table wihout specifying a version and see how it reads the latest version by default:
+Read the Delta table without specifying a version and see how it reads the latest version by default:
 
 ```
 print(pl.read_delta("tmp/bear_delta_lake"))
diff --git a/docs/integrations/object-storage/hdfs.md b/docs/integrations/object-storage/hdfs.md
new file mode 100644
index 0000000000..74d513c1f7
--- /dev/null
+++ b/docs/integrations/object-storage/hdfs.md
@@ -0,0 +1,48 @@
+# HDFS Storage Backend
+HDFS support is provided via the [hdfs-native-object-store](https://github.com/datafusion-contrib/hdfs-native-object-store) package, which sits on top of [hdfs-native](https://github.com/Kimahriman/hdfs-native). This is an HDFS client written from scratch in Rust, with no bindings to libhdfs or any use of Java. While it supports most common cluster configurations, it does not support every possible client configuration that could exist.
+
+## Supported Configurations
+By default, the client looks for existing Hadoop configs in following manner:
+
+- If the `HADOOP_CONF_DIR` environment variable is defined, load configs from `$HADOOP_CONF_DIR/core-site.xml` and `$HADOOP_CONF_DIR/hdfs-site.xml`
+- Otherwise, if the `HADOOP_HOME` environment variable is set, load configs from `$HADOOP_HOME/etc/hadoop/core-site.xml` and `$HADOOP_HOME/etc/hadoop/hdfs-site.xml`
+
+Additionally, you can pass Hadoop configs as `storage_options` and these will take precedence over the above configs.
+
+Currently the supported client configuration parameters are:
+
+- `dfs.ha.namenodes.*` - name service support
+- `dfs.namenode.rpc-address.*` - name service support
+- `fs.viewfs.mounttable.*.link.*` - ViewFS links
+- `fs.viewfs.mounttable.*.linkFallback` - ViewFS link fallback
+
+If you find your setup is not supported, please file an issue in the [hdfs-native](https://github.com/Kimahriman/hdfs-native) repository.
+
+## Secure Clusters
+The client supports connecting to secure clusters through both Kerberos authentication as well as token authentication, and all SASL protection types are supported. The highest supported protection mechanism advertised by the server will be used.
+
+### Kerberos Support
+Kerberos is supported through dynamically loading the `libgssapi_krb5` library. This must be installed separately through your package manager, and currently only works on Linux and Mac.
+
+Debian-based systems:
+```bash
+apt-get install libgssapi-krb5-2
+```
+
+RHEL-based systems:
+```bash
+yum install krb5-libs
+```
+
+MacOS:
+```bash
+brew install krb5
+```
+
+Then simply `kinit` to get your TGT and authentication to HDFS should just work.
+
+### Token Support
+Token authentication is supported by looking for a token file located at the environment variable `HADOOP_TOKEN_FILE_LOCATION`. This is the location systems like YARN will automatically place a delegation token, so things will just work inside of YARN jobs.
+
+## Issues
+If you face any HDFS-specific issues, please report to the [hdfs-native-object-store](https://github.com/datafusion-contrib/hdfs-native-object-store) repository.
\ No newline at end of file
diff --git a/docs/integrations/object-storage/s3.md b/docs/integrations/object-storage/s3.md
new file mode 100644
index 0000000000..0814eab0c5
--- /dev/null
+++ b/docs/integrations/object-storage/s3.md
@@ -0,0 +1,102 @@
+# AWS S3 Storage Backend
+
+`delta-rs` offers native support for using AWS S3 as an objet storage backend.
+
+You don’t need to install any extra dependencies to red/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+
+## Note for boto3 users
+
+Many Python engines use [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) to connect to AWS. This library supports reading credentials automatically from your local `.aws/config` or `.aws/creds` file.
+
+For example, if you’re running locally with the proper credentials in your local `.aws/config` or `.aws/creds` file then you can write a Parquet file to S3 like this with pandas:
+
+```python
+    import pandas as pd
+    df = pd.DataFrame({'x': [1, 2, 3]})
+    df.to_parquet("s3://avriiil/parquet-test-pandas")
+```
+
+The `delta-rs` writer does not use `boto3` and therefore does not support taking credentials from your `.aws/config` or `.aws/creds` file. If you’re used to working with writers from Python engines like Polars, pandas or Dask, this may mean a small change to your workflow.
+
+## Passing AWS Credentials
+
+You can pass your AWS credentials explicitly by using:
+
+- the `storage_options `kwarg
+- Environment variables
+- EC2 metadata if using EC2 instances
+- AWS Profiles
+
+## Example
+
+Let's work through an example with Polars. The same logic applies to other Python engines like Pandas, Daft, Dask, etc.
+
+Follow the steps below to use Delta Lake on S3 with Polars:
+
+1. Install Polars and deltalake. For example, using:
+
+   `pip install polars deltalake`
+
+2. Create a dataframe with some toy data.
+
+   `df = pl.DataFrame({'x': [1, 2, 3]})`
+
+3. Set your `storage_options` correctly.
+
+```python
+storage_options = {
+    "AWS_REGION":<region_name>,
+    'AWS_ACCESS_KEY_ID': <key_id>,
+    'AWS_SECRET_ACCESS_KEY': <access_key>,
+    'AWS_S3_LOCKING_PROVIDER': 'dynamodb',
+    'DELTA_DYNAMO_TABLE_NAME': 'delta_log',
+}
+```
+
+4. Write data to Delta table using the `storage_options` kwarg.
+
+   ```python
+   df.write_delta(
+       "s3://bucket/delta_table",
+       storage_options=storage_options,
+   )
+   ```
+
+## Delta Lake on S3: Safe Concurrent Writes
+
+You need a locking provider to ensure safe concurrent writes when writing Delta tables to S3. This is because S3 does not guarantee mutual exclusion.
+
+A locking provider guarantees that only one writer is able to create the same file. This prevents corrupted or conflicting data.
+
+`delta-rs` uses DynamoDB to guarantee safe concurrent writes.
+
+Run the code below in your terminal to create a DynamoDB table that will act as your locking provider.
+
+```
+    aws dynamodb create-table \
+    --table-name delta_log \
+    --attribute-definitions AttributeName=tablePath,AttributeType=S AttributeName=fileName,AttributeType=S \
+    --key-schema AttributeName=tablePath,KeyType=HASH AttributeName=fileName,KeyType=RANGE \
+    --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
+```
+
+If for some reason you don't want to use DynamoDB as your locking mechanism you can choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
+
+Read more in the [Usage](../../usage/writing/writing-to-s3-with-locking-provider.md) section.
+
+## Delta Lake on S3: Required permissions
+
+You need to have permissions to get, put and delete objects in the S3 bucket you're storing your data in. Please note that you must be allowed to delete objects even if you're just appending to the Delta Lake, because there are temporary files into the log folder that are deleted after usage.
+
+In AWS S3, you will need the following permissions:
+
+- s3:GetObject
+- s3:PutObject
+- s3:DeleteObject
+
+In DynamoDB, you will need the following permissions:
+
+- dynamodb:GetItem
+- dynamodb:Query
+- dynamodb:PutItem
+- dynamodb:UpdateItem
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 42830c1e63..e6ef3b18a0 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,9 @@
 mkdocs==1.5.3
+# 0.45 results in build errors on our native symbols
+griffe==0.44
 mkdocstrings[python]==0.23.0
 mkdocs-autorefs==0.5.0
 mkdocs-material==9.4.5
 mkdocs-macros-plugin==1.0.4
 markdown-exec[ansi]==1.7.0
-mkdocs-simple-hooks==0.1.5
\ No newline at end of file
+mkdocs-simple-hooks==0.1.5
diff --git a/docs/src/python/check_constraints.py b/docs/src/python/check_constraints.py
index 16fb8bf374..1bfa62d970 100644
--- a/docs/src/python/check_constraints.py
+++ b/docs/src/python/check_constraints.py
@@ -13,9 +13,11 @@ def add_constraint():
 
 def add_data():
     # --8<-- [start:add_data]
-    from deltalake import write_deltalake
+    from deltalake import write_deltalake, DeltaTable
     import pandas as pd
 
+    dt = DeltaTable("../rust/tests/data/simple_table")
+
     df = pd.DataFrame({"id": [-1]})
     write_deltalake(dt, df, mode="append", engine="rust")
     # _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"]
diff --git a/docs/src/python/read_cdf.py b/docs/src/python/read_cdf.py
new file mode 100644
index 0000000000..0eb4ec7b6b
--- /dev/null
+++ b/docs/src/python/read_cdf.py
@@ -0,0 +1,7 @@
+import polars
+from deltalake import DeltaTable
+
+dt = DeltaTable("../rust/tests/data/cdf-table")
+table = dt.load_cdf(starting_version=0, ending_version=4).read_all()
+pt = polars.from_arrow(table)
+pt.group_by("_commit_version").len().sort("len", descending=True)
diff --git a/docs/src/rust/operations.rs b/docs/src/rust/operations.rs
index 55ab40604f..14ac85987f 100644
--- a/docs/src/rust/operations.rs
+++ b/docs/src/rust/operations.rs
@@ -1,10 +1,14 @@
+use std::sync::Arc;
+
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // --8<-- [start:replace_where]
     // Assuming there is already a table in this location with some records where `id = '1'` which we want to overwrite
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
     use arrow_array::RecordBatch;
-    import deltalake::protocol::SaveMode;
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use deltalake::datafusion::logical_expr::{col, lit};
+    use deltalake::protocol::SaveMode;
+    use deltalake::DeltaOps;
 
     let schema = ArrowSchema::new(vec![
         Field::new("id", DataType::Utf8, true),
@@ -12,7 +16,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     ]);
 
     let data = RecordBatch::try_new(
-        &schema,
+        schema.into(),
         vec![
             Arc::new(arrow::array::StringArray::from(vec!["1", "1"])),
             Arc::new(arrow::array::Int32Array::from(vec![11, 12])),
@@ -21,11 +25,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     .unwrap();
 
     let table = deltalake::open_table("/tmp/my_table").await.unwrap();
-    let table = DeltaOps(table)
+    let _table = DeltaOps(table)
         .write(vec![data])
         .with_save_mode(SaveMode::Overwrite)
         .with_replace_where(col("id").eq(lit("1")))
-        .await;
+        .await
+        .unwrap();
     // --8<-- [end:replace_where]
 
     Ok(())
diff --git a/docs/src/rust/read_cdf.rs b/docs/src/rust/read_cdf.rs
new file mode 100644
index 0000000000..04de31e2bc
--- /dev/null
+++ b/docs/src/rust/read_cdf.rs
@@ -0,0 +1,15 @@
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+
+    let table = deltalake::open_table("../rust/tests/data/cdf-table").await?;
+    let ops = DeltaOps(table);
+    let cdf = ops.load_cdf()
+        .with_starting_version(0)
+        .with_ending_version(4)
+        .build()
+        .await?;
+
+    arrow_cast::pretty::print_batches(&cdf)?;
+
+    Ok(())
+}
\ No newline at end of file
diff --git a/docs/usage/deleting-rows-from-delta-lake-table.md b/docs/usage/deleting-rows-from-delta-lake-table.md
index e1833c84b9..6c852100e6 100644
--- a/docs/usage/deleting-rows-from-delta-lake-table.md
+++ b/docs/usage/deleting-rows-from-delta-lake-table.md
@@ -32,3 +32,7 @@ Here are the contents of the Delta table after the delete operation has been per
 |     2 | b        |
 +-------+----------+
 ```
+
+`dt.delete()` accepts any `SQL where` clause. If no predicate is provided, all rows will be deleted.
+
+Read more in the [API docs](https://delta-io.github.io/delta-rs/api/delta_table/#deltalake.DeltaTable.delete)
diff --git a/docs/usage/installation.md b/docs/usage/installation.md
index ffd3ffcefb..78ff3d1f3f 100644
--- a/docs/usage/installation.md
+++ b/docs/usage/installation.md
@@ -6,7 +6,7 @@ The `deltalake` project can be installed via pip for Python or Cargo for Rust.
 
 With pip:
 
-``` bash
+```bash
 pip install deltalake
 ```
 
@@ -20,7 +20,7 @@ conda install -c conda-forge deltalake
 
 With Cargo:
 
-``` bash
+```bash
 cargo add deltalake
 ```
 
@@ -41,9 +41,7 @@ dependencies:
   - pandas
   - polars
   - jupyterlab
-  - pip
-  - pip:
-    - deltalake
+  - deltalake
 ```
 
 Create a virtual environment with the dependencies:
diff --git a/docs/usage/loading-table.md b/docs/usage/loading-table.md
index e8d817c6e5..70be33b5fc 100644
--- a/docs/usage/loading-table.md
+++ b/docs/usage/loading-table.md
@@ -16,7 +16,7 @@ options](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfig
 [gcs
 options](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants).
 
-``` python
+```python
 >>> storage_options = {"AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY":"THE_AWS_SECRET_ACCESS_KEY"}
 >>> dt = DeltaTable("../rust/tests/data/delta-0.2.0", storage_options=storage_options)
 ```
@@ -28,25 +28,27 @@ properties.
 
 **S3**:
 
-> -   s3://\<bucket\>/\<path\>
-> -   s3a://\<bucket\>/\<path\>
+> - s3://\<bucket\>/\<path\>
+> - s3a://\<bucket\>/\<path\>
+
+Note that `delta-rs` does not read credentials from a local `.aws/config` or `.aws/creds` file. Credentials can be accessed from environment variables, ec2 metadata, profiles or web identity. You can also pass credentials to `storage_options` using `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
 
 **Azure**:
 
-> -   az://\<container\>/\<path\>
-> -   adl://\<container\>/\<path\>
-> -   abfs://\<container\>/\<path\>
+> - az://\<container\>/\<path\>
+> - adl://\<container\>/\<path\>
+> - abfs://\<container\>/\<path\>
 
 **GCS**:
 
-> -   gs://\<bucket\>/\<path\>
+> - gs://\<bucket\>/\<path\>
 
 Alternatively, if you have a data catalog you can load it by reference
 to a database and table name. Currently only AWS Glue is supported.
 
 For AWS Glue catalog, use AWS environment variables to authenticate.
 
-``` python
+```python
 >>> from deltalake import DeltaTable
 >>> from deltalake import DataCatalog
 >>> database_name = "simple_database"
@@ -57,6 +59,33 @@ For AWS Glue catalog, use AWS environment variables to authenticate.
 {'id': [5, 7, 9, 5, 6, 7, 8, 9]}
 ```
 
+## Verify Table Existence
+
+You can check whether or not a Delta table exists at a particular path by using
+the `DeltaTable.is_deltatable()` method.
+
+```python
+from deltalake import DeltaTable
+
+table_path = "<path/to/valid/table>"
+DeltaTable.is_deltatable(table_path)
+# True
+
+invalid_table_path = "<path/to/nonexistent/table>"
+DeltaTable.is_deltatable(invalid_table_path)
+# False
+
+bucket_table_path = "<path/to/valid/table/in/bucket>"
+storage_options = {
+    "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
+    "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
+    ...
+}
+DeltaTable.is_deltatable(bucket_table_path)
+# True
+```
+
+
 ## Custom Storage Backends
 
 While delta always needs its internal storage backend to work and be
@@ -66,7 +95,7 @@ customize the storage interface used for reading the bulk data.
 
 `deltalake` will work with any storage compliant with `pyarrow.fs.FileSystem`, however the root of the filesystem has to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into a `pyarrow.fs.SubTreeFileSystem`.
 
-``` python
+```python
 import pyarrow.fs as fs
 from deltalake import DeltaTable
 
@@ -81,7 +110,7 @@ When using the pyarrow factory method for file systems, the normalized
 path is provided on creation. In case of S3 this would look something
 like:
 
-``` python
+```python
 import pyarrow.fs as fs
 from deltalake import DeltaTable
 
@@ -98,14 +127,14 @@ ds = dt.to_pyarrow_dataset(filesystem=filesystem)
 To load previous table states, you can provide the version number you
 wish to load:
 
-``` python
+```python
 >>> dt = DeltaTable("../rust/tests/data/simple_table", version=2)
 ```
 
 Once you\'ve loaded a table, you can also change versions using either a
 version number or datetime string:
 
-``` python
+```python
 >>> dt.load_version(1)
 >>> dt.load_with_datetime("2021-11-04 00:05:23.283+00:00")
 ```
diff --git a/docs/usage/read-cdf.md b/docs/usage/read-cdf.md
new file mode 100644
index 0000000000..afbc07a6f9
--- /dev/null
+++ b/docs/usage/read-cdf.md
@@ -0,0 +1,10 @@
+# Reading the Change Data Feed from a Delta Table
+
+Reading the CDF data from a table with change data is easy.
+
+## Reading CDF Log
+
+{{ code_example('read_cdf', None, []) }}
+
+The output can then be used in various execution engines. The python example shows how one might
+consume the cdf feed inside polars.
\ No newline at end of file
diff --git a/docs/usage/writing/index.md b/docs/usage/writing/index.md
index dc8bb62389..9e9e1bcbec 100644
--- a/docs/usage/writing/index.md
+++ b/docs/usage/writing/index.md
@@ -23,7 +23,9 @@ of Spark's `pyspark.sql.DataFrameWriter.saveAsTable` DataFrame method. To overwr
 
 `write_deltalake` will raise `ValueError` if the schema of the data
 passed to it differs from the existing table's schema. If you wish to
-alter the schema as part of an overwrite pass in `overwrite_schema=True`.
+alter the schema as part of an overwrite pass in `schema_mode="overwrite"` or `schema_mode="merge"`.
+`schema_mode="overwrite"` will completely overwrite the schema, even if columns are dropped; merge will append the new columns
+and fill missing columns with `null`. `schema_mode="merge"` is also supported on append operations.
 
 ## Overwriting a partition
 
diff --git a/docs/usage/writing/writing-to-s3-with-locking-provider.md b/docs/usage/writing/writing-to-s3-with-locking-provider.md
index 99dcd4f23b..0e42baa2e3 100644
--- a/docs/usage/writing/writing-to-s3-with-locking-provider.md
+++ b/docs/usage/writing/writing-to-s3-with-locking-provider.md
@@ -1,14 +1,18 @@
 # Writing to S3 with a locking provider
 
-A locking mechanism is needed to prevent unsafe concurrent writes to a
-delta lake directory when writing to S3.
+Delta lake guarantees [ACID transactions](../../how-delta-lake-works/delta-lake-acid-transactions.md) when writing data. This is done by default when writing to all supported object stores except AWS S3. (Some S3 clients like CloudFlare R2 or MinIO may enable concurrent writing without a locking provider, refer to [this section](#enabling-concurrent-writes-for-alternative-clients) for more information).
+
+When writing to S3, delta-rs provides a locking mechanism to ensure that concurrent writes are safe. This is done by default when writing to S3, but you can opt-out by setting the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true`.
+
+To enable safe concurrent writes to AWS S3, we must provide an external locking mechanism.
 
 ### DynamoDB
-DynamoDB is the only available locking provider at the moment in delta-rs. To enable DynamoDB as the locking provider, you need to set the ``AWS_S3_LOCKING_PROVIDER`` to 'dynamodb' as a ``storage_options`` or as an environment variable.
 
-Additionally, you must create a DynamoDB table with the name ``delta_log``
+DynamoDB is the only available locking provider at the moment in delta-rs. To enable DynamoDB as the locking provider, you need to set the `AWS_S3_LOCKING_PROVIDER` to 'dynamodb' as a `storage_options` or as an environment variable.
+
+Additionally, you must create a DynamoDB table with the name `delta_log`
 so that it can be automatically recognized by delta-rs. Alternatively, you can
-use a table name of your choice, but you must set the ``DELTA_DYNAMO_TABLE_NAME``
+use a table name of your choice, but you must set the `DELTA_DYNAMO_TABLE_NAME`
 variable to match your chosen table name. The required schema for the DynamoDB
 table is as follows:
 
@@ -43,13 +47,22 @@ Here is an example writing to s3 using this mechanism:
 ```python
 from deltalake import write_deltalake
 df = pd.DataFrame({'x': [1, 2, 3]})
-storage_options = {'AWS_S3_LOCKING_PROVIDER': 'dynamodb', 'DELTA_DYNAMO_TABLE_NAME': 'custom_table_name'}
-write_deltalake('s3a://path/to/table', df, 'storage_options'= storage_options)
+storage_options = {
+    'AWS_S3_LOCKING_PROVIDER': 'dynamodb',
+    'DELTA_DYNAMO_TABLE_NAME': 'custom_table_name'
+}
+write_deltalake(
+    's3a://path/to/table',
+    df,
+    storage_options=storage_options
+)
 ```
 
 This locking mechanism is compatible with the one used by Apache Spark. The `tablePath` property, denoting the root url of the delta table itself, is part of the primary key, and all writers intending to write to the same table must match this property precisely. In Spark, S3 URLs are prefixed with `s3a://`, and a table in delta-rs must be configured accordingly.
 
-The following code allows creating the necessary table from the AWS cli:
+Note that `delta-rs` does not read credentials from your local `.aws/config` or `.aws/creds` file. Credentials can be accessed from environment variables, ec2 metadata, profiles or web identity. You can pass credentials to `storage_options` using `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
+
+The following code allows creating the necessary DynamoDB table from the AWS cli:
 
 ```sh
 aws dynamodb create-table \
@@ -59,9 +72,42 @@ aws dynamodb create-table \
 --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
 ```
 
-You can find additional information in the [delta-rs-documentation](https://docs.delta.io/latest/delta-storage.html#multi-cluster-setup), which also includes recommendations on configuring a time-to-live (TTL) for the table to avoid growing the table indefinitely.
-
+You can find additional information in the [Delta Lake documentation](https://docs.delta.io/latest/delta-storage.html#multi-cluster-setup), which also includes recommendations on configuring a time-to-live (TTL) for the table to avoid growing the table indefinitely.
 
 ### Enable unsafe writes in S3 (opt-in)
+
 If for some reason you don't want to use dynamodb as your locking mechanism you can
-choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to ``true`` in order to enable S3 unsafe writes.
\ No newline at end of file
+choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
+
+### Required permissions
+
+You need to have permissions to get, put and delete objects in the S3 bucket you're storing your data in. Please note that you must be allowed to delete objects even if you're just appending to the deltalake, because there are temporary files into the log folder that are deleted after usage.
+
+In AWS, those would be the required permissions:
+
+- s3:GetObject
+- s3:PutObject
+- s3:DeleteObject
+
+In DynamoDB, you need those permissions:
+
+- dynamodb:GetItem
+- dynamodb:Query
+- dynamodb:PutItem
+- dynamodb:UpdateItem
+
+### Enabling concurrent writes for alternative clients
+
+Unlike AWS S3, some S3 clients support atomic renames by passing some headers
+in requests.
+
+For CloudFlare R2 passing this in the storage_options will enable concurrent writes:
+
+```python
+storage_options = {
+    "copy_if_not_exists": "header: cf-copy-destination-if-none-match: *",
+}
+```
+
+Something similar can be done with MinIO but the header to pass should be verified
+in the MinIO documentation.
diff --git a/docs/why-use-delta-lake.md b/docs/why-use-delta-lake.md
index 4ecd468fa1..c73592b1c5 100644
--- a/docs/why-use-delta-lake.md
+++ b/docs/why-use-delta-lake.md
@@ -23,7 +23,7 @@ Delta Lake stores all the file paths in the transaction log.  So you can quickly
 
 ## Developer friendly features
 
-Many basic data operations are hard in data lakes but quite easy with Delta Lake.  The only data operation that’s easy with in data lake is appending data.  Delta Lake makes all data operations easy including the following:
+Many basic data operations are hard in data lakes but quite easy with Delta Lake.  The only data operation that’s easy with a data lake is appending data.  Delta Lake makes all data operations easy including the following:
 
 * Appends
 * Upserts
diff --git a/mkdocs.yml b/mkdocs.yml
index 204069aef7..b2b09cbe49 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -7,7 +7,7 @@ theme:
   name: material
   logo: delta-rust-no-whitespace.svg
   favicon: delta-rust-no-whitespace.svg
-  
+
   palette:
     # Palette toggle for automatic mode
     - media: "(prefers-color-scheme)"
@@ -15,7 +15,7 @@ theme:
       toggle:
         icon: material/brightness-auto
         name: Switch to light mode
-        
+
     # Palette toggle for light mode
     - media: "(prefers-color-scheme: light)"
       scheme: default
@@ -46,47 +46,55 @@ theme:
     - content.code.copy
 nav:
   - Home:
-    - Home: index.md
-    - Why Use Delta Lake: why-use-delta-lake.md
-    - Delta Lake for big and small data: delta-lake-big-data-small-data.md
+      - Home: index.md
+      - Why Use Delta Lake: why-use-delta-lake.md
+      - Delta Lake for big and small data: delta-lake-big-data-small-data.md
+      - Best practices: delta-lake-best-practices.md
   - Usage:
-    - Installation: usage/installation.md
-    - Overview: usage/overview.md
-    - Creating a table: usage/create-delta-lake-table.md
-    - Loading a table: usage/loading-table.md
-    - Append/overwrite tables: usage/appending-overwriting-delta-lake-table.md
-    - Adding a constraint: usage/constraints.md
-    - Examining a table: usage/examining-table.md
-    - Querying a table: usage/querying-delta-tables.md
-    - Managing a table: usage/managing-tables.md
-    - Writing a table: 
-      - usage/writing/index.md
-      - usage/writing/writing-to-s3-with-locking-provider.md
-    - Deleting rows from a table: usage/deleting-rows-from-delta-lake-table.md
-    - Optimize:
-      - Small file compaction: usage/optimize/small-file-compaction-with-optimize.md
-      - Z Order: usage/optimize/delta-lake-z-order.md
+      - Installation: usage/installation.md
+      - Overview: usage/overview.md
+      - Creating a table: usage/create-delta-lake-table.md
+      - Loading a table: usage/loading-table.md
+      - Append/overwrite tables: usage/appending-overwriting-delta-lake-table.md
+      - Adding a constraint: usage/constraints.md
+      - Reading Change Data: usage/read-cdf.md
+      - Examining a table: usage/examining-table.md
+      - Querying a table: usage/querying-delta-tables.md
+      - Managing a table: usage/managing-tables.md
+      - Writing a table:
+          - usage/writing/index.md
+          - usage/writing/writing-to-s3-with-locking-provider.md
+      - Deleting rows from a table: usage/deleting-rows-from-delta-lake-table.md
+      - Optimize:
+          - Small file compaction: usage/optimize/small-file-compaction-with-optimize.md
+          - Z Order: usage/optimize/delta-lake-z-order.md
   - API Reference:
-    - api/delta_writer.md
-    - Table:
-      - api/delta_table/index.md
-      - api/delta_table/metadata.md
-      - api/delta_table/delta_table_merger.md
-      - api/delta_table/delta_table_optimizer.md
-      - api/delta_table/delta_table_alterer.md
-    - api/schema.md
-    - api/storage.md
-    - api/catalog.md
-    - api/exceptions.md
+      - api/delta_writer.md
+      - Table:
+          - api/delta_table/index.md
+          - api/delta_table/metadata.md
+          - api/delta_table/delta_table_merger.md
+          - api/delta_table/delta_table_optimizer.md
+          - api/delta_table/delta_table_alterer.md
+      - api/schema.md
+      - api/storage.md
+      - api/catalog.md
+      - api/exceptions.md
   - Integrations:
-    - Arrow: integrations/delta-lake-arrow.md
-    - DataFusion: integrations/delta-lake-datafusion.md
-    - pandas: integrations/delta-lake-pandas.md
-    - Polars: integrations/delta-lake-polars.md
+      - Object Storage:
+          - integrations/object-storage/hdfs.md
+          - integrations/object-storage/s3.md
+      - Arrow: integrations/delta-lake-arrow.md
+      - Daft: integrations/delta-lake-daft.md
+      - Dagster: integrations/delta-lake-dagster.md
+      - Dask: integrations/delta-lake-dask.md
+      - DataFusion: integrations/delta-lake-datafusion.md
+      - pandas: integrations/delta-lake-pandas.md
+      - Polars: integrations/delta-lake-polars.md
   - How Delta Lake works:
-    - Architecture: how-delta-lake-works/architecture-of-delta-table.md
-    - Transactions: how-delta-lake-works/delta-lake-acid-transactions.md
-    - File skipping: how-delta-lake-works/delta-lake-file-skipping.md
+      - Architecture: how-delta-lake-works/architecture-of-delta-table.md
+      - Transactions: how-delta-lake-works/delta-lake-acid-transactions.md
+      - File skipping: how-delta-lake-works/delta-lake-file-skipping.md
 not_in_nav: |
   /_build/
 
@@ -128,7 +136,7 @@ plugins:
       module_name: docs/_build/macro
   - mkdocs-simple-hooks:
       hooks:
-        on_page_markdown: 'docs._build.hooks:on_page_markdown'
+        on_page_markdown: "docs._build.hooks:on_page_markdown"
 
 markdown_extensions:
   - pymdownx.highlight:
@@ -146,7 +154,7 @@ markdown_extensions:
   - pymdownx.tabbed:
       alternate_style: true
   - pymdownx.snippets:
-      base_path: ['.','docs/src/']
+      base_path: [".", "docs/src/"]
       check_paths: true
       dedent_subsections: true
   - footnotes
@@ -157,4 +165,4 @@ extra:
   social:
     - icon: fontawesome/brands/slack
       link: https://go.delta.io/slack
-      name: Delta slack channel
\ No newline at end of file
+      name: Delta slack channel
diff --git a/python/.cargo/config b/python/.cargo/config.toml
similarity index 100%
rename from python/.cargo/config
rename to python/.cargo/config.toml
diff --git a/python/.gitignore b/python/.gitignore
index e1e978f0a6..56df04b804 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -7,6 +7,7 @@ __pycache__/
 # Unit test / coverage reports
 .coverage
 .pytest_cache/
+.benchmarks/
 
 # mypy
 .mypy_cache/
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 9b9acff1d0..b7feb2a36e 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-python"
-version = "0.15.2"
+version = "0.18.3"
 authors = ["Qingping Hou <dave2008713@gmail.com>", "Will Jones <willjones127@gmail.com>"]
 homepage = "https://github.com/delta-io/delta-rs"
 license = "Apache-2.0"
@@ -15,6 +15,8 @@ crate-type = ["cdylib"]
 doc = false
 
 [dependencies]
+delta_kernel.workspace = true
+
 # arrow
 arrow-schema = { workspace = true, features = ["serde"] }
 
@@ -38,14 +40,16 @@ tokio = { workspace = true, features = ["rt-multi-thread"] }
 # for binary wheel best practice, statically link openssl
 reqwest = { version = "*", features = ["native-tls-vendored"] }
 
+deltalake-mount = { path = "../crates/mount" }
+
 [dependencies.pyo3]
-version = "0.20"
+version = "0.21.1"
 features = ["extension-module", "abi3", "abi3-py38"]
 
 [dependencies.deltalake]
 path = "../crates/deltalake"
 version = "0"
-features = ["azure", "gcs", "python", "datafusion", "unity-experimental"]
+features = ["azure", "gcs", "python", "datafusion", "unity-experimental", "hdfs"]
 
 [features]
 default = ["rustls"]
diff --git a/python/Makefile b/python/Makefile
index 1a95a95695..2c657fa9f3 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -53,7 +53,7 @@ format: ## Format the code
 	$(info --- Rust format ---)
 	cargo fmt
 	$(info --- Python format ---)
-	ruff . --fix
+	ruff check . --fix
 	ruff format .
 
 .PHONY: check-rust
@@ -77,6 +77,11 @@ unit-test: ## Run unit test
 	$(info --- Run Python unit-test ---)
 	python -m pytest --doctest-modules 
 
+.PHONY: test-cov
+test-cov: ## Create coverage report
+	$(info --- Run Python unit-test ---)
+	python -m pytest --doctest-modules --cov --cov-config=pyproject.toml --cov-report=term --cov-report=html
+
 .PHONY: test-pyspark
 test-pyspark:
 	python -m pytest -m 'pyspark and integration'
diff --git a/python/deltalake/__init__.py b/python/deltalake/__init__.py
index 99089ae922..60579a33a0 100644
--- a/python/deltalake/__init__.py
+++ b/python/deltalake/__init__.py
@@ -6,6 +6,7 @@
 from .schema import Schema as Schema
 from .table import DeltaTable as DeltaTable
 from .table import Metadata as Metadata
+from .table import PostCommitHookProperties as PostCommitHookProperties
 from .table import WriterProperties as WriterProperties
 from .writer import convert_to_deltalake as convert_to_deltalake
 from .writer import write_deltalake as write_deltalake
diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi
index 6319412c8e..27033cb9d8 100644
--- a/python/deltalake/_internal.pyi
+++ b/python/deltalake/_internal.pyi
@@ -1,10 +1,4 @@
-import sys
-from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
-
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
+from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union
 
 import pyarrow
 import pyarrow.fs as fs
@@ -40,11 +34,18 @@ class RawDeltaTable:
         data_catalog_id: Optional[str] = None,
         catalog_options: Optional[Dict[str, str]] = None,
     ) -> str: ...
+    @staticmethod
+    def is_deltatable(
+        table_uri: str, storage_options: Optional[Dict[str, str]]
+    ) -> bool: ...
     def table_uri(self) -> str: ...
     def version(self) -> int: ...
+    def get_add_file_sizes(self) -> Dict[str, int]: ...
     def get_latest_version(self) -> int: ...
+    def get_num_index_cols(self) -> int: ...
+    def get_stats_columns(self) -> Optional[List[str]]: ...
     def metadata(self) -> RawDeltaTableMetaData: ...
-    def protocol_versions(self) -> List[int]: ...
+    def protocol_versions(self) -> List[Any]: ...
     def load_version(self, version: int) -> None: ...
     def load_with_datetime(self, ds: str) -> None: ...
     def files_by_partitions(
@@ -58,6 +59,7 @@ class RawDeltaTable:
         retention_hours: Optional[int],
         enforce_retention_duration: bool,
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> List[str]: ...
     def compact_optimize(
         self,
@@ -67,6 +69,7 @@ class RawDeltaTable:
         min_commit_interval: Optional[int],
         writer_properties: Optional[Dict[str, Optional[str]]],
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> str: ...
     def z_order_optimize(
         self,
@@ -78,11 +81,32 @@ class RawDeltaTable:
         min_commit_interval: Optional[int],
         writer_properties: Optional[Dict[str, Optional[str]]],
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> str: ...
+    def add_columns(
+        self,
+        fields: List[Field],
+        custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
+    ) -> None: ...
     def add_constraints(
         self,
         constraints: Dict[str, str],
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
+    ) -> None: ...
+    def drop_constraints(
+        self,
+        name: str,
+        raise_if_not_exists: bool,
+        custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
+    ) -> None: ...
+    def set_table_properties(
+        self,
+        properties: Dict[str, str],
+        raise_if_not_exists: bool,
+        custom_metadata: Optional[Dict[str, str]],
     ) -> None: ...
     def restore(
         self,
@@ -103,9 +127,13 @@ class RawDeltaTable:
         predicate: Optional[str],
         writer_properties: Optional[Dict[str, Optional[str]]],
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> str: ...
     def repair(
-        self, dry_run: bool, custom_metadata: Optional[Dict[str, str]]
+        self,
+        dry_run: bool,
+        custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> str: ...
     def update(
         self,
@@ -114,6 +142,7 @@ class RawDeltaTable:
         writer_properties: Optional[Dict[str, Optional[str]]],
         safe_cast: bool,
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> str: ...
     def merge_execute(
         self,
@@ -123,6 +152,7 @@ class RawDeltaTable:
         target_alias: Optional[str],
         writer_properties: Optional[Dict[str, Optional[str]]],
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
         safe_cast: bool,
         matched_update_updates: Optional[List[Dict[str, str]]],
         matched_update_predicate: Optional[List[Optional[str]]],
@@ -146,8 +176,18 @@ class RawDeltaTable:
         schema: pyarrow.Schema,
         partitions_filters: Optional[FilterType],
         custom_metadata: Optional[Dict[str, str]],
+        post_commithook_properties: Optional[Dict[str, Optional[bool]]],
     ) -> None: ...
     def cleanup_metadata(self) -> None: ...
+    def check_can_write_timestamp_ntz(self, schema: pyarrow.Schema) -> None: ...
+    def load_cdf(
+        self,
+        columns: Optional[List[str]] = None,
+        starting_version: int = 0,
+        ending_version: Optional[int] = None,
+        starting_timestamp: Optional[str] = None,
+        ending_timestamp: Optional[str] = None,
+    ) -> pyarrow.RecordBatchReader: ...
 
 def rust_core_version() -> str: ...
 def write_new_deltalake(
@@ -167,8 +207,8 @@ def write_to_deltalake(
     data: pyarrow.RecordBatchReader,
     partition_by: Optional[List[str]],
     mode: str,
-    max_rows_per_group: int,
-    overwrite_schema: bool,
+    table: Optional[RawDeltaTable],
+    schema_mode: Optional[str],
     predicate: Optional[str],
     name: Optional[str],
     description: Optional[str],
@@ -176,6 +216,7 @@ def write_to_deltalake(
     storage_options: Optional[Dict[str, str]],
     writer_properties: Optional[Dict[str, Optional[str]]],
     custom_metadata: Optional[Dict[str, str]],
+    post_commithook_properties: Optional[Dict[str, Optional[bool]]],
 ) -> None: ...
 def convert_to_deltalake(
     uri: str,
@@ -192,6 +233,7 @@ def create_deltalake(
     schema: pyarrow.Schema,
     partition_by: List[str],
     mode: str,
+    raise_if_key_not_exists: bool,
     name: Optional[str],
     description: Optional[str],
     configuration: Optional[Mapping[str, Optional[str]]],
@@ -199,6 +241,9 @@ def create_deltalake(
     custom_metadata: Optional[Dict[str, str]],
 ) -> None: ...
 def batch_distinct(batch: pyarrow.RecordBatch) -> pyarrow.RecordBatch: ...
+def get_num_idx_cols_and_stats_columns(
+    table: Optional[RawDeltaTable], configuration: Optional[Mapping[str, Optional[str]]]
+) -> Tuple[int, Optional[List[str]]]: ...
 
 # Can't implement inheritance (see note in src/schema.rs), so this is next
 # best thing.
@@ -226,7 +271,8 @@ class PrimitiveType:
      * "binary",
      * "date",
      * "timestamp",
-     * "decimal(<precision>, <scale>)"
+     * "timestampNtz",
+     * "decimal(<precision>, <scale>)" Max: decimal(38,38)
 
     Args:
         data_type: string representation of the data type
@@ -706,10 +752,17 @@ class DeltaFileSystemHandler:
 
     def __init__(
         self,
-        root: str,
-        options: dict[str, str] | None = None,
-        known_sizes: dict[str, int] | None = None,
+        table_uri: str,
+        options: Dict[str, str] | None = None,
+        known_sizes: Dict[str, int] | None = None,
     ) -> None: ...
+    @classmethod
+    def from_table(
+        cls,
+        table: RawDeltaTable,
+        options: Dict[str, str] | None = None,
+        known_sizes: Dict[str, int] | None = None,
+    ) -> "DeltaFileSystemHandler": ...
     def get_type_name(self) -> str: ...
     def copy_file(self, src: str, dst: str) -> None:
         """Copy a file.
@@ -761,7 +814,7 @@ class DeltaFileSystemHandler:
     def open_input_file(self, path: str) -> ObjectInputFile:
         """Open an input file for random access reading."""
     def open_output_stream(
-        self, path: str, metadata: dict[str, str] | None = None
+        self, path: str, metadata: Dict[str, str] | None = None
     ) -> ObjectOutputStream:
         """Open an output stream for sequential writing."""
 
@@ -789,6 +842,11 @@ class DeltaProtocolError(DeltaError):
 
     pass
 
+class SchemaMismatchError(DeltaError):
+    """Raised when a schema mismatch is detected."""
+
+    pass
+
 FilterLiteralType = Tuple[str, str, Any]
 FilterConjunctionType = List[FilterLiteralType]
 FilterDNFType = List[FilterConjunctionType]
diff --git a/python/deltalake/exceptions.py b/python/deltalake/exceptions.py
index bacd0af9f8..a2e5b1ba1e 100644
--- a/python/deltalake/exceptions.py
+++ b/python/deltalake/exceptions.py
@@ -1,4 +1,5 @@
 from ._internal import CommitFailedError as CommitFailedError
 from ._internal import DeltaError as DeltaError
 from ._internal import DeltaProtocolError as DeltaProtocolError
+from ._internal import SchemaMismatchError as SchemaMismatchError
 from ._internal import TableNotFoundError as TableNotFoundError
diff --git a/python/deltalake/fs.py b/python/deltalake/fs.py
index 12e33f40e3..cf1780036a 100644
--- a/python/deltalake/fs.py
+++ b/python/deltalake/fs.py
@@ -1,17 +1,102 @@
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Mapping, Optional
 
 import pyarrow as pa
 from pyarrow.fs import FileInfo, FileSelector, FileSystemHandler
 
-from ._internal import DeltaFileSystemHandler
+from ._internal import DeltaFileSystemHandler, RawDeltaTable
 
 
 # NOTE  we need to inherit form FileSystemHandler to pass pyarrow's internal type checks.
-class DeltaStorageHandler(DeltaFileSystemHandler, FileSystemHandler):
+class DeltaStorageHandler(FileSystemHandler):
     """
     DeltaStorageHandler is a concrete implementations of a PyArrow FileSystemHandler.
     """
 
+    def __init__(
+        self,
+        table_uri: str,
+        options: Optional[Dict[str, str]] = None,
+        known_sizes: Optional[Dict[str, int]] = None,
+    ):
+        self._handler = DeltaFileSystemHandler(
+            table_uri=table_uri, options=options, known_sizes=known_sizes
+        )
+
+    @classmethod
+    def from_table(
+        cls,
+        table: RawDeltaTable,
+        options: Optional[Dict[str, str]] = None,
+        known_sizes: Optional[Dict[str, int]] = None,
+    ) -> "DeltaStorageHandler":
+        self = cls.__new__(cls)
+        self._handler = DeltaFileSystemHandler.from_table(table, options, known_sizes)
+        return self
+
+    def get_type_name(self) -> str:
+        return self._handler.get_type_name()
+
+    def copy_file(self, src: str, dst: str) -> None:
+        """Copy a file.
+
+        If the destination exists and is a directory, an error is returned. Otherwise, it is replaced.
+        """
+        return self._handler.copy_file(src=src, dst=dst)
+
+    def create_dir(self, path: str, recursive: bool = True) -> None:
+        """Create a directory and subdirectories.
+
+        This function succeeds if the directory already exists.
+        """
+        return self._handler.create_dir(path, recursive)
+
+    def delete_dir(self, path: str) -> None:
+        """Delete a directory and its contents, recursively."""
+        return self._handler.delete_dir(path)
+
+    def delete_file(self, path: str) -> None:
+        """Delete a file."""
+        return self._handler.delete_file(path)
+
+    def equals(self, other: Any) -> bool:
+        return self._handler.equals(other)
+
+    def delete_dir_contents(
+        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
+    ) -> None:
+        """Delete a directory's contents, recursively.
+
+        Like delete_dir, but doesn't delete the directory itself.
+        """
+        return self._handler.delete_dir_contents(
+            path=path, accept_root_dir=accept_root_dir, missing_dir_ok=missing_dir_ok
+        )
+
+    def delete_root_dir_contents(self) -> None:
+        """Delete the root directory contents, recursively."""
+        return self._handler.delete_root_dir_contents()
+
+    def get_file_info(self, paths: List[str]) -> List[FileInfo]:
+        """Get info for the given files.
+
+        A non-existing or unreachable file returns a FileStat object and has a FileType of value NotFound.
+        An exception indicates a truly exceptional condition (low-level I/O error, etc.).
+        """
+        return self._handler.get_file_info(paths)
+
+    def move(self, src: str, dest: str) -> None:
+        """Move / rename a file or directory.
+
+        If the destination exists: - if it is a non-empty directory, an error is returned - otherwise,
+        if it has the same type as the source, it is replaced - otherwise, behavior is
+        unspecified (implementation-dependent).
+        """
+        self._handler.move_file(src=src, dest=dest)
+
+    def normalize_path(self, path: str) -> str:
+        """Normalize filesystem path."""
+        return self._handler.normalize_path(path)
+
     def open_input_file(self, path: str) -> pa.PythonFile:
         """
         Open an input file for random access reading.
@@ -22,7 +107,7 @@ def open_input_file(self, path: str) -> pa.PythonFile:
         Returns:
             NativeFile
         """
-        return pa.PythonFile(DeltaFileSystemHandler.open_input_file(self, path))
+        return pa.PythonFile(self._handler.open_input_file(path))
 
     def open_input_stream(self, path: str) -> pa.PythonFile:
         """
@@ -34,7 +119,7 @@ def open_input_stream(self, path: str) -> pa.PythonFile:
         Returns:
             NativeFile
         """
-        return pa.PythonFile(DeltaFileSystemHandler.open_input_file(self, path))
+        return pa.PythonFile(self._handler.open_input_file(path))
 
     def open_output_stream(
         self, path: str, metadata: Optional[Dict[str, str]] = None
@@ -51,11 +136,9 @@ def open_output_stream(
         Returns:
             NativeFile
         """
-        return pa.PythonFile(
-            DeltaFileSystemHandler.open_output_stream(self, path, metadata)
-        )
+        return pa.PythonFile(self._handler.open_output_stream(path, metadata))
 
-    def get_file_info_selector(self, selector: FileSelector) -> List[FileInfo]:  # type: ignore
+    def get_file_info_selector(self, selector: FileSelector) -> List[FileInfo]:
         """
         Get info for the files defined by FileSelector.
 
@@ -65,6 +148,9 @@ def get_file_info_selector(self, selector: FileSelector) -> List[FileInfo]:  # t
         Returns:
             list of file info objects
         """
-        return DeltaFileSystemHandler.get_file_info_selector(
-            self, selector.base_dir, selector.allow_not_found, selector.recursive
+        return self._handler.get_file_info_selector(
+            selector.base_dir, selector.allow_not_found, selector.recursive
         )
+
+    def open_append_stream(self, path: str, metadata: Mapping[str, str]) -> None:
+        raise NotImplementedError
diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py
index 2f89cfaa2d..cc7561cb39 100644
--- a/python/deltalake/schema.py
+++ b/python/deltalake/schema.py
@@ -51,9 +51,10 @@ def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType:
         elif isinstance(dtype, pa.StructType):
             return struct_to_delta_dtype(dtype)
         elif isinstance(dtype, pa.TimestampType):
-            return pa.timestamp(
-                "us"
-            )  # TODO(ion): propagate also timezone information during writeonce we can properly read TZ in delta schema
+            if dtype.tz is None:
+                return pa.timestamp("us")
+            else:
+                return pa.timestamp("us", "UTC")
         elif type(dtype) is pa.FixedSizeBinaryType:
             return pa.binary()
         try:
@@ -84,7 +85,9 @@ def _cast_schema_to_recordbatchreader(
 ) -> Generator[pa.RecordBatch, None, None]:
     """Creates recordbatch generator."""
     for batch in reader:
-        yield pa.Table.from_batches([batch]).cast(schema).to_batches()[0]
+        batchs = pa.Table.from_batches([batch]).cast(schema).to_batches()
+        if len(batchs) > 0:
+            yield batchs[0]
 
 
 def convert_pyarrow_recordbatchreader(
diff --git a/python/deltalake/table.py b/python/deltalake/table.py
index b6dd27f49d..4c82b40cd0 100644
--- a/python/deltalake/table.py
+++ b/python/deltalake/table.py
@@ -1,10 +1,8 @@
 import json
-import operator
 import warnings
 from dataclasses import dataclass
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from enum import Enum
-from functools import reduce
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
@@ -19,7 +17,6 @@
     Optional,
     Tuple,
     Union,
-    cast,
 )
 
 import pyarrow
@@ -34,6 +31,11 @@
     ParquetReadOptions,
 )
 
+try:
+    from pyarrow.parquet import filters_to_expression  # pyarrow >= 10.0.0
+except ImportError:
+    from pyarrow.parquet import _filters_to_expression as filters_to_expression
+
 if TYPE_CHECKING:
     import os
 
@@ -44,17 +46,23 @@
 from deltalake.data_catalog import DataCatalog
 from deltalake.exceptions import DeltaProtocolError
 from deltalake.fs import DeltaStorageHandler
+from deltalake.schema import Field as DeltaField
 from deltalake.schema import Schema as DeltaSchema
 
 try:
-    import pandas as pd  # noqa: F811
+    import pandas as pd
 except ModuleNotFoundError:
     _has_pandas = False
 else:
     _has_pandas = True
 
-MAX_SUPPORTED_READER_VERSION = 1
-MAX_SUPPORTED_WRITER_VERSION = 2
+MAX_SUPPORTED_PYARROW_WRITER_VERSION = 7
+NOT_SUPPORTED_PYARROW_WRITER_VERSIONS = [3, 4, 5, 6]
+SUPPORTED_WRITER_FEATURES = {"appendOnly", "invariants", "timestampNtz"}
+
+MAX_SUPPORTED_READER_VERSION = 3
+NOT_SUPPORTED_READER_VERSION = 2
+SUPPORTED_READER_FEATURES = {"timestampNtz"}
 
 
 class Compression(Enum):
@@ -110,6 +118,28 @@ def check_valid_level(self, level: int) -> bool:
             return True
 
 
+@dataclass(init=True)
+class PostCommitHookProperties:
+    """The post commit hook properties, only required for advanced usecases where you need to control this."""
+
+    def __init__(
+        self,
+        create_checkpoint: bool = True,
+        cleanup_expired_logs: Optional[bool] = None,
+    ):
+        """Checkpoints are by default created based on the delta.checkpointInterval config setting.
+        cleanup_expired_logs can be set to override the delta.enableExpiredLogCleanup, otherwise the
+        config setting will be used to decide whether to clean up logs automatically by taking also
+        the delta.logRetentionDuration into account.
+
+        Args:
+            create_checkpoint (bool, optional): to create checkpoints based on checkpoint interval. Defaults to True.
+            cleanup_expired_logs (Optional[bool], optional): to clean up logs based on interval. Defaults to None.
+        """
+        self.create_checkpoint = create_checkpoint
+        self.cleanup_expired_logs = cleanup_expired_logs
+
+
 @dataclass(init=True)
 class WriterProperties:
     """A Writer Properties instance for the Rust parquet writer."""
@@ -242,130 +272,19 @@ def __str__(self) -> str:
 class ProtocolVersions(NamedTuple):
     min_reader_version: int
     min_writer_version: int
+    writer_features: Optional[List[str]]
+    reader_features: Optional[List[str]]
 
 
 FilterLiteralType = Tuple[str, str, Any]
 
-
 FilterConjunctionType = List[FilterLiteralType]
 
-
 FilterDNFType = List[FilterConjunctionType]
 
-
 FilterType = Union[FilterConjunctionType, FilterDNFType]
 
 
-def _check_contains_null(value: Any) -> bool:
-    """
-    Check if target contains nullish value.
-    """
-    if isinstance(value, bytes):
-        for byte in value:
-            if isinstance(byte, bytes):
-                compare_to = chr(0)
-            else:
-                compare_to = 0
-            if byte == compare_to:
-                return True
-    elif isinstance(value, str):
-        return "\x00" in value
-    return False
-
-
-def _check_dnf(
-    dnf: FilterDNFType,
-    check_null_strings: bool = True,
-) -> FilterDNFType:
-    """
-    Check if DNF are well-formed.
-    """
-    if len(dnf) == 0 or any(len(c) == 0 for c in dnf):
-        raise ValueError("Malformed DNF")
-    if check_null_strings:
-        for conjunction in dnf:
-            for col, op, val in conjunction:
-                if (
-                    isinstance(val, list)
-                    and all(_check_contains_null(v) for v in val)
-                    or _check_contains_null(val)
-                ):
-                    raise NotImplementedError(
-                        "Null-terminated binary strings are not supported "
-                        "as filter values."
-                    )
-    return dnf
-
-
-def _convert_single_predicate(column: str, op: str, value: Any) -> Expression:
-    """
-    Convert given `tuple` to [pyarrow.dataset.Expression].
-    """
-    import pyarrow.dataset as ds
-
-    field = ds.field(column)
-    if op == "=" or op == "==":
-        return field == value
-    elif op == "!=":
-        return field != value
-    elif op == "<":
-        return field < value
-    elif op == ">":
-        return field > value
-    elif op == "<=":
-        return field <= value
-    elif op == ">=":
-        return field >= value
-    elif op == "in":
-        return field.isin(value)
-    elif op == "not in":
-        return ~field.isin(value)
-    else:
-        raise ValueError(
-            f'"{(column, op, value)}" is not a valid operator in predicates.'
-        )
-
-
-def _filters_to_expression(filters: FilterType) -> Expression:
-    """
-    Check if filters are well-formed and convert to an [pyarrow.dataset.Expression].
-    """
-    if isinstance(filters[0][0], str):
-        # We have encountered the situation where we have one nesting level too few:
-        #   We have [(,,), ..] instead of [[(,,), ..]]
-        dnf = cast(FilterDNFType, [filters])
-    else:
-        dnf = cast(FilterDNFType, filters)
-    dnf = _check_dnf(dnf, check_null_strings=False)
-    disjunction_members = []
-    for conjunction in dnf:
-        conjunction_members = [
-            _convert_single_predicate(col, op, val) for col, op, val in conjunction
-        ]
-        disjunction_members.append(reduce(operator.and_, conjunction_members))
-    return reduce(operator.or_, disjunction_members)
-
-
-_DNF_filter_doc = """
-Predicates are expressed in disjunctive normal form (DNF), like [("x", "=", "a"), ...].
-DNF allows arbitrary boolean logical combinations of single partition predicates.
-The innermost tuples each describe a single partition predicate. The list of inner
-predicates is interpreted as a conjunction (AND), forming a more selective and
-multiple partition predicates. Each tuple has format: (key, op, value) and compares
-the key with the value. The supported op are: `=`, `!=`, `in`, and `not in`. If
-the op is in or not in, the value must be a collection such as a list, a set or a tuple.
-The supported type for value is str. Use empty string `''` for Null partition value.
-
-Example:
-    ```
-    ("x", "=", "a")
-    ("x", "!=", "a")
-    ("y", "in", ["a", "b", "c"])
-    ("z", "not in", ["a","b"])
-    ```
-"""
-
-
 @dataclass(init=False)
 class DeltaTable:
     """Represents a Delta Table"""
@@ -441,6 +360,21 @@ def from_data_catalog(
             table_uri=table_uri, version=version, log_buffer_size=log_buffer_size
         )
 
+    @staticmethod
+    def is_deltatable(
+        table_uri: str, storage_options: Optional[Dict[str, str]] = None
+    ) -> bool:
+        """
+        Returns True if a Delta Table exists at specified path.
+        Returns False otherwise.
+
+        Args:
+            table_uri: the path of the DeltaTable
+            storage_options: a dictionary of the options to use for the
+                storage backend
+        """
+        return RawDeltaTable.is_deltatable(table_uri, storage_options)
+
     @classmethod
     def create(
         cls,
@@ -453,6 +387,7 @@ def create(
         configuration: Optional[Mapping[str, Optional[str]]] = None,
         storage_options: Optional[Dict[str, str]] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        raise_if_key_not_exists: bool = True,
     ) -> "DeltaTable":
         """`CREATE` or `CREATE_OR_REPLACE` a delta table given a table_uri.
 
@@ -467,8 +402,9 @@ def create(
             name: User-provided identifier for this table.
             description: User-provided description for this table.
             configuration:  A map containing configuration options for the metadata action.
-            storage_options: options passed to the object store crate.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            storage_options: Options passed to the object store crate.
+            custom_metadata: Custom metadata that will be added to the transaction commit.
+            raise_if_key_not_exists: Whether to raise an error if the configuration uses keys that are not Delta keys
 
         Returns:
             DeltaTable: created delta table
@@ -502,6 +438,7 @@ def create(
             schema,
             partition_by or [],
             mode,
+            raise_if_key_not_exists,
             name,
             description,
             configuration,
@@ -600,6 +537,7 @@ def load_as_version(self, version: Union[int, str, datetime]) -> None:
         """
         Load/time travel a DeltaTable to a specified version number, or a timestamp version of the table. If a
         string is passed then the argument should be an RFC 3339 and ISO 8601 date and time string format.
+        If a datetime object without a timezone is passed, the UTC timezone will be assumed.
 
         Args:
             version: the identifier of the version of the DeltaTable to load
@@ -613,7 +551,8 @@ def load_as_version(self, version: Union[int, str, datetime]) -> None:
 
             **Use a datetime object**
             ```
-            dt.load_as_version(datetime(2023,1,1))
+            dt.load_as_version(datetime(2023, 1, 1))
+            dt.load_as_version(datetime(2023, 1, 1, tzinfo=timezone.utc))
             ```
 
             **Use a datetime in string format**
@@ -626,6 +565,8 @@ def load_as_version(self, version: Union[int, str, datetime]) -> None:
         if isinstance(version, int):
             self._table.load_version(version)
         elif isinstance(version, datetime):
+            if version.tzinfo is None:
+                version = version.astimezone(timezone.utc)
             self._table.load_with_datetime(version.isoformat())
         elif isinstance(version, str):
             self._table.load_with_datetime(version)
@@ -676,6 +617,22 @@ def load_with_datetime(self, datetime_string: str) -> None:
         )
         self._table.load_with_datetime(datetime_string)
 
+    def load_cdf(
+        self,
+        starting_version: int = 0,
+        ending_version: Optional[int] = None,
+        starting_timestamp: Optional[str] = None,
+        ending_timestamp: Optional[str] = None,
+        columns: Optional[List[str]] = None,
+    ) -> pyarrow.RecordBatchReader:
+        return self._table.load_cdf(
+            columns=columns,
+            starting_version=starting_version,
+            ending_version=ending_version,
+            starting_timestamp=starting_timestamp,
+            ending_timestamp=ending_timestamp,
+        )
+
     @property
     def table_uri(self) -> str:
         return self._table.table_uri()
@@ -689,6 +646,13 @@ def schema(self) -> DeltaSchema:
         """
         return self._table.schema
 
+    def files_by_partitions(self, partition_filters: Optional[FilterType]) -> List[str]:
+        """
+        Get the files for each partition
+
+        """
+        return self._table.files_by_partitions(partition_filters)
+
     def metadata(self) -> Metadata:
         """
         Get the current metadata of the DeltaTable.
@@ -743,15 +707,17 @@ def vacuum(
         dry_run: bool = True,
         enforce_retention_duration: bool = True,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> List[str]:
         """
         Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table and are older than the retention threshold.
 
         Args:
-            retention_hours: the retention threshold in hours, if none then the value from `configuration.deletedFileRetentionDuration` is used or default of 1 week otherwise.
+            retention_hours: the retention threshold in hours, if none then the value from `delta.deletedFileRetentionDuration` is used or default of 1 week otherwise.
             dry_run: when activated, list only the files, delete otherwise
-            enforce_retention_duration: when disabled, accepts retention hours smaller than the value from `configuration.deletedFileRetentionDuration`.
+            enforce_retention_duration: when disabled, accepts retention hours smaller than the value from `delta.deletedFileRetentionDuration`.
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
         Returns:
             the list of files no longer referenced by the Delta Table and are older than the retention threshold.
         """
@@ -764,6 +730,7 @@ def vacuum(
             retention_hours,
             enforce_retention_duration,
             custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
         )
 
     def update(
@@ -776,6 +743,7 @@ def update(
         writer_properties: Optional[WriterProperties] = None,
         error_on_type_mismatch: bool = True,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> Dict[str, Any]:
         """`UPDATE` records in the Delta Table that matches an optional predicate. Either updates or new_values needs
         to be passed for it to execute.
@@ -787,6 +755,7 @@ def update(
             writer_properties: Pass writer properties to the Rust parquet writer.
             error_on_type_mismatch: specify if update will return error if data types are mismatching :default = True
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
         Returns:
             the metrics from update
 
@@ -865,6 +834,9 @@ def update(
             writer_properties._to_dict() if writer_properties else None,
             safe_cast=not error_on_type_mismatch,
             custom_metadata=custom_metadata,
+            post_commithook_properties=post_commithook_properties.__dict__
+            if post_commithook_properties
+            else None,
         )
         return json.loads(metrics)
 
@@ -904,8 +876,9 @@ def merge(
         target_alias: Optional[str] = None,
         error_on_type_mismatch: bool = True,
         writer_properties: Optional[WriterProperties] = None,
-        large_dtypes: bool = True,
+        large_dtypes: bool = False,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> "TableMerger":
         """Pass the source data which you want to merge on the target delta table, providing a
         predicate in SQL query like format. You can also specify on what to do when the underlying data types do not
@@ -920,6 +893,7 @@ def merge(
             writer_properties: Pass writer properties to the Rust parquet writer
             large_dtypes: If True, the data schema is kept in large_dtypes.
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
 
         Returns:
             TableMerger: TableMerger Object
@@ -968,6 +942,7 @@ def validate_batch(batch: pyarrow.RecordBatch) -> pyarrow.RecordBatch:
             safe_cast=not error_on_type_mismatch,
             writer_properties=writer_properties,
             custom_metadata=custom_metadata,
+            post_commithook_properties=post_commithook_properties,
         )
 
     def restore(
@@ -1011,6 +986,8 @@ def to_pyarrow_dataset(
         partitions: Optional[List[Tuple[str, str, Any]]] = None,
         filesystem: Optional[Union[str, pa_fs.FileSystem]] = None,
         parquet_read_options: Optional[ParquetReadOptions] = None,
+        schema: Optional[pyarrow.Schema] = None,
+        as_large_types: bool = False,
     ) -> pyarrow.dataset.Dataset:
         """
         Build a PyArrow Dataset using data from the DeltaTable.
@@ -1019,34 +996,70 @@ def to_pyarrow_dataset(
             partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
             filesystem: A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem
             parquet_read_options: Optional read options for Parquet. Use this to handle INT96 to timestamp conversion for edge cases like 0001-01-01 or 9999-12-31
+            schema: The schema to use for the dataset. If None, the schema of the DeltaTable will be used. This can be used to force reading of Parquet/Arrow datatypes
+                that DeltaLake can't represent in it's schema (e.g. LargeString).
+                If you only need to read the schema with large types (e.g. for compatibility with Polars) you may want to use the `as_large_types` parameter instead.
+            as_large_types: get schema with all variable size types (list, binary, string) as large variants (with int64 indices).
+                This is for compatibility with systems like Polars that only support the large versions of Arrow types.
+                If `schema` is passed it takes precedence over this option.
 
          More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html
 
+        Example:
+            ``deltalake`` will work with any storage compliant with :class:`pyarrow.fs.FileSystem`, however the root of the filesystem has
+            to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into
+            a :class:`pyarrow.fs.SubTreeFileSystem`.
+            ```
+            import pyarrow.fs as fs
+            from deltalake import DeltaTable
+
+            table_uri = "s3://<bucket>/<path>"
+            raw_fs, normalized_path = fs.FileSystem.from_uri(table_uri)
+            filesystem = fs.SubTreeFileSystem(normalized_path, raw_fs)
+
+            dt = DeltaTable(table_uri)
+            ds = dt.to_pyarrow_dataset(filesystem=filesystem)
+            ```
+
         Returns:
             the PyArrow dataset in PyArrow
         """
-        if self.protocol().min_reader_version > MAX_SUPPORTED_READER_VERSION:
+        table_protocol = self.protocol()
+        if (
+            table_protocol.min_reader_version > MAX_SUPPORTED_READER_VERSION
+            or table_protocol.min_reader_version == NOT_SUPPORTED_READER_VERSION
+        ):
             raise DeltaProtocolError(
-                f"The table's minimum reader version is {self.protocol().min_reader_version} "
-                f"but deltalake only supports up to version {MAX_SUPPORTED_READER_VERSION}."
+                f"The table's minimum reader version is {table_protocol.min_reader_version} "
+                f"but deltalake only supports version 1 or {MAX_SUPPORTED_READER_VERSION} with these reader features: {SUPPORTED_READER_FEATURES}"
             )
+        if (
+            table_protocol.min_reader_version >= 3
+            and table_protocol.reader_features is not None
+        ):
+            missing_features = {*table_protocol.reader_features}.difference(
+                SUPPORTED_READER_FEATURES
+            )
+            if len(missing_features) > 0:
+                raise DeltaProtocolError(
+                    f"The table has set these reader features: {missing_features} but these are not yet supported by the deltalake reader."
+                )
 
         if not filesystem:
-            file_sizes = self.get_add_actions().to_pydict()
-            file_sizes = {
-                x: y for x, y in zip(file_sizes["path"], file_sizes["size_bytes"])
-            }
             filesystem = pa_fs.PyFileSystem(
-                DeltaStorageHandler(
-                    self._table.table_uri(), self._storage_options, file_sizes
+                DeltaStorageHandler.from_table(
+                    self._table,
+                    self._storage_options,
+                    self._table.get_add_file_sizes(),
                 )
             )
-
         format = ParquetFileFormat(
             read_options=parquet_read_options,
             default_fragment_scan_options=ParquetFragmentScanOptions(pre_buffer=True),
         )
 
+        schema = schema or self.schema().to_pyarrow(as_large_types=as_large_types)
+
         fragments = [
             format.make_fragment(
                 file,
@@ -1054,12 +1067,10 @@ def to_pyarrow_dataset(
                 partition_expression=part_expression,
             )
             for file, part_expression in self._table.dataset_partitions(
-                self.schema().to_pyarrow(), partitions
+                schema, partitions
             )
         ]
 
-        schema = self.schema().to_pyarrow()
-
         dictionary_columns = format.read_options.dictionary_columns or set()
         if dictionary_columns:
             for index, field in enumerate(schema):
@@ -1076,7 +1087,7 @@ def to_pyarrow_table(
         partitions: Optional[List[Tuple[str, str, Any]]] = None,
         columns: Optional[List[str]] = None,
         filesystem: Optional[Union[str, pa_fs.FileSystem]] = None,
-        filters: Optional[FilterType] = None,
+        filters: Optional[Union[FilterType, Expression]] = None,
     ) -> pyarrow.Table:
         """
         Build a PyArrow Table using data from the DeltaTable.
@@ -1085,10 +1096,10 @@ def to_pyarrow_table(
             partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
             columns: The columns to project. This can be a list of column names to include (order and duplicates will be preserved)
             filesystem: A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem
-            filters: A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass ``partitions``
+            filters: A disjunctive normal form (DNF) predicate for filtering rows, or directly a pyarrow.dataset.Expression. If you pass a filter you do not need to pass ``partitions``
         """
         if filters is not None:
-            filters = _filters_to_expression(filters)
+            filters = filters_to_expression(filters)
         return self.to_pyarrow_dataset(
             partitions=partitions, filesystem=filesystem
         ).to_table(columns=columns, filter=filters)
@@ -1098,7 +1109,7 @@ def to_pandas(
         partitions: Optional[List[Tuple[str, str, Any]]] = None,
         columns: Optional[List[str]] = None,
         filesystem: Optional[Union[str, pa_fs.FileSystem]] = None,
-        filters: Optional[FilterType] = None,
+        filters: Optional[Union[FilterType, Expression]] = None,
     ) -> "pd.DataFrame":
         """
         Build a pandas dataframe using data from the DeltaTable.
@@ -1107,7 +1118,7 @@ def to_pandas(
             partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
             columns: The columns to project. This can be a list of column names to include (order and duplicates will be preserved)
             filesystem: A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem
-            filters: A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass ``partitions``
+            filters: A disjunctive normal form (DNF) predicate for filtering rows, or directly a pyarrow.dataset.Expression. If you pass a filter you do not need to pass ``partitions``
         """
         return self.to_pyarrow_table(
             partitions=partitions,
@@ -1129,7 +1140,7 @@ def create_checkpoint(self) -> None:
     def cleanup_metadata(self) -> None:
         """
         Delete expired log files before current version from table. The table log retention is based on
-        the `configuration.logRetentionDuration` value, 30 days by default.
+        the `delta.logRetentionDuration` value, 30 days by default.
         """
         self._table.cleanup_metadata()
 
@@ -1194,6 +1205,7 @@ def delete(
         predicate: Optional[str] = None,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> Dict[str, Any]:
         """Delete records from a Delta Table that statisfy a predicate.
 
@@ -1206,6 +1218,7 @@ def delete(
             predicate: a SQL where clause. If not passed, will delete all rows.
             writer_properties: Pass writer properties to the Rust parquet writer.
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
 
         Returns:
             the metrics from delete.
@@ -1214,11 +1227,15 @@ def delete(
             predicate,
             writer_properties._to_dict() if writer_properties else None,
             custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
         )
         return json.loads(metrics)
 
     def repair(
-        self, dry_run: bool = False, custom_metadata: Optional[Dict[str, str]] = None
+        self,
+        dry_run: bool = False,
+        custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> Dict[str, Any]:
         """Repair the Delta Table by auditing active files that do not exist in the underlying
         filesystem and removes them. This can be useful when there are accidental deletions or corrupted files.
@@ -1230,6 +1247,8 @@ def repair(
         Args:
             dry_run: when activated, list only the files, otherwise add remove actions to transaction log. Defaults to False.
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+
         Returns:
             The metrics from repair (FSCK) action.
 
@@ -1244,7 +1263,11 @@ def repair(
             {'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']}
             ```
         """
-        metrics = self._table.repair(dry_run, custom_metadata)
+        metrics = self._table.repair(
+            dry_run,
+            custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
+        )
         return json.loads(metrics)
 
 
@@ -1261,6 +1284,7 @@ def __init__(
         safe_cast: bool = True,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ):
         self.table = table
         self.source = source
@@ -1270,6 +1294,7 @@ def __init__(
         self.safe_cast = safe_cast
         self.writer_properties = writer_properties
         self.custom_metadata = custom_metadata
+        self.post_commithook_properties = post_commithook_properties
         self.matched_update_updates: Optional[List[Dict[str, str]]] = None
         self.matched_update_predicate: Optional[List[Optional[str]]] = None
         self.matched_delete_predicate: Optional[List[str]] = None
@@ -1277,9 +1302,9 @@ def __init__(
         self.not_matched_insert_updates: Optional[List[Dict[str, str]]] = None
         self.not_matched_insert_predicate: Optional[List[Optional[str]]] = None
         self.not_matched_by_source_update_updates: Optional[List[Dict[str, str]]] = None
-        self.not_matched_by_source_update_predicate: Optional[
-            List[Optional[str]]
-        ] = None
+        self.not_matched_by_source_update_predicate: Optional[List[Optional[str]]] = (
+            None
+        )
         self.not_matched_by_source_delete_predicate: Optional[List[str]] = None
         self.not_matched_by_source_delete_all: Optional[bool] = None
 
@@ -1328,6 +1353,10 @@ def when_matched_update(
         """Update a matched table row based on the rules defined by ``updates``.
         If a ``predicate`` is specified, then it must evaluate to true for the row to be updated.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
             updates: a mapping of column name to update SQL expression.
             predicate:  SQL like predicate on when to update.
@@ -1340,10 +1369,10 @@ def when_matched_update(
             from deltalake import DeltaTable, write_deltalake
             import pyarrow as pa
 
-            data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
+            data = pa.table({"x": [1, 2, 3], "1y": [4, 5, 6]})
             write_deltalake("tmp", data)
             dt = DeltaTable("tmp")
-            new_data = pa.table({"x": [1], "y": [7]})
+            new_data = pa.table({"x": [1], "1y": [7]})
 
             (
                  dt.merge(
@@ -1351,7 +1380,7 @@ def when_matched_update(
                      predicate="target.x = source.x",
                      source_alias="source",
                      target_alias="target")
-                 .when_matched_update(updates={"x": "source.x", "y": "source.y"})
+                 .when_matched_update(updates={"x": "source.x", "`1y`": "source.`1y`"})
                  .execute()
             )
             {'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}
@@ -1377,6 +1406,10 @@ def when_matched_update_all(self, predicate: Optional[str] = None) -> "TableMerg
         """Updating all source fields to target fields, source and target are required to have the same field names.
         If a ``predicate`` is specified, then it must evaluate to true for the row to be updated.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
             predicate: SQL like predicate on when to update all columns.
 
@@ -1416,7 +1449,7 @@ def when_matched_update_all(self, predicate: Optional[str] = None) -> "TableMerg
         trgt_alias = (self.target_alias + ".") if self.target_alias is not None else ""
 
         updates = {
-            f"{trgt_alias}{col.name}": f"{src_alias}{col.name}"
+            f"{trgt_alias}`{col.name}`": f"{src_alias}`{col.name}`"
             for col in self.source.schema
         }
 
@@ -1435,6 +1468,10 @@ def when_matched_delete(self, predicate: Optional[str] = None) -> "TableMerger":
         """Delete a matched row from the table only if the given ``predicate`` (if specified) is
         true for the matched row. If not specified it deletes all matches.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
            predicate (str | None, Optional):  SQL like predicate on when to delete.
 
@@ -1511,6 +1548,10 @@ def when_not_matched_insert(
         """Insert a new row to the target table based on the rules defined by ``updates``. If a
         ``predicate`` is specified, then it must evaluate to true for the new row to be inserted.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
             updates (dict):  a mapping of column name to insert SQL expression.
             predicate (str | None, Optional): SQL like predicate on when to insert.
@@ -1570,6 +1611,10 @@ def when_not_matched_insert_all(
         required to have the same field names. If a ``predicate`` is specified, then it must evaluate to true for
         the new row to be inserted.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
             predicate: SQL like predicate on when to insert.
 
@@ -1609,7 +1654,7 @@ def when_not_matched_insert_all(
         src_alias = (self.source_alias + ".") if self.source_alias is not None else ""
         trgt_alias = (self.target_alias + ".") if self.target_alias is not None else ""
         updates = {
-            f"{trgt_alias}{col.name}": f"{src_alias}{col.name}"
+            f"{trgt_alias}`{col.name}`": f"{src_alias}`{col.name}`"
             for col in self.source.schema
         }
         if isinstance(self.not_matched_insert_updates, list) and isinstance(
@@ -1629,6 +1674,10 @@ def when_not_matched_by_source_update(
         """Update a target row that has no matches in the source based on the rules defined by ``updates``.
         If a ``predicate`` is specified, then it must evaluate to true for the row to be updated.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
             updates: a mapping of column name to update SQL expression.
             predicate: SQL like predicate on when to update.
@@ -1683,6 +1732,10 @@ def when_not_matched_by_source_delete(
         """Delete a target row that has no matches in the source from the table only if the given
         ``predicate`` (if specified) is true for the target row.
 
+        Note:
+            Column names with special characters, such as numbers or spaces should be encapsulated
+            in backticks: "target.`123column`" or "target.`my column`"
+
         Args:
             predicate:  SQL like predicate on when to delete when not matched by source.
 
@@ -1720,6 +1773,9 @@ def execute(self) -> Dict[str, Any]:
             if self.writer_properties
             else None,
             custom_metadata=self.custom_metadata,
+            post_commithook_properties=self.post_commithook_properties.__dict__
+            if self.post_commithook_properties
+            else None,
             matched_update_updates=self.matched_update_updates,
             matched_update_predicate=self.matched_update_predicate,
             matched_delete_predicate=self.matched_delete_predicate,
@@ -1741,10 +1797,47 @@ class TableAlterer:
     def __init__(self, table: DeltaTable) -> None:
         self.table = table
 
+    def add_columns(
+        self,
+        fields: Union[DeltaField, List[DeltaField]],
+        custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+    ) -> None:
+        """Add new columns and/or update the fields of a stuctcolumn
+
+        Args:
+            fields: fields to merge into schema
+            custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+
+        Example:
+            ```python
+            from deltalake import DeltaTable
+            from deltalake.schema import Field, PrimitiveType, StructType
+            dt = DeltaTable("test_table")
+            new_fields = [
+                Field("baz", StructType([Field("bar", PrimitiveType("integer"))])),
+                Field("bar", PrimitiveType("integer"))
+            ]
+            dt.alter.add_columns(
+                new_fields
+            )
+            ```
+        """
+        if isinstance(fields, DeltaField):
+            fields = [fields]
+
+        self.table._table.add_columns(
+            fields,
+            custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
+        )
+
     def add_constraint(
         self,
         constraints: Dict[str, str],
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> None:
         """
         Add constraints to the table. Limited to `single constraint` at once.
@@ -1752,6 +1845,8 @@ def add_constraint(
         Args:
             constraints: mapping of constraint name to SQL-expression to evaluate on write
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+
         Example:
             ```python
             from deltalake import DeltaTable
@@ -1773,7 +1868,71 @@ def add_constraint(
                 Please execute add_constraints multiple times with each time a different constraint."""
             )
 
-        self.table._table.add_constraints(constraints, custom_metadata)
+        self.table._table.add_constraints(
+            constraints,
+            custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
+        )
+
+    def drop_constraint(
+        self,
+        name: str,
+        raise_if_not_exists: bool = True,
+        custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+    ) -> None:
+        """
+        Drop constraints from a table. Limited to `single constraint` at once.
+
+        Args:
+            name: constraint name which to drop.
+            raise_if_not_exists: set if should raise if not exists.
+            custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+
+        Example:
+            ```python
+            from deltalake import DeltaTable
+            dt = DeltaTable("test_table_constraints")
+            dt.metadata().configuration
+            {'delta.constraints.value_gt_5': 'value > 5'}
+            ```
+
+            **Drop the constraint**
+            ```python
+            dt.alter.drop_constraint(name = "value_gt_5")
+            ```
+
+            **Configuration after dropping**
+            ```python
+            dt.metadata().configuration
+            {}
+            ```
+        """
+        self.table._table.drop_constraints(
+            name,
+            raise_if_not_exists,
+            custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
+        )
+
+    def set_table_properties(
+        self,
+        properties: Dict[str, str],
+        raise_if_not_exists: bool = True,
+        custom_metadata: Optional[Dict[str, str]] = None,
+    ) -> None:
+        """
+        Unset properties from the table.
+        Args:
+            properties: properties which set
+            raise_if_not_exists: set if should raise if not exists.
+            custom_metadata: custom metadata that will be added to the transaction commit.
+        Example:
+        """
+        self.table._table.set_table_properties(
+            properties, raise_if_not_exists, custom_metadata
+        )
 
 
 class TableOptimizer:
@@ -1809,6 +1968,7 @@ def compact(
         min_commit_interval: Optional[Union[int, timedelta]] = None,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> Dict[str, Any]:
         """
         Compacts small files to reduce the total number of files in the table.
@@ -1832,6 +1992,7 @@ def compact(
                                     want a commit per partition.
             writer_properties: Pass writer properties to the Rust parquet writer.
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
 
         Returns:
             the metrics from optimize
@@ -1862,6 +2023,7 @@ def compact(
             min_commit_interval,
             writer_properties._to_dict() if writer_properties else None,
             custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
         )
         self.table.update_incremental()
         return json.loads(metrics)
@@ -1876,6 +2038,7 @@ def z_order(
         min_commit_interval: Optional[Union[int, timedelta]] = None,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
     ) -> Dict[str, Any]:
         """
         Reorders the data using a Z-order curve to improve data skipping.
@@ -1897,6 +2060,7 @@ def z_order(
                                     want a commit per partition.
             writer_properties: Pass writer properties to the Rust parquet writer.
             custom_metadata: custom metadata that will be added to the transaction commit.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
 
         Returns:
             the metrics from optimize
@@ -1929,6 +2093,7 @@ def z_order(
             min_commit_interval,
             writer_properties._to_dict() if writer_properties else None,
             custom_metadata,
+            post_commithook_properties.__dict__ if post_commithook_properties else None,
         )
         self.table.update_incremental()
         return json.loads(metrics)
diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py
index d3b956cbfc..f323b90e35 100644
--- a/python/deltalake/writer.py
+++ b/python/deltalake/writer.py
@@ -1,5 +1,4 @@
 import json
-import sys
 import uuid
 from dataclasses import dataclass
 from datetime import date, datetime
@@ -13,34 +12,33 @@
     Iterable,
     Iterator,
     List,
+    Literal,
     Mapping,
     Optional,
+    Protocol,
     Tuple,
     Union,
     overload,
 )
 from urllib.parse import unquote
 
-from deltalake import Schema as DeltaSchema
-from deltalake.fs import DeltaStorageHandler
-
-from ._util import encode_partition_value
-
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
-
 import pyarrow as pa
 import pyarrow.dataset as ds
 import pyarrow.fs as pa_fs
-from pyarrow.lib import RecordBatchReader
+from pyarrow import RecordBatchReader
+
+from deltalake import Schema as DeltaSchema
+from deltalake.fs import DeltaStorageHandler
 
 from ._internal import DeltaDataChecker as _DeltaDataChecker
 from ._internal import batch_distinct
 from ._internal import convert_to_deltalake as _convert_to_deltalake
+from ._internal import (
+    get_num_idx_cols_and_stats_columns as get_num_idx_cols_and_stats_columns,
+)
 from ._internal import write_new_deltalake as write_deltalake_pyarrow
 from ._internal import write_to_deltalake as write_deltalake_rust
+from ._util import encode_partition_value
 from .exceptions import DeltaProtocolError, TableNotFoundError
 from .schema import (
     convert_pyarrow_dataset,
@@ -48,16 +46,39 @@
     convert_pyarrow_recordbatchreader,
     convert_pyarrow_table,
 )
-from .table import MAX_SUPPORTED_WRITER_VERSION, DeltaTable, WriterProperties
+from .table import (
+    MAX_SUPPORTED_PYARROW_WRITER_VERSION,
+    NOT_SUPPORTED_PYARROW_WRITER_VERSIONS,
+    SUPPORTED_WRITER_FEATURES,
+    DeltaTable,
+    PostCommitHookProperties,
+    WriterProperties,
+)
 
 try:
-    import pandas as pd  # noqa: F811
+    import pandas as pd
 except ModuleNotFoundError:
     _has_pandas = False
 else:
     _has_pandas = True
 
 PYARROW_MAJOR_VERSION = int(pa.__version__.split(".", maxsplit=1)[0])
+DEFAULT_DATA_SKIPPING_NUM_INDEX_COLS = 32
+
+DTYPE_MAP = {
+    pa.large_string(): pa.string(),
+}
+
+
+class ArrowStreamExportable(Protocol):
+    """Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.
+
+    https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+    """
+
+    def __arrow_c_stream__(
+        self, requested_schema: Optional[object] = None
+    ) -> object: ...
 
 
 @dataclass
@@ -80,6 +101,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = ...,
@@ -94,14 +116,14 @@ def write_deltalake(
     name: Optional[str] = ...,
     description: Optional[str] = ...,
     configuration: Optional[Mapping[str, Optional[str]]] = ...,
-    overwrite_schema: bool = ...,
+    schema_mode: Optional[Literal["overwrite"]] = ...,
     storage_options: Optional[Dict[str, str]] = ...,
     partition_filters: Optional[List[Tuple[str, str, Any]]] = ...,
     large_dtypes: bool = ...,
     engine: Literal["pyarrow"] = ...,
     custom_metadata: Optional[Dict[str, str]] = ...,
-) -> None:
-    ...
+    post_commithook_properties: Optional[PostCommitHookProperties] = ...,
+) -> None: ...
 
 
 @overload
@@ -114,6 +136,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = ...,
@@ -122,14 +145,14 @@ def write_deltalake(
     name: Optional[str] = ...,
     description: Optional[str] = ...,
     configuration: Optional[Mapping[str, Optional[str]]] = ...,
-    overwrite_schema: bool = ...,
+    schema_mode: Optional[Literal["merge", "overwrite"]] = ...,
     storage_options: Optional[Dict[str, str]] = ...,
     large_dtypes: bool = ...,
     engine: Literal["rust"],
     writer_properties: WriterProperties = ...,
     custom_metadata: Optional[Dict[str, str]] = ...,
-) -> None:
-    ...
+    post_commithook_properties: Optional[PostCommitHookProperties] = ...,
+) -> None: ...
 
 
 @overload
@@ -142,6 +165,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = ...,
@@ -150,15 +174,15 @@ def write_deltalake(
     name: Optional[str] = ...,
     description: Optional[str] = ...,
     configuration: Optional[Mapping[str, Optional[str]]] = ...,
-    overwrite_schema: bool = ...,
+    schema_mode: Optional[Literal["merge", "overwrite"]] = ...,
     storage_options: Optional[Dict[str, str]] = ...,
     predicate: Optional[str] = ...,
     large_dtypes: bool = ...,
     engine: Literal["rust"],
     writer_properties: WriterProperties = ...,
     custom_metadata: Optional[Dict[str, str]] = ...,
-) -> None:
-    ...
+    post_commithook_properties: Optional[PostCommitHookProperties] = ...,
+) -> None: ...
 
 
 def write_deltalake(
@@ -170,6 +194,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = None,
@@ -184,7 +209,7 @@ def write_deltalake(
     name: Optional[str] = None,
     description: Optional[str] = None,
     configuration: Optional[Mapping[str, Optional[str]]] = None,
-    overwrite_schema: bool = False,
+    schema_mode: Optional[Literal["merge", "overwrite"]] = None,
     storage_options: Optional[Dict[str, str]] = None,
     partition_filters: Optional[List[Tuple[str, str, Any]]] = None,
     predicate: Optional[str] = None,
@@ -192,6 +217,7 @@ def write_deltalake(
     engine: Literal["pyarrow", "rust"] = "pyarrow",
     writer_properties: Optional[WriterProperties] = None,
     custom_metadata: Optional[Dict[str, str]] = None,
+    post_commithook_properties: Optional[PostCommitHookProperties] = None,
 ) -> None:
     """Write to a Delta Lake table
 
@@ -201,9 +227,9 @@ def write_deltalake(
     For higher protocol support use engine='rust', this will become the default
     eventually.
 
-    A locking mechanism is needed to prevent unsafe concurrent writes to a
-    delta lake directory when writing to S3. For more information on the setup, follow
-    this usage guide: https://delta-io.github.io/delta-rs/usage/writing/writing-to-s3-with-locking-provider/
+    To enable safe concurrent writes when writing to S3, an additional locking
+    mechanism must be supplied. For more information on enabling concurrent writing to S3, follow
+    [this guide](https://delta-io.github.io/delta-rs/usage/writing/writing-to-s3-with-locking-provider/)
 
     Args:
         table_or_uri: URI of a table or a DeltaTable object.
@@ -238,7 +264,7 @@ def write_deltalake(
         name: User-provided identifier for this table.
         description: User-provided description for this table.
         configuration: A map containing configuration options for the metadata action.
-        overwrite_schema: If True, allows updating the schema of the table.
+        schema_mode: If set to "overwrite", allows replacing the schema of the table. Set to "merge" to merge with existing schema.
         storage_options: options passed to the native delta filesystem.
         predicate: When using `Overwrite` mode, replace data that matches a predicate. Only used in rust engine.
         partition_filters: the partition filters that will be used for partition overwrite. Only used in pyarrow engine.
@@ -247,49 +273,21 @@ def write_deltalake(
             see up to 4x performance improvements over pyarrow.
         writer_properties: Pass writer properties to the Rust parquet writer.
         custom_metadata: Custom metadata to add to the commitInfo.
+        post_commithook_properties: properties for the post commit hook. If None, default values are used.
     """
     table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
     if table is not None:
         storage_options = table._storage_options or {}
         storage_options.update(storage_options or {})
-
         table.update_incremental()
 
-    __enforce_append_only(table=table, configuration=configuration, mode=mode)
-
+    _enforce_append_only(table=table, configuration=configuration, mode=mode)
     if isinstance(partition_by, str):
         partition_by = [partition_by]
 
-    if isinstance(schema, DeltaSchema):
-        schema = schema.to_pyarrow()
-
-    if isinstance(data, RecordBatchReader):
-        data = convert_pyarrow_recordbatchreader(data, large_dtypes)
-    elif isinstance(data, pa.RecordBatch):
-        data = convert_pyarrow_recordbatch(data, large_dtypes)
-    elif isinstance(data, pa.Table):
-        data = convert_pyarrow_table(data, large_dtypes)
-    elif isinstance(data, ds.Dataset):
-        data = convert_pyarrow_dataset(data, large_dtypes)
-    elif _has_pandas and isinstance(data, pd.DataFrame):
-        if schema is not None:
-            data = convert_pyarrow_table(
-                pa.Table.from_pandas(data, schema=schema), large_dtypes=large_dtypes
-            )
-        else:
-            data = convert_pyarrow_table(
-                pa.Table.from_pandas(data), large_dtypes=large_dtypes
-            )
-    elif isinstance(data, Iterable):
-        if schema is None:
-            raise ValueError("You must provide schema if data is Iterable")
-    else:
-        raise TypeError(
-            f"{type(data).__name__} is not a valid input. Only PyArrow RecordBatchReader, RecordBatch, Iterable[RecordBatch], Table, Dataset or Pandas DataFrame are valid inputs for source."
-        )
-
-    if schema is None:
-        schema = data.schema
+    data, schema = _convert_data_and_schema(
+        data=data, schema=schema, large_dtypes=large_dtypes
+    )
 
     if engine == "rust":
         if table is not None and mode == "ignore":
@@ -301,58 +299,71 @@ def write_deltalake(
             data=data,
             partition_by=partition_by,
             mode=mode,
-            max_rows_per_group=max_rows_per_group,
-            overwrite_schema=overwrite_schema,
+            table=table._table if table is not None else None,
+            schema_mode=schema_mode,
             predicate=predicate,
             name=name,
             description=description,
             configuration=configuration,
             storage_options=storage_options,
-            writer_properties=writer_properties._to_dict()
-            if writer_properties
-            else None,
+            writer_properties=(
+                writer_properties._to_dict() if writer_properties else None
+            ),
             custom_metadata=custom_metadata,
+            post_commithook_properties=post_commithook_properties.__dict__
+            if post_commithook_properties
+            else None,
         )
         if table:
             table.update_incremental()
 
     elif engine == "pyarrow":
+        if schema_mode == "merge":
+            raise ValueError(
+                "schema_mode 'merge' is not supported in pyarrow engine. Use engine=rust"
+            )
         # We need to write against the latest table version
-        filesystem = pa_fs.PyFileSystem(DeltaStorageHandler(table_uri, storage_options))
+
+        num_indexed_cols, stats_cols = get_num_idx_cols_and_stats_columns(
+            table._table if table is not None else None, configuration
+        )
 
         if table:  # already exists
-            if schema != table.schema().to_pyarrow(
-                as_large_types=large_dtypes
-            ) and not (mode == "overwrite" and overwrite_schema):
+            filesystem = pa_fs.PyFileSystem(
+                DeltaStorageHandler.from_table(
+                    table=table._table, options=storage_options
+                )
+            )
+
+            if _sort_arrow_schema(schema) != _sort_arrow_schema(
+                table.schema().to_pyarrow(as_large_types=large_dtypes)
+            ) and not (mode == "overwrite" and schema_mode == "overwrite"):
                 raise ValueError(
                     "Schema of data does not match table schema\n"
                     f"Data schema:\n{schema}\nTable Schema:\n{table.schema().to_pyarrow(as_large_types=large_dtypes)}"
                 )
             if mode == "error":
-                raise AssertionError("DeltaTable already exists.")
+                raise FileExistsError(
+                    "Delta table already exists, write mode set to error."
+                )
             elif mode == "ignore":
                 return
 
             current_version = table.version()
 
-            if partition_by:
-                assert partition_by == table.metadata().partition_columns
+            if partition_by and partition_by != table.metadata().partition_columns:
+                raise ValueError(
+                    f"Partition columns should be {table.metadata().partition_columns} but is {partition_by}"
+                )
             else:
                 partition_by = table.metadata().partition_columns
 
         else:  # creating a new table
+            filesystem = pa_fs.PyFileSystem(
+                DeltaStorageHandler(table_uri, options=storage_options)
+            )
             current_version = -1
 
-        dtype_map = {
-            pa.large_string(): pa.string(),
-        }
-
-        def _large_to_normal_dtype(dtype: pa.DataType) -> pa.DataType:
-            try:
-                return dtype_map[dtype]
-            except KeyError:
-                return dtype
-
         if partition_by:
             table_schema: pa.Schema = schema
             if PYARROW_MAJOR_VERSION < 12:
@@ -376,7 +387,11 @@ def _large_to_normal_dtype(dtype: pa.DataType) -> pa.DataType:
 
         def visitor(written_file: Any) -> None:
             path, partition_values = get_partitions_from_path(written_file.path)
-            stats = get_file_stats_from_metadata(written_file.metadata)
+            stats = get_file_stats_from_metadata(
+                written_file.metadata,
+                num_indexed_cols=num_indexed_cols,
+                columns_to_collect_stats=stats_cols,
+            )
 
             # PyArrow added support for written_file.size in 9.0.0
             if PYARROW_MAJOR_VERSION >= 9:
@@ -400,12 +415,30 @@ def visitor(written_file: Any) -> None:
         if table is not None:
             # We don't currently provide a way to set invariants
             # (and maybe never will), so only enforce if already exist.
-            if table.protocol().min_writer_version > MAX_SUPPORTED_WRITER_VERSION:
+            table_protocol = table.protocol()
+            table._table.check_can_write_timestamp_ntz(schema)
+            if (
+                table_protocol.min_writer_version > MAX_SUPPORTED_PYARROW_WRITER_VERSION
+                or table_protocol.min_writer_version
+                in NOT_SUPPORTED_PYARROW_WRITER_VERSIONS
+            ):
                 raise DeltaProtocolError(
                     "This table's min_writer_version is "
-                    f"{table.protocol().min_writer_version}, "
-                    "but this method only supports version 2."
+                    f"{table_protocol.min_writer_version}, "
+                    f"""but this method only supports version 2 or 7 with at max these features {SUPPORTED_WRITER_FEATURES} enabled.
+                    Try engine='rust' instead which supports more features and writer versions."""
                 )
+            if (
+                table_protocol.min_writer_version >= 7
+                and table_protocol.writer_features is not None
+            ):
+                missing_features = {*table_protocol.writer_features}.difference(
+                    SUPPORTED_WRITER_FEATURES
+                )
+                if len(missing_features) > 0:
+                    raise DeltaProtocolError(
+                        f"The table has set these writer features: {missing_features} but these are not supported by the pyarrow writer. Please use engine='rust'."
+                    )
 
             invariants = table.schema().invariants
             checker = _DeltaDataChecker(invariants)
@@ -415,12 +448,12 @@ def check_data_is_aligned_with_partition_filtering(
             ) -> None:
                 if table is None:
                     return
-                existed_partitions: FrozenSet[
-                    FrozenSet[Tuple[str, Optional[str]]]
-                ] = table._table.get_active_partitions()
-                allowed_partitions: FrozenSet[
-                    FrozenSet[Tuple[str, Optional[str]]]
-                ] = table._table.get_active_partitions(partition_filters)
+                existed_partitions: FrozenSet[FrozenSet[Tuple[str, Optional[str]]]] = (
+                    table._table.get_active_partitions()
+                )
+                allowed_partitions: FrozenSet[FrozenSet[Tuple[str, Optional[str]]]] = (
+                    table._table.get_active_partitions(partition_filters)
+                )
                 partition_values = pa.RecordBatch.from_arrays(
                     [
                         batch.column(column_name)
@@ -509,6 +542,9 @@ def validate_batch(batch: pa.RecordBatch) -> pa.RecordBatch:
                 schema,
                 partition_filters,
                 custom_metadata,
+                post_commithook_properties=post_commithook_properties.__dict__
+                if post_commithook_properties
+                else None,
             )
             table.update_incremental()
     else:
@@ -569,7 +605,7 @@ def convert_to_deltalake(
     return
 
 
-def __enforce_append_only(
+def _enforce_append_only(
     table: Optional[DeltaTable],
     configuration: Optional[Mapping[str, Optional[str]]],
     mode: str,
@@ -587,6 +623,75 @@ def __enforce_append_only(
         )
 
 
+def _convert_data_and_schema(
+    data: Union[
+        "pd.DataFrame",
+        ds.Dataset,
+        pa.Table,
+        pa.RecordBatch,
+        Iterable[pa.RecordBatch],
+        RecordBatchReader,
+        ArrowStreamExportable,
+    ],
+    schema: Optional[Union[pa.Schema, DeltaSchema]],
+    large_dtypes: bool,
+) -> Tuple[pa.RecordBatchReader, pa.Schema]:
+    if isinstance(data, RecordBatchReader):
+        data = convert_pyarrow_recordbatchreader(data, large_dtypes)
+    elif isinstance(data, pa.RecordBatch):
+        data = convert_pyarrow_recordbatch(data, large_dtypes)
+    elif isinstance(data, pa.Table):
+        data = convert_pyarrow_table(data, large_dtypes)
+    elif isinstance(data, ds.Dataset):
+        data = convert_pyarrow_dataset(data, large_dtypes)
+    elif _has_pandas and isinstance(data, pd.DataFrame):
+        if schema is not None:
+            data = convert_pyarrow_table(
+                pa.Table.from_pandas(data, schema=schema), large_dtypes=large_dtypes
+            )
+        else:
+            data = convert_pyarrow_table(
+                pa.Table.from_pandas(data), large_dtypes=large_dtypes
+            )
+    elif hasattr(data, "__arrow_c_array__"):
+        data = convert_pyarrow_recordbatch(
+            pa.record_batch(data),  # type:ignore[attr-defined]
+            large_dtypes,
+        )
+    elif hasattr(data, "__arrow_c_stream__"):
+        if not hasattr(RecordBatchReader, "from_stream"):
+            raise ValueError(
+                "pyarrow 15 or later required to read stream via pycapsule interface"
+            )
+
+        data = convert_pyarrow_recordbatchreader(
+            RecordBatchReader.from_stream(data), large_dtypes
+        )
+    elif isinstance(data, Iterable):
+        if schema is None:
+            raise ValueError("You must provide schema if data is Iterable")
+    else:
+        raise TypeError(
+            f"{type(data).__name__} is not a valid input. Only PyArrow RecordBatchReader, RecordBatch, Iterable[RecordBatch], Table, Dataset or Pandas DataFrame or objects implementing the Arrow PyCapsule Interface are valid inputs for source."
+        )
+
+    if isinstance(schema, DeltaSchema):
+        schema = schema.to_pyarrow(as_large_types=large_dtypes)
+    elif schema is None:
+        schema = data.schema
+
+    return data, schema
+
+
+def _sort_arrow_schema(schema: pa.schema) -> pa.schema:
+    sorted_cols = sorted(iter(schema), key=lambda x: (x.name, str(x.type)))
+    return pa.schema(sorted_cols)
+
+
+def _large_to_normal_dtype(dtype: pa.DataType) -> pa.DataType:
+    return DTYPE_MAP.get(dtype, dtype)
+
+
 class DeltaJSONEncoder(json.JSONEncoder):
     def default(self, obj: Any) -> Any:
         if isinstance(obj, bytes):
@@ -656,6 +761,8 @@ def get_partitions_from_path(path: str) -> Tuple[str, Dict[str, Optional[str]]]:
 
 def get_file_stats_from_metadata(
     metadata: Any,
+    num_indexed_cols: int,
+    columns_to_collect_stats: Optional[List[str]],
 ) -> Dict[str, Union[int, Dict[str, Any]]]:
     stats = {
         "numRecords": metadata.num_rows,
@@ -666,10 +773,27 @@ def get_file_stats_from_metadata(
 
     def iter_groups(metadata: Any) -> Iterator[Any]:
         for i in range(metadata.num_row_groups):
-            yield metadata.row_group(i)
+            if metadata.row_group(i).num_rows > 0:
+                yield metadata.row_group(i)
 
-    for column_idx in range(metadata.num_columns):
+    schema_columns = metadata.schema.names
+    if columns_to_collect_stats is not None:
+        idx_to_iterate = []
+        for col in columns_to_collect_stats:
+            try:
+                idx_to_iterate.append(schema_columns.index(col))
+            except ValueError:
+                pass
+    elif num_indexed_cols == -1:
+        idx_to_iterate = list(range(metadata.num_columns))
+    elif num_indexed_cols >= 0:
+        idx_to_iterate = list(range(min(num_indexed_cols, metadata.num_columns)))
+    else:
+        raise ValueError("delta.dataSkippingNumIndexedCols valid values are >=-1")
+
+    for column_idx in idx_to_iterate:
         name = metadata.row_group(0).column(column_idx).path_in_schema
+
         # If stats missing, then we can't know aggregate stats
         if all(
             group.column(column_idx).is_stats_set for group in iter_groups(metadata)
diff --git a/python/docs/source/_ext/edit_on_github.py b/python/docs/source/_ext/edit_on_github.py
index f7188f189a..241560877c 100644
--- a/python/docs/source/_ext/edit_on_github.py
+++ b/python/docs/source/_ext/edit_on_github.py
@@ -38,9 +38,9 @@ def html_page_context(app, pagename, templatename, context, doctree):
     context["display_github"] = True
     context["github_user"] = app.config.edit_on_github_project.split("/")[0]
     context["github_repo"] = app.config.edit_on_github_project.split("/")[1]
-    context[
-        "github_version"
-    ] = f"{app.config.edit_on_github_branch}/{app.config.page_source_prefix}/"
+    context["github_version"] = (
+        f"{app.config.edit_on_github_branch}/{app.config.page_source_prefix}/"
+    )
 
 
 def setup(app):
diff --git a/python/docs/source/usage.rst b/python/docs/source/usage.rst
index d0349a450c..753c1470ec 100644
--- a/python/docs/source/usage.rst
+++ b/python/docs/source/usage.rst
@@ -90,6 +90,32 @@ For Databricks Unity Catalog authentication, use environment variables:
 .. _`azure options`: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants
 .. _`gcs options`: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants
 
+Verify Table Existence
+~~~~~~~~~~~~~~~~~~~~~~
+
+You can check whether or not a Delta table exists at a particular path by using
+the :meth:`DeltaTable.is_deltatable()` method.
+
+.. code-block:: python
+    from deltalake import DeltaTable
+
+    table_path = "<path/to/valid/table>"
+    DeltaTable.is_deltatable(table_path)
+    # True
+
+    invalid_table_path = "<path/to/nonexistent/table>"
+    DeltaTable.is_deltatable(invalid_table_path)
+    # False
+
+    bucket_table_path = "<path/to/valid/table/in/bucket>"
+    storage_options = {
+        "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
+        ...
+    }
+    DeltaTable.is_deltatable(bucket_table_path)
+    # True
+
 Custom Storage Backends
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -481,7 +507,7 @@ to append pass in ``mode='append'``:
 
 :py:meth:`write_deltalake` will raise :py:exc:`ValueError` if the schema of
 the data passed to it differs from the existing table's schema. If you wish to
-alter the schema as part of an overwrite pass in ``overwrite_schema=True``.
+alter the schema as part of an overwrite pass in ``schema_mode="overwrite"``.
 
 Writing to s3
 ~~~~~~~~~~~~~
diff --git a/python/pyproject.toml b/python/pyproject.toml
index e9fc7389af..013ec09aca 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -27,8 +27,7 @@ pandas = [
     "pandas"
 ]
 devel = [
-    "mypy",
-    "ruff>=0.1.5",
+    "azure-storage-blob==12.20.0",
     "packaging>=20",
     "pytest",
     "pytest-mock",
@@ -38,7 +37,10 @@ devel = [
     "sphinx-rtd-theme",
     "toml",
     "wheel",
-    "pytest-benchmark"
+    "pytest-benchmark",
+    # keep ruff and mypy versions in sync with .github/workflows/python_build.yml
+    "mypy==1.10.1",
+    "ruff==0.5.2"
 ]
 pyspark = [
     "pyspark",
@@ -71,26 +73,24 @@ warn_return_any = false
 implicit_reexport = true
 strict_equality = true
 
-[tool.black]
-include = '\.pyi?$'
-exclude = "venv"
-
-[tool.ruff]
+[tool.ruff.lint]
 select = [
     # pycodestyle error
     "E",
     # pyflakes
     "F",
     # isort
-    "I"
+    "I",
+    # ruff-specific rules
+    "RUF"
 ]
 ignore = ["E501"]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 known-first-party = ["deltalake"]
 
 [tool.pytest.ini_options]
-addopts = "--cov=deltalake -v -m 'not integration and not benchmark'"
+addopts = "-v -m 'not integration and not benchmark'"
 testpaths = [
     "tests",
     "deltalake",
@@ -101,4 +101,8 @@ markers = [
     "azure: marks tests as integration tests with Azure Blob Store",
     "pandas: marks tests that require pandas",
     "pyspark: marks tests that require pyspark",
-]
\ No newline at end of file
+]
+
+[tool.coverage.run]
+branch = true
+source = ["deltalake"]
diff --git a/python/src/error.rs b/python/src/error.rs
index a69160e3ec..a54b1e60b4 100644
--- a/python/src/error.rs
+++ b/python/src/error.rs
@@ -10,6 +10,7 @@ create_exception!(_internal, DeltaError, PyException);
 create_exception!(_internal, TableNotFoundError, DeltaError);
 create_exception!(_internal, DeltaProtocolError, DeltaError);
 create_exception!(_internal, CommitFailedError, DeltaError);
+create_exception!(_internal, SchemaMismatchError, DeltaError);
 
 fn inner_to_py_err(err: DeltaTableError) -> PyErr {
     match err {
@@ -55,6 +56,7 @@ fn arrow_to_py(err: ArrowError) -> PyErr {
         ArrowError::DivideByZero => PyValueError::new_err("division by zero"),
         ArrowError::InvalidArgumentError(msg) => PyValueError::new_err(msg),
         ArrowError::NotYetImplemented(msg) => PyNotImplementedError::new_err(msg),
+        ArrowError::SchemaError(msg) => SchemaMismatchError::new_err(msg),
         other => PyException::new_err(other.to_string()),
     }
 }
diff --git a/python/src/filesystem.rs b/python/src/filesystem.rs
index b50f738bec..453d05e480 100644
--- a/python/src/filesystem.rs
+++ b/python/src/filesystem.rs
@@ -1,19 +1,17 @@
-use std::collections::HashMap;
-use std::sync::Arc;
-
-use deltalake::storage::{DynObjectStore, ListResult, MultipartId, ObjectStoreError, Path};
+use crate::error::PythonError;
+use crate::utils::{delete_dir, rt, walk_tree, warn};
+use crate::RawDeltaTable;
+use deltalake::storage::object_store::{MultipartUpload, PutPayloadMut};
+use deltalake::storage::{DynObjectStore, ListResult, ObjectStoreError, Path};
 use deltalake::DeltaTableBuilder;
 use pyo3::exceptions::{PyIOError, PyNotImplementedError, PyValueError};
 use pyo3::prelude::*;
-use pyo3::types::{IntoPyDict, PyBytes};
+use pyo3::types::{IntoPyDict, PyBytes, PyType};
 use serde::{Deserialize, Serialize};
-use tokio::io::{AsyncWrite, AsyncWriteExt};
-use tokio::runtime::Runtime;
-
-use crate::error::PythonError;
-use crate::utils::{delete_dir, rt, walk_tree};
+use std::collections::HashMap;
+use std::sync::Arc;
 
-const DEFAULT_MAX_BUFFER_SIZE: i64 = 4 * 1024 * 1024;
+const DEFAULT_MAX_BUFFER_SIZE: usize = 5 * 1024 * 1024;
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub(crate) struct FsConfig {
@@ -25,7 +23,6 @@ pub(crate) struct FsConfig {
 #[derive(Debug, Clone)]
 pub struct DeltaFileSystemHandler {
     pub(crate) inner: Arc<DynObjectStore>,
-    pub(crate) rt: Arc<Runtime>,
     pub(crate) config: FsConfig,
     pub(crate) known_sizes: Option<HashMap<String, i64>>,
 }
@@ -46,20 +43,39 @@ impl DeltaFileSystemHandler {
     #[new]
     #[pyo3(signature = (table_uri, options = None, known_sizes = None))]
     fn new(
-        table_uri: &str,
+        table_uri: String,
         options: Option<HashMap<String, String>>,
         known_sizes: Option<HashMap<String, i64>>,
     ) -> PyResult<Self> {
-        let storage = DeltaTableBuilder::from_uri(table_uri)
+        let storage = DeltaTableBuilder::from_uri(&table_uri)
             .with_storage_options(options.clone().unwrap_or_default())
             .build_storage()
             .map_err(PythonError::from)?
             .object_store();
+
+        Ok(Self {
+            inner: storage,
+            config: FsConfig {
+                root_url: table_uri,
+                options: options.unwrap_or_default(),
+            },
+            known_sizes,
+        })
+    }
+
+    #[classmethod]
+    #[pyo3(signature = (table, options = None, known_sizes = None))]
+    fn from_table(
+        _cls: &Bound<'_, PyType>,
+        table: &RawDeltaTable,
+        options: Option<HashMap<String, String>>,
+        known_sizes: Option<HashMap<String, i64>>,
+    ) -> PyResult<Self> {
+        let storage = table._table.object_store();
         Ok(Self {
             inner: storage,
-            rt: Arc::new(rt()?),
             config: FsConfig {
-                root_url: table_uri.into(),
+                root_url: table._table.table_uri(),
                 options: options.unwrap_or_default(),
             },
             known_sizes,
@@ -79,8 +95,7 @@ impl DeltaFileSystemHandler {
     fn copy_file(&self, src: String, dest: String) -> PyResult<()> {
         let from_path = Self::parse_path(&src);
         let to_path = Self::parse_path(&dest);
-        self.rt
-            .block_on(self.inner.copy(&from_path, &to_path))
+        rt().block_on(self.inner.copy(&from_path, &to_path))
             .map_err(PythonError::from)?;
         Ok(())
     }
@@ -92,16 +107,14 @@ impl DeltaFileSystemHandler {
 
     fn delete_dir(&self, path: String) -> PyResult<()> {
         let path = Self::parse_path(&path);
-        self.rt
-            .block_on(delete_dir(self.inner.as_ref(), &path))
+        rt().block_on(delete_dir(self.inner.as_ref(), &path))
             .map_err(PythonError::from)?;
         Ok(())
     }
 
     fn delete_file(&self, path: String) -> PyResult<()> {
         let path = Self::parse_path(&path);
-        self.rt
-            .block_on(self.inner.delete(&path))
+        rt().block_on(self.inner.delete(&path))
             .map_err(PythonError::from)?;
         Ok(())
     }
@@ -110,26 +123,33 @@ impl DeltaFileSystemHandler {
         Ok(format!("{self:?}") == format!("{other:?}"))
     }
 
-    fn get_file_info<'py>(&self, paths: Vec<String>, py: Python<'py>) -> PyResult<Vec<&'py PyAny>> {
-        let fs = PyModule::import(py, "pyarrow.fs")?;
+    fn get_file_info<'py>(
+        &self,
+        paths: Vec<String>,
+        py: Python<'py>,
+    ) -> PyResult<Vec<Bound<'py, PyAny>>> {
+        let fs = PyModule::import_bound(py, "pyarrow.fs")?;
         let file_types = fs.getattr("FileType")?;
 
-        let to_file_info = |loc: &str, type_: &PyAny, kwargs: &HashMap<&str, i64>| {
-            fs.call_method("FileInfo", (loc, type_), Some(kwargs.into_py_dict(py)))
+        let to_file_info = |loc: &str, type_: &Bound<'py, PyAny>, kwargs: &HashMap<&str, i64>| {
+            fs.call_method(
+                "FileInfo",
+                (loc, type_),
+                Some(&kwargs.into_py_dict_bound(py)),
+            )
         };
 
         let mut infos = Vec::new();
         for file_path in paths {
             let path = Self::parse_path(&file_path);
             let listed = py.allow_threads(|| {
-                self.rt
-                    .block_on(self.inner.list_with_delimiter(Some(&path)))
+                rt().block_on(self.inner.list_with_delimiter(Some(&path)))
                     .map_err(PythonError::from)
             })?;
 
             // TODO is there a better way to figure out if we are in a directory?
             if listed.objects.is_empty() && listed.common_prefixes.is_empty() {
-                let maybe_meta = py.allow_threads(|| self.rt.block_on(self.inner.head(&path)));
+                let maybe_meta = py.allow_threads(|| rt().block_on(self.inner.head(&path)));
                 match maybe_meta {
                     Ok(meta) => {
                         let kwargs = HashMap::from([
@@ -143,14 +163,14 @@ impl DeltaFileSystemHandler {
                         ]);
                         infos.push(to_file_info(
                             meta.location.as_ref(),
-                            file_types.getattr("File")?,
+                            &file_types.getattr("File")?,
                             &kwargs,
                         )?);
                     }
                     Err(ObjectStoreError::NotFound { .. }) => {
                         infos.push(to_file_info(
                             path.as_ref(),
-                            file_types.getattr("NotFound")?,
+                            &file_types.getattr("NotFound")?,
                             &HashMap::new(),
                         )?);
                     }
@@ -161,7 +181,7 @@ impl DeltaFileSystemHandler {
             } else {
                 infos.push(to_file_info(
                     path.as_ref(),
-                    file_types.getattr("Directory")?,
+                    &file_types.getattr("Directory")?,
                     &HashMap::new(),
                 )?);
             }
@@ -177,19 +197,20 @@ impl DeltaFileSystemHandler {
         allow_not_found: bool,
         recursive: bool,
         py: Python<'py>,
-    ) -> PyResult<Vec<&'py PyAny>> {
-        let fs = PyModule::import(py, "pyarrow.fs")?;
+    ) -> PyResult<Vec<Bound<'py, PyAny>>> {
+        let fs = PyModule::import_bound(py, "pyarrow.fs")?;
         let file_types = fs.getattr("FileType")?;
 
-        let to_file_info = |loc: String, type_: &PyAny, kwargs: HashMap<&str, i64>| {
-            fs.call_method("FileInfo", (loc, type_), Some(kwargs.into_py_dict(py)))
+        let to_file_info = |loc: String, type_: &Bound<'py, PyAny>, kwargs: HashMap<&str, i64>| {
+            fs.call_method(
+                "FileInfo",
+                (loc, type_),
+                Some(&kwargs.into_py_dict_bound(py)),
+            )
         };
 
         let path = Self::parse_path(&base_dir);
-        let list_result = match self
-            .rt
-            .block_on(walk_tree(self.inner.clone(), &path, recursive))
-        {
+        let list_result = match rt().block_on(walk_tree(self.inner.clone(), &path, recursive)) {
             Ok(res) => Ok(res),
             Err(ObjectStoreError::NotFound { path, source }) => {
                 if allow_not_found {
@@ -213,7 +234,7 @@ impl DeltaFileSystemHandler {
                 .map(|p| {
                     to_file_info(
                         p.to_string(),
-                        file_types.getattr("Directory")?,
+                        &file_types.getattr("Directory")?,
                         HashMap::new(),
                     )
                 })
@@ -235,7 +256,7 @@ impl DeltaFileSystemHandler {
                     ]);
                     to_file_info(
                         meta.location.to_string(),
-                        file_types.getattr("File")?,
+                        &file_types.getattr("File")?,
                         kwargs,
                     )
                 })
@@ -249,8 +270,7 @@ impl DeltaFileSystemHandler {
         let from_path = Self::parse_path(&src);
         let to_path = Self::parse_path(&dest);
         // TODO check the if not exists semantics
-        self.rt
-            .block_on(self.inner.rename(&from_path, &to_path))
+        rt().block_on(self.inner.rename(&from_path, &to_path))
             .map_err(PythonError::from)?;
         Ok(())
     }
@@ -262,10 +282,8 @@ impl DeltaFileSystemHandler {
         };
 
         let path = Self::parse_path(&path);
-        let file = self
-            .rt
+        let file = rt()
             .block_on(ObjectInputFile::try_new(
-                Arc::clone(&self.rt),
                 self.inner.clone(),
                 path,
                 size.copied(),
@@ -279,6 +297,7 @@ impl DeltaFileSystemHandler {
         &self,
         path: String,
         #[allow(unused)] metadata: Option<HashMap<String, String>>,
+        py: Python<'_>,
     ) -> PyResult<ObjectOutputStream> {
         let path = Self::parse_path(&path);
         let max_buffer_size = self
@@ -286,12 +305,23 @@ impl DeltaFileSystemHandler {
             .options
             .get("max_buffer_size")
             .map_or(DEFAULT_MAX_BUFFER_SIZE, |v| {
-                v.parse::<i64>().unwrap_or(DEFAULT_MAX_BUFFER_SIZE)
+                v.parse::<usize>().unwrap_or(DEFAULT_MAX_BUFFER_SIZE)
             });
-        let file = self
-            .rt
+        if max_buffer_size < DEFAULT_MAX_BUFFER_SIZE {
+            warn(
+                py,
+                "UserWarning",
+                format!(
+                    "You specified a `max_buffer_size` of {} bits less than {} bits. Most object 
+                    stores expect greater than that number, you may experience issues",
+                    max_buffer_size, DEFAULT_MAX_BUFFER_SIZE
+                )
+                .as_str(),
+                Some(2),
+            )?;
+        }
+        let file = rt()
             .block_on(ObjectOutputStream::try_new(
-                Arc::clone(&self.rt),
                 self.inner.clone(),
                 path,
                 max_buffer_size,
@@ -314,7 +344,6 @@ impl DeltaFileSystemHandler {
 #[derive(Debug, Clone)]
 pub struct ObjectInputFile {
     store: Arc<DynObjectStore>,
-    rt: Arc<Runtime>,
     path: Path,
     content_length: i64,
     #[pyo3(get)]
@@ -326,7 +355,6 @@ pub struct ObjectInputFile {
 
 impl ObjectInputFile {
     pub async fn try_new(
-        rt: Arc<Runtime>,
         store: Arc<DynObjectStore>,
         path: Path,
         size: Option<i64>,
@@ -345,7 +373,6 @@ impl ObjectInputFile {
         // https://github.com/apache/arrow/blob/f184255cbb9bf911ea2a04910f711e1a924b12b8/cpp/src/arrow/filesystem/s3fs.cc#L1083
         Ok(Self {
             store,
-            rt,
             path,
             content_length,
             closed: false,
@@ -437,7 +464,7 @@ impl ObjectInputFile {
     }
 
     #[pyo3(signature = (nbytes = None))]
-    fn read(&mut self, nbytes: Option<i64>, py: Python<'_>) -> PyResult<Py<PyBytes>> {
+    fn read<'py>(&mut self, nbytes: Option<i64>, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
         self.check_closed()?;
         let range = match nbytes {
             Some(len) => {
@@ -456,8 +483,7 @@ impl ObjectInputFile {
         self.pos += nbytes;
         let data = if nbytes > 0 {
             py.allow_threads(|| {
-                self.rt
-                    .block_on(self.store.get_range(&self.path, range))
+                rt().block_on(self.store.get_range(&self.path, range))
                     .map_err(PythonError::from)
             })?
         } else {
@@ -466,7 +492,7 @@ impl ObjectInputFile {
         // TODO: PyBytes copies the buffer. If we move away from the limited CPython
         // API (the stable C API), we could implement the buffer protocol for
         // bytes::Bytes and return this zero-copy.
-        Ok(PyBytes::new(py, data.as_ref()).into_py(py))
+        Ok(PyBytes::new_bound(py, data.as_ref()))
     }
 
     fn fileno(&self) -> PyResult<()> {
@@ -489,42 +515,32 @@ impl ObjectInputFile {
 }
 
 // TODO the C++ implementation track an internal lock on all random access files, DO we need this here?
-// TODO add buffer to store data ...
 #[pyclass(weakref, module = "deltalake._internal")]
 pub struct ObjectOutputStream {
-    store: Arc<DynObjectStore>,
-    rt: Arc<Runtime>,
-    path: Path,
-    writer: Box<dyn AsyncWrite + Send + Unpin>,
-    multipart_id: MultipartId,
+    upload: Box<dyn MultipartUpload>,
     pos: i64,
     #[pyo3(get)]
     closed: bool,
     #[pyo3(get)]
     mode: String,
-    max_buffer_size: i64,
-    buffer_size: i64,
+    max_buffer_size: usize,
+    buffer: PutPayloadMut,
 }
 
 impl ObjectOutputStream {
     pub async fn try_new(
-        rt: Arc<Runtime>,
         store: Arc<DynObjectStore>,
         path: Path,
-        max_buffer_size: i64,
+        max_buffer_size: usize,
     ) -> Result<Self, ObjectStoreError> {
-        let (multipart_id, writer) = store.put_multipart(&path).await?;
+        let upload = store.put_multipart(&path).await?;
         Ok(Self {
-            store,
-            rt,
-            path,
-            writer,
-            multipart_id,
+            upload,
             pos: 0,
             closed: false,
             mode: "wb".into(),
+            buffer: PutPayloadMut::default(),
             max_buffer_size,
-            buffer_size: 0,
         })
     }
 
@@ -535,19 +551,36 @@ impl ObjectOutputStream {
 
         Ok(())
     }
+
+    fn abort(&mut self) -> PyResult<()> {
+        rt().block_on(self.upload.abort())
+            .map_err(PythonError::from)?;
+        Ok(())
+    }
+
+    fn upload_buffer(&mut self) -> PyResult<()> {
+        let payload = std::mem::take(&mut self.buffer).freeze();
+        match rt().block_on(self.upload.put_part(payload)) {
+            Ok(_) => Ok(()),
+            Err(err) => {
+                self.abort()?;
+                Err(PyIOError::new_err(err.to_string()))
+            }
+        }
+    }
 }
 
 #[pymethods]
 impl ObjectOutputStream {
     fn close(&mut self, py: Python<'_>) -> PyResult<()> {
-        self.closed = true;
-        py.allow_threads(|| match self.rt.block_on(self.writer.shutdown()) {
-            Ok(_) => Ok(()),
-            Err(err) => {
-                self.rt
-                    .block_on(self.store.abort_multipart(&self.path, &self.multipart_id))
-                    .map_err(PythonError::from)?;
-                Err(PyIOError::new_err(err.to_string()))
+        py.allow_threads(|| {
+            self.closed = true;
+            if !self.buffer.is_empty() {
+                self.upload_buffer()?;
+            }
+            match rt().block_on(self.upload.complete()) {
+                Ok(_) => Ok(()),
+                Err(err) => Err(PyIOError::new_err(err.to_string())),
             }
         })
     }
@@ -592,38 +625,37 @@ impl ObjectOutputStream {
         Err(PyNotImplementedError::new_err("'read' not implemented"))
     }
 
-    fn write(&mut self, data: &PyBytes) -> PyResult<i64> {
+    fn write(&mut self, data: &Bound<'_, PyBytes>) -> PyResult<i64> {
         self.check_closed()?;
-        let len = data.as_bytes().len() as i64;
         let py = data.py();
-        let data = data.as_bytes();
-        let res = py.allow_threads(|| match self.rt.block_on(self.writer.write_all(data)) {
-            Ok(_) => Ok(len),
-            Err(err) => {
-                self.rt
-                    .block_on(self.store.abort_multipart(&self.path, &self.multipart_id))
-                    .map_err(PythonError::from)?;
-                Err(PyIOError::new_err(err.to_string()))
+        let bytes = data.as_bytes();
+        py.allow_threads(|| {
+            let len = bytes.len();
+            for chunk in bytes.chunks(self.max_buffer_size) {
+                // this will never overflow
+                let remaining = self.max_buffer_size - self.buffer.content_length();
+                // if we have enough space to store this chunk, just append it
+                if chunk.len() < remaining {
+                    self.buffer.extend_from_slice(chunk);
+                    break;
+                }
+                // if we don't, fill as much as we can, flush the buffer, and then append the rest
+                // this won't panic since we've checked the size of the chunk
+                let (first, second) = chunk.split_at(remaining);
+                self.buffer.extend_from_slice(first);
+                self.upload_buffer()?;
+                // len(second) will always be < max_buffer_size, and we just
+                // emptied the buffer by flushing, so we won't overflow
+                // if len(chunk) just happened to be == remaining,
+                // the second slice is empty. this is a no-op
+                self.buffer.extend_from_slice(second);
             }
-        })?;
-        self.buffer_size += len;
-        if self.buffer_size >= self.max_buffer_size {
-            let _ = self.flush(py);
-            self.buffer_size = 0;
-        }
-        Ok(res)
+            Ok(len as i64)
+        })
     }
 
     fn flush(&mut self, py: Python<'_>) -> PyResult<()> {
-        py.allow_threads(|| match self.rt.block_on(self.writer.flush()) {
-            Ok(_) => Ok(()),
-            Err(err) => {
-                self.rt
-                    .block_on(self.store.abort_multipart(&self.path, &self.multipart_id))
-                    .map_err(PythonError::from)?;
-                Err(PyIOError::new_err(err.to_string()))
-            }
-        })
+        py.allow_threads(|| self.upload_buffer())
     }
 
     fn fileno(&self) -> PyResult<()> {
diff --git a/python/src/lib.rs b/python/src/lib.rs
index bfed12bb8b..8d0ff3cc0c 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -1,39 +1,50 @@
-#![deny(warnings)]
-
 mod error;
 mod filesystem;
 mod schema;
 mod utils;
 
 use std::collections::{HashMap, HashSet};
-use std::convert::TryFrom;
 use std::future::IntoFuture;
+use std::str::FromStr;
 use std::sync::Arc;
 use std::time;
 use std::time::{SystemTime, UNIX_EPOCH};
 
 use arrow::pyarrow::PyArrowType;
 use chrono::{DateTime, Duration, FixedOffset, Utc};
+use delta_kernel::expressions::Scalar;
+use delta_kernel::schema::StructField;
 use deltalake::arrow::compute::concat_batches;
-use deltalake::arrow::ffi_stream::ArrowArrayStreamReader;
-use deltalake::arrow::record_batch::RecordBatch;
+use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
 use deltalake::arrow::record_batch::RecordBatchReader;
+use deltalake::arrow::record_batch::{RecordBatch, RecordBatchIterator};
 use deltalake::arrow::{self, datatypes::Schema as ArrowSchema};
 use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
 use deltalake::datafusion::datasource::memory::MemTable;
 use deltalake::datafusion::datasource::provider::TableProvider;
+use deltalake::datafusion::physical_plan::ExecutionPlan;
 use deltalake::datafusion::prelude::SessionContext;
+use deltalake::delta_datafusion::cdf::FileAction;
 use deltalake::delta_datafusion::DeltaDataChecker;
 use deltalake::errors::DeltaTableError;
-use deltalake::kernel::{Action, Add, Invariant, LogicalFile, Remove, Scalar, StructType};
+use deltalake::kernel::{
+    scalars::ScalarExt, Action, Add, Invariant, LogicalFile, Remove, StructType,
+};
+use deltalake::operations::add_column::AddColumnBuilder;
+use deltalake::operations::collect_sendable_stream;
 use deltalake::operations::constraints::ConstraintBuilder;
 use deltalake::operations::convert_to_delta::{ConvertToDeltaBuilder, PartitionStrategy};
 use deltalake::operations::delete::DeleteBuilder;
+use deltalake::operations::drop_constraints::DropConstraintBuilder;
 use deltalake::operations::filesystem_check::FileSystemCheckBuilder;
+use deltalake::operations::load_cdf::CdfLoadBuilder;
 use deltalake::operations::merge::MergeBuilder;
 use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType};
 use deltalake::operations::restore::RestoreBuilder;
-use deltalake::operations::transaction::commit;
+use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder;
+use deltalake::operations::transaction::{
+    CommitBuilder, CommitProperties, TableReference, PROTOCOL,
+};
 use deltalake::operations::update::UpdateBuilder;
 use deltalake::operations::vacuum::VacuumBuilder;
 use deltalake::parquet::basic::Compression;
@@ -43,25 +54,23 @@ use deltalake::partitions::PartitionFilter;
 use deltalake::protocol::{DeltaOperation, SaveMode};
 use deltalake::DeltaTableBuilder;
 use deltalake::{DeltaOps, DeltaResult};
+use futures::future::join_all;
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
+use pyo3::pybacked::PyBackedStr;
 use pyo3::types::{PyDict, PyFrozenSet};
 use serde_json::{Map, Value};
 
 use crate::error::DeltaProtocolError;
 use crate::error::PythonError;
 use crate::filesystem::FsConfig;
-use crate::schema::schema_to_pyobject;
-
-#[inline]
-fn rt() -> PyResult<tokio::runtime::Runtime> {
-    tokio::runtime::Runtime::new().map_err(|err| PyRuntimeError::new_err(err.to_string()))
-}
+use crate::schema::{schema_to_pyobject, Field};
+use crate::utils::rt;
 
 #[derive(FromPyObject)]
-enum PartitionFilterValue<'a> {
-    Single(&'a str),
-    Multiple(Vec<&'a str>),
+enum PartitionFilterValue {
+    Single(PyBackedStr),
+    Multiple(Vec<PyBackedStr>),
 }
 
 #[pyclass(module = "deltalake._internal")]
@@ -87,42 +96,67 @@ struct RawDeltaTableMetaData {
     configuration: HashMap<String, Option<String>>,
 }
 
+type StringVec = Vec<String>;
+
 #[pymethods]
 impl RawDeltaTable {
     #[new]
     #[pyo3(signature = (table_uri, version = None, storage_options = None, without_files = false, log_buffer_size = None))]
     fn new(
+        py: Python,
         table_uri: &str,
         version: Option<i64>,
         storage_options: Option<HashMap<String, String>>,
         without_files: bool,
         log_buffer_size: Option<usize>,
     ) -> PyResult<Self> {
+        py.allow_threads(|| {
+            let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri);
+            let options = storage_options.clone().unwrap_or_default();
+            if let Some(storage_options) = storage_options {
+                builder = builder.with_storage_options(storage_options)
+            }
+            if let Some(version) = version {
+                builder = builder.with_version(version)
+            }
+            if without_files {
+                builder = builder.without_files()
+            }
+            if let Some(buf_size) = log_buffer_size {
+                builder = builder
+                    .with_log_buffer_size(buf_size)
+                    .map_err(PythonError::from)?;
+            }
+
+            let table = rt().block_on(builder.load()).map_err(PythonError::from)?;
+            Ok(RawDeltaTable {
+                _table: table,
+                _config: FsConfig {
+                    root_url: table_uri.into(),
+                    options,
+                },
+            })
+        })
+    }
+
+    #[pyo3(signature = (table_uri, storage_options = None))]
+    #[staticmethod]
+    pub fn is_deltatable(
+        table_uri: &str,
+        storage_options: Option<HashMap<String, String>>,
+    ) -> PyResult<bool> {
         let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri);
-        let options = storage_options.clone().unwrap_or_default();
         if let Some(storage_options) = storage_options {
             builder = builder.with_storage_options(storage_options)
         }
-        if let Some(version) = version {
-            builder = builder.with_version(version)
-        }
-        if without_files {
-            builder = builder.without_files()
-        }
-        if let Some(buf_size) = log_buffer_size {
-            builder = builder
-                .with_log_buffer_size(buf_size)
-                .map_err(PythonError::from)?;
-        }
-
-        let table = rt()?.block_on(builder.load()).map_err(PythonError::from)?;
-        Ok(RawDeltaTable {
-            _table: table,
-            _config: FsConfig {
-                root_url: table_uri.into(),
-                options,
-            },
-        })
+        Ok(rt()
+            .block_on(async {
+                match builder.build() {
+                    Ok(table) => table.verify_deltatable_existence().await,
+                    Err(err) => Err(err),
+                }
+            })
+            .map_err(PythonError::from)?)
     }
 
     pub fn table_uri(&self) -> PyResult<String> {
@@ -145,94 +179,145 @@ impl RawDeltaTable {
         })
     }
 
-    pub fn protocol_versions(&self) -> PyResult<(i32, i32)> {
+    pub fn protocol_versions(&self) -> PyResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
+        let table_protocol = self._table.protocol().map_err(PythonError::from)?;
         Ok((
-            self._table
-                .protocol()
-                .map_err(PythonError::from)?
-                .min_reader_version,
-            self._table
-                .protocol()
-                .map_err(PythonError::from)?
-                .min_writer_version,
+            table_protocol.min_reader_version,
+            table_protocol.min_writer_version,
+            table_protocol
+                .writer_features
+                .as_ref()
+                .and_then(|features| {
+                    let empty_set = !features.is_empty();
+                    empty_set.then(|| {
+                        features
+                            .iter()
+                            .map(|v| v.to_string())
+                            .collect::<Vec<String>>()
+                    })
+                }),
+            table_protocol
+                .reader_features
+                .as_ref()
+                .and_then(|features| {
+                    let empty_set = !features.is_empty();
+                    empty_set.then(|| {
+                        features
+                            .iter()
+                            .map(|v| v.to_string())
+                            .collect::<Vec<String>>()
+                    })
+                }),
         ))
     }
 
-    pub fn load_version(&mut self, version: i64) -> PyResult<()> {
-        Ok(rt()?
-            .block_on(self._table.load_version(version))
+    pub fn check_can_write_timestamp_ntz(&self, schema: PyArrowType<ArrowSchema>) -> PyResult<()> {
+        let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
+        Ok(PROTOCOL
+            .check_can_write_timestamp_ntz(
+                self._table.snapshot().map_err(PythonError::from)?,
+                &schema,
+            )
+            .map_err(|e| DeltaTableError::Generic(e.to_string()))
             .map_err(PythonError::from)?)
     }
 
-    pub fn get_latest_version(&mut self) -> PyResult<i64> {
-        Ok(rt()?
-            .block_on(self._table.get_latest_version())
-            .map_err(PythonError::from)?)
+    pub fn load_version(&mut self, py: Python, version: i64) -> PyResult<()> {
+        py.allow_threads(|| {
+            Ok(rt()
+                .block_on(self._table.load_version(version))
+                .map_err(PythonError::from)?)
+        })
     }
 
-    pub fn load_with_datetime(&mut self, ds: &str) -> PyResult<()> {
-        let datetime =
-            DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(ds).map_err(
-                |err| PyValueError::new_err(format!("Failed to parse datetime string: {err}")),
-            )?);
-        Ok(rt()?
-            .block_on(self._table.load_with_datetime(datetime))
-            .map_err(PythonError::from)?)
+    pub fn get_latest_version(&mut self, py: Python) -> PyResult<i64> {
+        py.allow_threads(|| {
+            Ok(rt()
+                .block_on(self._table.get_latest_version())
+                .map_err(PythonError::from)?)
+        })
+    }
+
+    pub fn get_num_index_cols(&mut self) -> PyResult<i32> {
+        Ok(self
+            ._table
+            .snapshot()
+            .map_err(PythonError::from)?
+            .config()
+            .num_indexed_cols())
+    }
+
+    pub fn get_stats_columns(&mut self) -> PyResult<Option<Vec<String>>> {
+        Ok(self
+            ._table
+            .snapshot()
+            .map_err(PythonError::from)?
+            .config()
+            .stats_columns()
+            .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()))
+    }
+
+    pub fn load_with_datetime(&mut self, py: Python, ds: &str) -> PyResult<()> {
+        py.allow_threads(|| {
+            let datetime =
+                DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(ds).map_err(
+                    |err| PyValueError::new_err(format!("Failed to parse datetime string: {err}")),
+                )?);
+            Ok(rt()
+                .block_on(self._table.load_with_datetime(datetime))
+                .map_err(PythonError::from)?)
+        })
     }
 
     pub fn files_by_partitions(
         &self,
-        partitions_filters: Vec<(&str, &str, PartitionFilterValue)>,
+        py: Python,
+        partitions_filters: Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>,
     ) -> PyResult<Vec<String>> {
-        let partition_filters: Result<Vec<PartitionFilter>, DeltaTableError> = partitions_filters
-            .into_iter()
-            .map(|filter| match filter {
-                (key, op, PartitionFilterValue::Single(v)) => {
-                    PartitionFilter::try_from((key, op, v))
-                }
-                (key, op, PartitionFilterValue::Multiple(v)) => {
-                    PartitionFilter::try_from((key, op, v.as_slice()))
-                }
-            })
-            .collect();
-        match partition_filters {
-            Ok(filters) => Ok(self
-                ._table
-                .get_files_by_partitions(&filters)
-                .map_err(PythonError::from)?
-                .into_iter()
-                .map(|p| p.to_string())
-                .collect()),
-            Err(err) => Err(PythonError::from(err).into()),
-        }
+        py.allow_threads(|| {
+            let partition_filters = convert_partition_filters(partitions_filters);
+            match partition_filters {
+                Ok(filters) => Ok(self
+                    ._table
+                    .get_files_by_partitions(&filters)
+                    .map_err(PythonError::from)?
+                    .into_iter()
+                    .map(|p| p.to_string())
+                    .collect()),
+                Err(err) => Err(PythonError::from(err).into()),
+            }
+        })
     }
 
     pub fn files(
         &self,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        py: Python,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
     ) -> PyResult<Vec<String>> {
-        if let Some(filters) = partition_filters {
-            let filters = convert_partition_filters(filters).map_err(PythonError::from)?;
-            Ok(self
-                ._table
-                .get_files_by_partitions(&filters)
-                .map_err(PythonError::from)?
-                .into_iter()
-                .map(|p| p.to_string())
-                .collect())
-        } else {
-            Ok(self
-                ._table
-                .get_files_iter()
-                .map_err(PythonError::from)?
-                .map(|f| f.to_string())
-                .collect())
-        }
+        py.allow_threads(|| {
+            if let Some(filters) = partition_filters {
+                let filters = convert_partition_filters(filters).map_err(PythonError::from)?;
+                Ok(self
+                    ._table
+                    .get_files_by_partitions(&filters)
+                    .map_err(PythonError::from)?
+                    .into_iter()
+                    .map(|p| p.to_string())
+                    .collect())
+            } else {
+                Ok(self
+                    ._table
+                    .get_files_iter()
+                    .map_err(PythonError::from)?
+                    .map(|f| f.to_string())
+                    .collect())
+            }
+        })
     }
 
     pub fn file_uris(
         &self,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
     ) -> PyResult<Vec<String>> {
         if let Some(filters) = partition_filters {
             let filters = convert_partition_filters(filters).map_err(PythonError::from)?;
@@ -250,88 +335,93 @@ impl RawDeltaTable {
     }
 
     #[getter]
-    pub fn schema(&self, py: Python) -> PyResult<PyObject> {
+    pub fn schema<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let schema: &StructType = self._table.get_schema().map_err(PythonError::from)?;
-        schema_to_pyobject(schema, py)
+        schema_to_pyobject(schema.to_owned(), py)
     }
 
     /// Run the Vacuum command on the Delta Table: list and delete files no longer referenced
     /// by the Delta table and are older than the retention threshold.
-    #[pyo3(signature = (dry_run, retention_hours = None, enforce_retention_duration = true, custom_metadata=None))]
+    #[pyo3(signature = (dry_run, retention_hours = None, enforce_retention_duration = true, custom_metadata=None, post_commithook_properties=None))]
     pub fn vacuum(
         &mut self,
+        py: Python,
         dry_run: bool,
         retention_hours: Option<u64>,
         enforce_retention_duration: bool,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<Vec<String>> {
-        let mut cmd = VacuumBuilder::new(
-            self._table.log_store(),
-            self._table.snapshot().map_err(PythonError::from)?.clone(),
-        )
-        .with_enforce_retention_duration(enforce_retention_duration)
-        .with_dry_run(dry_run);
-        if let Some(retention_period) = retention_hours {
-            cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
-        }
-
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+        let (table, metrics) = py.allow_threads(|| {
+            let mut cmd = VacuumBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            )
+            .with_enforce_retention_duration(enforce_retention_duration)
+            .with_dry_run(dry_run);
+            if let Some(retention_period) = retention_hours {
+                cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
+            }
 
-        let (table, metrics) = rt()?
-            .block_on(cmd.into_future())
-            .map_err(PythonError::from)?;
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
         self._table.state = table.state;
         Ok(metrics.files_deleted)
     }
 
     /// Run the UPDATE command on the Delta Table
-    #[pyo3(signature = (updates, predicate=None, writer_properties=None, safe_cast = false, custom_metadata = None))]
+    #[pyo3(signature = (updates, predicate=None, writer_properties=None, safe_cast = false, custom_metadata = None, post_commithook_properties=None))]
+    #[allow(clippy::too_many_arguments)]
     pub fn update(
         &mut self,
+        py: Python,
         updates: HashMap<String, String>,
         predicate: Option<String>,
         writer_properties: Option<HashMap<String, Option<String>>>,
         safe_cast: bool,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<String> {
-        let mut cmd = UpdateBuilder::new(
-            self._table.log_store(),
-            self._table.snapshot().map_err(PythonError::from)?.clone(),
-        )
-        .with_safe_cast(safe_cast);
+        let (table, metrics) = py.allow_threads(|| {
+            let mut cmd = UpdateBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            )
+            .with_safe_cast(safe_cast);
 
-        if let Some(writer_props) = writer_properties {
-            cmd = cmd.with_writer_properties(
-                set_writer_properties(writer_props).map_err(PythonError::from)?,
-            );
-        }
+            if let Some(writer_props) = writer_properties {
+                cmd = cmd.with_writer_properties(
+                    set_writer_properties(writer_props).map_err(PythonError::from)?,
+                );
+            }
 
-        for (col_name, expression) in updates {
-            cmd = cmd.with_update(col_name.clone(), expression.clone());
-        }
+            for (col_name, expression) in updates {
+                cmd = cmd.with_update(col_name.clone(), expression.clone());
+            }
 
-        if let Some(update_predicate) = predicate {
-            cmd = cmd.with_predicate(update_predicate);
-        }
+            if let Some(update_predicate) = predicate {
+                cmd = cmd.with_predicate(update_predicate);
+            }
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
-        let (table, metrics) = rt()?
-            .block_on(cmd.into_future())
-            .map_err(PythonError::from)?;
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
         self._table.state = table.state;
         Ok(serde_json::to_string(&metrics).unwrap())
     }
 
     /// Run the optimize command on the Delta Table: merge small files into a large file by bin-packing.
+    #[allow(clippy::too_many_arguments)]
     #[pyo3(signature = (
         partition_filters = None,
         target_size = None,
@@ -339,47 +429,52 @@ impl RawDeltaTable {
         min_commit_interval = None,
         writer_properties=None,
         custom_metadata=None,
+        post_commithook_properties=None
     ))]
+    #[allow(clippy::too_many_arguments)]
     pub fn compact_optimize(
         &mut self,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        py: Python,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         target_size: Option<i64>,
         max_concurrent_tasks: Option<usize>,
         min_commit_interval: Option<u64>,
         writer_properties: Option<HashMap<String, Option<String>>>,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<String> {
-        let mut cmd = OptimizeBuilder::new(
-            self._table.log_store(),
-            self._table.snapshot().map_err(PythonError::from)?.clone(),
-        )
-        .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
-        if let Some(size) = target_size {
-            cmd = cmd.with_target_size(size);
-        }
-        if let Some(commit_interval) = min_commit_interval {
-            cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
-        }
+        let (table, metrics) = py.allow_threads(|| {
+            let mut cmd = OptimizeBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            )
+            .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
+            if let Some(size) = target_size {
+                cmd = cmd.with_target_size(size);
+            }
+            if let Some(commit_interval) = min_commit_interval {
+                cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
+            }
 
-        if let Some(writer_props) = writer_properties {
-            cmd = cmd.with_writer_properties(
-                set_writer_properties(writer_props).map_err(PythonError::from)?,
-            );
-        }
+            if let Some(writer_props) = writer_properties {
+                cmd = cmd.with_writer_properties(
+                    set_writer_properties(writer_props).map_err(PythonError::from)?,
+                );
+            }
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
-        let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
-            .map_err(PythonError::from)?;
-        cmd = cmd.with_filters(&converted_filters);
+            let converted_filters =
+                convert_partition_filters(partition_filters.unwrap_or_default())
+                    .map_err(PythonError::from)?;
+            cmd = cmd.with_filters(&converted_filters);
 
-        let (table, metrics) = rt()?
-            .block_on(cmd.into_future())
-            .map_err(PythonError::from)?;
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
         self._table.state = table.state;
         Ok(serde_json::to_string(&metrics).unwrap())
     }
@@ -393,81 +488,216 @@ impl RawDeltaTable {
         max_spill_size = 20 * 1024 * 1024 * 1024,
         min_commit_interval = None,
         writer_properties=None,
-        custom_metadata=None,))]
+        custom_metadata=None,
+        post_commithook_properties=None))]
     pub fn z_order_optimize(
         &mut self,
+        py: Python,
         z_order_columns: Vec<String>,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         target_size: Option<i64>,
         max_concurrent_tasks: Option<usize>,
         max_spill_size: usize,
         min_commit_interval: Option<u64>,
         writer_properties: Option<HashMap<String, Option<String>>>,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<String> {
-        let mut cmd = OptimizeBuilder::new(
-            self._table.log_store(),
-            self._table.snapshot().map_err(PythonError::from)?.clone(),
-        )
-        .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
-        .with_max_spill_size(max_spill_size)
-        .with_type(OptimizeType::ZOrder(z_order_columns));
-        if let Some(size) = target_size {
-            cmd = cmd.with_target_size(size);
-        }
-        if let Some(commit_interval) = min_commit_interval {
-            cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
-        }
+        let (table, metrics) = py.allow_threads(|| {
+            let mut cmd = OptimizeBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            )
+            .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
+            .with_max_spill_size(max_spill_size)
+            .with_type(OptimizeType::ZOrder(z_order_columns));
+            if let Some(size) = target_size {
+                cmd = cmd.with_target_size(size);
+            }
+            if let Some(commit_interval) = min_commit_interval {
+                cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
+            }
 
-        if let Some(writer_props) = writer_properties {
-            cmd = cmd.with_writer_properties(
-                set_writer_properties(writer_props).map_err(PythonError::from)?,
-            );
-        }
+            if let Some(writer_props) = writer_properties {
+                cmd = cmd.with_writer_properties(
+                    set_writer_properties(writer_props).map_err(PythonError::from)?,
+                );
+            }
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
-        let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
-            .map_err(PythonError::from)?;
-        cmd = cmd.with_filters(&converted_filters);
+            let converted_filters =
+                convert_partition_filters(partition_filters.unwrap_or_default())
+                    .map_err(PythonError::from)?;
+            cmd = cmd.with_filters(&converted_filters);
 
-        let (table, metrics) = rt()?
-            .block_on(cmd.into_future())
-            .map_err(PythonError::from)?;
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
         self._table.state = table.state;
         Ok(serde_json::to_string(&metrics).unwrap())
     }
 
-    #[pyo3(signature = (constraints, custom_metadata=None))]
+    #[pyo3(signature = (fields, custom_metadata=None, post_commithook_properties=None))]
+    pub fn add_columns(
+        &mut self,
+        py: Python,
+        fields: Vec<Field>,
+        custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
+    ) -> PyResult<()> {
+        let table = py.allow_threads(|| {
+            let mut cmd = AddColumnBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            );
+
+            let new_fields = fields
+                .iter()
+                .map(|v| v.inner.clone())
+                .collect::<Vec<StructField>>();
+
+            cmd = cmd.with_fields(new_fields);
+
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(())
+    }
+
+    #[pyo3(signature = (constraints, custom_metadata=None, post_commithook_properties=None))]
     pub fn add_constraints(
         &mut self,
+        py: Python,
         constraints: HashMap<String, String>,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<()> {
-        let mut cmd = ConstraintBuilder::new(
+        let table = py.allow_threads(|| {
+            let mut cmd = ConstraintBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            );
+
+            for (col_name, expression) in constraints {
+                cmd = cmd.with_constraint(col_name.clone(), expression.clone());
+            }
+
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(())
+    }
+
+    #[pyo3(signature = (name, raise_if_not_exists, custom_metadata=None, post_commithook_properties=None))]
+    pub fn drop_constraints(
+        &mut self,
+        py: Python,
+        name: String,
+        raise_if_not_exists: bool,
+        custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
+    ) -> PyResult<()> {
+        let table = py.allow_threads(|| {
+            let mut cmd = DropConstraintBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            )
+            .with_constraint(name)
+            .with_raise_if_not_exists(raise_if_not_exists);
+
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(())
+    }
+
+    #[pyo3(signature = (starting_version = 0, ending_version = None, starting_timestamp = None, ending_timestamp = None, columns = None))]
+    pub fn load_cdf(
+        &mut self,
+        py: Python,
+        starting_version: i64,
+        ending_version: Option<i64>,
+        starting_timestamp: Option<String>,
+        ending_timestamp: Option<String>,
+        columns: Option<Vec<String>>,
+    ) -> PyResult<PyArrowType<ArrowArrayStreamReader>> {
+        let ctx = SessionContext::new();
+        let mut cdf_read = CdfLoadBuilder::new(
             self._table.log_store(),
             self._table.snapshot().map_err(PythonError::from)?.clone(),
-        );
+        )
+        .with_starting_version(starting_version);
 
-        for (col_name, expression) in constraints {
-            cmd = cmd.with_constraint(col_name.clone(), expression.clone());
+        if let Some(ev) = ending_version {
+            cdf_read = cdf_read.with_ending_version(ev);
+        }
+        if let Some(st) = starting_timestamp {
+            let starting_ts: DateTime<Utc> = DateTime::<Utc>::from_str(&st)
+                .map_err(|pe| PyValueError::new_err(pe.to_string()))?
+                .to_utc();
+            cdf_read = cdf_read.with_starting_timestamp(starting_ts);
+        }
+        if let Some(et) = ending_timestamp {
+            let ending_ts = DateTime::<Utc>::from_str(&et)
+                .map_err(|pe| PyValueError::new_err(pe.to_string()))?
+                .to_utc();
+            cdf_read = cdf_read.with_starting_timestamp(ending_ts);
         }
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+        if let Some(columns) = columns {
+            cdf_read = cdf_read.with_columns(columns);
+        }
 
-        let table = rt()?
-            .block_on(cmd.into_future())
-            .map_err(PythonError::from)?;
-        self._table.state = table.state;
-        Ok(())
+        cdf_read = cdf_read.with_session_ctx(ctx.clone());
+
+        let plan = rt().block_on(cdf_read.build()).map_err(PythonError::from)?;
+
+        py.allow_threads(|| {
+            let mut tasks = vec![];
+            for p in 0..plan.properties().output_partitioning().partition_count() {
+                let inner_plan = plan.clone();
+                let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
+                let handle = rt().spawn(collect_sendable_stream(partition_batch));
+                tasks.push(handle);
+            }
+
+            // This is unfortunate.
+            let batches = rt()
+                .block_on(join_all(tasks))
+                .into_iter()
+                .flatten()
+                .collect::<Result<Vec<Vec<_>>, _>>()
+                .unwrap()
+                .into_iter()
+                .flatten()
+                .map(Ok);
+            let batch_iter = RecordBatchIterator::new(batches, plan.schema());
+            let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
+            let reader = ArrowArrayStreamReader::try_new(ffi_stream).unwrap();
+            Ok(PyArrowType(reader))
+        })
     }
 
     #[allow(clippy::too_many_arguments)]
@@ -477,6 +707,7 @@ impl RawDeltaTable {
         target_alias = None,
         safe_cast = false,
         writer_properties = None,
+        post_commithook_properties = None,
         custom_metadata = None,
         matched_update_updates = None,
         matched_update_predicate = None,
@@ -498,6 +729,7 @@ impl RawDeltaTable {
         target_alias: Option<String>,
         safe_cast: bool,
         writer_properties: Option<HashMap<String, Option<String>>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
         custom_metadata: Option<HashMap<String, String>>,
         matched_update_updates: Option<Vec<HashMap<String, String>>>,
         matched_update_predicate: Option<Vec<Option<String>>>,
@@ -510,7 +742,7 @@ impl RawDeltaTable {
         not_matched_by_source_delete_predicate: Option<Vec<String>>,
         not_matched_by_source_delete_all: Option<bool>,
     ) -> PyResult<String> {
-        py.allow_threads(|| {
+        let (table, metrics) = py.allow_threads(|| {
             let ctx = SessionContext::new();
             let schema = source.0.schema();
             let batches = vec![source.0.map(|batch| batch.unwrap()).collect::<Vec<_>>()];
@@ -540,11 +772,11 @@ impl RawDeltaTable {
                 );
             }
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_metadata(json_metadata);
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             if let Some(mu_updates) = matched_update_updates {
                 if let Some(mu_predicate) = matched_update_predicate {
@@ -656,19 +888,17 @@ impl RawDeltaTable {
                 }
             }
 
-            let (table, metrics) = rt()?
-                .block_on(cmd.into_future())
-                .map_err(PythonError::from)?;
-            self._table.state = table.state;
-            Ok(serde_json::to_string(&metrics).unwrap())
-        })
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(serde_json::to_string(&metrics).unwrap())
     }
 
     // Run the restore command on the Delta Table: restore table to a given version or datetime
     #[pyo3(signature = (target, *, ignore_missing_files = false, protocol_downgrade_allowed = false, custom_metadata=None))]
     pub fn restore(
         &mut self,
-        target: Option<&PyAny>,
+        target: Option<&Bound<'_, PyAny>>,
         ignore_missing_files: bool,
         protocol_downgrade_allowed: bool,
         custom_metadata: Option<HashMap<String, String>>,
@@ -681,9 +911,9 @@ impl RawDeltaTable {
             if let Ok(version) = val.extract::<i64>() {
                 cmd = cmd.with_version_to_restore(version)
             }
-            if let Ok(ds) = val.extract::<&str>() {
+            if let Ok(ds) = val.extract::<PyBackedStr>() {
                 let datetime = DateTime::<Utc>::from(
-                    DateTime::<FixedOffset>::parse_from_rfc3339(ds).map_err(|err| {
+                    DateTime::<FixedOffset>::parse_from_rfc3339(ds.as_ref()).map_err(|err| {
                         PyValueError::new_err(format!("Failed to parse datetime string: {err}"))
                     })?,
                 );
@@ -693,13 +923,11 @@ impl RawDeltaTable {
         cmd = cmd.with_ignore_missing_files(ignore_missing_files);
         cmd = cmd.with_protocol_downgrade_allowed(protocol_downgrade_allowed);
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+        if let Some(commit_properties) = maybe_create_commit_properties(custom_metadata, None) {
+            cmd = cmd.with_commit_properties(commit_properties);
+        }
 
-        let (table, metrics) = rt()?
+        let (table, metrics) = rt()
             .block_on(cmd.into_future())
             .map_err(PythonError::from)?;
         self._table.state = table.state;
@@ -708,7 +936,7 @@ impl RawDeltaTable {
 
     /// Run the History command on the Delta Table: Returns provenance information, including the operation, user, and so on, for each write to a table.
     pub fn history(&mut self, limit: Option<usize>) -> PyResult<Vec<String>> {
-        let history = rt()?
+        let history = rt()
             .block_on(self._table.history(limit))
             .map_err(PythonError::from)?;
         Ok(history
@@ -719,7 +947,7 @@ impl RawDeltaTable {
 
     pub fn update_incremental(&mut self) -> PyResult<()> {
         #[allow(deprecated)]
-        Ok(rt()?
+        Ok(rt()
             .block_on(self._table.update_incremental(None))
             .map_err(PythonError::from)?)
     }
@@ -728,11 +956,11 @@ impl RawDeltaTable {
         &mut self,
         py: Python<'py>,
         schema: PyArrowType<ArrowSchema>,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
-    ) -> PyResult<Vec<(String, Option<&'py PyAny>)>> {
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
+    ) -> PyResult<Vec<(String, Option<Bound<'py, PyAny>>)>> {
         let path_set = match partition_filters {
             Some(filters) => Some(HashSet::<_>::from_iter(
-                self.files_by_partitions(filters)?.iter().cloned(),
+                self.files_by_partitions(py, filters)?.iter().cloned(),
             )),
             None => None,
         };
@@ -750,7 +978,6 @@ impl RawDeltaTable {
             })
             .map(|(path, f)| {
                 let expression = filestats_to_expression_next(py, &schema, f)?;
-                println!("path: {:?}", path);
                 Ok((path, expression))
             })
             .collect()
@@ -758,15 +985,14 @@ impl RawDeltaTable {
 
     fn get_active_partitions<'py>(
         &self,
-        partitions_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partitions_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         py: Python<'py>,
-    ) -> PyResult<&'py PyFrozenSet> {
+    ) -> PyResult<Bound<'py, PyFrozenSet>> {
         let column_names: HashSet<&str> = self
             ._table
             .get_schema()
             .map_err(|_| DeltaProtocolError::new_err("table does not yet have a schema"))?
             .fields()
-            .iter()
             .map(|field| field.name().as_str())
             .collect();
         let partition_columns: HashSet<&str> = self
@@ -779,10 +1005,13 @@ impl RawDeltaTable {
             .collect();
 
         if let Some(filters) = &partitions_filters {
-            let unknown_columns: Vec<&str> = filters
+            let unknown_columns: Vec<&PyBackedStr> = filters
                 .iter()
-                .map(|(column_name, _, _)| *column_name)
-                .filter(|column_name| !column_names.contains(column_name))
+                .map(|(column_name, _, _)| column_name)
+                .filter(|column_name| {
+                    let column_name: &'_ str = column_name.as_ref();
+                    !column_names.contains(column_name)
+                })
                 .collect();
             if !unknown_columns.is_empty() {
                 return Err(PyValueError::new_err(format!(
@@ -790,10 +1019,13 @@ impl RawDeltaTable {
                 )));
             }
 
-            let non_partition_columns: Vec<&str> = filters
+            let non_partition_columns: Vec<&PyBackedStr> = filters
                 .iter()
-                .map(|(column_name, _, _)| *column_name)
-                .filter(|column_name| !partition_columns.contains(column_name))
+                .map(|(column_name, _, _)| column_name)
+                .filter(|column_name| {
+                    let column_name: &'_ str = column_name.as_ref();
+                    !partition_columns.contains(column_name)
+                })
                 .collect();
 
             if !non_partition_columns.is_empty() {
@@ -836,141 +1068,161 @@ impl RawDeltaTable {
             })
             .collect();
 
-        let active_partitions: Vec<&'py PyFrozenSet> = active_partitions
+        let active_partitions = active_partitions
             .into_iter()
-            .map(|part| PyFrozenSet::new(py, part.iter()))
-            .collect::<Result<_, PyErr>>()?;
-        PyFrozenSet::new(py, active_partitions)
+            .map(|part| PyFrozenSet::new_bound(py, part.iter()))
+            .collect::<Result<Vec<Bound<'py, _>>, PyErr>>()?;
+        PyFrozenSet::new_bound(py, &active_partitions)
     }
 
+    #[allow(clippy::too_many_arguments)]
     fn create_write_transaction(
         &mut self,
+        py: Python,
         add_actions: Vec<PyAddAction>,
         mode: &str,
         partition_by: Vec<String>,
         schema: PyArrowType<ArrowSchema>,
-        partitions_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partitions_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<()> {
-        let mode = mode.parse().map_err(PythonError::from)?;
+        py.allow_threads(|| {
+            let mode = mode.parse().map_err(PythonError::from)?;
 
-        let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
+            let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
 
-        let existing_schema = self._table.get_schema().map_err(PythonError::from)?;
+            let existing_schema = self._table.get_schema().map_err(PythonError::from)?;
 
-        let mut actions: Vec<Action> = add_actions
-            .iter()
-            .map(|add| Action::Add(add.into()))
-            .collect();
+            let mut actions: Vec<Action> = add_actions
+                .iter()
+                .map(|add| Action::Add(add.into()))
+                .collect();
 
-        match mode {
-            SaveMode::Overwrite => {
-                let converted_filters =
-                    convert_partition_filters(partitions_filters.unwrap_or_default())
+            match mode {
+                SaveMode::Overwrite => {
+                    let converted_filters =
+                        convert_partition_filters(partitions_filters.unwrap_or_default())
+                            .map_err(PythonError::from)?;
+
+                    let add_actions = self
+                        ._table
+                        .snapshot()
+                        .map_err(PythonError::from)?
+                        .get_active_add_actions_by_partitions(&converted_filters)
                         .map_err(PythonError::from)?;
 
-                let add_actions = self
-                    ._table
-                    .snapshot()
-                    .map_err(PythonError::from)?
-                    .get_active_add_actions_by_partitions(&converted_filters)
-                    .map_err(PythonError::from)?;
-
-                for old_add in add_actions {
-                    let old_add = old_add.map_err(PythonError::from)?;
-                    let remove_action = Action::Remove(Remove {
-                        path: old_add.path().to_string(),
-                        deletion_timestamp: Some(current_timestamp()),
-                        data_change: true,
-                        extended_file_metadata: Some(true),
-                        partition_values: Some(
-                            old_add
-                                .partition_values()
-                                .map_err(PythonError::from)?
-                                .iter()
-                                .map(|(k, v)| {
-                                    (
-                                        k.to_string(),
-                                        if v.is_null() {
-                                            None
-                                        } else {
-                                            Some(v.serialize())
-                                        },
-                                    )
-                                })
-                                .collect(),
-                        ),
-                        size: Some(old_add.size()),
-                        deletion_vector: None,
-                        tags: None,
-                        base_row_id: None,
-                        default_row_commit_version: None,
-                    });
-                    actions.push(remove_action);
-                }
+                    for old_add in add_actions {
+                        let old_add = old_add.map_err(PythonError::from)?;
+                        let remove_action = Action::Remove(Remove {
+                            path: old_add.path().to_string(),
+                            deletion_timestamp: Some(current_timestamp()),
+                            data_change: true,
+                            extended_file_metadata: Some(true),
+                            partition_values: Some(
+                                old_add
+                                    .partition_values()
+                                    .map_err(PythonError::from)?
+                                    .iter()
+                                    .map(|(k, v)| {
+                                        (
+                                            k.to_string(),
+                                            if v.is_null() {
+                                                None
+                                            } else {
+                                                Some(v.serialize())
+                                            },
+                                        )
+                                    })
+                                    .collect(),
+                            ),
+                            size: Some(old_add.size()),
+                            deletion_vector: None,
+                            tags: None,
+                            base_row_id: None,
+                            default_row_commit_version: None,
+                        });
+                        actions.push(remove_action);
+                    }
 
-                // Update metadata with new schema
-                if &schema != existing_schema {
-                    let mut metadata = self._table.metadata().map_err(PythonError::from)?.clone();
-                    metadata.schema_string = serde_json::to_string(&schema)
-                        .map_err(DeltaTableError::from)
-                        .map_err(PythonError::from)?;
-                    actions.push(Action::Metadata(metadata));
+                    // Update metadata with new schema
+                    if &schema != existing_schema {
+                        let mut metadata =
+                            self._table.metadata().map_err(PythonError::from)?.clone();
+                        metadata.schema_string = serde_json::to_string(&schema)
+                            .map_err(DeltaTableError::from)
+                            .map_err(PythonError::from)?;
+                        actions.push(Action::Metadata(metadata));
+                    }
                 }
-            }
-            _ => {
-                // This should be unreachable from Python
-                if &schema != existing_schema {
-                    DeltaProtocolError::new_err("Cannot change schema except in overwrite.");
+                _ => {
+                    // This should be unreachable from Python
+                    if &schema != existing_schema {
+                        DeltaProtocolError::new_err("Cannot change schema except in overwrite.");
+                    }
                 }
             }
-        }
 
-        let operation = DeltaOperation::Write {
-            mode,
-            partition_by: Some(partition_by),
-            predicate: None,
-        };
+            let operation = DeltaOperation::Write {
+                mode,
+                partition_by: Some(partition_by),
+                predicate: None,
+            };
 
-        let app_metadata =
-            custom_metadata.map(|md| md.into_iter().map(|(k, v)| (k, v.into())).collect());
+            let mut commit_properties = CommitProperties::default();
+            if let Some(metadata) = custom_metadata {
+                let json_metadata: Map<String, Value> =
+                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+                commit_properties = commit_properties.with_metadata(json_metadata);
+            };
 
-        let store = self._table.log_store();
+            if let Some(post_commit_hook_props) = post_commithook_properties {
+                commit_properties =
+                    set_post_commithook_properties(commit_properties, post_commit_hook_props)
+            }
 
-        rt()?
-            .block_on(commit(
-                &*store,
-                &actions,
-                operation,
-                Some(self._table.snapshot().map_err(PythonError::from)?),
-                app_metadata,
-            ))
+            rt().block_on(
+                CommitBuilder::from(commit_properties)
+                    .with_actions(actions)
+                    .build(
+                        Some(self._table.snapshot().map_err(PythonError::from)?),
+                        self._table.log_store(),
+                        operation,
+                    )
+                    .into_future(),
+            )
             .map_err(PythonError::from)?;
 
-        Ok(())
+            Ok(())
+        })
     }
 
     pub fn get_py_storage_backend(&self) -> PyResult<filesystem::DeltaFileSystemHandler> {
         Ok(filesystem::DeltaFileSystemHandler {
             inner: self._table.object_store(),
-            rt: Arc::new(rt()?),
             config: self._config.clone(),
             known_sizes: None,
         })
     }
 
-    pub fn create_checkpoint(&self) -> PyResult<()> {
-        rt()?
-            .block_on(create_checkpoint(&self._table))
-            .map_err(PythonError::from)?;
+    pub fn create_checkpoint(&self, py: Python) -> PyResult<()> {
+        py.allow_threads(|| {
+            Ok::<_, pyo3::PyErr>(
+                rt().block_on(create_checkpoint(&self._table))
+                    .map_err(PythonError::from)?,
+            )
+        })?;
 
         Ok(())
     }
 
-    pub fn cleanup_metadata(&self) -> PyResult<()> {
-        rt()?
-            .block_on(cleanup_metadata(&self._table))
-            .map_err(PythonError::from)?;
+    pub fn cleanup_metadata(&self, py: Python) -> PyResult<()> {
+        py.allow_threads(|| {
+            Ok::<_, pyo3::PyErr>(
+                rt().block_on(cleanup_metadata(&self._table))
+                    .map_err(PythonError::from)?,
+            )
+        })?;
 
         Ok(())
     }
@@ -985,48 +1237,87 @@ impl RawDeltaTable {
         ))
     }
 
+    pub fn get_add_file_sizes(&self) -> PyResult<HashMap<String, i64>> {
+        let actions = self
+            ._table
+            .snapshot()
+            .map_err(PythonError::from)?
+            .file_actions()
+            .map_err(PythonError::from)?;
+
+        Ok(actions
+            .iter()
+            .map(|action| (action.path(), action.size() as i64))
+            .collect::<HashMap<String, i64>>())
+    }
     /// Run the delete command on the delta table: delete records following a predicate and return the delete metrics.
-    #[pyo3(signature = (predicate = None, writer_properties=None, custom_metadata=None))]
+    #[pyo3(signature = (predicate = None, writer_properties=None, custom_metadata=None, post_commithook_properties=None))]
     pub fn delete(
         &mut self,
+        py: Python,
         predicate: Option<String>,
         writer_properties: Option<HashMap<String, Option<String>>>,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<String> {
-        let mut cmd = DeleteBuilder::new(
+        let (table, metrics) = py.allow_threads(|| {
+            let mut cmd = DeleteBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            );
+            if let Some(predicate) = predicate {
+                cmd = cmd.with_predicate(predicate);
+            }
+            if let Some(writer_props) = writer_properties {
+                cmd = cmd.with_writer_properties(
+                    set_writer_properties(writer_props).map_err(PythonError::from)?,
+                );
+            }
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(serde_json::to_string(&metrics).unwrap())
+    }
+
+    #[pyo3(signature = (properties, raise_if_not_exists, custom_metadata=None))]
+    pub fn set_table_properties(
+        &mut self,
+        properties: HashMap<String, String>,
+        raise_if_not_exists: bool,
+        custom_metadata: Option<HashMap<String, String>>,
+    ) -> PyResult<()> {
+        let mut cmd = SetTablePropertiesBuilder::new(
             self._table.log_store(),
             self._table.snapshot().map_err(PythonError::from)?.clone(),
-        );
-        if let Some(predicate) = predicate {
-            cmd = cmd.with_predicate(predicate);
-        }
+        )
+        .with_properties(properties)
+        .with_raise_if_not_exists(raise_if_not_exists);
 
-        if let Some(writer_props) = writer_properties {
-            cmd = cmd.with_writer_properties(
-                set_writer_properties(writer_props).map_err(PythonError::from)?,
-            );
+        if let Some(commit_properties) = maybe_create_commit_properties(custom_metadata, None) {
+            cmd = cmd.with_commit_properties(commit_properties);
         }
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
-
-        let (table, metrics) = rt()?
+        let table = rt()
             .block_on(cmd.into_future())
             .map_err(PythonError::from)?;
         self._table.state = table.state;
-        Ok(serde_json::to_string(&metrics).unwrap())
+        Ok(())
     }
 
     /// Execute the File System Check command (FSCK) on the delta table: removes old reference to files that
     /// have been deleted or are malformed
-    #[pyo3(signature = (dry_run = true, custom_metadata = None))]
+    #[pyo3(signature = (dry_run = true, custom_metadata = None, post_commithook_properties=None))]
     pub fn repair(
         &mut self,
         dry_run: bool,
         custom_metadata: Option<HashMap<String, String>>,
+        post_commithook_properties: Option<HashMap<String, Option<bool>>>,
     ) -> PyResult<String> {
         let mut cmd = FileSystemCheckBuilder::new(
             self._table.log_store(),
@@ -1034,13 +1325,13 @@ impl RawDeltaTable {
         )
         .with_dry_run(dry_run);
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd.with_metadata(json_metadata);
-        };
+        if let Some(commit_properties) =
+            maybe_create_commit_properties(custom_metadata, post_commithook_properties)
+        {
+            cmd = cmd.with_commit_properties(commit_properties);
+        }
 
-        let (table, metrics) = rt()?
+        let (table, metrics) = rt()
             .block_on(cmd.into_future())
             .map_err(PythonError::from)?;
         self._table.state = table.state;
@@ -1048,6 +1339,19 @@ impl RawDeltaTable {
     }
 }
 
+fn set_post_commithook_properties(
+    mut commit_properties: CommitProperties,
+    post_commithook_properties: HashMap<String, Option<bool>>,
+) -> CommitProperties {
+    if let Some(Some(create_checkpoint)) = post_commithook_properties.get("create_checkpoint") {
+        commit_properties = commit_properties.with_create_checkpoint(*create_checkpoint)
+    }
+    if let Some(cleanup_expired_logs) = post_commithook_properties.get("cleanup_expired_logs") {
+        commit_properties = commit_properties.with_cleanup_expired_logs(*cleanup_expired_logs)
+    }
+    commit_properties
+}
+
 fn set_writer_properties(
     writer_properties: HashMap<String, Option<String>>,
 ) -> DeltaResult<WriterProperties> {
@@ -1087,23 +1391,53 @@ fn set_writer_properties(
     Ok(properties.build())
 }
 
-fn convert_partition_filters<'a>(
-    partitions_filters: Vec<(&'a str, &'a str, PartitionFilterValue)>,
+fn convert_partition_filters(
+    partitions_filters: Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>,
 ) -> Result<Vec<PartitionFilter>, DeltaTableError> {
     partitions_filters
         .into_iter()
         .map(|filter| match filter {
-            (key, op, PartitionFilterValue::Single(v)) => PartitionFilter::try_from((key, op, v)),
+            (key, op, PartitionFilterValue::Single(v)) => {
+                let key: &'_ str = key.as_ref();
+                let op: &'_ str = op.as_ref();
+                let v: &'_ str = v.as_ref();
+                PartitionFilter::try_from((key, op, v))
+            }
             (key, op, PartitionFilterValue::Multiple(v)) => {
+                let key: &'_ str = key.as_ref();
+                let op: &'_ str = op.as_ref();
+                let v: Vec<&'_ str> = v.iter().map(|v| v.as_ref()).collect();
                 PartitionFilter::try_from((key, op, v.as_slice()))
             }
         })
         .collect()
 }
 
-fn scalar_to_py(value: &Scalar, py_date: &PyAny, py: Python) -> PyResult<PyObject> {
+fn maybe_create_commit_properties(
+    custom_metadata: Option<HashMap<String, String>>,
+    post_commithook_properties: Option<HashMap<String, Option<bool>>>,
+) -> Option<CommitProperties> {
+    if custom_metadata.is_none() && post_commithook_properties.is_none() {
+        return None;
+    }
+    let mut commit_properties = CommitProperties::default();
+    if let Some(metadata) = custom_metadata {
+        let json_metadata: Map<String, Value> =
+            metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+        commit_properties = commit_properties.with_metadata(json_metadata);
+    };
+
+    if let Some(post_commit_hook_props) = post_commithook_properties {
+        commit_properties =
+            set_post_commithook_properties(commit_properties, post_commit_hook_props)
+    }
+    Some(commit_properties)
+}
+
+fn scalar_to_py<'py>(value: &Scalar, py_date: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyAny>> {
     use Scalar::*;
 
+    let py = py_date.py();
     let val = match value {
         Null(_) => py.None(),
         Boolean(val) => val.to_object(py),
@@ -1115,12 +1449,14 @@ fn scalar_to_py(value: &Scalar, py_date: &PyAny, py: Python) -> PyResult<PyObjec
         Long(val) => val.to_object(py),
         Float(val) => val.to_object(py),
         Double(val) => val.to_object(py),
-        // TODO: Since PyArrow 13.0.0, casting string -> timestamp fails if it ends with "Z"
-        // and the target type is timezone naive. The serialization does not produce "Z",
-        // but we need to consider timezones when doing timezone ntz.
         Timestamp(_) => {
+            // We need to manually append 'Z' add to end so that pyarrow can cast the
+            // the scalar value to pa.timestamp("us","UTC")
+            let value = value.serialize();
+            format!("{}Z", value).to_object(py)
+        }
+        TimestampNtz(_) => {
             let value = value.serialize();
-            println!("timestamp: {}", value);
             value.to_object(py)
         }
         // NOTE: PyArrow 13.0.0 lost the ability to cast from string to date32, so
@@ -1130,16 +1466,16 @@ fn scalar_to_py(value: &Scalar, py_date: &PyAny, py: Python) -> PyResult<PyObjec
             date.to_object(py)
         }
         Decimal(_, _, _) => value.serialize().to_object(py),
-        Struct(values, fields) => {
-            let py_struct = PyDict::new(py);
-            for (field, value) in fields.iter().zip(values.iter()) {
-                py_struct.set_item(field.name(), scalar_to_py(value, py_date, py)?)?;
+        Struct(data) => {
+            let py_struct = PyDict::new_bound(py);
+            for (field, value) in data.fields().iter().zip(data.values().iter()) {
+                py_struct.set_item(field.name(), scalar_to_py(value, py_date)?)?;
             }
             py_struct.to_object(py)
         }
     };
 
-    Ok(val)
+    Ok(val.into_bound(py))
 }
 
 /// Create expression that file statistics guarantee to be true.
@@ -1158,14 +1494,14 @@ fn filestats_to_expression_next<'py>(
     py: Python<'py>,
     schema: &PyArrowType<ArrowSchema>,
     file_info: LogicalFile<'_>,
-) -> PyResult<Option<&'py PyAny>> {
-    let ds = PyModule::import(py, "pyarrow.dataset")?;
+) -> PyResult<Option<Bound<'py, PyAny>>> {
+    let ds = PyModule::import_bound(py, "pyarrow.dataset")?;
     let py_field = ds.getattr("field")?;
-    let pa = PyModule::import(py, "pyarrow")?;
-    let py_date = Python::import(py, "datetime")?.getattr("date")?;
-    let mut expressions: Vec<PyResult<&PyAny>> = Vec::new();
+    let pa = PyModule::import_bound(py, "pyarrow")?;
+    let py_date = Python::import_bound(py, "datetime")?.getattr("date")?;
+    let mut expressions = Vec::new();
 
-    let cast_to_type = |column_name: &String, value: PyObject, schema: &ArrowSchema| {
+    let cast_to_type = |column_name: &String, value: &Bound<'py, PyAny>, schema: &ArrowSchema| {
         let column_type = schema
             .field_with_name(column_name)
             .map_err(|_| {
@@ -1179,13 +1515,12 @@ fn filestats_to_expression_next<'py>(
     };
 
     if let Ok(partitions_values) = file_info.partition_values() {
-        println!("partition_values: {:?}", partitions_values);
         for (column, value) in partitions_values.iter() {
             let column = column.to_string();
             if !value.is_null() {
                 // value is a string, but needs to be parsed into appropriate type
                 let converted_value =
-                    cast_to_type(&column, scalar_to_py(value, py_date, py)?, &schema.0)?;
+                    cast_to_type(&column, &scalar_to_py(value, &py_date)?, &schema.0)?;
                 expressions.push(
                     py_field
                         .call1((&column,))?
@@ -1200,8 +1535,8 @@ fn filestats_to_expression_next<'py>(
     let mut has_nulls_set: HashSet<String> = HashSet::new();
 
     // NOTE: null_counts should always return a struct scalar.
-    if let Some(Scalar::Struct(values, fields)) = file_info.null_counts() {
-        for (field, value) in fields.iter().zip(values.iter()) {
+    if let Some(Scalar::Struct(data)) = file_info.null_counts() {
+        for (field, value) in data.fields().iter().zip(data.values().iter()) {
             if let Scalar::Long(val) = value {
                 if *val == 0 {
                     expressions.push(py_field.call1((field.name(),))?.call_method0("is_valid"));
@@ -1215,14 +1550,14 @@ fn filestats_to_expression_next<'py>(
     }
 
     // NOTE: min_values should always return a struct scalar.
-    if let Some(Scalar::Struct(values, fields)) = file_info.min_values() {
-        for (field, value) in fields.iter().zip(values.iter()) {
+    if let Some(Scalar::Struct(data)) = file_info.min_values() {
+        for (field, value) in data.fields().iter().zip(data.values().iter()) {
             match value {
                 // TODO: Handle nested field statistics.
-                Scalar::Struct(_, _) => {}
+                Scalar::Struct(_) => {}
                 _ => {
                     let maybe_minimum =
-                        cast_to_type(field.name(), scalar_to_py(value, py_date, py)?, &schema.0);
+                        cast_to_type(field.name(), &scalar_to_py(value, &py_date)?, &schema.0);
                     if let Ok(minimum) = maybe_minimum {
                         let field_expr = py_field.call1((field.name(),))?;
                         let expr = field_expr.call_method1("__ge__", (minimum,));
@@ -1242,14 +1577,14 @@ fn filestats_to_expression_next<'py>(
     }
 
     // NOTE: max_values should always return a struct scalar.
-    if let Some(Scalar::Struct(values, fields)) = file_info.max_values() {
-        for (field, value) in fields.iter().zip(values.iter()) {
+    if let Some(Scalar::Struct(data)) = file_info.max_values() {
+        for (field, value) in data.fields().iter().zip(data.values().iter()) {
             match value {
                 // TODO: Handle nested field statistics.
-                Scalar::Struct(_, _) => {}
+                Scalar::Struct(_) => {}
                 _ => {
                     let maybe_maximum =
-                        cast_to_type(field.name(), scalar_to_py(value, py_date, py)?, &schema.0);
+                        cast_to_type(field.name(), &scalar_to_py(value, &py_date)?, &schema.0);
                     if let Ok(maximum) = maybe_maximum {
                         let field_expr = py_field.call1((field.name(),))?;
                         let expr = field_expr.call_method1("__le__", (maximum,));
@@ -1289,7 +1624,7 @@ fn batch_distinct(batch: PyArrowType<RecordBatch>) -> PyResult<PyArrowType<Recor
     let schema = batch.0.schema();
     ctx.register_batch("batch", batch.0)
         .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
-    let batches = rt()?
+    let batches = rt()
         .block_on(async { ctx.table("batch").await?.distinct()?.collect().await })
         .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
 
@@ -1338,11 +1673,12 @@ impl From<&PyAddAction> for Add {
 #[pyfunction]
 #[allow(clippy::too_many_arguments)]
 fn write_to_deltalake(
+    py: Python,
     table_uri: String,
     data: PyArrowType<ArrowArrayStreamReader>,
     mode: String,
-    max_rows_per_group: i64,
-    overwrite_schema: bool,
+    table: Option<&RawDeltaTable>,
+    schema_mode: Option<String>,
     partition_by: Option<Vec<String>>,
     predicate: Option<String>,
     name: Option<String>,
@@ -1351,117 +1687,134 @@ fn write_to_deltalake(
     storage_options: Option<HashMap<String, String>>,
     writer_properties: Option<HashMap<String, Option<String>>>,
     custom_metadata: Option<HashMap<String, String>>,
+    post_commithook_properties: Option<HashMap<String, Option<bool>>>,
 ) -> PyResult<()> {
-    let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
-    let save_mode = mode.parse().map_err(PythonError::from)?;
+    py.allow_threads(|| {
+        let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
+        let save_mode = mode.parse().map_err(PythonError::from)?;
 
-    let options = storage_options.clone().unwrap_or_default();
-    let table = rt()?
-        .block_on(DeltaOps::try_from_uri_with_storage_options(
-            &table_uri, options,
-        ))
-        .map_err(PythonError::from)?;
+        let options = storage_options.clone().unwrap_or_default();
+        let table = if let Some(table) = table {
+            DeltaOps(table._table.clone())
+        } else {
+            rt().block_on(DeltaOps::try_from_uri_with_storage_options(
+                &table_uri, options,
+            ))
+            .map_err(PythonError::from)?
+        };
 
-    let mut builder = table
-        .write(batches)
-        .with_save_mode(save_mode)
-        .with_overwrite_schema(overwrite_schema)
-        .with_write_batch_size(max_rows_per_group as usize);
+        let mut builder = table.write(batches).with_save_mode(save_mode);
+        if let Some(schema_mode) = schema_mode {
+            builder = builder.with_schema_mode(schema_mode.parse().map_err(PythonError::from)?);
+        }
+        if let Some(partition_columns) = partition_by {
+            builder = builder.with_partition_columns(partition_columns);
+        }
 
-    if let Some(partition_columns) = partition_by {
-        builder = builder.with_partition_columns(partition_columns);
-    }
+        if let Some(writer_props) = writer_properties {
+            builder = builder.with_writer_properties(
+                set_writer_properties(writer_props).map_err(PythonError::from)?,
+            );
+        }
 
-    if let Some(writer_props) = writer_properties {
-        builder = builder.with_writer_properties(
-            set_writer_properties(writer_props).map_err(PythonError::from)?,
-        );
-    }
+        if let Some(name) = &name {
+            builder = builder.with_table_name(name);
+        };
 
-    if let Some(name) = &name {
-        builder = builder.with_table_name(name);
-    };
+        if let Some(description) = &description {
+            builder = builder.with_description(description);
+        };
 
-    if let Some(description) = &description {
-        builder = builder.with_description(description);
-    };
+        if let Some(predicate) = predicate {
+            builder = builder.with_replace_where(predicate);
+        };
 
-    if let Some(predicate) = predicate {
-        builder = builder.with_replace_where(predicate);
-    };
+        if let Some(config) = configuration {
+            builder = builder.with_configuration(config);
+        };
 
-    if let Some(config) = configuration {
-        builder = builder.with_configuration(config);
-    };
+        if custom_metadata.is_some() || post_commithook_properties.is_some() {
+            let mut commit_properties = CommitProperties::default();
+            if let Some(metadata) = custom_metadata {
+                let json_metadata: Map<String, Value> =
+                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+                commit_properties = commit_properties.with_metadata(json_metadata);
+            };
 
-    if let Some(metadata) = custom_metadata {
-        let json_metadata: Map<String, Value> =
-            metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-        builder = builder.with_metadata(json_metadata);
-    };
+            if let Some(post_commit_hook_props) = post_commithook_properties {
+                commit_properties =
+                    set_post_commithook_properties(commit_properties, post_commit_hook_props)
+            }
+            builder = builder.with_commit_properties(commit_properties);
+        }
 
-    rt()?
-        .block_on(builder.into_future())
-        .map_err(PythonError::from)?;
+        rt().block_on(builder.into_future())
+            .map_err(PythonError::from)?;
 
-    Ok(())
+        Ok(())
+    })
 }
 
 #[pyfunction]
 #[allow(clippy::too_many_arguments)]
 fn create_deltalake(
+    py: Python,
     table_uri: String,
     schema: PyArrowType<ArrowSchema>,
     partition_by: Vec<String>,
     mode: String,
+    raise_if_key_not_exists: bool,
     name: Option<String>,
     description: Option<String>,
     configuration: Option<HashMap<String, Option<String>>>,
     storage_options: Option<HashMap<String, String>>,
     custom_metadata: Option<HashMap<String, String>>,
 ) -> PyResult<()> {
-    let table = DeltaTableBuilder::from_uri(table_uri)
-        .with_storage_options(storage_options.unwrap_or_default())
-        .build()
-        .map_err(PythonError::from)?;
-
-    let mode = mode.parse().map_err(PythonError::from)?;
-    let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
-
-    let mut builder = DeltaOps(table)
-        .create()
-        .with_columns(schema.fields().clone())
-        .with_save_mode(mode)
-        .with_partition_columns(partition_by);
-
-    if let Some(name) = &name {
-        builder = builder.with_table_name(name);
-    };
+    py.allow_threads(|| {
+        let table = DeltaTableBuilder::from_uri(table_uri)
+            .with_storage_options(storage_options.unwrap_or_default())
+            .build()
+            .map_err(PythonError::from)?;
 
-    if let Some(description) = &description {
-        builder = builder.with_comment(description);
-    };
+        let mode = mode.parse().map_err(PythonError::from)?;
+        let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
 
-    if let Some(config) = configuration {
-        builder = builder.with_configuration(config);
-    };
+        let mut builder = DeltaOps(table)
+            .create()
+            .with_columns(schema.fields().cloned())
+            .with_save_mode(mode)
+            .with_raise_if_key_not_exists(raise_if_key_not_exists)
+            .with_partition_columns(partition_by);
 
-    if let Some(metadata) = custom_metadata {
-        let json_metadata: Map<String, Value> =
-            metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-        builder = builder.with_metadata(json_metadata);
-    };
+        if let Some(name) = &name {
+            builder = builder.with_table_name(name);
+        };
 
-    rt()?
-        .block_on(builder.into_future())
-        .map_err(PythonError::from)?;
+        if let Some(description) = &description {
+            builder = builder.with_comment(description);
+        };
 
-    Ok(())
+        if let Some(config) = configuration {
+            builder = builder.with_configuration(config);
+        };
+
+        if let Some(metadata) = custom_metadata {
+            let json_metadata: Map<String, Value> =
+                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+            builder = builder.with_metadata(json_metadata);
+        };
+
+        rt().block_on(builder.into_future())
+            .map_err(PythonError::from)?;
+
+        Ok(())
+    })
 }
 
 #[pyfunction]
 #[allow(clippy::too_many_arguments)]
 fn write_new_deltalake(
+    py: Python,
     table_uri: String,
     schema: PyArrowType<ArrowSchema>,
     add_actions: Vec<PyAddAction>,
@@ -1473,47 +1826,49 @@ fn write_new_deltalake(
     storage_options: Option<HashMap<String, String>>,
     custom_metadata: Option<HashMap<String, String>>,
 ) -> PyResult<()> {
-    let table = DeltaTableBuilder::from_uri(table_uri)
-        .with_storage_options(storage_options.unwrap_or_default())
-        .build()
-        .map_err(PythonError::from)?;
+    py.allow_threads(|| {
+        let table = DeltaTableBuilder::from_uri(table_uri)
+            .with_storage_options(storage_options.unwrap_or_default())
+            .build()
+            .map_err(PythonError::from)?;
 
-    let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
+        let schema: StructType = (&schema.0).try_into().map_err(PythonError::from)?;
 
-    let mut builder = DeltaOps(table)
-        .create()
-        .with_columns(schema.fields().clone())
-        .with_partition_columns(partition_by)
-        .with_actions(add_actions.iter().map(|add| Action::Add(add.into())));
+        let mut builder = DeltaOps(table)
+            .create()
+            .with_columns(schema.fields().cloned())
+            .with_partition_columns(partition_by)
+            .with_actions(add_actions.iter().map(|add| Action::Add(add.into())));
 
-    if let Some(name) = &name {
-        builder = builder.with_table_name(name);
-    };
+        if let Some(name) = &name {
+            builder = builder.with_table_name(name);
+        };
 
-    if let Some(description) = &description {
-        builder = builder.with_comment(description);
-    };
+        if let Some(description) = &description {
+            builder = builder.with_comment(description);
+        };
 
-    if let Some(config) = configuration {
-        builder = builder.with_configuration(config);
-    };
+        if let Some(config) = configuration {
+            builder = builder.with_configuration(config);
+        };
 
-    if let Some(metadata) = custom_metadata {
-        let json_metadata: Map<String, Value> =
-            metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-        builder = builder.with_metadata(json_metadata);
-    };
+        if let Some(metadata) = custom_metadata {
+            let json_metadata: Map<String, Value> =
+                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+            builder = builder.with_metadata(json_metadata);
+        };
 
-    rt()?
-        .block_on(builder.into_future())
-        .map_err(PythonError::from)?;
+        rt().block_on(builder.into_future())
+            .map_err(PythonError::from)?;
 
-    Ok(())
+        Ok(())
+    })
 }
 
 #[pyfunction]
 #[allow(clippy::too_many_arguments)]
 fn convert_to_deltalake(
+    py: Python,
     uri: String,
     partition_schema: Option<PyArrowType<ArrowSchema>>,
     partition_strategy: Option<String>,
@@ -1523,44 +1878,64 @@ fn convert_to_deltalake(
     storage_options: Option<HashMap<String, String>>,
     custom_metadata: Option<HashMap<String, String>>,
 ) -> PyResult<()> {
-    let mut builder = ConvertToDeltaBuilder::new().with_location(uri);
+    py.allow_threads(|| {
+        let mut builder = ConvertToDeltaBuilder::new().with_location(uri);
 
-    if let Some(part_schema) = partition_schema {
-        let schema: StructType = (&part_schema.0).try_into().map_err(PythonError::from)?;
-        builder = builder.with_partition_schema(schema.fields().clone());
-    }
+        if let Some(part_schema) = partition_schema {
+            let schema: StructType = (&part_schema.0).try_into().map_err(PythonError::from)?;
+            builder = builder.with_partition_schema(schema.fields().cloned());
+        }
 
-    if let Some(partition_strategy) = &partition_strategy {
-        let strategy: PartitionStrategy = partition_strategy.parse().map_err(PythonError::from)?;
-        builder = builder.with_partition_strategy(strategy);
-    }
+        if let Some(partition_strategy) = &partition_strategy {
+            let strategy: PartitionStrategy =
+                partition_strategy.parse().map_err(PythonError::from)?;
+            builder = builder.with_partition_strategy(strategy);
+        }
 
-    if let Some(name) = &name {
-        builder = builder.with_table_name(name);
-    }
+        if let Some(name) = &name {
+            builder = builder.with_table_name(name);
+        }
 
-    if let Some(description) = &description {
-        builder = builder.with_comment(description);
-    }
+        if let Some(description) = &description {
+            builder = builder.with_comment(description);
+        }
 
-    if let Some(config) = configuration {
-        builder = builder.with_configuration(config);
-    };
+        if let Some(config) = configuration {
+            builder = builder.with_configuration(config);
+        };
 
-    if let Some(strg_options) = storage_options {
-        builder = builder.with_storage_options(strg_options);
-    };
+        if let Some(strg_options) = storage_options {
+            builder = builder.with_storage_options(strg_options);
+        };
 
-    if let Some(metadata) = custom_metadata {
-        let json_metadata: Map<String, Value> =
-            metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-        builder = builder.with_metadata(json_metadata);
-    };
+        if let Some(metadata) = custom_metadata {
+            let json_metadata: Map<String, Value> =
+                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+            builder = builder.with_metadata(json_metadata);
+        };
 
-    rt()?
-        .block_on(builder.into_future())
-        .map_err(PythonError::from)?;
-    Ok(())
+        rt().block_on(builder.into_future())
+            .map_err(PythonError::from)?;
+        Ok(())
+    })
+}
+
+#[pyfunction]
+fn get_num_idx_cols_and_stats_columns(
+    table: Option<&RawDeltaTable>,
+    configuration: Option<HashMap<String, Option<String>>>,
+) -> PyResult<(i32, Option<Vec<String>>)> {
+    let config = table
+        .as_ref()
+        .map(|table| table._table.snapshot())
+        .transpose()
+        .map_err(PythonError::from)?
+        .map(|snapshot| snapshot.table_config());
+
+    Ok(deltalake::operations::get_num_idx_cols_and_stats_columns(
+        config,
+        configuration.unwrap_or_default(),
+    ))
 }
 
 #[pyclass(name = "DeltaDataChecker", module = "deltalake._internal")]
@@ -1597,26 +1972,45 @@ impl PyDeltaDataChecker {
 
 #[pymodule]
 // module name need to match project name
-fn _internal(py: Python, m: &PyModule) -> PyResult<()> {
-    use crate::error::{CommitFailedError, DeltaError, TableNotFoundError};
-
+fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    use crate::error::{CommitFailedError, DeltaError, SchemaMismatchError, TableNotFoundError};
     deltalake::aws::register_handlers(None);
     deltalake::azure::register_handlers(None);
     deltalake::gcp::register_handlers(None);
-
-    m.add("DeltaError", py.get_type::<DeltaError>())?;
-    m.add("CommitFailedError", py.get_type::<CommitFailedError>())?;
-    m.add("DeltaProtocolError", py.get_type::<DeltaProtocolError>())?;
-    m.add("TableNotFoundError", py.get_type::<TableNotFoundError>())?;
+    deltalake::hdfs::register_handlers(None);
+    deltalake_mount::register_handlers(None);
+
+    let py = m.py();
+    m.add("DeltaError", py.get_type_bound::<DeltaError>())?;
+    m.add(
+        "CommitFailedError",
+        py.get_type_bound::<CommitFailedError>(),
+    )?;
+    m.add(
+        "DeltaProtocolError",
+        py.get_type_bound::<DeltaProtocolError>(),
+    )?;
+    m.add(
+        "TableNotFoundError",
+        py.get_type_bound::<TableNotFoundError>(),
+    )?;
+    m.add(
+        "SchemaMismatchError",
+        py.get_type_bound::<SchemaMismatchError>(),
+    )?;
 
     env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();
     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
-    m.add_function(pyo3::wrap_pyfunction!(rust_core_version, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(create_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(write_new_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(write_to_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(convert_to_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(batch_distinct, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(rust_core_version, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(create_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(write_new_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(write_to_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(convert_to_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(batch_distinct, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(
+        get_num_idx_cols_and_stats_columns,
+        m
+    )?)?;
     m.add_class::<RawDeltaTable>()?;
     m.add_class::<RawDeltaTableMetaData>()?;
     m.add_class::<PyDeltaDataChecker>()?;
diff --git a/python/src/schema.rs b/python/src/schema.rs
index c56010f131..7508b5495f 100644
--- a/python/src/schema.rs
+++ b/python/src/schema.rs
@@ -7,15 +7,15 @@ use deltalake::arrow::datatypes::{
 use deltalake::arrow::error::ArrowError;
 use deltalake::arrow::pyarrow::PyArrowType;
 use deltalake::kernel::{
-    ArrayType as DeltaArrayType, DataType, MapType as DeltaMapType, PrimitiveType as DeltaPrimitve,
-    StructField, StructType as DeltaStructType,
+    ArrayType as DeltaArrayType, DataType, MapType as DeltaMapType, MetadataValue,
+    PrimitiveType as DeltaPrimitve, StructField, StructType as DeltaStructType, StructTypeExt,
 };
 use pyo3::exceptions::{PyException, PyNotImplementedError, PyTypeError, PyValueError};
 use pyo3::prelude::*;
-use pyo3::types::IntoPyDict;
-use pyo3::{PyRef, PyResult};
 use std::collections::HashMap;
 
+use crate::utils::warn;
+
 // PyO3 doesn't yet support converting classes with inheritance with Python
 // objects within Rust code, which we need here. So for now, we implement
 // the types with no inheritance. Later, we may add inheritance.
@@ -23,40 +23,40 @@ use std::collections::HashMap;
 
 // Decimal is separate special case, since it has parameters
 
-fn schema_type_to_python(schema_type: DataType, py: Python) -> PyResult<PyObject> {
+fn schema_type_to_python(schema_type: DataType, py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
     match schema_type {
-        DataType::Primitive(data_type) => {
-            Ok((PrimitiveType::new(data_type.to_string())?).into_py(py))
-        }
+        DataType::Primitive(data_type) => Ok((PrimitiveType::new(data_type.to_string())?)
+            .into_py(py)
+            .into_bound(py)),
         DataType::Array(array_type) => {
             let array_type: ArrayType = (*array_type).into();
-            Ok(array_type.into_py(py))
+            Ok(array_type.into_py(py).into_bound(py))
         }
         DataType::Map(map_type) => {
             let map_type: MapType = (*map_type).into();
-            Ok(map_type.into_py(py))
+            Ok(map_type.into_py(py).into_bound(py))
         }
         DataType::Struct(struct_type) => {
             let struct_type: StructType = (*struct_type).into();
-            Ok(struct_type.into_py(py))
+            Ok(struct_type.into_py(py).into_bound(py))
         }
     }
 }
 
-fn python_type_to_schema(ob: PyObject, py: Python) -> PyResult<DataType> {
-    if let Ok(data_type) = ob.extract::<PrimitiveType>(py) {
+fn python_type_to_schema(ob: &Bound<'_, PyAny>) -> PyResult<DataType> {
+    if let Ok(data_type) = ob.extract::<PrimitiveType>() {
         return Ok(DataType::Primitive(data_type.inner_type));
     }
-    if let Ok(array_type) = ob.extract::<ArrayType>(py) {
+    if let Ok(array_type) = ob.extract::<ArrayType>() {
         return Ok(array_type.into());
     }
-    if let Ok(map_type) = ob.extract::<MapType>(py) {
+    if let Ok(map_type) = ob.extract::<MapType>() {
         return Ok(map_type.into());
     }
-    if let Ok(struct_type) = ob.extract::<StructType>(py) {
+    if let Ok(struct_type) = ob.extract::<StructType>() {
         return Ok(struct_type.into());
     }
-    if let Ok(raw_primitive) = ob.extract::<String>(py) {
+    if let Ok(raw_primitive) = ob.extract::<String>() {
         // Pass through PrimitiveType::new() to do validation
         return PrimitiveType::new(raw_primitive)
             .map(|data_type| DataType::Primitive(data_type.inner_type));
@@ -85,36 +85,20 @@ impl PrimitiveType {
     #[new]
     #[pyo3(signature = (data_type))]
     fn new(data_type: String) -> PyResult<Self> {
-        let data_type: DeltaPrimitve = serde_json::from_str(&format!("\"{data_type}\""))
-            .map_err(|_| PyValueError::new_err(format!("invalid type string: {data_type}")))?;
+        let data_type: DeltaPrimitve =
+            serde_json::from_str(&format!("\"{data_type}\"")).map_err(|_| {
+                if data_type.starts_with("decimal") {
+                    PyValueError::new_err(format!(
+                        "invalid type string: {data_type}, precision/scale can't be larger than 38"
+                    ))
+                } else {
+                    PyValueError::new_err(format!("invalid type string: {data_type}"))
+                }
+            })?;
 
         Ok(Self {
             inner_type: data_type,
         })
-
-        // if data_type.starts_with("decimal") {
-        //     if try_parse_decimal_type(&data_type).is_none() {
-        //         Err(PyValueError::new_err(format!(
-        //             "invalid decimal type: {data_type}"
-        //         )))
-        //     } else {
-        //         Ok(Self {
-        //             inner_type: data_type,
-        //         })
-        //     }
-        // } else if !VALID_PRIMITIVE_TYPES
-        //     .iter()
-        //     .any(|&valid| data_type == valid)
-        // {
-        //     Err(PyValueError::new_err(format!(
-        //         "data_type must be one of decimal(<precision>, <scale>), {}.",
-        //         VALID_PRIMITIVE_TYPES.join(", ")
-        //     )))
-        // } else {
-        //     Ok(Self {
-        //         inner_type: data_type,
-        //     })
-        // }
     }
 
     #[getter]
@@ -204,16 +188,15 @@ impl TryFrom<DataType> for ArrayType {
 impl ArrayType {
     #[new]
     #[pyo3(signature = (element_type, contains_null = true))]
-    fn new(element_type: PyObject, contains_null: bool, py: Python) -> PyResult<Self> {
-        let inner_type =
-            DeltaArrayType::new(python_type_to_schema(element_type, py)?, contains_null);
+    fn new(element_type: &Bound<'_, PyAny>, contains_null: bool) -> PyResult<Self> {
+        let inner_type = DeltaArrayType::new(python_type_to_schema(element_type)?, contains_null);
         Ok(Self { inner_type })
     }
 
     fn __repr__(&self, py: Python) -> PyResult<String> {
         let type_repr: String = schema_type_to_python(self.inner_type.element_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
         Ok(format!(
             "ArrayType({}, contains_null={})",
             type_repr,
@@ -241,13 +224,13 @@ impl ArrayType {
     }
 
     #[getter]
-    fn element_type(&self, py: Python) -> PyResult<PyObject> {
+    fn element_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner_type.element_type().to_owned(), py)
     }
 
     #[getter]
-    fn contains_null(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.inner_type.contains_null().into_py(py))
+    fn contains_null<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        Ok(self.inner_type.contains_null().into_py(py).into_bound(py))
     }
 
     #[pyo3(text_signature = "($self)")]
@@ -318,15 +301,14 @@ impl TryFrom<DataType> for MapType {
 impl MapType {
     #[new]
     #[pyo3(signature = (key_type, value_type, value_contains_null = true))]
-    fn new(
-        key_type: PyObject,
-        value_type: PyObject,
+    fn new<'py>(
+        key_type: &Bound<'py, PyAny>,
+        value_type: &Bound<'py, PyAny>,
         value_contains_null: bool,
-        py: Python,
     ) -> PyResult<Self> {
         let inner_type = DeltaMapType::new(
-            python_type_to_schema(key_type, py)?,
-            python_type_to_schema(value_type, py)?,
+            python_type_to_schema(key_type)?,
+            python_type_to_schema(value_type)?,
             value_contains_null,
         );
         Ok(Self { inner_type })
@@ -334,11 +316,11 @@ impl MapType {
 
     fn __repr__(&self, py: Python) -> PyResult<String> {
         let key_repr: String = schema_type_to_python(self.inner_type.key_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
         let value_repr: String = schema_type_to_python(self.inner_type.value_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
         Ok(format!(
             "MapType({}, {}, value_contains_null={})",
             key_repr,
@@ -367,18 +349,22 @@ impl MapType {
     }
 
     #[getter]
-    fn key_type(&self, py: Python) -> PyResult<PyObject> {
+    fn key_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner_type.key_type().to_owned(), py)
     }
 
     #[getter]
-    fn value_type(&self, py: Python) -> PyResult<PyObject> {
+    fn value_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner_type.value_type().to_owned(), py)
     }
 
     #[getter]
-    fn value_contains_null(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.inner_type.value_contains_null().into_py(py))
+    fn value_contains_null<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        Ok(self
+            .inner_type
+            .value_contains_null()
+            .into_py(py)
+            .into_bound(py))
     }
 
     #[pyo3(text_signature = "($self)")]
@@ -418,25 +404,25 @@ impl MapType {
 #[pyclass(module = "deltalake._internal")]
 #[derive(Clone)]
 pub struct Field {
-    inner: StructField,
+    pub inner: StructField,
 }
 
 #[pymethods]
 impl Field {
     #[new]
     #[pyo3(signature = (name, r#type, nullable = true, metadata = None))]
-    fn new(
+    fn new<'py>(
         name: String,
-        r#type: PyObject,
+        r#type: &Bound<'py, PyAny>,
         nullable: bool,
-        metadata: Option<PyObject>,
-        py: Python,
+        metadata: Option<&Bound<'py, PyAny>>,
     ) -> PyResult<Self> {
-        let ty = python_type_to_schema(r#type, py)?;
+        let py = r#type.py();
+        let ty = python_type_to_schema(r#type)?;
 
         // Serialize and de-serialize JSON (it needs to be valid JSON anyways)
-        let metadata: HashMap<String, serde_json::Value> = if let Some(ref json) = metadata {
-            let json_dumps = PyModule::import(py, "json")?.getattr("dumps")?;
+        let metadata: HashMap<String, serde_json::Value> = if let Some(json) = metadata {
+            let json_dumps = PyModule::import_bound(py, "json")?.getattr("dumps")?;
             let metadata_json: String = json_dumps.call1((json,))?.extract()?;
             let metadata_json = Some(metadata_json)
                 .filter(|x| x != "null")
@@ -448,7 +434,24 @@ impl Field {
         };
 
         let mut inner = StructField::new(name, ty, nullable);
-        inner = inner.with_metadata(metadata);
+        inner = inner.with_metadata(metadata.iter().map(|(k, v)| {
+            (
+                k,
+                if let serde_json::Value::Number(n) = v {
+                    n.as_i64().map_or_else(
+                        || MetadataValue::String(v.to_string()),
+                        |i| {
+                            i32::try_from(i)
+                                .ok()
+                                .map(MetadataValue::Number)
+                                .unwrap_or_else(|| MetadataValue::String(v.to_string()))
+                        },
+                    )
+                } else {
+                    MetadataValue::String(v.to_string())
+                },
+            )
+        }));
 
         Ok(Self { inner })
     }
@@ -459,7 +462,7 @@ impl Field {
     }
 
     #[getter]
-    fn get_type(&self, py: Python) -> PyResult<PyObject> {
+    fn get_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner.data_type().clone(), py)
     }
 
@@ -469,26 +472,27 @@ impl Field {
     }
 
     #[getter]
-    fn metadata(&self, py: Python) -> PyResult<PyObject> {
-        let json_loads = PyModule::import(py, "json")?.getattr("loads")?;
+    fn metadata<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let json_loads = PyModule::import_bound(py, "json")?.getattr("loads")?;
         let metadata_json: String = serde_json::to_string(self.inner.metadata())
             .map_err(|err| PyValueError::new_err(err.to_string()))?;
-        Ok(json_loads.call1((metadata_json,))?.to_object(py))
+        Ok(json_loads
+            .call1((metadata_json,))?
+            .to_object(py)
+            .bind(py)
+            .to_owned())
     }
 
     fn __repr__(&self, py: Python) -> PyResult<String> {
         let type_repr: String = schema_type_to_python(self.inner.data_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
 
         let metadata = self.inner.metadata();
         let maybe_metadata = if metadata.is_empty() {
             "".to_string()
         } else {
-            let metadata_repr: String = self
-                .metadata(py)?
-                .call_method0(py, "__repr__")?
-                .extract(py)?;
+            let metadata_repr: String = self.metadata(py)?.call_method0("__repr__")?.extract()?;
             format!(", metadata={metadata_repr}")
         };
         Ok(format!(
@@ -590,7 +594,6 @@ impl StructType {
         let inner_data: Vec<String> = self
             .inner_type
             .fields()
-            .iter()
             .map(|field| {
                 let field = Field {
                     inner: field.clone(),
@@ -621,7 +624,6 @@ impl StructType {
     fn fields(&self) -> Vec<Field> {
         self.inner_type
             .fields()
-            .iter()
             .map(|field| Field {
                 inner: field.clone(),
             })
@@ -662,20 +664,14 @@ impl StructType {
     }
 }
 
-pub fn schema_to_pyobject(schema: &DeltaStructType, py: Python) -> PyResult<PyObject> {
-    let fields: Vec<Field> = schema
-        .fields()
-        .iter()
-        .map(|field| Field {
-            inner: field.clone(),
-        })
-        .collect();
+pub fn schema_to_pyobject(schema: DeltaStructType, py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
+    let fields = schema.fields().map(|field| Field {
+        inner: field.clone(),
+    });
 
-    let py_schema = PyModule::import(py, "deltalake.schema")?.getattr("Schema")?;
+    let py_schema = PyModule::import_bound(py, "deltalake.schema")?.getattr("Schema")?;
 
-    py_schema
-        .call1((fields,))
-        .map(|schema| schema.to_object(py))
+    py_schema.call1((fields.collect::<Vec<_>>(),))
 }
 
 /// A Delta Lake schema
@@ -711,7 +707,6 @@ impl PySchema {
         let inner_data: Vec<String> = super_
             .inner_type
             .fields()
-            .iter()
             .map(|field| {
                 let field = Field {
                     inner: field.clone(),
@@ -722,26 +717,18 @@ impl PySchema {
         Ok(format!("Schema([{}])", inner_data.join(", ")))
     }
 
-    fn json(self_: PyRef<'_, Self>, py: Python) -> PyResult<PyObject> {
-        let warnings_warn = PyModule::import(py, "warnings")?.getattr("warn")?;
-        let deprecation_warning = PyModule::import(py, "builtins")?
-            .getattr("DeprecationWarning")?
-            .to_object(py);
-        let kwargs: [(&str, PyObject); 2] = [
-            ("category", deprecation_warning),
-            ("stacklevel", 2.to_object(py)),
-        ];
-        warnings_warn.call(
-            ("Schema.json() is deprecated. Use json.loads(Schema.to_json()) instead.",),
-            Some(kwargs.into_py_dict(py)),
+    fn json<'py>(self_: PyRef<'_, Self>, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        warn(
+            py,
+            "DeprecationWarning",
+            "Schema.json() is deprecated. Use json.loads(Schema.to_json()) instead.",
+            Some(2),
         )?;
 
         let super_ = self_.as_ref();
         let json = super_.to_json()?;
-        let json_loads = PyModule::import(py, "json")?.getattr("loads")?;
-        json_loads
-            .call1((json.into_py(py),))
-            .map(|obj| obj.to_object(py))
+        let json_loads = PyModule::import_bound(py, "json")?.getattr("loads")?;
+        json_loads.call1((json.into_py(py),))
     }
 
     #[pyo3(signature = (as_large_types = false))]
@@ -819,12 +806,15 @@ impl PySchema {
 
     #[staticmethod]
     #[pyo3(text_signature = "(data_type)")]
-    fn from_pyarrow(data_type: PyArrowType<ArrowSchema>, py: Python) -> PyResult<PyObject> {
+    fn from_pyarrow(
+        data_type: PyArrowType<ArrowSchema>,
+        py: Python<'_>,
+    ) -> PyResult<Bound<'_, PyAny>> {
         let inner_type: DeltaStructType = (&data_type.0)
             .try_into()
             .map_err(|err: ArrowError| PyException::new_err(err.to_string()))?;
 
-        schema_to_pyobject(&inner_type, py)
+        schema_to_pyobject(inner_type, py)
     }
 
     #[pyo3(text_signature = "($self)")]
diff --git a/python/src/utils.rs b/python/src/utils.rs
index ae4c931e38..5ec2fe0a65 100644
--- a/python/src/utils.rs
+++ b/python/src/utils.rs
@@ -1,15 +1,16 @@
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use deltalake::storage::{ListResult, ObjectStore, ObjectStoreError, ObjectStoreResult, Path};
 use futures::future::{join_all, BoxFuture, FutureExt};
 use futures::StreamExt;
-use pyo3::exceptions::PyRuntimeError;
-use pyo3::prelude::*;
+use pyo3::types::{IntoPyDict, PyAnyMethods, PyModule};
+use pyo3::{Bound, PyAny, PyResult, Python, ToPyObject};
 use tokio::runtime::Runtime;
 
 #[inline]
-pub fn rt() -> PyResult<tokio::runtime::Runtime> {
-    Runtime::new().map_err(|_| PyRuntimeError::new_err("Couldn't start a new tokio runtime."))
+pub fn rt() -> &'static Runtime {
+    static TOKIO_RT: OnceLock<Runtime> = OnceLock::new();
+    TOKIO_RT.get_or_init(|| Runtime::new().expect("Failed to create a tokio runtime."))
 }
 
 /// walk the "directory" tree along common prefixes in object store
@@ -81,3 +82,20 @@ pub async fn delete_dir(storage: &dyn ObjectStore, prefix: &Path) -> ObjectStore
     }
     Ok(())
 }
+
+pub fn warn<'py>(
+    py: Python<'py>,
+    warning_type: &str,
+    message: &str,
+    stack_level: Option<u8>,
+) -> PyResult<()> {
+    let warnings_warn = PyModule::import_bound(py, "warnings")?.getattr("warn")?;
+    let warning_type = PyModule::import_bound(py, "builtins")?.getattr(warning_type)?;
+    let stack_level = stack_level.unwrap_or(1);
+    let kwargs: [(&str, Bound<'py, PyAny>); 2] = [
+        ("category", warning_type),
+        ("stacklevel", stack_level.to_object(py).into_bound(py)),
+    ];
+    warnings_warn.call((message,), Some(&kwargs.into_py_dict_bound(py)))?;
+    Ok(())
+}
diff --git a/python/stubs/pyarrow/parquet.pyi b/python/stubs/pyarrow/parquet.pyi
new file mode 100644
index 0000000000..26db6a2fa2
--- /dev/null
+++ b/python/stubs/pyarrow/parquet.pyi
@@ -0,0 +1,8 @@
+from typing import Callable
+
+from pyarrow.dataset import Expression
+
+from deltalake.table import FilterType
+
+filters_to_expression: Callable[[FilterType], Expression]
+_filters_to_expression: Callable[[FilterType], Expression]
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index c81b6fb91e..cd3dec4627 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -2,12 +2,13 @@
 import pathlib
 import subprocess
 import time
-from datetime import date, datetime, timedelta
+from datetime import date, datetime, timedelta, timezone
 from decimal import Decimal
 from time import sleep
 
 import pyarrow as pa
 import pytest
+from azure.storage import blob
 
 from deltalake import DeltaTable, WriterProperties, write_deltalake
 
@@ -112,53 +113,34 @@ def s3_localstack(monkeypatch, s3_localstack_creds):
 def azurite_creds():
     # These are the well-known values
     # https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio#well-known-storage-account-and-key
+    account_name = "devstoreaccount1"
     config = dict(
-        AZURE_STORAGE_ACCOUNT_NAME="devstoreaccount1",
+        AZURE_STORAGE_ACCOUNT_NAME=account_name,
         AZURE_STORAGE_ACCOUNT_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
         AZURE_STORAGE_CONTAINER_NAME="deltars",
         AZURE_STORAGE_USE_EMULATOR="true",
         AZURE_STORAGE_USE_HTTP="true",
+        AZURE_STORAGE_ENDPOINT=f"http://localhost:10000/{account_name}",
     )
 
-    endpoint_url = f"http://localhost:10000/{config['AZURE_STORAGE_ACCOUNT_NAME']}"
-
     env = os.environ.copy()
     env.update(config)
-    env["AZURE_STORAGE_CONNECTION_STRING"] = (
+    conn_str = (
         "DefaultEndpointsProtocol=http;"
         f"AccountName={config['AZURE_STORAGE_ACCOUNT_NAME']};"
         f"AccountKey={config['AZURE_STORAGE_ACCOUNT_KEY']};"
-        f"BlobEndpoint={endpoint_url};"
+        f"BlobEndpoint={config['AZURE_STORAGE_ENDPOINT']};"
     )
-    wait_till_host_is_available(endpoint_url)
+    env["AZURE_STORAGE_CONNECTION_STRING"] = conn_str
+    wait_till_host_is_available(config["AZURE_STORAGE_ENDPOINT"])
     try:
-        subprocess.run(
-            [
-                "az",
-                "storage",
-                "container",
-                "create",
-                "--name",
-                config["AZURE_STORAGE_CONTAINER_NAME"],
-            ],
-            env=env,
+        blob_client = blob.BlobServiceClient.from_connection_string(conn_str=conn_str)
+        container = blob_client.create_container(
+            name=config["AZURE_STORAGE_CONTAINER_NAME"]
         )
-    except OSError:
-        pytest.skip("azure cli is not installed")
-
-    yield config
-
-    subprocess.run(
-        [
-            "az",
-            "storage",
-            "container",
-            "delete",
-            "--name",
-            config["AZURE_STORAGE_CONTAINER_NAME"],
-        ],
-        env=env,
-    )
+        yield config
+    finally:
+        container.delete_container()
 
 
 @pytest.fixture()
@@ -169,34 +151,30 @@ def azurite_env_vars(monkeypatch, azurite_creds):
 
 @pytest.fixture(scope="session")
 def azurite_sas_creds(azurite_creds):
-    endpoint_url = (
-        f"http://localhost:10000/{azurite_creds['AZURE_STORAGE_ACCOUNT_NAME']}"
-    )
     env = os.environ.copy()
     env.update(azurite_creds)
     env["AZURE_STORAGE_CONNECTION_STRING"] = (
         "DefaultEndpointsProtocol=http;"
         f"AccountName={azurite_creds['AZURE_STORAGE_ACCOUNT_NAME']};"
         f"AccountKey={azurite_creds['AZURE_STORAGE_ACCOUNT_KEY']};"
-        f"BlobEndpoint={endpoint_url};"
+        f"BlobEndpoint={azurite_creds['AZURE_STORAGE_ENDPOINT']};"
     )
-    output = subprocess.run(
-        [
-            "az",
-            "storage",
-            "container",
-            "generate-sas",
-            "--name",
-            azurite_creds["AZURE_STORAGE_CONTAINER_NAME"],
-            "--permissions",
-            "dlrw",
-        ],
-        env=env,
-        capture_output=True,
+    sas_token = blob.generate_container_sas(
+        account_name=azurite_creds["AZURE_STORAGE_ACCOUNT_NAME"],
+        container_name=azurite_creds["AZURE_STORAGE_CONTAINER_NAME"],
+        account_key=azurite_creds["AZURE_STORAGE_ACCOUNT_KEY"],
+        permission=blob.ContainerSasPermissions(
+            read=True,
+            write=True,
+            list=True,
+            delete=True,
+        ),
+        expiry=datetime.now(tz=timezone.utc) + timedelta(hours=1),
+        start=datetime.now(tz=timezone.utc),
+        protocol="http",
     )
-
     creds = {key: value for key, value in azurite_creds.items() if "KEY" not in key}
-    creds["SAS_TOKEN"] = output.stdout.decode()
+    creds["SAS_TOKEN"] = sas_token
 
     return creds
 
@@ -250,6 +228,19 @@ def sample_table():
     )
 
 
+@pytest.fixture()
+def sample_table_with_spaces_numbers():
+    nrows = 5
+    return pa.table(
+        {
+            "1id": pa.array(["1", "2", "3", "4", "5"]),
+            "price": pa.array(list(range(nrows)), pa.int64()),
+            "sold items": pa.array(list(range(nrows)), pa.int32()),
+            "deleted": pa.array([False] * nrows),
+        }
+    )
+
+
 @pytest.fixture()
 def writer_properties():
     return WriterProperties(compression="GZIP", compression_level=0)
diff --git a/python/tests/data_acceptance/test_reader.py b/python/tests/data_acceptance/test_reader.py
index 8d3faff49e..ae77100618 100644
--- a/python/tests/data_acceptance/test_reader.py
+++ b/python/tests/data_acceptance/test_reader.py
@@ -46,6 +46,7 @@ class ReadCase(NamedTuple):
 failing_cases = {
     "multi_partitioned_2": "Waiting for PyArrow 11.0.0 for decimal cast support (#1078)",
     "multi_partitioned": "Test case handles binary poorly",
+    "all_primitive_types": "The parquet table written with PySpark incorrectly wrote a timestamp primitive without Timezone information.",
 }
 
 
diff --git a/python/tests/pyspark_integration/test_write_to_pyspark.py b/python/tests/pyspark_integration/test_write_to_pyspark.py
index 8418f587ca..81cda71883 100644
--- a/python/tests/pyspark_integration/test_write_to_pyspark.py
+++ b/python/tests/pyspark_integration/test_write_to_pyspark.py
@@ -1,10 +1,11 @@
 """Tests that deltalake(delta-rs) can write to tables written by PySpark."""
+
 import pathlib
 
 import pyarrow as pa
 import pytest
 
-from deltalake import write_deltalake
+from deltalake import DeltaTable, write_deltalake
 from deltalake.exceptions import DeltaProtocolError
 
 from .utils import assert_spark_read_equal, get_spark
@@ -105,10 +106,66 @@ def test_checks_min_writer_version(tmp_path: pathlib.Path):
     )
 
     # Add a constraint upgrades the minWriterProtocol
-    spark.sql(f"ALTER TABLE delta.`{str(tmp_path)}` ADD CONSTRAINT x CHECK (c1 > 2)")
+    spark.sql(f"ALTER TABLE delta.`{tmp_path!s}` ADD CONSTRAINT x CHECK (c1 > 2)")
 
     with pytest.raises(
         DeltaProtocolError, match="This table's min_writer_version is 3, but"
     ):
         valid_data = pa.table({"c1": pa.array([5, 6])})
         write_deltalake(str(tmp_path), valid_data, mode="append")
+
+
+@pytest.mark.pyspark
+@pytest.mark.integration
+def test_spark_read_optimize_history(tmp_path: pathlib.Path):
+    ids = ["1"] * 10
+    values = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+
+    id_array = pa.array(ids, type=pa.string())
+    value_array = pa.array(values, type=pa.int32())
+
+    pa_table = pa.Table.from_arrays([id_array, value_array], names=["id", "value"])
+
+    # Two writes on purpose for an optimize to occur
+    write_deltalake(tmp_path, pa_table, mode="append", partition_by=["id"])
+    write_deltalake(tmp_path, pa_table, mode="append", partition_by=["id"])
+
+    dt = DeltaTable(tmp_path)
+    dt.optimize.compact(partition_filters=[("id", "=", "1")])
+
+    spark = get_spark()
+    history_df = spark.sql(f"DESCRIBE HISTORY '{tmp_path}'")
+
+    latest_operation_metrics = (
+        history_df.orderBy(history_df.version.desc()).select("operationMetrics").first()
+    )
+
+    assert latest_operation_metrics["operationMetrics"] is not None
+
+
+@pytest.mark.pyspark
+@pytest.mark.integration
+def test_spark_read_z_ordered_history(tmp_path: pathlib.Path):
+    ids = ["1"] * 10
+    values = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+
+    id_array = pa.array(ids, type=pa.string())
+    value_array = pa.array(values, type=pa.int32())
+
+    pa_table = pa.Table.from_arrays([id_array, value_array], names=["id", "value"])
+
+    # Two writes on purpose for an optimize to occur
+    write_deltalake(tmp_path, pa_table, mode="append", partition_by=["id"])
+    write_deltalake(tmp_path, pa_table, mode="append", partition_by=["id"])
+
+    dt = DeltaTable(tmp_path)
+    dt.optimize.z_order(columns=["value"], partition_filters=[("id", "=", "1")])
+
+    spark = get_spark()
+    history_df = spark.sql(f"DESCRIBE HISTORY '{tmp_path}'")
+
+    latest_operation_metrics = (
+        history_df.orderBy(history_df.version.desc()).select("operationMetrics").first()
+    )
+
+    assert latest_operation_metrics["operationMetrics"] is not None
diff --git a/python/tests/pyspark_integration/test_writer_readable.py b/python/tests/pyspark_integration/test_writer_readable.py
index ea555074b8..6d1a528ee6 100644
--- a/python/tests/pyspark_integration/test_writer_readable.py
+++ b/python/tests/pyspark_integration/test_writer_readable.py
@@ -1,4 +1,5 @@
 """Test that pyspark can read tables written by deltalake(delta-rs)."""
+
 import pathlib
 
 import pyarrow as pa
@@ -95,3 +96,37 @@ def test_issue_1591_roundtrip_special_characters(tmp_path: pathlib.Path):
 
     loaded = DeltaTable(spark_path).to_pandas()
     assert loaded.shape == data.shape
+
+
+@pytest.mark.pyspark
+@pytest.mark.integration
+def test_read_checkpointed_table(tmp_path: pathlib.Path):
+    data = pa.table(
+        {
+            "int": pa.array([1]),
+        }
+    )
+    write_deltalake(tmp_path, data)
+
+    dt = DeltaTable(tmp_path)
+    dt.create_checkpoint()
+
+    assert_spark_read_equal(data, str(tmp_path), ["int"])
+
+
+@pytest.mark.pyspark
+@pytest.mark.integration
+def test_read_checkpointed_features_table(tmp_path: pathlib.Path):
+    from datetime import datetime
+
+    data = pa.table(
+        {
+            "timestamp": pa.array([datetime(2010, 1, 1)]),
+        }
+    )
+    write_deltalake(tmp_path, data)
+
+    dt = DeltaTable(tmp_path)
+    dt.create_checkpoint()
+
+    assert_spark_read_equal(data, str(tmp_path), ["timestamp"])
diff --git a/python/tests/test_alter.py b/python/tests/test_alter.py
index 7069124581..b931939348 100644
--- a/python/tests/test_alter.py
+++ b/python/tests/test_alter.py
@@ -1,10 +1,12 @@
 import pathlib
+from typing import List
 
 import pyarrow as pa
 import pytest
 
 from deltalake import DeltaTable, write_deltalake
 from deltalake.exceptions import DeltaError, DeltaProtocolError
+from deltalake.schema import Field, PrimitiveType, StructType
 
 
 def test_add_constraint(tmp_path: pathlib.Path, sample_table: pa.Table):
@@ -20,6 +22,7 @@ def test_add_constraint(tmp_path: pathlib.Path, sample_table: pa.Table):
     assert dt.metadata().configuration == {
         "delta.constraints.check_price": "price >= 0"
     }
+    assert dt.protocol().min_writer_version == 3
 
     with pytest.raises(DeltaError):
         # Invalid constraint
@@ -60,3 +63,312 @@ def test_add_constraint_roundtrip_metadata(
     )
 
     assert dt.history(1)[0]["userName"] == "John Doe"
+
+
+def test_drop_constraint(tmp_path: pathlib.Path, sample_table: pa.Table):
+    write_deltalake(tmp_path, sample_table)
+
+    dt = DeltaTable(tmp_path)
+
+    dt.alter.add_constraint({"check_price": "price >= 0"})
+    assert dt.protocol().min_writer_version == 3
+    dt.alter.drop_constraint(name="check_price")
+    last_action = dt.history(1)[0]
+    assert last_action["operation"] == "DROP CONSTRAINT"
+    assert dt.version() == 2
+    assert dt.metadata().configuration == {}
+    assert dt.protocol().min_writer_version == 3
+
+
+def test_drop_constraint_invalid(tmp_path: pathlib.Path, sample_table: pa.Table):
+    write_deltalake(tmp_path, sample_table)
+
+    dt = DeltaTable(tmp_path)
+
+    dt.alter.add_constraint({"check_price": "price >= 0"})
+    with pytest.raises(DeltaError):
+        dt.alter.drop_constraint(name="invalid_constraint_name")
+
+    assert dt.metadata().configuration == {
+        "delta.constraints.check_price": "price >= 0"
+    }
+    assert dt.protocol().min_writer_version == 3
+
+
+def test_drop_constraint_invalid_ignore(tmp_path: pathlib.Path, sample_table: pa.Table):
+    write_deltalake(tmp_path, sample_table)
+
+    dt = DeltaTable(tmp_path)
+
+    dt.alter.add_constraint({"check_price": "price >= 0"})
+    dt.alter.drop_constraint(name="invalid_constraint_name", raise_if_not_exists=False)
+
+
+def test_drop_constraint_roundtrip_metadata(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+
+    dt = DeltaTable(tmp_path)
+
+    dt.alter.add_constraint({"check_price2": "price >= 0"})
+    dt.alter.drop_constraint("check_price2", custom_metadata={"userName": "John Doe"})
+
+    assert dt.history(1)[0]["userName"] == "John Doe"
+
+
+@pytest.mark.parametrize("min_writer_version", ["2", "3", "4", "5", "6", "7"])
+def test_set_table_properties_min_writer_version(
+    tmp_path: pathlib.Path,
+    sample_table: pa.Table,
+    min_writer_version: str,
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+
+    configuration = {"delta.minWriterVersion": min_writer_version}
+    dt.alter.set_table_properties(configuration)
+
+    protocol = dt.protocol()
+
+    assert dt.metadata().configuration == configuration
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == int(min_writer_version)
+
+
+def test_set_table_properties_invalid_min_writer_version(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    with pytest.raises(DeltaError):
+        dt.alter.set_table_properties({"delta.minWriterVersion": "8"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {}
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 2
+
+
+@pytest.mark.parametrize("min_reader_version", ["1", "2", "3"])
+def test_set_table_properties_min_reader_version(
+    tmp_path: pathlib.Path,
+    sample_table: pa.Table,
+    min_reader_version: str,
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    configuration = {"delta.minReaderVersion": min_reader_version}
+    dt.alter.set_table_properties(configuration)
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == configuration
+    assert protocol.min_reader_version == int(min_reader_version)
+    assert protocol.min_writer_version == 2
+
+
+def test_set_table_properties_invalid_min_reader_version(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    with pytest.raises(DeltaError):
+        dt.alter.set_table_properties({"delta.minReaderVersion": "8"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {}
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 2
+
+
+def test_set_table_properties_enable_cdf(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    dt.alter.set_table_properties({"delta.enableChangeDataFeed": "true"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {"delta.enableChangeDataFeed": "true"}
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 4
+
+
+def test_set_table_properties_enable_cdf_invalid(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    with pytest.raises(DeltaError):
+        dt.alter.set_table_properties({"delta.enableChangeDataFeed": "wrong"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {}
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 2
+
+
+def test_set_table_properties_enable_cdf_value_false(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    dt.alter.set_table_properties({"delta.enableChangeDataFeed": "false"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {"delta.enableChangeDataFeed": "false"}
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 2
+
+
+def test_set_table_properties_enable_cdf_with_writer_version_bumped(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    dt.alter.set_table_properties(
+        {"delta.enableChangeDataFeed": "true", "delta.minWriterVersion": "7"}
+    )
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {
+        "delta.enableChangeDataFeed": "true",
+        "delta.minWriterVersion": "7",
+    }
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 7
+    assert protocol.writer_features == ["changeDataFeed"]
+
+
+def test_set_table_properties_enable_cdf_and_deletion_vectors(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    dt.alter.set_table_properties(
+        {"delta.enableChangeDataFeed": "true", "delta.enableDeletionVectors": "true"}
+    )
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {
+        "delta.enableChangeDataFeed": "true",
+        "delta.enableDeletionVectors": "true",
+    }
+    assert protocol.min_reader_version == 3
+    assert protocol.min_writer_version == 7
+    assert list(sorted(protocol.writer_features)) == [  # type: ignore
+        "changeDataFeed",
+        "deletionVectors",
+    ]
+    assert protocol.reader_features == ["deletionVectors"]
+
+
+def test_convert_checkConstraints_to_feature_after_version_upgrade(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table)
+
+    dt = DeltaTable(tmp_path)
+
+    dt.alter.add_constraint({"check_price": "price >= 0"})
+
+    last_action = dt.history(1)[0]
+    assert last_action["operation"] == "ADD CONSTRAINT"
+    assert dt.version() == 1
+    assert dt.metadata().configuration == {
+        "delta.constraints.check_price": "price >= 0"
+    }
+    assert dt.protocol().min_writer_version == 3
+
+    dt.alter.set_table_properties({"delta.enableDeletionVectors": "true"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {
+        "delta.constraints.check_price": "price >= 0",
+        "delta.enableDeletionVectors": "true",
+    }
+    assert protocol.min_reader_version == 3
+    assert protocol.min_writer_version == 7
+    assert list(sorted(protocol.writer_features)) == [  # type: ignore
+        "checkConstraints",
+        "deletionVectors",
+    ]
+    assert protocol.reader_features == ["deletionVectors"]
+
+
+def test_set_table_properties_enable_dv(tmp_path: pathlib.Path, sample_table: pa.Table):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    dt.alter.set_table_properties({"delta.enableDeletionVectors": "true"})
+
+    protocol = dt.protocol()
+    assert dt.metadata().configuration == {"delta.enableDeletionVectors": "true"}
+    assert protocol.min_reader_version == 3
+    assert protocol.min_writer_version == 7
+    assert protocol.writer_features == ["deletionVectors"]
+    assert protocol.reader_features == ["deletionVectors"]
+
+
+def _sort_fields(fields: List[Field]) -> List[Field]:
+    return list(sorted(iter(fields), key=lambda x: (x.name, str(x.type))))
+
+
+def test_add_column_primitive(existing_table: DeltaTable):
+    current_fields = existing_table.schema().fields
+
+    new_fields_to_add = [
+        Field("foo", PrimitiveType("integer")),
+        Field("bar", PrimitiveType("float")),
+    ]
+
+    existing_table.alter.add_columns(new_fields_to_add)
+    new_fields = existing_table.schema().fields
+
+    assert _sort_fields(new_fields) == _sort_fields(
+        [*current_fields, *new_fields_to_add]
+    )
+
+
+def test_add_field_in_struct_column(existing_table: DeltaTable):
+    current_fields = existing_table.schema().fields
+
+    new_fields_to_add = [
+        Field("struct", StructType([Field("z", PrimitiveType("float"))])),
+    ]
+
+    existing_table.alter.add_columns(new_fields_to_add)
+    new_fields = existing_table.schema().fields
+
+    new_field = Field(
+        "struct",
+        StructType(
+            [
+                Field("x", PrimitiveType("long")),
+                Field("y", PrimitiveType("string")),
+                Field("z", PrimitiveType("float")),
+            ]
+        ),
+    )
+    assert _sort_fields(new_fields) == _sort_fields(
+        [*[field for field in current_fields if field.name != "struct"], new_field]
+    )
+
+
+def test_add_timestamp_ntz_column(tmp_path: pathlib.Path, sample_table: pa.Table):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    current_fields = dt.schema().fields
+
+    new_fields_to_add = Field("timestamp_ntz_col", PrimitiveType("timestamp_ntz"))
+
+    dt.alter.add_columns(new_fields_to_add)
+    new_fields = dt.schema().fields
+    new_protocol = dt.protocol()
+
+    assert _sort_fields(new_fields) == _sort_fields(
+        [*current_fields, new_fields_to_add]
+    )
+    assert new_protocol.min_reader_version == 3
+    assert new_protocol.min_writer_version == 7
+    assert new_protocol.reader_features == ["timestampNtz"]
+    assert new_protocol.writer_features == ["timestampNtz"]
diff --git a/python/tests/test_cdf.py b/python/tests/test_cdf.py
new file mode 100644
index 0000000000..1fdd459263
--- /dev/null
+++ b/python/tests/test_cdf.py
@@ -0,0 +1,633 @@
+import os
+from datetime import date, datetime
+
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.dataset as ds
+import pyarrow.parquet as pq
+
+from deltalake import DeltaTable, write_deltalake
+
+
+def test_read_cdf_partitioned():
+    dt = DeltaTable("../crates/test/tests/data/cdf-table/")
+    b = dt.load_cdf(0, 3).read_all().to_pydict()
+    assert sorted(b["id"]) == [
+        1,
+        2,
+        2,
+        2,
+        3,
+        3,
+        3,
+        4,
+        4,
+        4,
+        5,
+        5,
+        5,
+        6,
+        6,
+        6,
+        7,
+        7,
+        7,
+        7,
+        8,
+        9,
+        10,
+    ]
+    assert sorted(b["name"]) == [
+        "Ada",
+        "Bob",
+        "Bob",
+        "Bob",
+        "Borb",
+        "Carl",
+        "Carl",
+        "Carl",
+        "Claire",
+        "Dave",
+        "Dave",
+        "Dave",
+        "Dennis",
+        "Dennis",
+        "Dennis",
+        "Dennis",
+        "Emily",
+        "Emily",
+        "Emily",
+        "Kate",
+        "Kate",
+        "Kate",
+        "Steve",
+    ]
+    assert sorted(b["_change_type"]) == [
+        "delete",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+    ]
+    assert sorted(b["_commit_version"]) == [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        3,
+    ]
+    assert sorted(b["_commit_timestamp"]) == [
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 18, 828000),
+        datetime(2023, 12, 22, 17, 10, 21, 675000),
+        datetime(2023, 12, 22, 17, 10, 21, 675000),
+        datetime(2023, 12, 22, 17, 10, 21, 675000),
+        datetime(2023, 12, 22, 17, 10, 21, 675000),
+        datetime(2023, 12, 22, 17, 10, 21, 675000),
+        datetime(2023, 12, 22, 17, 10, 21, 675000),
+        datetime(2023, 12, 29, 21, 41, 33, 785000),
+        datetime(2023, 12, 29, 21, 41, 33, 785000),
+        datetime(2023, 12, 29, 21, 41, 33, 785000),
+        datetime(2023, 12, 29, 21, 41, 33, 785000),
+        datetime(2023, 12, 29, 21, 41, 33, 785000),
+        datetime(2023, 12, 29, 21, 41, 33, 785000),
+        datetime(2024, 1, 6, 16, 44, 59, 570000),
+    ]
+    assert sorted(b["birthday"]) == [
+        date(2023, 12, 22),
+        date(2023, 12, 22),
+        date(2023, 12, 22),
+        date(2023, 12, 22),
+        date(2023, 12, 23),
+        date(2023, 12, 23),
+        date(2023, 12, 23),
+        date(2023, 12, 23),
+        date(2023, 12, 23),
+        date(2023, 12, 23),
+        date(2023, 12, 24),
+        date(2023, 12, 24),
+        date(2023, 12, 24),
+        date(2023, 12, 24),
+        date(2023, 12, 24),
+        date(2023, 12, 24),
+        date(2023, 12, 25),
+        date(2023, 12, 25),
+        date(2023, 12, 25),
+        date(2023, 12, 29),
+        date(2023, 12, 29),
+        date(2023, 12, 29),
+        date(2023, 12, 29),
+    ]
+
+
+def test_read_cdf_non_partitioned():
+    dt = DeltaTable("../crates/test/tests/data/cdf-table-non-partitioned/")
+    b = dt.load_cdf(0, 3).read_all().to_pydict()
+
+    assert sorted(b["id"]) == [
+        1,
+        2,
+        2,
+        2,
+        3,
+        3,
+        3,
+        4,
+        4,
+        4,
+        5,
+        5,
+        5,
+        6,
+        6,
+        6,
+        7,
+        7,
+        7,
+        7,
+        8,
+        9,
+        10,
+    ]
+    assert sorted(b["name"]) == [
+        "Ada",
+        "Bob",
+        "Bob",
+        "Bob",
+        "Borb",
+        "Carl",
+        "Carl",
+        "Carl",
+        "Claire",
+        "Dave",
+        "Dave",
+        "Dave",
+        "Dennis",
+        "Dennis",
+        "Dennis",
+        "Dennis",
+        "Emily",
+        "Emily",
+        "Emily",
+        "Kate",
+        "Kate",
+        "Kate",
+        "Steve",
+    ]
+    assert sorted(b["birthday"]) == [
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 14),
+        date(2024, 4, 15),
+        date(2024, 4, 15),
+        date(2024, 4, 15),
+        date(2024, 4, 15),
+        date(2024, 4, 15),
+        date(2024, 4, 15),
+        date(2024, 4, 16),
+        date(2024, 4, 16),
+        date(2024, 4, 16),
+        date(2024, 4, 16),
+        date(2024, 4, 16),
+        date(2024, 4, 16),
+        date(2024, 4, 17),
+        date(2024, 4, 17),
+        date(2024, 4, 17),
+    ]
+    assert sorted(b["long_field"]) == [
+        1,
+        1,
+        1,
+        1,
+        2,
+        2,
+        2,
+        3,
+        3,
+        3,
+        4,
+        4,
+        4,
+        5,
+        5,
+        5,
+        6,
+        6,
+        6,
+        6,
+        7,
+        8,
+        99999999999999999,
+    ]
+    assert sorted(b["boolean_field"]) == [
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+        True,
+    ]
+    assert sorted(b["double_field"]) == [
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+        3.14,
+    ]
+    assert sorted(b["smallint_field"]) == [
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+    ]
+    assert sorted(b["_change_type"]) == [
+        "delete",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "insert",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_postimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+        "update_preimage",
+    ]
+    assert sorted(b["_commit_version"]) == [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        3,
+    ]
+    assert sorted(b["_commit_timestamp"]) == [
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 26, 249000),
+        datetime(2024, 4, 14, 15, 58, 29, 393000),
+        datetime(2024, 4, 14, 15, 58, 29, 393000),
+        datetime(2024, 4, 14, 15, 58, 29, 393000),
+        datetime(2024, 4, 14, 15, 58, 29, 393000),
+        datetime(2024, 4, 14, 15, 58, 29, 393000),
+        datetime(2024, 4, 14, 15, 58, 29, 393000),
+        datetime(2024, 4, 14, 15, 58, 31, 257000),
+        datetime(2024, 4, 14, 15, 58, 31, 257000),
+        datetime(2024, 4, 14, 15, 58, 31, 257000),
+        datetime(2024, 4, 14, 15, 58, 31, 257000),
+        datetime(2024, 4, 14, 15, 58, 31, 257000),
+        datetime(2024, 4, 14, 15, 58, 31, 257000),
+        datetime(2024, 4, 14, 15, 58, 32, 495000),
+    ]
+
+
+def test_read_cdf_partitioned_projection():
+    dt = DeltaTable("../crates/test/tests/data/cdf-table/")
+    columns = ["id", "_change_type", "_commit_version"]
+    assert columns == dt.load_cdf(0, 3, columns=columns).schema.names
+
+
+def test_delete_unpartitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    dt = DeltaTable(tmp_path)
+    dt.delete("int64 > 2")
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table(filter=(pc.field("int64") > 2))
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 2],
+        )
+    )
+    cdc_data = pq.read_table(cdc_path)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert cdc_data == expected_data
+
+
+def test_delete_partitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="overwrite",
+        partition_by=["utf8"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    dt = DeltaTable(tmp_path)
+    dt.delete("int64 > 2")
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table(filter=(pc.field("int64") > 2))
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 2],
+        )
+    )
+    table_schema = dt.schema().to_pyarrow()
+    table_schema = table_schema.insert(
+        len(table_schema), pa.field("_change_type", pa.string(), nullable=False)
+    )
+    cdc_data = pq.read_table(cdc_path, schema=table_schema)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert len(os.listdir(cdc_path)) == 2
+    assert cdc_data == expected_data
+
+
+def test_write_predicate_unpartitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(filter=(pc.field("int64") > 2)),
+        mode="overwrite",
+        predicate="int64 > 2",
+        engine="rust",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table(filter=(pc.field("int64") > 2))
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 2],
+        )
+    )
+    cdc_data = pq.read_table(cdc_path)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert cdc_data == expected_data
+    assert dt.to_pyarrow_table().sort_by([("utf8", "ascending")]) == sample_data
+
+
+def test_write_predicate_partitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="overwrite",
+        partition_by=["utf8"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(filter=(pc.field("int64") > 2)),
+        mode="overwrite",
+        predicate="int64 > 2",
+        engine="rust",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table(filter=(pc.field("int64") > 2))
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 2],
+        )
+    )
+    table_schema = dt.schema().to_pyarrow()
+    table_schema = table_schema.insert(
+        len(table_schema), pa.field("_change_type", pa.string(), nullable=False)
+    )
+    cdc_data = pq.read_table(cdc_path, schema=table_schema)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert len(os.listdir(cdc_path)) == 2
+    assert cdc_data == expected_data
+    assert dt.to_pyarrow_table().sort_by([("utf8", "ascending")]) == sample_data
+
+
+def test_write_overwrite_unpartitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(),
+        mode="overwrite",
+        engine="rust",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table()
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 5],
+        )
+    )
+    cdc_data = pq.read_table(cdc_path)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert cdc_data == expected_data
+    assert dt.to_pyarrow_table().sort_by([("utf8", "ascending")]) == sample_data
+
+
+def test_write_overwrite_partitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        partition_by=["utf8"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(),
+        mode="overwrite",
+        engine="rust",
+        partition_by=["utf8"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table()
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 5],
+        )
+    )
+    table_schema = dt.schema().to_pyarrow()
+    table_schema = table_schema.insert(
+        len(table_schema), pa.field("_change_type", pa.string(), nullable=False)
+    )
+    cdc_data = pq.read_table(cdc_path, schema=table_schema)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert cdc_data == expected_data
+    assert dt.to_pyarrow_table().sort_by([("int64", "ascending")]) == sample_data
diff --git a/python/tests/test_checkpoint.py b/python/tests/test_checkpoint.py
index fa1ae6a8ae..5ce6656463 100644
--- a/python/tests/test_checkpoint.py
+++ b/python/tests/test_checkpoint.py
@@ -1,10 +1,15 @@
 import datetime as dt
 import os
 import pathlib
+import shutil
+from datetime import date, datetime, timedelta
 
 import pyarrow as pa
+import pyarrow.parquet as pq
+import pytest
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import PostCommitHookProperties
 
 
 def test_checkpoint(tmp_path: pathlib.Path, sample_data: pa.Table):
@@ -30,7 +35,13 @@ def test_checkpoint(tmp_path: pathlib.Path, sample_data: pa.Table):
 def setup_cleanup_metadata(tmp_path: pathlib.Path, sample_data: pa.Table):
     tmp_table_path = tmp_path / "path" / "to" / "table"
     first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
     second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
     third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
 
     # TODO: Include binary after fixing issue "Json error: binary type is not supported"
@@ -42,6 +53,14 @@ def setup_cleanup_metadata(tmp_path: pathlib.Path, sample_data: pa.Table):
     delta_table = DeltaTable(str(tmp_table_path))
     delta_table.delete()
 
+    # Create failed json commit
+    shutil.copy(str(first_log_path), str(first_failed_log_path))
+    shutil.copy(str(third_log_path), str(second_failed_log_path))
+
+    # Move first failed log entry timestamp back in time for more than 30 days
+    old_ts = (dt.datetime.now() - dt.timedelta(days=31)).timestamp()
+    os.utime(first_failed_log_path, (old_ts, old_ts))
+
     # Move first log entry timestamp back in time for more than 30 days
     old_ts = (dt.datetime.now() - dt.timedelta(days=31)).timestamp()
     os.utime(first_log_path, (old_ts, old_ts))
@@ -51,8 +70,10 @@ def setup_cleanup_metadata(tmp_path: pathlib.Path, sample_data: pa.Table):
     os.utime(second_log_path, (near_ts, near_ts))
 
     assert first_log_path.exists()
+    assert first_failed_log_path.exists()
     assert second_log_path.exists()
     assert third_log_path.exists()
+    assert second_failed_log_path.exists()
     return delta_table
 
 
@@ -62,13 +83,83 @@ def test_cleanup_metadata(tmp_path: pathlib.Path, sample_data: pa.Table):
     delta_table.cleanup_metadata()
 
     tmp_table_path = tmp_path / "path" / "to" / "table"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
+    first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
+    second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
+    third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
+
+    assert not first_log_path.exists()
+    assert not first_failed_log_path.exists()
+    assert second_log_path.exists()
+    assert third_log_path.exists()
+    assert second_failed_log_path.exists()
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_cleanup_metadata_log_cleanup_hook(
+    tmp_path: pathlib.Path, sample_data: pa.Table, engine
+):
+    delta_table = setup_cleanup_metadata(tmp_path, sample_data)
+    delta_table.create_checkpoint()
+
+    sample_data = sample_data.drop(["binary"])
+    write_deltalake(delta_table, sample_data, mode="append", engine=engine)
+
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
     first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
     second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
     third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
 
     assert not first_log_path.exists()
+    assert not first_failed_log_path.exists()
     assert second_log_path.exists()
     assert third_log_path.exists()
+    assert second_failed_log_path.exists()
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_cleanup_metadata_log_cleanup_hook_disabled(
+    tmp_path: pathlib.Path, sample_data: pa.Table, engine
+):
+    delta_table = setup_cleanup_metadata(tmp_path, sample_data)
+    delta_table.create_checkpoint()
+
+    sample_data = sample_data.drop(["binary"])
+    write_deltalake(
+        delta_table,
+        sample_data,
+        mode="append",
+        engine=engine,
+        post_commithook_properties=PostCommitHookProperties(cleanup_expired_logs=False),
+    )
+
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
+    first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
+    second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
+    third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
+
+    assert first_log_path.exists()
+    assert first_failed_log_path.exists()
+    assert second_log_path.exists()
+    assert third_log_path.exists()
+    assert second_failed_log_path.exists()
 
 
 def test_cleanup_metadata_no_checkpoint(tmp_path: pathlib.Path, sample_data: pa.Table):
@@ -76,10 +167,304 @@ def test_cleanup_metadata_no_checkpoint(tmp_path: pathlib.Path, sample_data: pa.
     delta_table.cleanup_metadata()
 
     tmp_table_path = tmp_path / "path" / "to" / "table"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
     first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
     second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
     third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
 
     assert first_log_path.exists()
+    assert first_failed_log_path.exists()
     assert second_log_path.exists()
     assert third_log_path.exists()
+    assert second_failed_log_path.exists()
+
+
+def test_features_maintained_after_checkpoint(tmp_path: pathlib.Path):
+    from datetime import datetime
+
+    data = pa.table(
+        {
+            "timestamp": pa.array([datetime(2022, 1, 1)]),
+        }
+    )
+    write_deltalake(tmp_path, data)
+
+    dt = DeltaTable(tmp_path)
+    current_protocol = dt.protocol()
+
+    dt.create_checkpoint()
+
+    dt = DeltaTable(tmp_path)
+    protocol_after_checkpoint = dt.protocol()
+
+    assert protocol_after_checkpoint.reader_features == ["timestampNtz"]
+    assert current_protocol == protocol_after_checkpoint
+
+
+def test_features_null_on_below_v3_v7(tmp_path: pathlib.Path):
+    data = pa.table(
+        {
+            "int": pa.array([1]),
+        }
+    )
+    write_deltalake(tmp_path, data)
+
+    dt = DeltaTable(tmp_path)
+    current_protocol = dt.protocol()
+
+    dt.create_checkpoint()
+
+    dt = DeltaTable(tmp_path)
+    protocol_after_checkpoint = dt.protocol()
+
+    assert protocol_after_checkpoint.reader_features is None
+    assert protocol_after_checkpoint.writer_features is None
+    assert current_protocol == protocol_after_checkpoint
+
+    checkpoint = pq.read_table(
+        os.path.join(tmp_path, "_delta_log/00000000000000000000.checkpoint.parquet")
+    )
+
+    assert checkpoint["protocol"][0]["writerFeatures"].as_py() is None
+    assert checkpoint["protocol"][0]["readerFeatures"].as_py() is None
+
+
+@pytest.fixture
+def sample_all_types():
+    from datetime import timezone
+
+    nrows = 5
+    return pa.table(
+        {
+            "utf8": pa.array([str(x) for x in range(nrows)]),
+            "int64": pa.array(list(range(nrows)), pa.int64()),
+            "int32": pa.array(list(range(nrows)), pa.int32()),
+            "int16": pa.array(list(range(nrows)), pa.int16()),
+            "int8": pa.array(list(range(nrows)), pa.int8()),
+            "float32": pa.array([float(x) for x in range(nrows)], pa.float32()),
+            "float64": pa.array([float(x) for x in range(nrows)], pa.float64()),
+            "bool": pa.array([x % 2 == 0 for x in range(nrows)]),
+            "binary": pa.array([str(x).encode() for x in range(nrows)]),
+            # "decimal": pa.array([Decimal("10.000") + x for x in range(nrows)]), # Some issue with decimal and Rust engine at the moment.
+            "date32": pa.array(
+                [date(2022, 1, 1) + timedelta(days=x) for x in range(nrows)]
+            ),
+            "timestampNtz": pa.array(
+                [datetime(2022, 1, 1) + timedelta(hours=x) for x in range(nrows)]
+            ),
+            "timestamp": pa.array(
+                [
+                    datetime(2022, 1, 1, tzinfo=timezone.utc) + timedelta(hours=x)
+                    for x in range(nrows)
+                ]
+            ),
+            "struct": pa.array([{"x": x, "y": str(x)} for x in range(nrows)]),
+            "list": pa.array([list(range(x + 1)) for x in range(nrows)]),
+        }
+    )
+
+
+@pytest.mark.parametrize(
+    "engine,part_col",
+    [
+        ("rust", "timestampNtz"),
+        ("rust", "timestamp"),
+        ("pyarrow", "timestampNtz"),
+        pytest.param(
+            "pyarrow",
+            "timestamp",
+            marks=pytest.mark.skip(
+                "Pyarrow serialization of UTC datetimes is incorrect, it appends a 'Z' at the end."
+            ),
+        ),
+    ],
+)
+def test_checkpoint_partition_timestamp_2380(
+    tmp_path: pathlib.Path, sample_all_types: pa.Table, part_col: str, engine: str
+):
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    checkpoint_path = tmp_table_path / "_delta_log" / "_last_checkpoint"
+    last_checkpoint_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.checkpoint.parquet"
+    )
+
+    # TODO: Include binary after fixing issue "Json error: binary type is not supported"
+    sample_data = sample_all_types.drop(["binary"])
+    write_deltalake(
+        str(tmp_table_path),
+        sample_data,
+        partition_by=[part_col],
+        engine=engine,  # type: ignore
+    )
+
+    assert not checkpoint_path.exists()
+
+    delta_table = DeltaTable(str(tmp_table_path))
+
+    delta_table.create_checkpoint()
+
+    assert last_checkpoint_path.exists()
+    assert checkpoint_path.exists()
+
+
+def test_checkpoint_with_binary_column(tmp_path: pathlib.Path):
+    data = pa.table(
+        {
+            "intColumn": pa.array([1]),
+            "binaryColumn": pa.array([b"a"]),
+        }
+    )
+
+    write_deltalake(
+        str(tmp_path),
+        data,
+        partition_by=["intColumn"],
+        mode="append",
+    )
+
+    dt = DeltaTable(tmp_path)
+    dt.create_checkpoint()
+
+    dt = DeltaTable(tmp_path)
+
+    assert dt.to_pyarrow_table().equals(data)
+
+
+def test_checkpoint_post_commit_config(tmp_path: pathlib.Path, sample_data: pa.Table):
+    """Checks whether checkpoints are properly written based on commit_interval"""
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    checkpoint_path = tmp_table_path / "_delta_log" / "_last_checkpoint"
+    first_checkpoint_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000004.checkpoint.parquet"
+    )
+    second_checkpoint_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000009.checkpoint.parquet"
+    )
+
+    # TODO: Include binary after fixing issue "Json error: binary type is not supported"
+    sample_data = sample_data.drop(["binary"])
+    for i in range(2):
+        write_deltalake(
+            str(tmp_table_path),
+            sample_data,
+            mode="append",
+            configuration={"delta.checkpointInterval": "5"},
+        )
+
+    assert not checkpoint_path.exists()
+    assert not first_checkpoint_path.exists()
+    assert not second_checkpoint_path.exists()
+
+    for i in range(10):
+        write_deltalake(
+            str(tmp_table_path),
+            sample_data,
+            mode="append",
+            configuration={"delta.checkpointInterval": "5"},
+        )
+
+    assert checkpoint_path.exists()
+    assert first_checkpoint_path.exists()
+    assert second_checkpoint_path.exists()
+
+    for i in range(12):
+        if i in [4, 9]:
+            continue
+        random_checkpoint_path = (
+            tmp_table_path / "_delta_log" / f"{str(i).zfill(20)}.checkpoint.parquet"
+        )
+        assert not random_checkpoint_path.exists()
+
+    dt = DeltaTable(str(tmp_table_path))
+    assert dt.version() == 11
+
+
+def test_checkpoint_post_commit_config_multiple_operations(
+    tmp_path: pathlib.Path, sample_data: pa.Table
+):
+    """Checks whether checkpoints are properly written based on commit_interval"""
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    checkpoint_path = tmp_table_path / "_delta_log" / "_last_checkpoint"
+    first_checkpoint_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000004.checkpoint.parquet"
+    )
+    second_checkpoint_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000009.checkpoint.parquet"
+    )
+
+    # TODO: Include binary after fixing issue "Json error: binary type is not supported"
+    sample_data = sample_data.drop(["binary", "decimal"])
+    for i in range(4):
+        write_deltalake(
+            str(tmp_table_path),
+            sample_data,
+            mode="append",
+            configuration={"delta.checkpointInterval": "5"},
+        )
+
+    assert not checkpoint_path.exists()
+    assert not first_checkpoint_path.exists()
+    assert not second_checkpoint_path.exists()
+
+    dt = DeltaTable(str(tmp_table_path))
+
+    dt.optimize.compact()
+
+    assert checkpoint_path.exists()
+    assert first_checkpoint_path.exists()
+
+    for i in range(4):
+        write_deltalake(
+            str(tmp_table_path),
+            sample_data,
+            mode="append",
+            configuration={"delta.checkpointInterval": "5"},
+        )
+
+    dt = DeltaTable(str(tmp_table_path))
+    dt.delete()
+
+    assert second_checkpoint_path.exists()
+
+    for i in range(12):
+        if i in [4, 9]:
+            continue
+        random_checkpoint_path = (
+            tmp_table_path / "_delta_log" / f"{str(i).zfill(20)}.checkpoint.parquet"
+        )
+        assert not random_checkpoint_path.exists()
+
+    delta_table = DeltaTable(str(tmp_table_path))
+    assert delta_table.version() == 9
+
+
+def test_checkpoint_with_nullable_false(tmp_path: pathlib.Path):
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    checkpoint_path = tmp_table_path / "_delta_log" / "_last_checkpoint"
+
+    pylist = [{"year": 2023, "n_party": 0}, {"year": 2024, "n_party": 1}]
+    my_schema = pa.schema(
+        [
+            pa.field("year", pa.int64(), nullable=False),
+            pa.field("n_party", pa.int64(), nullable=False),
+        ]
+    )
+
+    data = pa.Table.from_pylist(pylist, schema=my_schema)
+
+    write_deltalake(
+        str(tmp_table_path),
+        data,
+        configuration={"delta.dataSkippingNumIndexedCols": "1"},
+    )
+
+    DeltaTable(str(tmp_table_path)).create_checkpoint()
+
+    assert checkpoint_path.exists()
+
+    assert DeltaTable(str(tmp_table_path)).to_pyarrow_table() == data
diff --git a/python/tests/test_convert_to_delta.py b/python/tests/test_convert_to_delta.py
index 164e817fb1..e7c59432e3 100644
--- a/python/tests/test_convert_to_delta.py
+++ b/python/tests/test_convert_to_delta.py
@@ -23,7 +23,7 @@ def test_local_convert_to_delta(tmp_path: pathlib.Path, sample_data: pa.Table):
         tmp_path,
         name=name,
         description=description,
-        configuration={"delta.AppendOnly": "True"},
+        configuration={"delta.appendOnly": "true"},
         custom_metadata={"userName": "John Doe"},
     )
 
@@ -33,7 +33,7 @@ def test_local_convert_to_delta(tmp_path: pathlib.Path, sample_data: pa.Table):
     assert dt.files() == ["part-0.parquet"]
     assert dt.metadata().name == name
     assert dt.metadata().description == description
-    assert dt.metadata().configuration == {"delta.AppendOnly": "True"}
+    assert dt.metadata().configuration == {"delta.appendOnly": "true"}
     assert dt.history()[0]["userName"] == "John Doe"
 
 
diff --git a/python/tests/test_create.py b/python/tests/test_create.py
index 3852fc2bab..4bb73183fe 100644
--- a/python/tests/test_create.py
+++ b/python/tests/test_create.py
@@ -3,7 +3,7 @@
 import pyarrow as pa
 import pytest
 
-from deltalake import DeltaTable
+from deltalake import DeltaTable, write_deltalake
 from deltalake.exceptions import DeltaError
 
 
@@ -13,7 +13,10 @@ def test_create_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table
         sample_data.schema,
         name="test_name",
         description="test_desc",
-        configuration={"delta.appendOnly": "false", "foo": "bar"},
+        configuration={
+            "delta.appendOnly": "true",
+            "delta.logRetentionDuration": "interval 2 days",
+        },
         custom_metadata={"userName": "John Doe"},
     )
 
@@ -21,9 +24,14 @@ def test_create_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table
 
     assert metadata.name == "test_name"
     assert metadata.description == "test_desc"
-    assert metadata.configuration == {"delta.appendOnly": "false", "foo": "bar"}
+    assert metadata.configuration == {
+        "delta.appendOnly": "true",
+        "delta.logRetentionDuration": "interval 2 days",
+    }
     assert dt.history()[0]["userName"] == "John Doe"
 
+    assert {*dt.protocol().writer_features} == {"appendOnly", "timestampNtz"}  # type: ignore
+
 
 def test_create_modes(tmp_path: pathlib.Path, sample_data: pa.Table):
     dt = DeltaTable.create(tmp_path, sample_data.schema, mode="error")
@@ -54,3 +62,75 @@ def test_create_schema(tmp_path: pathlib.Path, sample_data: pa.Table):
     )
 
     assert dt.schema().to_pyarrow() == sample_data.schema
+
+
+def test_create_with_deletion_vectors_enabled(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    """append only is set to false so shouldn't be converted to a feature"""
+    dt = DeltaTable.create(
+        tmp_path,
+        sample_table.schema,
+        name="test_name",
+        description="test_desc",
+        configuration={
+            "delta.appendOnly": "false",
+            "delta.enableDeletionVectors": "true",
+        },
+        custom_metadata={"userName": "John Doe"},
+    )
+
+    metadata = dt.metadata()
+    protocol = dt.protocol()
+    assert metadata.name == "test_name"
+    assert metadata.description == "test_desc"
+    assert metadata.configuration == {
+        "delta.appendOnly": "false",
+        "delta.enableDeletionVectors": "true",
+    }
+    assert protocol.min_reader_version == 3
+    assert protocol.min_writer_version == 7
+    assert protocol.writer_features == ["deletionVectors"]  # type: ignore
+    assert protocol.reader_features == ["deletionVectors"]
+    assert dt.history()[0]["userName"] == "John Doe"
+
+
+def test_create_higher_protocol_versions(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    dt = DeltaTable.create(
+        tmp_path,
+        sample_table.schema,
+        name="test_name",
+        description="test_desc",
+        configuration={
+            "delta.appendOnly": "false",
+            "delta.minReaderVersion": "2",
+            "delta.minWriterVersion": "5",
+        },
+        custom_metadata={"userName": "John Doe"},
+    )
+
+    metadata = dt.metadata()
+    protocol = dt.protocol()
+    assert metadata.name == "test_name"
+    assert metadata.description == "test_desc"
+    assert metadata.configuration == {
+        "delta.appendOnly": "false",
+        "delta.minReaderVersion": "2",
+        "delta.minWriterVersion": "5",
+    }
+    assert protocol.min_reader_version == 2
+    assert protocol.min_writer_version == 5
+    assert dt.history()[0]["userName"] == "John Doe"
+
+
+def test_create_or_replace_existing_table(
+    tmp_path: pathlib.Path, sample_data: pa.Table
+):
+    write_deltalake(table_or_uri=tmp_path, data=sample_data)
+    dt = DeltaTable.create(
+        tmp_path, sample_data.schema, partition_by=["utf8"], mode="overwrite"
+    )
+
+    assert dt.files() == []
diff --git a/python/tests/test_delete.py b/python/tests/test_delete.py
index 4d3983a532..65b5ebdec3 100644
--- a/python/tests/test_delete.py
+++ b/python/tests/test_delete.py
@@ -2,6 +2,7 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
+import pytest
 
 from deltalake.table import DeltaTable
 from deltalake.writer import write_deltalake
@@ -57,3 +58,74 @@ def test_delete_some_rows(existing_table: DeltaTable):
 
     table = existing_table.to_pyarrow_table()
     assert table.equals(expected_table)
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_delete_large_dtypes(
+    tmp_path: pathlib.Path, sample_table: pa.table, engine: str
+):
+    write_deltalake(tmp_path, sample_table, large_dtypes=True, engine=engine)  # type: ignore
+
+    dt = DeltaTable(tmp_path)
+    old_version = dt.version()
+
+    existing = dt.to_pyarrow_table()
+    mask = pc.invert(pc.is_in(existing["id"], pa.array(["1"])))
+    expected_table = existing.filter(mask)
+
+    dt.delete(predicate="id = '1'")
+
+    last_action = dt.history(1)[0]
+    assert last_action["operation"] == "DELETE"
+    assert dt.version() == old_version + 1
+
+    table = dt.to_pyarrow_table()
+    assert table.equals(expected_table)
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_delete_stats_columns_stats_provided(tmp_path: pathlib.Path, engine):
+    data = pa.table(
+        {
+            "foo": pa.array(["a", "b", None, None]),
+            "bar": pa.array([1, 2, 3, None]),
+            "baz": pa.array([1, 1, None, None]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        mode="append",
+        engine=engine,
+        configuration={"delta.dataSkippingStatsColumns": "foo,baz"},
+    )
+    dt = DeltaTable(tmp_path)
+    add_actions_table = dt.get_add_actions(flatten=True)
+    stats = add_actions_table.to_pylist()[0]
+
+    assert stats["null_count.foo"] == 2
+    assert stats["min.foo"] == "a"
+    assert stats["max.foo"] == "b"
+    assert stats["null_count.bar"] is None
+    assert stats["min.bar"] is None
+    assert stats["max.bar"] is None
+    assert stats["null_count.baz"] == 2
+    assert stats["min.baz"] == 1
+    assert stats["max.baz"] == 1
+
+    dt.delete("bar == 3")
+
+    dt = DeltaTable(tmp_path)
+    add_actions_table = dt.get_add_actions(flatten=True)
+    stats = add_actions_table.to_pylist()[0]
+
+    assert dt.version() == 1
+    assert stats["null_count.foo"] == 1
+    assert stats["min.foo"] == "a"
+    assert stats["max.foo"] == "b"
+    assert stats["null_count.bar"] is None
+    assert stats["min.bar"] is None
+    assert stats["max.bar"] is None
+    assert stats["null_count.baz"] == 1
+    assert stats["min.baz"] == 1
+    assert stats["max.baz"] == 1
diff --git a/python/tests/test_file_system_handler.py b/python/tests/test_file_system_handler.py
index 488a880e36..cd57d72dee 100644
--- a/python/tests/test_file_system_handler.py
+++ b/python/tests/test_file_system_handler.py
@@ -34,7 +34,7 @@ def test_file_info(file_systems, table_data):
     info = store.get_file_info(file_path)
     arrow_info = arrow_fs.get_file_info(file_path)
 
-    assert type(info) == type(arrow_info)
+    assert type(info) is type(arrow_info)
     assert info.path == arrow_info.path
     assert info.type == arrow_info.type
     assert info.size == arrow_info.size
diff --git a/python/tests/test_fs.py b/python/tests/test_fs.py
index b4973572e9..87b24d8577 100644
--- a/python/tests/test_fs.py
+++ b/python/tests/test_fs.py
@@ -50,7 +50,7 @@ def test_s3_authenticated_read_write(s3_localstack_creds, monkeypatch):
     # Create unauthenticated handler
     storage_handler = DeltaStorageHandler(
         "s3://deltars/",
-        {
+        options={
             "AWS_ENDPOINT_URL": s3_localstack_creds["AWS_ENDPOINT_URL"],
             # Grants anonymous access. If we don't do this, will timeout trying
             # to reading from EC2 instance provider.
@@ -209,7 +209,6 @@ def test_roundtrip_azure_direct(azurite_creds, sample_data: pa.Table):
 @pytest.mark.timeout(timeout=60, method="thread")
 def test_roundtrip_azure_sas(azurite_sas_creds, sample_data: pa.Table):
     table_path = "az://deltars/roundtrip3"
-
     write_deltalake(table_path, sample_data, storage_options=azurite_sas_creds)
     dt = DeltaTable(table_path, storage_options=azurite_sas_creds)
     table = dt.to_pyarrow_table()
@@ -233,6 +232,20 @@ def test_roundtrip_azure_decoded_sas(azurite_sas_creds, sample_data: pa.Table):
     assert dt.version() == 0
 
 
+@pytest.mark.parametrize("storage_size", [1, 4 * 1024 * 1024, 5 * 1024 * 1024 - 1])
+def test_warning_for_small_max_buffer_size(tmp_path, storage_size):
+    storage_opts = {"max_buffer_size": str(storage_size)}
+    store = DeltaStorageHandler(str(tmp_path.absolute()), options=storage_opts)
+    with pytest.warns(UserWarning) as warnings:
+        store.open_output_stream("test")
+
+    assert len(warnings) == 1
+    assert (
+        f"You specified a `max_buffer_size` of {storage_size} bits less than {5*1024*1024} bits"
+        in str(warnings[0].message)
+    )
+
+
 def test_pickle_roundtrip(tmp_path):
     store = DeltaStorageHandler(str(tmp_path.absolute()))
 
diff --git a/python/tests/test_merge.py b/python/tests/test_merge.py
index b609d88d21..0b047580f1 100644
--- a/python/tests/test_merge.py
+++ b/python/tests/test_merge.py
@@ -1,6 +1,7 @@
 import pathlib
 
 import pyarrow as pa
+import pytest
 
 from deltalake import DeltaTable, write_deltalake
 
@@ -274,7 +275,7 @@ def test_merge_when_not_matched_insert_with_predicate(
             "sold": "source.sold",
             "deleted": "False",
         },
-        predicate="source.price < bigint'50'",
+        predicate="source.price < 50",
     ).execute()
 
     expected = pa.table(
@@ -314,7 +315,7 @@ def test_merge_when_not_matched_insert_all_with_predicate(
         target_alias="target",
         predicate="target.id = source.id",
     ).when_not_matched_insert_all(
-        predicate="source.price < bigint'50'",
+        predicate="source.price < 50",
     ).execute()
 
     expected = pa.table(
@@ -332,6 +333,46 @@ def test_merge_when_not_matched_insert_all_with_predicate(
     assert result == expected
 
 
+def test_merge_when_not_matched_insert_all_with_predicate_special_column_names(
+    tmp_path: pathlib.Path, sample_table_with_spaces_numbers: pa.Table
+):
+    write_deltalake(tmp_path, sample_table_with_spaces_numbers, mode="append")
+
+    dt = DeltaTable(tmp_path)
+
+    source_table = pa.table(
+        {
+            "1id": pa.array(["6", "10"]),
+            "price": pa.array([10, 100], pa.int64()),
+            "sold items": pa.array([10, 20], pa.int32()),
+            "deleted": pa.array([None, None], pa.bool_()),
+        }
+    )
+
+    dt.merge(
+        source=source_table,
+        source_alias="source",
+        target_alias="target",
+        predicate="target.`1id` = source.`1id`",
+    ).when_not_matched_insert_all(
+        predicate="source.price < 50",
+    ).execute()
+
+    expected = pa.table(
+        {
+            "1id": pa.array(["1", "2", "3", "4", "5", "6"]),
+            "price": pa.array([0, 1, 2, 3, 4, 10], pa.int64()),
+            "sold items": pa.array([0, 1, 2, 3, 4, 10], pa.int32()),
+            "deleted": pa.array([False, False, False, False, False, None]),
+        }
+    )
+    result = dt.to_pyarrow_table().sort_by([("1id", "ascending")])
+    last_action = dt.history(1)[0]
+
+    assert last_action["operation"] == "MERGE"
+    assert result == expected
+
+
 def test_merge_when_not_matched_by_source_update_wo_predicate(
     tmp_path: pathlib.Path, sample_table: pa.Table
 ):
@@ -399,7 +440,7 @@ def test_merge_when_not_matched_by_source_update_with_predicate(
         updates={
             "sold": "int'10'",
         },
-        predicate="target.price > bigint'3'",
+        predicate="target.price > 3",
     ).execute()
 
     expected = pa.table(
@@ -438,7 +479,7 @@ def test_merge_when_not_matched_by_source_delete_with_predicate(
         source_alias="source",
         target_alias="target",
         predicate="target.id = source.id",
-    ).when_not_matched_by_source_delete(predicate="target.price > bigint'3'").execute()
+    ).when_not_matched_by_source_delete(predicate="target.price > 3").execute()
 
     expected = pa.table(
         {
@@ -608,7 +649,7 @@ def test_merge_multiple_when_not_matched_insert_with_predicate(
             "sold": "source.sold",
             "deleted": "False",
         },
-        predicate="source.price < bigint'50'",
+        predicate="source.price < 50",
     ).when_not_matched_insert(
         updates={
             "id": "source.id",
@@ -616,7 +657,7 @@ def test_merge_multiple_when_not_matched_insert_with_predicate(
             "sold": "source.sold",
             "deleted": "False",
         },
-        predicate="source.price > bigint'50'",
+        predicate="source.price > 50",
     ).execute()
 
     expected = pa.table(
@@ -723,3 +764,224 @@ def test_merge_multiple_when_not_matched_by_source_update_wo_predicate(
 
     assert last_action["operation"] == "MERGE"
     assert result == expected
+
+
+def test_merge_date_partitioned_2344(tmp_path: pathlib.Path):
+    from datetime import date
+
+    schema = pa.schema(
+        [
+            ("date", pa.date32()),
+            ("foo", pa.string()),
+            ("bar", pa.string()),
+        ]
+    )
+
+    dt = DeltaTable.create(
+        tmp_path, schema=schema, partition_by=["date"], mode="overwrite"
+    )
+
+    data = pa.table(
+        {
+            "date": pa.array([date(2022, 2, 1)]),
+            "foo": pa.array(["hello"]),
+            "bar": pa.array(["world"]),
+        }
+    )
+
+    dt.merge(
+        data,
+        predicate="s.date = t.date",
+        source_alias="s",
+        target_alias="t",
+    ).when_matched_update_all().when_not_matched_insert_all().execute()
+
+    result = dt.to_pyarrow_table()
+    last_action = dt.history(1)[0]
+
+    assert last_action["operation"] == "MERGE"
+    assert result == data
+    assert last_action["operationParameters"].get("predicate") == "2022-02-01 = date"
+
+
+@pytest.mark.parametrize(
+    "timezone,predicate",
+    [
+        (
+            None,
+            "arrow_cast('2022-02-01T00:00:00.000000', 'Timestamp(Microsecond, None)') = datetime",
+        ),
+        (
+            "UTC",
+            "arrow_cast('2022-02-01T00:00:00.000000', 'Timestamp(Microsecond, Some(\"UTC\"))') = datetime",
+        ),
+    ],
+)
+def test_merge_timestamps_partitioned_2344(tmp_path: pathlib.Path, timezone, predicate):
+    from datetime import datetime
+
+    schema = pa.schema(
+        [
+            ("datetime", pa.timestamp("us", tz=timezone)),
+            ("foo", pa.string()),
+            ("bar", pa.string()),
+        ]
+    )
+
+    dt = DeltaTable.create(
+        tmp_path, schema=schema, partition_by=["datetime"], mode="overwrite"
+    )
+
+    data = pa.table(
+        {
+            "datetime": pa.array(
+                [datetime(2022, 2, 1)], pa.timestamp("us", tz=timezone)
+            ),
+            "foo": pa.array(["hello"]),
+            "bar": pa.array(["world"]),
+        }
+    )
+
+    dt.merge(
+        data,
+        predicate="s.datetime = t.datetime",
+        source_alias="s",
+        target_alias="t",
+    ).when_matched_update_all().when_not_matched_insert_all().execute()
+
+    result = dt.to_pyarrow_table()
+    last_action = dt.history(1)[0]
+
+    assert last_action["operation"] == "MERGE"
+    assert result == data
+    assert last_action["operationParameters"].get("predicate") == predicate
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_merge_stats_columns_stats_provided(tmp_path: pathlib.Path, engine):
+    data = pa.table(
+        {
+            "foo": pa.array(["a", "b", None, None]),
+            "bar": pa.array([1, 2, 3, None]),
+            "baz": pa.array([1, 1, None, None]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        mode="append",
+        engine=engine,
+        configuration={"delta.dataSkippingStatsColumns": "foo,baz"},
+    )
+    dt = DeltaTable(tmp_path)
+    add_actions_table = dt.get_add_actions(flatten=True)
+    stats = add_actions_table.to_pylist()[0]
+
+    assert stats["null_count.foo"] == 2
+    assert stats["min.foo"] == "a"
+    assert stats["max.foo"] == "b"
+    assert stats["null_count.bar"] is None
+    assert stats["min.bar"] is None
+    assert stats["max.bar"] is None
+    assert stats["null_count.baz"] == 2
+    assert stats["min.baz"] == 1
+    assert stats["max.baz"] == 1
+
+    data = pa.table(
+        {
+            "foo": pa.array(["a"]),
+            "bar": pa.array([10]),
+            "baz": pa.array([10]),
+        }
+    )
+
+    dt.merge(
+        data,
+        predicate="source.foo = target.foo",
+        source_alias="source",
+        target_alias="target",
+    ).when_matched_update_all().execute()
+
+    dt = DeltaTable(tmp_path)
+    add_actions_table = dt.get_add_actions(flatten=True)
+    stats = add_actions_table.to_pylist()[0]
+
+    assert dt.version() == 1
+    assert stats["null_count.foo"] == 2
+    assert stats["min.foo"] == "a"
+    assert stats["max.foo"] == "b"
+    assert stats["null_count.bar"] is None
+    assert stats["min.bar"] is None
+    assert stats["max.bar"] is None
+    assert stats["null_count.baz"] == 2
+    assert stats["min.baz"] == 1
+    assert stats["max.baz"] == 10
+
+
+def test_merge_field_special_characters_delete_2438(tmp_path: pathlib.Path):
+    ## See issue: https://github.com/delta-io/delta-rs/issues/2438
+    data = pa.table({"x": [1, 2, 3], "y--1": [4, 5, 6]})
+    write_deltalake(tmp_path, data, mode="append")
+
+    dt = DeltaTable(tmp_path)
+    new_data = pa.table({"x": [2, 3]})
+
+    (
+        dt.merge(
+            source=new_data,
+            predicate="target.x = source.x",
+            source_alias="source",
+            target_alias="target",
+        )
+        .when_matched_delete()
+        .execute()
+    )
+
+    expected = pa.table({"x": [1], "y--1": [4]})
+
+    assert dt.to_pyarrow_table() == expected
+
+
+@pytest.mark.pandas
+def test_struct_casting(tmp_path: pathlib.Path):
+    import pandas as pd
+
+    cols = ["id", "name", "address", "scores"]
+    data = [
+        (
+            2,
+            "Marry Doe",
+            {"street": "123 Main St", "city": "Anytown", "state": "CA"},
+            [0, 0, 0],
+        )
+    ]
+    df = pd.DataFrame(data, columns=cols)
+    df_merge = pd.DataFrame(
+        [
+            (
+                2,
+                "Merged",
+                {"street": "1 Front", "city": "San Francisco", "state": "CA"},
+                [7, 0, 7],
+            )
+        ],
+        columns=cols,
+    )
+    assert not df.empty
+
+    schema = pa.Table.from_pandas(df=df).schema
+    dt = DeltaTable.create(tmp_path, schema, name="test")
+    metadata = dt.metadata()
+    assert metadata.name == "test"
+
+    result = (
+        dt.merge(
+            source=df_merge,
+            predicate="t.id = s.id",
+            source_alias="s",
+            target_alias="t",
+        )
+        .when_matched_update_all()
+        .execute()
+    )
+    assert result is not None
diff --git a/python/tests/test_optimize.py b/python/tests/test_optimize.py
index 1be0654836..8cb0902dae 100644
--- a/python/tests/test_optimize.py
+++ b/python/tests/test_optimize.py
@@ -99,3 +99,35 @@ def test_optimize_min_commit_interval(
     # independently. So with min_commit_interval=0, each will get its
     # own commit.
     assert dt.version() == old_version + 5
+
+
+def test_optimize_schema_evolved_table(
+    tmp_path: pathlib.Path,
+    sample_data: pa.Table,
+):
+    data = pa.table({"foo": pa.array(["1"])})
+
+    write_deltalake(tmp_path, data, engine="rust", mode="append", schema_mode="merge")
+
+    data = pa.table({"bar": pa.array(["1"])})
+    write_deltalake(tmp_path, data, engine="rust", mode="append", schema_mode="merge")
+
+    dt = DeltaTable(tmp_path)
+    old_version = dt.version()
+
+    dt.optimize.compact()
+
+    last_action = dt.history(1)[0]
+    assert last_action["operation"] == "OPTIMIZE"
+    assert dt.version() == old_version + 1
+
+    data = pa.table(
+        {
+            "foo": pa.array([None, "1"]),
+            "bar": pa.array(["1", None]),
+        }
+    )
+
+    assert dt.to_pyarrow_table().sort_by([("foo", "ascending")]) == data.sort_by(
+        [("foo", "ascending")]
+    )
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 7feb66a743..23198d9ef3 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -76,7 +76,7 @@ def test_primitive_delta_types():
         "decimal(10,2)",
     ]
 
-    invalid_types = ["int", "decimal", "decimal()"]
+    invalid_types = ["int", "decimal", "decimal()", "decimal(39,1)", "decimal(1,39)"]
 
     for data_type in valid_types:
         delta_type = PrimitiveType(data_type)
@@ -404,10 +404,10 @@ def test_delta_schema():
                     ("timestamp1", pa.timestamp("us")),
                     ("timestamp2", pa.timestamp("us")),
                     ("timestamp3", pa.timestamp("us")),
-                    ("timestamp4", pa.timestamp("us")),
-                    ("timestamp5", pa.timestamp("us")),
-                    ("timestamp6", pa.timestamp("us")),
-                    ("timestamp7", pa.timestamp("us")),
+                    ("timestamp4", pa.timestamp("us", tz="UTC")),
+                    ("timestamp5", pa.timestamp("us", tz="UTC")),
+                    ("timestamp6", pa.timestamp("us", tz="UTC")),
+                    ("timestamp7", pa.timestamp("us", tz="UTC")),
                 ]
             ),
             False,
diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py
index ce46a41a2a..9fb644e285 100644
--- a/python/tests/test_table_read.py
+++ b/python/tests/test_table_read.py
@@ -1,9 +1,9 @@
 import os
-from datetime import date, datetime
+from datetime import date, datetime, timezone
 from pathlib import Path
+from random import random
 from threading import Barrier, Thread
-from types import SimpleNamespace
-from typing import Any
+from typing import Any, List, Tuple
 from unittest.mock import Mock
 
 from packaging import version
@@ -36,12 +36,20 @@ def test_read_table_with_edge_timestamps():
         parquet_read_options=ParquetReadOptions(coerce_int96_timestamp_unit="ms")
     )
     assert dataset.to_table().to_pydict() == {
-        "BIG_DATE": [datetime(9999, 12, 31, 0, 0, 0), datetime(9999, 12, 30, 0, 0, 0)],
-        "NORMAL_DATE": [datetime(2022, 1, 1, 0, 0, 0), datetime(2022, 2, 1, 0, 0, 0)],
+        "BIG_DATE": [
+            datetime(9999, 12, 31, 0, 0, 0, tzinfo=timezone.utc),
+            datetime(9999, 12, 30, 0, 0, 0, tzinfo=timezone.utc),
+        ],
+        "NORMAL_DATE": [
+            datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
+            datetime(2022, 2, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ],
         "SOME_VALUE": [1, 2],
     }
     # Can push down filters to these timestamps.
-    predicate = ds.field("BIG_DATE") == datetime(9999, 12, 31, 0, 0, 0)
+    predicate = ds.field("BIG_DATE") == datetime(
+        9999, 12, 31, 0, 0, 0, tzinfo=timezone.utc
+    )
     assert len(list(dataset.get_fragments(predicate))) == 1
 
 
@@ -93,6 +101,52 @@ def test_load_as_version_datetime(date_value: str, expected_version):
     assert dt.version() == expected_version
 
 
+@pytest.mark.parametrize(
+    ["date_value", "expected_version", "log_mtime_pairs"],
+    [
+        ("2020-05-01T00:47:31-07:00", 1, [("00000000000000000000.json", 158839841.0)]),
+        (
+            "2020-05-02T22:47:31-07:00",
+            2,
+            [
+                ("00000000000000000000.json", 158839841.0),
+                ("00000000000000000001.json", 1588484851.0),
+            ],
+        ),
+    ],
+)
+def test_load_as_version_datetime_with_logs_removed(
+    tmp_path,
+    sample_table,
+    date_value: str,
+    expected_version,
+    log_mtime_pairs: List[Tuple[str, int]],
+):
+    log_path = tmp_path / "_delta_log"
+    for i in range(6):
+        write_deltalake(tmp_path, data=sample_table, mode="append")
+
+    for file_name, dt_epoch in log_mtime_pairs:
+        file_path = log_path / file_name
+        print(file_path)
+        os.utime(file_path, (dt_epoch, dt_epoch))
+
+    dt = DeltaTable(tmp_path, version=expected_version)
+    dt.create_checkpoint()
+    file = log_path / f"0000000000000000000{expected_version}.checkpoint.parquet"
+    assert file.exists()
+    dt.cleanup_metadata()
+
+    file = log_path / f"0000000000000000000{expected_version-1}.json"
+    assert not file.exists()
+    dt = DeltaTable(tmp_path)
+    dt.load_as_version(date_value)
+    assert dt.version() == expected_version
+    dt = DeltaTable(tmp_path)
+    dt.load_as_version(datetime.fromisoformat(date_value))
+    assert dt.version() == expected_version
+
+
 def test_load_as_version_datetime_bad_format():
     table_path = "../crates/test/tests/data/simple_table"
     dt = DeltaTable(table_path)
@@ -114,18 +168,18 @@ def test_read_simple_table_update_incremental():
     assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"id": [5, 7, 9]}
 
 
-def test_read_simple_table_file_sizes_failure():
+def test_read_simple_table_file_sizes_failure(mocker):
     table_path = "../crates/test/tests/data/simple_table"
     dt = DeltaTable(table_path)
     add_actions = dt.get_add_actions().to_pydict()
 
     # set all sizes to -1, the idea is to break the reading, to check
     # that input file sizes are actually used
-    add_actions_modified = {
-        x: [-1 for item in x] if x == "size_bytes" else y
-        for x, y in add_actions.items()
-    }
-    dt.get_add_actions = lambda: SimpleNamespace(to_pydict=lambda: add_actions_modified)  # type:ignore
+    add_actions_modified = {x: -1 for x in add_actions["path"]}
+    mocker.patch(
+        "deltalake._internal.RawDeltaTable.get_add_file_sizes",
+        return_value=add_actions_modified,
+    )
 
     with pytest.raises(OSError, match="Cannot seek past end of file."):
         dt.to_pyarrow_dataset().to_table().to_pydict()
@@ -453,7 +507,11 @@ def test_delta_table_with_filters():
 
     filter_expr = ds.field("date") > "2021-02-20"
     data = dataset.to_table(filter=filter_expr)
-    assert len(dt.to_pandas(filters=[("date", ">", "2021-02-20")])) == data.num_rows
+    assert (
+        len(dt.to_pandas(filters=[("date", ">", "2021-02-20")]))
+        == len(dt.to_pandas(filters=filter_expr))
+        == data.num_rows
+    )
 
     filter_expr = (ds.field("date") > "2021-02-20") | (
         ds.field("state").isin(["Alabama", "Wyoming"])
@@ -468,6 +526,7 @@ def test_delta_table_with_filters():
                 ]
             )
         )
+        == len(dt.to_pandas(filters=filter_expr))
         == data.num_rows
     )
 
@@ -484,6 +543,7 @@ def test_delta_table_with_filters():
                 ]
             )
         )
+        == len(dt.to_pandas(filters=filter_expr))
         == data.num_rows
     )
 
@@ -491,7 +551,7 @@ def test_delta_table_with_filters():
 def test_writer_fails_on_protocol():
     table_path = "../crates/test/tests/data/simple_table"
     dt = DeltaTable(table_path)
-    dt.protocol = Mock(return_value=ProtocolVersions(2, 1))
+    dt.protocol = Mock(return_value=ProtocolVersions(2, 1, None, None))
     with pytest.raises(DeltaProtocolError):
         dt.to_pyarrow_dataset()
     with pytest.raises(DeltaProtocolError):
@@ -732,3 +792,34 @@ def test_encode_partition_value(input_value: Any, expected: str) -> None:
         assert [encode_partition_value(val) for val in input_value] == expected
     else:
         assert encode_partition_value(input_value) == expected
+
+
+def test_read_table_last_checkpoint_not_updated():
+    dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update")
+
+    assert dt.version() == 3
+
+
+def test_is_deltatable_valid_path():
+    table_path = "../crates/test/tests/data/simple_table"
+    assert DeltaTable.is_deltatable(table_path)
+
+
+def test_is_deltatable_invalid_path():
+    # Nonce ensures that the table_path always remains an invalid table path.
+    nonce = int(random() * 10000)
+    table_path = "../crates/test/tests/data/simple_table_invalid_%s" % nonce
+    assert not DeltaTable.is_deltatable(table_path)
+
+
+def test_is_deltatable_with_storage_opts():
+    table_path = "../crates/test/tests/data/simple_table"
+    storage_options = {
+        "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
+        "AWS_ALLOW_HTTP": "true",
+        "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
+        "AWS_S3_LOCKING_PROVIDER": "dynamodb",
+        "DELTA_DYNAMO_TABLE_NAME": "custom_table_name",
+    }
+    assert DeltaTable.is_deltatable(table_path, storage_options=storage_options)
diff --git a/python/tests/test_update.py b/python/tests/test_update.py
index c14f374937..554cc276b5 100644
--- a/python/tests/test_update.py
+++ b/python/tests/test_update.py
@@ -52,6 +52,37 @@ def test_update_with_predicate(tmp_path: pathlib.Path, sample_table: pa.Table):
     assert result == expected
 
 
+def test_update_with_predicate_large_dtypes(
+    tmp_path: pathlib.Path, sample_table: pa.Table
+):
+    write_deltalake(tmp_path, sample_table, mode="append", large_dtypes=True)
+
+    dt = DeltaTable(tmp_path)
+
+    nrows = 5
+    expected = pa.table(
+        {
+            "id": pa.array(["1", "2", "3", "4", "5"]),
+            "price": pa.array(list(range(nrows)), pa.int64()),
+            "sold": pa.array(list(range(nrows)), pa.int64()),
+            "price_float": pa.array(list(range(nrows)), pa.float64()),
+            "items_in_bucket": pa.array([["item1", "item2", "item3"]] * nrows),
+            "deleted": pa.array([True, False, False, False, False]),
+        }
+    )
+
+    dt.update(
+        updates={"deleted": "True"},
+        predicate="id = '1'",
+    )
+
+    result = dt.to_pyarrow_table()
+    last_action = dt.history(1)[0]
+
+    assert last_action["operation"] == "UPDATE"
+    assert result == expected
+
+
 def test_update_wo_predicate(tmp_path: pathlib.Path, sample_table: pa.Table):
     write_deltalake(tmp_path, sample_table, mode="append")
 
@@ -86,10 +117,8 @@ def test_update_wrong_types_cast(tmp_path: pathlib.Path, sample_table: pa.Table)
     with pytest.raises(Exception) as excinfo:
         dt.update(updates={"deleted": "'hello_world'"})
 
-    assert (
-        str(excinfo.value)
-        == "Cast error: Cannot cast value 'hello_world' to value of Boolean type"
-    )
+    expected = """Generic DeltaTable error: type_coercion\ncaused by\nError during planning: Failed to coerce then ([Utf8]) and else (Some(Boolean)) to common types in CASE WHEN expression"""
+    assert str(excinfo.value) == expected
 
 
 def test_update_wo_predicate_multiple_updates(
@@ -203,3 +232,51 @@ def test_update_with_incorrect_updates_input(
         str(excinfo.value)
         == "Invalid datatype provided in new_values, only int, float, bool, list, str or datetime or accepted."
     )
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_update_stats_columns_stats_provided(tmp_path: pathlib.Path, engine):
+    data = pa.table(
+        {
+            "foo": pa.array(["a", "b", None, None]),
+            "bar": pa.array([1, 2, 3, None]),
+            "baz": pa.array([1, 1, None, None]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        mode="append",
+        engine=engine,
+        configuration={"delta.dataSkippingStatsColumns": "foo,baz"},
+    )
+    dt = DeltaTable(tmp_path)
+    add_actions_table = dt.get_add_actions(flatten=True)
+    stats = add_actions_table.to_pylist()[0]
+
+    assert stats["null_count.foo"] == 2
+    assert stats["min.foo"] == "a"
+    assert stats["max.foo"] == "b"
+    assert stats["null_count.bar"] is None
+    assert stats["min.bar"] is None
+    assert stats["max.bar"] is None
+    assert stats["null_count.baz"] == 2
+    assert stats["min.baz"] == 1
+    assert stats["max.baz"] == 1
+
+    dt.update({"foo": "'hello world'"})
+
+    dt = DeltaTable(tmp_path)
+    add_actions_table = dt.get_add_actions(flatten=True)
+    stats = add_actions_table.to_pylist()[0]
+
+    assert dt.version() == 1
+    assert stats["null_count.foo"] == 0
+    assert stats["min.foo"] == "hello world"
+    assert stats["max.foo"] == "hello world"
+    assert stats["null_count.bar"] is None
+    assert stats["min.bar"] is None
+    assert stats["max.bar"] is None
+    assert stats["null_count.baz"] == 2
+    assert stats["min.baz"] == 1
+    assert stats["max.baz"] == 1
diff --git a/python/tests/test_writer.py b/python/tests/test_writer.py
index 9252dfdd41..3c9a977b56 100644
--- a/python/tests/test_writer.py
+++ b/python/tests/test_writer.py
@@ -5,6 +5,7 @@
 import random
 import threading
 from datetime import date, datetime
+from decimal import Decimal
 from math import inf
 from typing import Any, Dict, Iterable, List, Literal
 from unittest.mock import Mock
@@ -13,11 +14,16 @@
 import pyarrow.compute as pc
 import pytest
 from packaging import version
-from pyarrow.dataset import ParquetFileFormat, ParquetReadOptions
+from pyarrow.dataset import ParquetFileFormat, ParquetReadOptions, dataset
 from pyarrow.lib import RecordBatchReader
 
 from deltalake import DeltaTable, Schema, write_deltalake
-from deltalake.exceptions import CommitFailedError, DeltaError, DeltaProtocolError
+from deltalake.exceptions import (
+    CommitFailedError,
+    DeltaError,
+    DeltaProtocolError,
+    SchemaMismatchError,
+)
 from deltalake.table import ProtocolVersions
 from deltalake.writer import try_get_table_and_table_uri
 
@@ -124,11 +130,17 @@ def test_enforce_schema(existing_table: DeltaTable, mode: str):
 def test_enforce_schema_rust_writer(existing_table: DeltaTable, mode: str):
     bad_data = pa.table({"x": pa.array([1, 2, 3])})
 
-    with pytest.raises(DeltaError):
+    with pytest.raises(
+        SchemaMismatchError,
+        match=".*Cannot cast schema, number of fields does not match.*",
+    ):
         write_deltalake(existing_table, bad_data, mode=mode, engine="rust")
 
     table_uri = existing_table._table.table_uri()
-    with pytest.raises(DeltaError):
+    with pytest.raises(
+        SchemaMismatchError,
+        match=".*Cannot cast schema, number of fields does not match.*",
+    ):
         write_deltalake(table_uri, bad_data, mode=mode, engine="rust")
 
 
@@ -136,48 +148,154 @@ def test_update_schema(existing_table: DeltaTable):
     new_data = pa.table({"x": pa.array([1, 2, 3])})
 
     with pytest.raises(ValueError):
-        write_deltalake(existing_table, new_data, mode="append", overwrite_schema=True)
+        write_deltalake(
+            existing_table, new_data, mode="append", schema_mode="overwrite"
+        )
 
-    write_deltalake(existing_table, new_data, mode="overwrite", overwrite_schema=True)
+    write_deltalake(existing_table, new_data, mode="overwrite", schema_mode="overwrite")
 
     read_data = existing_table.to_pyarrow_table()
     assert new_data == read_data
     assert existing_table.schema().to_pyarrow() == new_data.schema
 
 
-def test_update_schema_rust_writer(existing_table: DeltaTable):
-    new_data = pa.table({"x": pa.array([1, 2, 3])})
+def test_merge_schema(existing_table: DeltaTable):
+    print(existing_table._table.table_uri())
+    old_table_data = existing_table.to_pyarrow_table()
+    new_data = pa.table(
+        {
+            "new_x": pa.array([1, 2, 3], pa.int32()),
+            "new_y": pa.array([1, 2, 3], pa.int32()),
+        }
+    )
+
+    write_deltalake(
+        existing_table, new_data, mode="append", schema_mode="merge", engine="rust"
+    )
+    # adjust schema of old_table_data and new_data to match each other
+
+    for i in range(old_table_data.num_columns):
+        col = old_table_data.schema.field(i)
+        new_data = new_data.add_column(i, col, pa.nulls(new_data.num_rows, col.type))
+
+    old_table_data = old_table_data.append_column(
+        pa.field("new_x", pa.int32()), pa.nulls(old_table_data.num_rows, pa.int32())
+    )
+    old_table_data = old_table_data.append_column(
+        pa.field("new_y", pa.int32()), pa.nulls(old_table_data.num_rows, pa.int32())
+    )
+
+    # define sort order
+    read_data = existing_table.to_pyarrow_table().sort_by(
+        [("utf8", "ascending"), ("new_x", "ascending")]
+    )
+    print(repr(read_data.to_pylist()))
+    concated = pa.concat_tables([old_table_data, new_data])
+    print(repr(concated.to_pylist()))
+    assert read_data == concated
+
+    write_deltalake(existing_table, new_data, mode="overwrite", schema_mode="overwrite")
+
+    assert existing_table.schema().to_pyarrow() == new_data.schema
+
+
+def test_overwrite_schema(existing_table: DeltaTable):
+    new_data_invalid = pa.table(
+        {
+            "utf8": pa.array([1235, 546, 5645]),
+            "new_x": pa.array([1, 2, 3], pa.int32()),
+            "new_y": pa.array([1, 2, 3], pa.int32()),
+        }
+    )
 
     with pytest.raises(DeltaError):
         write_deltalake(
             existing_table,
-            new_data,
+            new_data_invalid,
             mode="append",
-            overwrite_schema=True,
+            schema_mode="overwrite",
             engine="rust",
         )
+
+    new_data = pa.table(
+        {
+            "utf8": pa.array(["bla", "bli", "blubb"]),
+            "new_x": pa.array([1, 2, 3], pa.int32()),
+            "new_y": pa.array([1, 2, 3], pa.int32()),
+        }
+    )
     with pytest.raises(DeltaError):
         write_deltalake(
             existing_table,
             new_data,
-            mode="overwrite",
-            overwrite_schema=False,
+            mode="append",
+            schema_mode="overwrite",
+            engine="rust",
+        )
+
+    write_deltalake(existing_table, new_data, mode="overwrite", schema_mode="overwrite")
+
+    assert existing_table.schema().to_pyarrow() == new_data.schema
+
+
+def test_update_schema_rust_writer_append(existing_table: DeltaTable):
+    with pytest.raises(
+        SchemaMismatchError, match="Cannot cast schema, number of fields does not match"
+    ):
+        # It's illegal to do schema drift without correct schema_mode
+        write_deltalake(
+            existing_table,
+            pa.table({"x4": pa.array([1, 2, 3])}),
+            mode="append",
+            schema_mode=None,
             engine="rust",
         )
     with pytest.raises(DeltaError):
+        write_deltalake(  # schema_mode overwrite is illegal with append
+            existing_table,
+            pa.table({"x1": pa.array([1, 2, 3])}),
+            mode="append",
+            schema_mode="overwrite",
+            engine="rust",
+        )
+    with pytest.raises(
+        SchemaMismatchError,
+        match="Schema error: Cannot merge types string and long",
+    ):
         write_deltalake(
             existing_table,
-            new_data,
+            pa.table({"utf8": pa.array([1, 2, 3])}),
             mode="append",
-            overwrite_schema=False,
+            schema_mode="merge",
             engine="rust",
         )
-    # TODO(ion): Remove this once we add schema overwrite support
+    write_deltalake(
+        existing_table,
+        pa.table({"x2": pa.array([1, 2, 3])}),
+        mode="append",
+        schema_mode="merge",
+        engine="rust",
+    )
+
+
+def test_update_schema_rust_writer_invalid(existing_table: DeltaTable):
+    new_data = pa.table({"x5": pa.array([1, 2, 3])})
+    with pytest.raises(
+        SchemaMismatchError, match="Cannot cast schema, number of fields does not match"
+    ):
+        write_deltalake(
+            existing_table,
+            new_data,
+            mode="overwrite",
+            schema_mode=None,
+            engine="rust",
+        )
+
     write_deltalake(
         existing_table,
         new_data,
         mode="overwrite",
-        overwrite_schema=True,
+        schema_mode="overwrite",
         engine="rust",
     )
 
@@ -186,6 +304,34 @@ def test_update_schema_rust_writer(existing_table: DeltaTable):
     assert existing_table.schema().to_pyarrow() == new_data.schema
 
 
+def test_merge_schema_rust_writer_with_overwrite(tmp_path: pathlib.Path):
+    data = pa.table(
+        {
+            "a": pa.array([1, 2, 3, 4]),
+            "b": pa.array([1, 1, 2, 2]),
+            "c": pa.array([10, 11, 12, 13]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        engine="rust",
+    )
+
+    new_data = pa.table({"a": pa.array([100, 200, 300]), "b": pa.array([1, 1, 1])})
+
+    write_deltalake(
+        tmp_path,
+        new_data,
+        mode="overwrite",
+        schema_mode="merge",
+        engine="rust",
+    )
+    assert set(DeltaTable(tmp_path).to_pyarrow_table().column_names) == set(
+        ["a", "b", "c"]
+    )
+
+
 @pytest.mark.parametrize("engine", ["pyarrow", "rust"])
 def test_local_path(
     tmp_path: pathlib.Path,
@@ -205,6 +351,30 @@ def test_local_path(
     assert table == sample_data
 
 
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_local_path_with_unsafe_rename(
+    tmp_path: pathlib.Path,
+    sample_data: pa.Table,
+    monkeypatch,
+    engine: Literal["pyarrow", "rust"],
+):
+    monkeypatch.chdir(tmp_path)  # Make tmp_path the working directory
+    (tmp_path / "path/to/table").mkdir(parents=True)
+
+    local_path = "./path/to/table"
+    storage_opts = {
+        "allow_unsafe_rename": "true",
+    }
+    write_deltalake(
+        local_path, sample_data, storage_options=storage_opts, engine=engine
+    )
+    delta_table = DeltaTable(local_path, storage_options=storage_opts)
+    assert delta_table.schema().to_pyarrow() == sample_data.schema
+
+    table = delta_table.to_pyarrow_table()
+    assert table == sample_data
+
+
 @pytest.mark.parametrize("engine", ["pyarrow", "rust"])
 def test_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table, engine):
     write_deltalake(
@@ -212,7 +382,7 @@ def test_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table, engin
         sample_data,
         name="test_name",
         description="test_desc",
-        configuration={"delta.appendOnly": "false", "foo": "bar"},
+        configuration={"delta.appendOnly": "false"},
         engine=engine,
     )
 
@@ -222,7 +392,7 @@ def test_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table, engin
 
     assert metadata.name == "test_name"
     assert metadata.description == "test_desc"
-    assert metadata.configuration == {"delta.appendOnly": "false", "foo": "bar"}
+    assert metadata.configuration == {"delta.appendOnly": "false"}
 
 
 @pytest.mark.parametrize("engine", ["pyarrow", "rust"])
@@ -304,7 +474,7 @@ def test_write_modes(tmp_path: pathlib.Path, sample_data: pa.Table, engine):
     assert DeltaTable(tmp_path).to_pyarrow_table() == sample_data
 
     if engine == "pyarrow":
-        with pytest.raises(AssertionError):
+        with pytest.raises(FileExistsError):
             write_deltalake(tmp_path, sample_data, mode="error")
     elif engine == "rust":
         with pytest.raises(DeltaError):
@@ -364,7 +534,7 @@ def test_fails_wrong_partitioning(
     existing_table: DeltaTable, sample_data: pa.Table, engine
 ):
     if engine == "pyarrow":
-        with pytest.raises(AssertionError):
+        with pytest.raises(ValueError):
             write_deltalake(
                 existing_table,
                 sample_data,
@@ -587,7 +757,7 @@ def test_writer_fails_on_protocol(
     sample_data: pa.Table,
     engine: Literal["pyarrow", "rust"],
 ):
-    existing_table.protocol = Mock(return_value=ProtocolVersions(1, 3))
+    existing_table.protocol = Mock(return_value=ProtocolVersions(1, 3, None, None))
     with pytest.raises(DeltaProtocolError):
         write_deltalake(existing_table, sample_data, mode="overwrite", engine=engine)
 
@@ -661,35 +831,58 @@ def test_writer_with_options(tmp_path: pathlib.Path):
 
 
 def test_try_get_table_and_table_uri(tmp_path: pathlib.Path):
+    def _normalize_path(t):  # who does not love Windows? ;)
+        return t[0], t[1].replace("\\", "/") if t[1] else t[1]
+
     data = pa.table({"vals": pa.array(["1", "2", "3"])})
     table_or_uri = tmp_path / "delta_table"
     write_deltalake(table_or_uri, data)
     delta_table = DeltaTable(table_or_uri)
 
     # table_or_uri as DeltaTable
-    assert try_get_table_and_table_uri(delta_table, None) == (
-        delta_table,
-        str(tmp_path / "delta_table") + "/",
+    assert _normalize_path(
+        try_get_table_and_table_uri(delta_table, None)
+    ) == _normalize_path(
+        (
+            delta_table,
+            str(tmp_path / "delta_table") + "/",
+        )
     )
 
     # table_or_uri as str
-    assert try_get_table_and_table_uri(str(tmp_path / "delta_table"), None) == (
-        delta_table,
-        str(tmp_path / "delta_table"),
+    assert _normalize_path(
+        try_get_table_and_table_uri(str(tmp_path / "delta_table"), None)
+    ) == _normalize_path(
+        (
+            delta_table,
+            str(tmp_path / "delta_table"),
+        )
     )
-    assert try_get_table_and_table_uri(str(tmp_path / "str"), None) == (
-        None,
-        str(tmp_path / "str"),
+    assert _normalize_path(
+        try_get_table_and_table_uri(str(tmp_path / "str"), None)
+    ) == _normalize_path(
+        (
+            None,
+            str(tmp_path / "str"),
+        )
     )
 
     # table_or_uri as Path
-    assert try_get_table_and_table_uri(tmp_path / "delta_table", None) == (
-        delta_table,
-        str(tmp_path / "delta_table"),
+    assert _normalize_path(
+        try_get_table_and_table_uri(tmp_path / "delta_table", None)
+    ) == _normalize_path(
+        (
+            delta_table,
+            str(tmp_path / "delta_table"),
+        )
     )
-    assert try_get_table_and_table_uri(tmp_path / "Path", None) == (
-        None,
-        str(tmp_path / "Path"),
+    assert _normalize_path(
+        try_get_table_and_table_uri(tmp_path / "Path", None)
+    ) == _normalize_path(
+        (
+            None,
+            str(tmp_path / "Path"),
+        )
     )
 
     # table_or_uri with invalid parameter type
@@ -854,6 +1047,7 @@ def test_partition_overwrite_unfiltered_data_fails(
         )
 
 
+@pytest.mark.parametrize("large_dtypes", [True, False])
 @pytest.mark.parametrize(
     "value_1,value_2,value_type,filter_string",
     [
@@ -868,6 +1062,7 @@ def test_replace_where_overwrite(
     value_2: Any,
     value_type: pa.DataType,
     filter_string: str,
+    large_dtypes: bool,
 ):
     table_path = tmp_path
 
@@ -878,7 +1073,9 @@ def test_replace_where_overwrite(
             "val": pa.array([1, 1, 1, 1], pa.int64()),
         }
     )
-    write_deltalake(table_path, sample_data, mode="overwrite")
+    write_deltalake(
+        table_path, sample_data, mode="overwrite", large_dtypes=large_dtypes
+    )
 
     delta_table = DeltaTable(table_path)
     assert (
@@ -909,6 +1106,7 @@ def test_replace_where_overwrite(
         mode="overwrite",
         predicate="p1 = '1'",
         engine="rust",
+        large_dtypes=large_dtypes,
     )
 
     delta_table.update_incremental()
@@ -1137,6 +1335,62 @@ def test_large_arrow_types(tmp_path: pathlib.Path):
     assert table.schema == dt.schema().to_pyarrow(as_large_types=True)
 
 
+@pytest.mark.skipif(
+    int(pa.__version__.split(".")[0]) < 10, reason="map casts require pyarrow >= 10"
+)
+def test_large_arrow_types_dataset_as_large_types(tmp_path: pathlib.Path):
+    pylist = [
+        {"name": "Joey", "gender": b"M", "arr_type": ["x", "y"], "dict": {"a": b"M"}},
+        {"name": "Ivan", "gender": b"F", "arr_type": ["x", "z"]},
+    ]
+    schema = pa.schema(
+        [
+            pa.field("name", pa.large_string()),
+            pa.field("gender", pa.large_binary()),
+            pa.field("arr_type", pa.large_list(pa.large_string())),
+            pa.field("map_type", pa.map_(pa.large_string(), pa.large_binary())),
+            pa.field("struct", pa.struct([pa.field("sub", pa.large_string())])),
+        ]
+    )
+    table = pa.Table.from_pylist(pylist, schema=schema)
+
+    write_deltalake(tmp_path, table)
+
+    dt = DeltaTable(tmp_path)
+
+    ds = dt.to_pyarrow_dataset(as_large_types=True)
+    union_ds = dataset([ds, dataset(table)])
+    assert union_ds.to_table().shape[0] == 4
+
+
+@pytest.mark.skipif(
+    int(pa.__version__.split(".")[0]) < 10, reason="map casts require pyarrow >= 10"
+)
+def test_large_arrow_types_explicit_scan_schema(tmp_path: pathlib.Path):
+    pylist = [
+        {"name": "Joey", "gender": b"M", "arr_type": ["x", "y"], "dict": {"a": b"M"}},
+        {"name": "Ivan", "gender": b"F", "arr_type": ["x", "z"]},
+    ]
+    schema = pa.schema(
+        [
+            pa.field("name", pa.large_string()),
+            pa.field("gender", pa.large_binary()),
+            pa.field("arr_type", pa.large_list(pa.large_string())),
+            pa.field("map_type", pa.map_(pa.large_string(), pa.large_binary())),
+            pa.field("struct", pa.struct([pa.field("sub", pa.large_string())])),
+        ]
+    )
+    table = pa.Table.from_pylist(pylist, schema=schema)
+
+    write_deltalake(tmp_path, table)
+
+    dt = DeltaTable(tmp_path)
+
+    ds = dt.to_pyarrow_dataset(schema=schema)
+    union_ds = dataset([ds, dataset(table)])
+    assert union_ds.to_table().shape[0] == 4
+
+
 def test_partition_large_arrow_types(tmp_path: pathlib.Path):
     table = pa.table(
         {
@@ -1222,6 +1476,34 @@ def test_issue_1651_roundtrip_timestamp(tmp_path: pathlib.Path):
     assert dataset.count_rows() == 1
 
 
+@pytest.mark.parametrize("engine", ["rust", "pyarrow"])
+def test_invalid_decimals(tmp_path: pathlib.Path, engine):
+    import re
+
+    data = pa.table(
+        {"x": pa.array([Decimal("10000000000000000000000000000000000000.0")])}
+    )
+
+    with pytest.raises(
+        SchemaMismatchError,
+        match=re.escape("Invalid data type for Delta Lake: Decimal256(39, 1)"),
+    ):
+        write_deltalake(table_or_uri=tmp_path, mode="append", data=data, engine=engine)
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_write_large_decimal(tmp_path: pathlib.Path, engine):
+    data = pa.table(
+        {
+            "decimal_column": pa.array(
+                [Decimal(11111111111111111), Decimal(22222), Decimal("333333333333.33")]
+            )
+        }
+    )
+
+    write_deltalake(tmp_path, data, engine=engine)
+
+
 def test_float_values(tmp_path: pathlib.Path):
     data = pa.table(
         {
@@ -1251,3 +1533,314 @@ def test_with_deltalake_schema(tmp_path: pathlib.Path, sample_data: pa.Table):
     )
     delta_table = DeltaTable(tmp_path)
     assert delta_table.schema().to_pyarrow() == sample_data.schema
+
+
+def test_with_deltalake_json_schema(tmp_path: pathlib.Path):
+    json_schema = '{"type": "struct","fields": [{"name": "campaign", "type": "string", "nullable": true, "metadata": {}},{"name": "account", "type": "string", "nullable": true, "metadata": {}}]}'
+    table_schema = Schema.from_json(json_schema)
+    table = pa.table(
+        {
+            "campaign": pa.array([]),
+            "account": pa.array([]),
+        }
+    )
+    write_deltalake(tmp_path, table, schema=table_schema)
+    table = pa.table(
+        {
+            "campaign": pa.array(["deltaLake"]),
+            "account": pa.array(["admin"]),
+        }
+    )
+
+    write_deltalake(tmp_path, data=table, schema=table_schema, mode="append")
+
+    delta_table = DeltaTable(tmp_path)
+    assert delta_table.schema() == table_schema
+    assert delta_table.to_pyarrow_table() == table
+
+
+def test_write_stats_empty_rowgroups(tmp_path: pathlib.Path):
+    # https://github.com/delta-io/delta-rs/issues/2169
+    data = pa.table(
+        {
+            "data": pa.array(["B"] * 1024 * 33),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        max_rows_per_file=1024 * 32,
+        max_rows_per_group=1024 * 16,
+        min_rows_per_group=8 * 1024,
+        mode="overwrite",
+    )
+    dt = DeltaTable(tmp_path)
+    assert (
+        dt.to_pyarrow_dataset().to_table(filter=(pc.field("data") == "B")).shape[0]
+        == 33792
+    )
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_schema_cols_diff_order(tmp_path: pathlib.Path, engine):
+    data = pa.table(
+        {
+            "foo": pa.array(["B"] * 10),
+            "bar": pa.array([1] * 10),
+            "baz": pa.array([2.0] * 10),
+        }
+    )
+    write_deltalake(tmp_path, data, mode="append", engine=engine)
+
+    data = pa.table(
+        {
+            "baz": pa.array([2.0] * 10),
+            "bar": pa.array([1] * 10),
+            "foo": pa.array(["B"] * 10),
+        }
+    )
+    write_deltalake(tmp_path, data, mode="append", engine=engine)
+    dt = DeltaTable(tmp_path)
+    assert dt.version() == 1
+
+    expected = pa.table(
+        {
+            "baz": pa.array([2.0] * 20),
+            "bar": pa.array([1] * 20),
+            "foo": pa.array(["B"] * 20),
+        }
+    )
+
+    assert dt.to_pyarrow_table(columns=["baz", "bar", "foo"]) == expected
+
+
+def test_empty(existing_table: DeltaTable):
+    schema = existing_table.schema().to_pyarrow()
+    empty_table = pa.Table.from_pylist([], schema=schema)
+    with pytest.raises(DeltaError, match="No data source supplied to write command"):
+        write_deltalake(existing_table, empty_table, mode="append", engine="rust")
+
+
+def test_rust_decimal_cast(tmp_path: pathlib.Path):
+    import re
+
+    data = pa.table({"x": pa.array([Decimal("100.1")])})
+
+    write_deltalake(tmp_path, data, mode="append", engine="rust")
+
+    assert DeltaTable(tmp_path).to_pyarrow_table()["x"][0].as_py() == Decimal("100.1")
+
+    # Write smaller decimal,  works since it's fits in the previous decimal precision, scale
+    data = pa.table({"x": pa.array([Decimal("10.1")])})
+    write_deltalake(tmp_path, data, mode="append", engine="rust")
+
+    data = pa.table({"x": pa.array([Decimal("1000.1")])})
+    # write decimal that is larger than target type in table
+    with pytest.raises(
+        SchemaMismatchError,
+        match=re.escape(
+            "Cannot cast field x from Decimal128(5, 1) to Decimal128(4, 1)"
+        ),
+    ):
+        write_deltalake(tmp_path, data, mode="append", engine="rust")
+
+    with pytest.raises(SchemaMismatchError, match="Cannot merge types decimal"):
+        write_deltalake(
+            tmp_path, data, mode="append", schema_mode="merge", engine="rust"
+        )
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_write_stats_column_idx(tmp_path: pathlib.Path, engine):
+    def _check_stats(dt: DeltaTable):
+        add_actions_table = dt.get_add_actions(flatten=True)
+        stats = add_actions_table.to_pylist()[0]
+
+        assert stats["null_count.foo"] == 2
+        assert stats["min.foo"] == "a"
+        assert stats["max.foo"] == "b"
+        assert stats["null_count.bar"] == 1
+        assert stats["min.bar"] == 1
+        assert stats["max.bar"] == 3
+        assert stats["null_count.baz"] is None
+        assert stats["min.baz"] is None
+        assert stats["max.baz"] is None
+
+    data = pa.table(
+        {
+            "foo": pa.array(["a", "b", None, None]),
+            "bar": pa.array([1, 2, 3, None]),
+            "baz": pa.array([1, 1, None, None]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        mode="append",
+        engine=engine,
+        configuration={"delta.dataSkippingNumIndexedCols": "2"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    _check_stats(dt)
+
+    # Check if it properly takes skippingNumIndexCols from the config in the table
+    write_deltalake(tmp_path, data, mode="overwrite", engine=engine)
+
+    dt = DeltaTable(tmp_path)
+    assert dt.version() == 1
+    _check_stats(dt)
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_write_stats_columns_stats_provided(tmp_path: pathlib.Path, engine):
+    def _check_stats(dt: DeltaTable):
+        add_actions_table = dt.get_add_actions(flatten=True)
+        stats = add_actions_table.to_pylist()[0]
+
+        assert stats["null_count.foo"] == 2
+        assert stats["min.foo"] == "a"
+        assert stats["max.foo"] == "b"
+        assert stats["null_count.bar"] is None
+        assert stats["min.bar"] is None
+        assert stats["max.bar"] is None
+        assert stats["null_count.baz"] == 2
+        assert stats["min.baz"] == 1
+        assert stats["max.baz"] == 1
+
+    data = pa.table(
+        {
+            "foo": pa.array(["a", "b", None, None]),
+            "bar": pa.array([1, 2, 3, None]),
+            "baz": pa.array([1, 1, None, None]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        mode="append",
+        engine=engine,
+        configuration={"delta.dataSkippingStatsColumns": "foo,baz"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    _check_stats(dt)
+
+    # Check if it properly takes skippingNumIndexCols from the config in the table
+    write_deltalake(tmp_path, data, mode="overwrite", engine=engine)
+
+    dt = DeltaTable(tmp_path)
+    assert dt.version() == 1
+    _check_stats(dt)
+
+
+@pytest.mark.parametrize(
+    "array",
+    [
+        pa.array([[datetime(2010, 1, 1)]]),
+        pa.array([{"foo": datetime(2010, 1, 1)}]),
+        pa.array([{"foo": [[datetime(2010, 1, 1)]]}]),
+        pa.array([{"foo": [[{"foo": datetime(2010, 1, 1)}]]}]),
+    ],
+)
+def test_write_timestamp_ntz_nested(tmp_path: pathlib.Path, array: pa.array):
+    data = pa.table({"x": array})
+    write_deltalake(tmp_path, data, mode="append", engine="rust")
+
+    dt = DeltaTable(tmp_path)
+
+    protocol = dt.protocol()
+    assert protocol.min_reader_version == 3
+    assert protocol.min_writer_version == 7
+    assert protocol.reader_features == ["timestampNtz"]
+    assert protocol.writer_features == ["timestampNtz"]
+
+
+def test_write_timestamp_ntz_on_table_with_features_not_enabled(tmp_path: pathlib.Path):
+    data = pa.table({"x": pa.array(["foo"])})
+    write_deltalake(tmp_path, data, mode="append", engine="pyarrow")
+
+    dt = DeltaTable(tmp_path)
+
+    protocol = dt.protocol()
+    assert protocol.min_reader_version == 1
+    assert protocol.min_writer_version == 2
+
+    data = pa.table({"x": pa.array([datetime(2010, 1, 1)])})
+    with pytest.raises(
+        DeltaError,
+        match="Generic DeltaTable error: Writer features must be specified for writerversion >= 7, please specify: TimestampWithoutTimezone",
+    ):
+        write_deltalake(
+            tmp_path, data, mode="overwrite", engine="pyarrow", schema_mode="overwrite"
+        )
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_parse_stats_with_new_schema(tmp_path, engine):
+    sample_data = pa.table(
+        {
+            "val": pa.array([1, 1], pa.int8()),
+        }
+    )
+    write_deltalake(tmp_path, sample_data)
+
+    sample_data = pa.table(
+        {
+            "val": pa.array([1000000000000, 1000000000000], pa.int64()),
+        }
+    )
+    write_deltalake(
+        tmp_path, sample_data, mode="overwrite", schema_mode="overwrite", engine=engine
+    )
+
+
+def test_roundtrip_cdc_evolution(tmp_path: pathlib.Path):
+    """
+    This test is used as a CDC integration test from Python to ensure,
+    approximately, that CDC files are being written
+    """
+    raw_commit = r"""{"metaData":{"id":"bb0fdeb2-76dd-4f5e-b1ea-845ecec8fa7e","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableChangeDataFeed":"true"},"createdTime":1713110303902}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":4,"writerFeatures":["changeDataFeed"]}}
+"""
+    # timestampNtz looks like it might be an unnecessary requirement to write from Python
+    os.mkdir(os.path.join(tmp_path, "_delta_log"))
+    # This is a stupid hack to make sure we have a CDC capable table from the jump
+    with open(
+        os.path.join(tmp_path, "_delta_log", "00000000000000000000.json"), "w+"
+    ) as fd:
+        fd.write(raw_commit)
+    assert ("0" * 20 + ".json") in os.listdir(tmp_path / "_delta_log")
+
+    # Make sure the _change_data doesn't exist
+    assert not os.path.isdir(os.path.join(tmp_path, "_change_data"))
+
+    nrows = 5
+    sample_data = pa.table(
+        {
+            "utf8": pa.array([str(x) for x in range(nrows)]),
+            "int64": pa.array(list(range(nrows)), pa.int64()),
+            # See <https://github.com/delta-io/delta-rs/issues/2568>
+            # "struct": pa.array([{"x": x, "y": str(x)} for x in range(nrows)]),
+            # "list": pa.array([list(range(x + 1)) for x in range(nrows)]),
+        }
+    )
+
+    write_deltalake(
+        tmp_path, sample_data, mode="append", schema_mode="merge", engine="rust"
+    )
+    assert ("0" * 19 + "1.json") in os.listdir(tmp_path / "_delta_log")
+
+    delta_table = DeltaTable(tmp_path)
+    delta_table.update(predicate="utf8 = '1'", updates={"utf8": "'hello world'"})
+
+    delta_table = DeltaTable(tmp_path)
+    print(os.listdir(tmp_path))
+    # This is kind of a weak test to verify that CDFs were written
+    assert os.path.isdir(os.path.join(tmp_path, "_change_data"))
+
+
+def test_empty_dataset_write(tmp_path: pathlib.Path, sample_data: pa.Table):
+    empty_arrow_table = sample_data.schema.empty_table()
+    empty_dataset = dataset(empty_arrow_table)
+    write_deltalake(tmp_path, empty_dataset, mode="append")