From 5b7dbe6827ca3b8114766378f1da0e601cbc08a9 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Tue, 24 Sep 2024 12:18:18 +0100
Subject: [PATCH 1/9] fix typo

---
 docs/integrations/object-storage/s3.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/integrations/object-storage/s3.md b/docs/integrations/object-storage/s3.md
index 5b2034827f..1989494978 100644
--- a/docs/integrations/object-storage/s3.md
+++ b/docs/integrations/object-storage/s3.md
@@ -2,7 +2,7 @@
 
 `delta-rs` offers native support for using AWS S3 as an objet storage backend.
 
-You don’t need to install any extra dependencies to red/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
 
 ## Note for boto3 users
 

From 7783f66558c518d5e7b3434fa5de2d607591773c Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Tue, 24 Sep 2024 12:21:07 +0100
Subject: [PATCH 2/9] typo fix

---
 docs/integrations/object-storage/s3.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/integrations/object-storage/s3.md b/docs/integrations/object-storage/s3.md
index 1989494978..a7965cb2a5 100644
--- a/docs/integrations/object-storage/s3.md
+++ b/docs/integrations/object-storage/s3.md
@@ -1,6 +1,6 @@
 # AWS S3 Storage Backend
 
-`delta-rs` offers native support for using AWS S3 as an objet storage backend.
+`delta-rs` offers native support for using AWS S3 as an object storage backend.
 
 You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
 

From 7f7e3cddac750197ab0d7ded5d9de926046011f5 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Tue, 24 Sep 2024 12:20:47 +0100
Subject: [PATCH 3/9] create gcs docs

---
 docs/integrations/object-storage/gcs.md | 87 +++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 docs/integrations/object-storage/gcs.md

diff --git a/docs/integrations/object-storage/gcs.md b/docs/integrations/object-storage/gcs.md
new file mode 100644
index 0000000000..c5592ccc5c
--- /dev/null
+++ b/docs/integrations/object-storage/gcs.md
@@ -0,0 +1,87 @@
+# GCS Storage Backend
+
+`delta-rs` offers native support for using Google Cloud Storage (GCS) as an object storage backend.
+
+You don’t need to install any extra dependencies to red/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+
+## Note for boto3 users
+
+Many Python engines use [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) to connect to AWS. This library supports reading credentials automatically from your local `.aws/config` or `.aws/creds` file.
+
+For example, if you’re running locally with the proper credentials in your local `.aws/config` or `.aws/creds` file then you can write a Parquet file to S3 like this with pandas:
+
+```python
+    import pandas as pd
+    df = pd.DataFrame({'x': [1, 2, 3]})
+    df.to_parquet("s3://avriiil/parquet-test-pandas")
+```
+
+The `delta-rs` writer does not use `boto3` and therefore does not support taking credentials from your `.aws/config` or `.aws/creds` file. If you’re used to working with writers from Python engines like Polars, pandas or Dask, this may mean a small change to your workflow.
+
+## Passing AWS Credentials
+
+You can pass your AWS credentials explicitly by using:
+
+- the `storage_options `kwarg
+- Environment variables
+- EC2 metadata if using EC2 instances
+- AWS Profiles
+
+## Example
+
+Let's work through an example with Polars. The same logic applies to other Python engines like Pandas, Daft, Dask, etc.
+
+Follow the steps below to use Delta Lake on S3 with Polars:
+
+1. Install Polars and deltalake. For example, using:
+
+   `pip install polars deltalake`
+
+2. Create a dataframe with some toy data.
+
+   `df = pl.DataFrame({'x': [1, 2, 3]})`
+
+3. Set your `storage_options` correctly.
+
+```python
+storage_options = {
+    "AWS_REGION":<region_name>,
+    'AWS_ACCESS_KEY_ID': <key_id>,
+    'AWS_SECRET_ACCESS_KEY': <access_key>,
+    'AWS_S3_LOCKING_PROVIDER': 'dynamodb',
+    'DELTA_DYNAMO_TABLE_NAME': 'delta_log',
+}
+```
+
+4. Write data to Delta table using the `storage_options` kwarg.
+
+   ```python
+   df.write_delta(
+       "s3://bucket/delta_table",
+       storage_options=storage_options,
+   )
+   ```
+
+## Delta Lake on AWS S3: Safe Concurrent Writes
+
+You need a locking provider to ensure safe concurrent writes when writing Delta tables to AWS S3. This is because AWS S3 does not guarantee mutual exclusion.
+
+A locking provider guarantees that only one writer is able to create the same file. This prevents corrupted or conflicting data.
+
+`delta-rs` uses DynamoDB to guarantee safe concurrent writes.
+
+Run the code below in your terminal to create a DynamoDB table that will act as your locking provider.
+
+```
+    aws dynamodb create-table \
+    --table-name delta_log \
+    --attribute-definitions AttributeName=tablePath,AttributeType=S AttributeName=fileName,AttributeType=S \
+    --key-schema AttributeName=tablePath,KeyType=HASH AttributeName=fileName,KeyType=RANGE \
+    --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
+```
+
+If for some reason you don't want to use DynamoDB as your locking mechanism you can choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
+
+Read more in the [Usage](../../usage/writing/writing-to-s3-with-locking-provider.md) section.
+
+## Delta Lake on GCS: Required permissions

From 96dc0a6682e88852f6d14cbad1a085f37787cd33 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Tue, 24 Sep 2024 15:33:53 +0100
Subject: [PATCH 4/9] update docs

---
 docs/integrations/object-storage/gcs.md | 91 +++++++------------------
 1 file changed, 24 insertions(+), 67 deletions(-)

diff --git a/docs/integrations/object-storage/gcs.md b/docs/integrations/object-storage/gcs.md
index c5592ccc5c..aa8682d3cc 100644
--- a/docs/integrations/object-storage/gcs.md
+++ b/docs/integrations/object-storage/gcs.md
@@ -2,86 +2,43 @@
 
 `delta-rs` offers native support for using Google Cloud Storage (GCS) as an object storage backend.
 
-You don’t need to install any extra dependencies to red/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+You don’t need to install any extra dependencies to read/write Delta tables to GCS with engines that use `delta-rs`. You do need to configure your GCS access credentials correctly.
 
-## Note for boto3 users
+## Using Application Default Credentials
 
-Many Python engines use [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) to connect to AWS. This library supports reading credentials automatically from your local `.aws/config` or `.aws/creds` file.
+Application Default Credentials (ADC) is a strategy used by GCS to automatically find credentials based on the application environment.
 
-For example, if you’re running locally with the proper credentials in your local `.aws/config` or `.aws/creds` file then you can write a Parquet file to S3 like this with pandas:
+If you are working from your local machine and have ADC set up then you can read/write Delta tables from GCS directly, without having to pass your credentials explicitly.
 
-```python
-    import pandas as pd
-    df = pd.DataFrame({'x': [1, 2, 3]})
-    df.to_parquet("s3://avriiil/parquet-test-pandas")
-```
-
-The `delta-rs` writer does not use `boto3` and therefore does not support taking credentials from your `.aws/config` or `.aws/creds` file. If you’re used to working with writers from Python engines like Polars, pandas or Dask, this may mean a small change to your workflow.
-
-## Passing AWS Credentials
-
-You can pass your AWS credentials explicitly by using:
-
-- the `storage_options `kwarg
-- Environment variables
-- EC2 metadata if using EC2 instances
-- AWS Profiles
-
-## Example
-
-Let's work through an example with Polars. The same logic applies to other Python engines like Pandas, Daft, Dask, etc.
-
-Follow the steps below to use Delta Lake on S3 with Polars:
-
-1. Install Polars and deltalake. For example, using:
-
-   `pip install polars deltalake`
+## Example: Write Delta tables to GCS with Polars
 
-2. Create a dataframe with some toy data.
-
-   `df = pl.DataFrame({'x': [1, 2, 3]})`
-
-3. Set your `storage_options` correctly.
+Using Polars, you can write a Delta table to GCS like this:
 
 ```python
-storage_options = {
-    "AWS_REGION":<region_name>,
-    'AWS_ACCESS_KEY_ID': <key_id>,
-    'AWS_SECRET_ACCESS_KEY': <access_key>,
-    'AWS_S3_LOCKING_PROVIDER': 'dynamodb',
-    'DELTA_DYNAMO_TABLE_NAME': 'delta_log',
-}
-```
-
-4. Write data to Delta table using the `storage_options` kwarg.
-
-   ```python
-   df.write_delta(
-       "s3://bucket/delta_table",
-       storage_options=storage_options,
-   )
-   ```
+# create a toy dataframe
+import polars as pl
+df = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
 
-## Delta Lake on AWS S3: Safe Concurrent Writes
+# define path
+table_path = "gs://bucket/delta-table"
 
-You need a locking provider to ensure safe concurrent writes when writing Delta tables to AWS S3. This is because AWS S3 does not guarantee mutual exclusion.
+# write Delta to GCS
+df.write_delta(table_path)
+```
 
-A locking provider guarantees that only one writer is able to create the same file. This prevents corrupted or conflicting data.
+## Passing GCS Credentials explicitly
 
-`delta-rs` uses DynamoDB to guarantee safe concurrent writes.
+Alternatively, you can pass GCS credentials to your query engine explicitly.
 
-Run the code below in your terminal to create a DynamoDB table that will act as your locking provider.
+For Polars, you would do this using the `storage_options` keyword. This will forward your credentials to the `object store` library that Polars uses under the hood. Read the [Polars documentation](https://docs.pola.rs/api/python/stable/reference/api/polars.DataFrame.write_delta.html) and the [`object store` documentation](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants) for more information.
 
-```
-    aws dynamodb create-table \
-    --table-name delta_log \
-    --attribute-definitions AttributeName=tablePath,AttributeType=S AttributeName=fileName,AttributeType=S \
-    --key-schema AttributeName=tablePath,KeyType=HASH AttributeName=fileName,KeyType=RANGE \
-    --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
-```
+## Delta Lake on GCS: Required permissions
 
-If for some reason you don't want to use DynamoDB as your locking mechanism you can choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
+You will need the following permissions in your GCS account:
 
-Read more in the [Usage](../../usage/writing/writing-to-s3-with-locking-provider.md) section.
+- `storage.objects.create`
+- `storage.objects.delete` (only required for uploads that overwrite an existing object)
+- `storage.objects.get` (only required if you plan on using the Google Cloud CLI)
+- `storage.objects.list` (only required if you plan on using the Google Cloud CLI)
 
-## Delta Lake on GCS: Required permissions
+For more information, see the [GCP documentation](https://cloud.google.com/storage/docs/uploading-objects)

From 5d10cbe786211bfb18fb31f6a49a3838b179e3b4 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Tue, 24 Sep 2024 15:39:44 +0100
Subject: [PATCH 5/9] fix typos

---
 docs/integrations/object-storage/s3-like.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/integrations/object-storage/s3-like.md b/docs/integrations/object-storage/s3-like.md
index 4d32f7c41b..40b2f6e076 100644
--- a/docs/integrations/object-storage/s3-like.md
+++ b/docs/integrations/object-storage/s3-like.md
@@ -1,8 +1,8 @@
 # CloudFlare R2 & Minio
 
-`delta-rs` offers native support for using Cloudflare R2 and Minio's as storage backend. R2 and Minio support conditional puts, however we have to pass this flag into the storage options. See the example blow
+`delta-rs` offers native support for using Cloudflare R2 and Minio's as storage backend. R2 and Minio support conditional puts, however we have to pass this flag into the storage options. See the example below
 
-You don’t need to install any extra dependencies to red/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
 
 ## Passing S3 Credentials
 

From 22ff7a1c6a839f0c9ef28c1f205ecd25bf8e41b9 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Wed, 25 Sep 2024 12:14:41 +0100
Subject: [PATCH 6/9] add adls docs

---
 docs/integrations/object-storage/adls.md | 57 ++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 docs/integrations/object-storage/adls.md

diff --git a/docs/integrations/object-storage/adls.md b/docs/integrations/object-storage/adls.md
new file mode 100644
index 0000000000..2867c07da3
--- /dev/null
+++ b/docs/integrations/object-storage/adls.md
@@ -0,0 +1,57 @@
+# Azure ADLS Storage Backend
+
+`delta-rs` offers native support for using Microsoft Azure Data Lake Storage (ADSL) as an object storage backend.
+
+You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your ADLS access credentials correctly.
+
+## Passing Credentials Explicitly
+
+You can also pass ADLS credentials to your query engine explicitly.
+
+For Polars, you would do this using the `storage_options` keyword as demonstrated above. This will forward your credentials to the `object store` library that Polars uses for cloud storage access under the hood. Read the [`object store` documentation](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants) for more information defining specific credentials.
+
+## Example: Write Delta table to ADLS with Polars
+
+Using Polars, you can write a Delta table to ADLS directly like this:
+
+```python
+import polars as pl
+
+df = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
+
+# define container name
+container = <container_name>
+
+# define credentials
+storage_options = {
+    "ACCOUNT_NAME": <account_name>,
+    "ACCESS_KEY": <access_key>,
+}
+
+# write Delta to ADLS
+df_pl.write_delta(
+    f"abfs://{container}/delta_table",
+    storage_options = storage_options
+)
+```
+
+## Example with pandas
+
+For libraries without direct `write_delta` methods (like Pandas), you can use the `write_deltalake` function from the `deltalake` library:
+
+```python
+import pandas as pd
+from deltalake import write_deltalake
+
+df = pd.DataFrame({"foo": [1, 2, 3, 4, 5]})
+
+write_deltalake(
+    f"abfs://{container}/delta_table_pandas",
+    df,
+    storage_options=storage_options
+)
+```
+
+## Using Local Authentication
+
+If your local session is authenticated using the Azure CLI then you can write Delta tables directly to ADLS. Read more about this in the [Azure CLI documentation](https://learn.microsoft.com/en-us/cli/azure/).

From b3b2b9e856a71c3c78d014de104d2699c09c5790 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Wed, 25 Sep 2024 12:15:53 +0100
Subject: [PATCH 7/9] add adls docs to nav

---
 mkdocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index b0c8d3a0ac..baf28ff3fc 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -82,6 +82,7 @@ nav:
       - api/exceptions.md
   - Integrations:
       - Object Storage:
+          - integrations/object-storage/adls.md
           - integrations/object-storage/hdfs.md
           - integrations/object-storage/s3.md
           - integrations/object-storage/s3-like.md

From 2498837ff6a2c3525058f1a9fd1301ba50fecbba Mon Sep 17 00:00:00 2001
From: Filip Dziuba <filip.dziuba@relativity.com>
Date: Wed, 25 Sep 2024 15:14:39 +0200
Subject: [PATCH 8/9] refactor: exposing CommitConflictError enum

---
 crates/core/src/operations/transaction/mod.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs
index 6c4e81dc63..69027cc4b7 100644
--- a/crates/core/src/operations/transaction/mod.rs
+++ b/crates/core/src/operations/transaction/mod.rs
@@ -83,7 +83,7 @@ use object_store::path::Path;
 use object_store::Error as ObjectStoreError;
 use serde_json::Value;
 
-use self::conflict_checker::{CommitConflictError, TransactionInfo, WinningCommitSummary};
+use self::conflict_checker::{TransactionInfo, WinningCommitSummary};
 use crate::checkpoints::{cleanup_expired_logs_for, create_checkpoint_for};
 use crate::errors::DeltaTableError;
 use crate::kernel::{
@@ -97,6 +97,7 @@ use crate::table::config::TableConfig;
 use crate::table::state::DeltaTableState;
 use crate::{crate_version, DeltaResult};
 
+pub use self::conflict_checker::CommitConflictError;
 pub use self::protocol::INSTANCE as PROTOCOL;
 
 #[cfg(test)]

From 6b53ac79451b3203506f281fafaa3e9f876f03a9 Mon Sep 17 00:00:00 2001
From: Avril Aysha <68642378+avriiil@users.noreply.github.com>
Date: Fri, 27 Sep 2024 11:38:19 +0100
Subject: [PATCH 9/9] squash adding adls docs

---
 .github/CODEOWNERS                            |    2 +-
 .github/actions/setup-env/action.yml          |   34 +
 .github/codecov.yml                           |   17 +
 .github/dependabot.yml                        |    3 +-
 .github/workflows/build.yml                   |   37 +-
 .github/workflows/codecov.yml                 |   36 +
 .github/workflows/dev_pr.yml                  |    1 +
 .github/workflows/docs.yml                    |   25 +-
 .github/workflows/python_benchmark.yml        |   54 +
 .github/workflows/python_build.yml            |  136 +--
 .github/workflows/python_release.yml          |    3 +-
 .gitignore                                    |    4 +-
 CHANGELOG.md                                  |  296 +++++
 CONTRIBUTING.md                               |   38 +-
 Cargo.toml                                    |   49 +-
 README.md                                     |    6 +-
 crates/aws/Cargo.toml                         |   19 +-
 crates/aws/src/constants.rs                   |  141 +++
 crates/aws/src/credentials.rs                 |  331 ++++--
 crates/aws/src/lib.rs                         |  427 ++++---
 crates/aws/src/logstore/default_logstore.rs   |  113 ++
 .../dynamodb_logstore.rs}                     |   27 +-
 crates/aws/src/logstore/mod.rs                |   11 +
 crates/aws/src/storage.rs                     |  604 +++++-----
 crates/aws/tests/common.rs                    |    6 +-
 crates/aws/tests/integration_s3_dynamodb.rs   |   76 +-
 crates/aws/tests/repair_s3_rename_test.rs     |   22 +-
 crates/azure/Cargo.toml                       |    4 +-
 crates/azure/tests/integration.rs             |    5 +-
 crates/benchmarks/src/bin/merge.rs            |    3 +-
 crates/catalog-glue/Cargo.toml                |    6 +-
 crates/core/Cargo.toml                        |   22 +-
 crates/core/src/data_catalog/storage/mod.rs   |    5 +-
 .../core/src/data_catalog/unity/datafusion.rs |    2 +-
 crates/core/src/data_catalog/unity/models.rs  |    2 +-
 crates/core/src/delta_datafusion/cdf/mod.rs   |   66 +-
 crates/core/src/delta_datafusion/cdf/scan.rs  |    8 +-
 .../src/delta_datafusion/cdf/scan_utils.rs    |   17 +-
 crates/core/src/delta_datafusion/expr.rs      |  126 +-
 .../delta_datafusion/find_files/logical.rs    |   13 +-
 .../src/delta_datafusion/find_files/mod.rs    |   30 +-
 .../delta_datafusion/find_files/physical.rs   |   14 +-
 crates/core/src/delta_datafusion/logical.rs   |   15 +-
 crates/core/src/delta_datafusion/mod.rs       |  905 ++++++++++-----
 crates/core/src/delta_datafusion/physical.rs  |   21 +-
 crates/core/src/delta_datafusion/planner.rs   |   58 +
 .../src/delta_datafusion/schema_adapter.rs    |   82 ++
 crates/core/src/errors.rs                     |   11 +
 crates/core/src/kernel/arrow/mod.rs           |  472 +-------
 crates/core/src/kernel/error.rs               |   10 -
 crates/core/src/kernel/expressions/eval.rs    |  384 -------
 crates/core/src/kernel/expressions/mod.rs     |  478 --------
 crates/core/src/kernel/expressions/scalars.rs |  559 ---------
 crates/core/src/kernel/mod.rs                 |    8 +-
 crates/core/src/kernel/models/actions.rs      |  361 +++++-
 crates/core/src/kernel/models/fields.rs       |   10 +-
 crates/core/src/kernel/models/schema.rs       |  838 +-------------
 crates/core/src/kernel/scalars.rs             |  286 +++++
 crates/core/src/kernel/snapshot/log_data.rs   |  225 +++-
 .../core/src/kernel/snapshot/log_segment.rs   |  168 ++-
 crates/core/src/kernel/snapshot/mod.rs        |  357 +++++-
 crates/core/src/kernel/snapshot/parse.rs      |   29 +-
 crates/core/src/kernel/snapshot/replay.rs     |  447 +++++++-
 crates/core/src/kernel/snapshot/serde.rs      |    3 +-
 crates/core/src/lib.rs                        |    9 +-
 crates/core/src/logstore/default_logstore.rs  |   53 +-
 crates/core/src/logstore/mod.rs               |  111 +-
 crates/core/src/operations/add_column.rs      |  113 ++
 crates/core/src/operations/add_feature.rs     |  196 ++++
 crates/core/src/operations/cast.rs            |  354 ------
 .../core/src/operations/cast/merge_schema.rs  |  352 ++++++
 crates/core/src/operations/cast/mod.rs        |  650 +++++++++++
 crates/core/src/operations/cdc.rs             |  415 +++++++
 crates/core/src/operations/constraints.rs     |    8 +-
 .../core/src/operations/convert_to_delta.rs   |  106 +-
 crates/core/src/operations/create.rs          |  115 +-
 crates/core/src/operations/delete.rs          |  445 ++++++--
 .../core/src/operations/filesystem_check.rs   |    6 +-
 crates/core/src/operations/load.rs            |    3 +
 crates/core/src/operations/load_cdf.rs        |  267 ++++-
 crates/core/src/operations/merge/barrier.rs   |   45 +-
 crates/core/src/operations/merge/filter.rs    |  943 +++++++++++++++
 crates/core/src/operations/merge/mod.rs       |  963 ++++++++--------
 crates/core/src/operations/mod.rs             |   65 +-
 crates/core/src/operations/optimize.rs        |  160 ++-
 crates/core/src/operations/restore.rs         |   21 +-
 .../core/src/operations/set_tbl_properties.rs |  215 +---
 .../transaction/conflict_checker.rs           |  104 +-
 crates/core/src/operations/transaction/mod.rs |  120 +-
 .../src/operations/transaction/protocol.rs    |  156 ++-
 .../core/src/operations/transaction/state.rs  |  175 +--
 .../src/operations/transaction/test_utils.rs  |  171 ---
 crates/core/src/operations/update.rs          |  554 ++++++---
 crates/core/src/operations/vacuum.rs          |    5 +-
 crates/core/src/operations/write.rs           |  892 +++++++++++++--
 crates/core/src/operations/writer.rs          |   11 +-
 crates/core/src/protocol/checkpoints.rs       |   28 +-
 crates/core/src/protocol/mod.rs               |   75 +-
 crates/core/src/schema/partitions.rs          |  205 +++-
 crates/core/src/storage/file.rs               |   42 +-
 crates/core/src/storage/mod.rs                |  324 +++++-
 crates/core/src/storage/retry_ext.rs          |    5 +-
 crates/core/src/table/builder.rs              |  198 ++--
 crates/core/src/table/config.rs               |   89 +-
 crates/core/src/table/mod.rs                  |   33 +-
 crates/core/src/table/state.rs                |    5 +
 crates/core/src/table/state_arrow.rs          |   23 +-
 .../core/src/test_utils/factories/actions.rs  |  153 +++
 crates/core/src/test_utils/factories/data.rs  |  247 ++++
 crates/core/src/test_utils/factories/mod.rs   |   66 ++
 crates/core/src/test_utils/mod.rs             |    5 +
 crates/core/src/writer/json.rs                |    9 +-
 crates/core/src/writer/record_batch.rs        |   36 +-
 crates/core/src/writer/stats.rs               |   25 +-
 crates/core/src/writer/test_utils.rs          |   10 +-
 crates/core/src/writer/utils.rs               |    6 +-
 crates/core/tests/checkpoint_writer.rs        |   14 +-
 crates/core/tests/command_merge.rs            |   31 +-
 crates/core/tests/command_optimize.rs         |    2 +-
 crates/core/tests/command_restore.rs          |    8 +-
 crates/core/tests/fs_common/mod.rs            |   23 +-
 crates/core/tests/integration_checkpoint.rs   |   10 +-
 crates/core/tests/integration_datafusion.rs   |   87 +-
 .../core/tests/read_delta_partitions_test.rs  |  116 --
 crates/deltalake/Cargo.toml                   |   16 +-
 crates/deltalake/src/lib.rs                   |    2 +
 crates/gcp/Cargo.toml                         |    4 +-
 crates/gcp/src/storage.rs                     |   21 +-
 crates/gcp/tests/context.rs                   |    2 +-
 crates/hdfs/Cargo.toml                        |   29 +
 crates/hdfs/src/lib.rs                        |   48 +
 crates/hdfs/tests/context.rs                  |   60 +
 crates/hdfs/tests/integration.rs              |   16 +
 crates/mount/Cargo.toml                       |    4 +-
 crates/mount/src/file.rs                      |   27 +-
 crates/sql/src/logical_plan.rs                |   44 +-
 crates/sql/src/planner.rs                     |   21 +-
 crates/test/Cargo.toml                        |    4 +-
 crates/test/src/concurrent.rs                 |    2 +-
 crates/test/src/datafusion.rs                 |    8 +-
 crates/test/src/lib.rs                        |   14 +-
 .../_delta_log/00000000000000000000.json      |    3 +
 .../_delta_log/00000000000000000001.json      |    3 +
 .../00000000000000000002.checkpoint.parquet   |  Bin 0 -> 41898 bytes
 .../_delta_log/00000000000000000002.json      |    2 +
 .../_delta_log/_last_checkpoint               |    1 +
 ...411e-bca9-b067444cbcb0-c000.snappy.parquet |  Bin 0 -> 5489 bytes
 ...4453-9202-51d75dee59af-c000.snappy.parquet |  Bin 0 -> 5489 bytes
 dev/publish.sh                                |   11 +
 dev/release/update_change_log.sh              |    4 +-
 docs/Makefile                                 |   20 +
 docs/api/delta_writer.md                      |    4 +
 .../architecture-of-delta-table.md            |   18 +-
 .../delta-lake-acid-transactions.md           |   90 +-
 docs/integrations/delta-lake-daft.md          |   10 +
 docs/integrations/delta-lake-dagster.md       |    4 +-
 docs/integrations/object-storage/adls.md      |   57 +
 docs/integrations/object-storage/gcs.md       |   44 +
 docs/integrations/object-storage/hdfs.md      |   48 +
 docs/integrations/object-storage/s3-like.md   |   83 ++
 docs/integrations/object-storage/s3.md        |  102 ++
 docs/requirements.txt                         |    4 +-
 docs/usage/loading-table.md                   |   55 +-
 docs/usage/managing-tables.md                 |    9 +-
 .../small-file-compaction-with-optimize.md    |    2 +-
 docs/usage/writing/index.md                   |   35 +-
 .../writing-to-s3-with-locking-provider.md    |   53 +-
 mkdocs.yml                                    |    5 +
 python/.gitignore                             |    1 +
 python/Cargo.toml                             |   11 +-
 python/Makefile                               |    7 +-
 python/deltalake/__init__.py                  |   13 +-
 python/deltalake/_internal.pyi                |  166 ++-
 python/deltalake/schema.py                    |  111 +-
 python/deltalake/table.py                     | 1016 ++++++++++-------
 python/deltalake/writer.py                    |  307 +++--
 python/docs/source/usage.rst                  |   26 +
 python/pyproject.toml                         |   31 +-
 python/src/features.rs                        |   56 +
 python/src/filesystem.rs                      |  170 +--
 python/src/lib.rs                             |  966 +++++++++-------
 python/src/merge.rs                           |  214 ++++
 python/src/schema.rs                          |  215 ++--
 python/src/utils.rs                           |   36 +
 python/stubs/pyarrow/__init__.pyi             |    7 +
 python/stubs/pyarrow/parquet.pyi              |    8 +
 python/tests/conftest.py                      |   89 +-
 .../test_write_to_pyspark.py                  |    4 +-
 python/tests/test_alter.py                    |  158 ++-
 python/tests/test_benchmark.py                |    2 +-
 python/tests/test_cdf.py                      |  267 ++++-
 python/tests/test_checkpoint.py               |  113 ++
 python/tests/test_delete.py                   |    5 +-
 python/tests/test_file_system_handler.py      |    2 +-
 python/tests/test_fs.py                       |   25 +-
 python/tests/test_merge.py                    |  100 +-
 python/tests/test_optimize.py                 |   44 +-
 python/tests/test_repair.py                   |    4 +-
 python/tests/test_restore.py                  |    4 +-
 python/tests/test_schema.py                   |  130 ++-
 python/tests/test_table_read.py               |  188 ++-
 python/tests/test_update.py                   |   10 +-
 python/tests/test_vacuum.py                   |    4 +-
 python/tests/test_writer.py                   |  349 ++++--
 python/tests/test_writerproperties.py         |   56 +-
 205 files changed, 15934 insertions(+), 8216 deletions(-)
 create mode 100644 .github/actions/setup-env/action.yml
 create mode 100644 .github/codecov.yml
 create mode 100644 .github/workflows/codecov.yml
 create mode 100644 .github/workflows/python_benchmark.yml
 create mode 100644 crates/aws/src/constants.rs
 create mode 100644 crates/aws/src/logstore/default_logstore.rs
 rename crates/aws/src/{logstore.rs => logstore/dynamodb_logstore.rs} (94%)
 create mode 100644 crates/aws/src/logstore/mod.rs
 create mode 100644 crates/core/src/delta_datafusion/planner.rs
 create mode 100644 crates/core/src/delta_datafusion/schema_adapter.rs
 delete mode 100644 crates/core/src/kernel/expressions/eval.rs
 delete mode 100644 crates/core/src/kernel/expressions/mod.rs
 delete mode 100644 crates/core/src/kernel/expressions/scalars.rs
 create mode 100644 crates/core/src/kernel/scalars.rs
 create mode 100644 crates/core/src/operations/add_column.rs
 create mode 100644 crates/core/src/operations/add_feature.rs
 delete mode 100644 crates/core/src/operations/cast.rs
 create mode 100644 crates/core/src/operations/cast/merge_schema.rs
 create mode 100644 crates/core/src/operations/cast/mod.rs
 create mode 100644 crates/core/src/operations/cdc.rs
 create mode 100644 crates/core/src/operations/merge/filter.rs
 delete mode 100644 crates/core/src/operations/transaction/test_utils.rs
 create mode 100644 crates/core/src/test_utils/factories/actions.rs
 create mode 100644 crates/core/src/test_utils/factories/data.rs
 create mode 100644 crates/core/src/test_utils/factories/mod.rs
 create mode 100644 crates/core/src/test_utils/mod.rs
 create mode 100644 crates/hdfs/Cargo.toml
 create mode 100644 crates/hdfs/src/lib.rs
 create mode 100644 crates/hdfs/tests/context.rs
 create mode 100644 crates/hdfs/tests/integration.rs
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000000.json
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000001.json
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.checkpoint.parquet
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.json
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/_last_checkpoint
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet
 create mode 100644 crates/test/tests/data/delta-checkpoint-stats-optional/part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet
 create mode 100755 dev/publish.sh
 create mode 100644 docs/Makefile
 create mode 100644 docs/integrations/object-storage/adls.md
 create mode 100644 docs/integrations/object-storage/gcs.md
 create mode 100644 docs/integrations/object-storage/hdfs.md
 create mode 100644 docs/integrations/object-storage/s3-like.md
 create mode 100644 docs/integrations/object-storage/s3.md
 create mode 100644 python/src/features.rs
 create mode 100644 python/src/merge.rs
 create mode 100644 python/stubs/pyarrow/parquet.pyi

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index b99809d1f6..736703c551 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,4 +1,4 @@
-crates/ @wjones127 @roeap @rtyler
+crates/ @wjones127 @roeap @rtyler @hntd187 @ion-elgreco
 delta-inspect/ @wjones127 @rtyler
 proofs/ @houqp
 python/ @wjones127 @fvaleye @roeap @ion-elgreco
diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml
new file mode 100644
index 0000000000..7875107ddd
--- /dev/null
+++ b/.github/actions/setup-env/action.yml
@@ -0,0 +1,34 @@
+name: "Setup Python and Rust Environment"
+description: "Set up Python, virtual environment, and Rust toolchain"
+
+inputs:
+
+  python-version:
+    description: "The Python version to set up"
+    required: true
+    default: "3.10"
+
+  rust-toolchain:
+    description: "The Rust toolchain to set up"
+    required: true
+    default: "stable"
+
+runs:
+  using: "composite"
+
+  steps:
+  
+    - name: Set up Python ${{ inputs.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ inputs.python-version }}
+
+    - name: Install Rust toolchain
+      uses: actions-rs/toolchain@v1
+      with:
+        profile: default
+        toolchain: ${{ inputs.rust-toolchain }}
+        override: true
+        components: rustfmt, clippy
+
+    - uses: Swatinem/rust-cache@v2
\ No newline at end of file
diff --git a/.github/codecov.yml b/.github/codecov.yml
new file mode 100644
index 0000000000..dd93c3b7cf
--- /dev/null
+++ b/.github/codecov.yml
@@ -0,0 +1,17 @@
+
+coverage:
+  status:
+    project:
+      default:
+        # allow some leniency on the deviation of pull requests
+        threshold: '1%'
+        informational: true
+    patch:
+      default:
+        informational: true
+
+
+ignore:
+  - "delta-inspect/"
+  - "proofs/"
+  - "**/*.toml"
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index bdacb4c00c..1e5b6b27a4 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -10,6 +10,5 @@ updates:
     ignore:
       # arrow and datafusion are bumped manually
       - dependency-name: "arrow*"
-        update-types: ["version-update:semver-major"]
       - dependency-name: "datafusion*"
-        update-types: ["version-update:semver-major"]
+      - dependency-name: "parquet"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8b80dc0a9f..a807184c47 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -5,6 +5,10 @@ on:
     branches: [main, "rust-v*"]
   pull_request:
     branches: [main, "rust-v*"]
+  merge_group:
+
+env:
+  DEFAULT_FEATURES: "azure,datafusion,s3,gcs,glue,hdfs "
 
 jobs:
   format:
@@ -16,7 +20,7 @@ jobs:
         uses: actions-rs/toolchain@v1
         with:
           profile: default
-          toolchain: stable
+          toolchain: '1.80'
           override: true
 
       - name: Format
@@ -28,7 +32,6 @@ jobs:
       matrix:
         os:
           - ubuntu-latest
-          - macos-11
           - windows-latest
     runs-on: ${{ matrix.os }}
 
@@ -39,17 +42,17 @@ jobs:
         uses: actions-rs/toolchain@v1
         with:
           profile: default
-          toolchain: stable
+          toolchain: '1.80'
           override: true
 
       - name: build and lint with clippy
-        run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests
+        run: cargo clippy --features ${{ env.DEFAULT_FEATURES }} --tests
 
       - name: Spot-check build for native-tls features
         run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue --tests
 
       - name: Check docs
-        run: cargo doc --features azure,datafusion,s3,gcs,glue
+        run: cargo doc --features ${{ env.DEFAULT_FEATURES }}
 
       - name: Check no default features (except rustls)
         run: cargo check --no-default-features --features rustls
@@ -60,7 +63,6 @@ jobs:
       matrix:
         os:
           - ubuntu-latest
-          - macos-11
           - windows-latest
     runs-on: ${{ matrix.os }}
     env:
@@ -77,11 +79,11 @@ jobs:
         uses: actions-rs/toolchain@v1
         with:
           profile: default
-          toolchain: "stable"
+          toolchain: '1.80'
           override: true
 
       - name: Run tests
-        run: cargo test --verbose --features datafusion,azure
+        run: cargo test --verbose --features ${{ env.DEFAULT_FEATURES }}
 
   integration_test:
     name: Integration Tests
@@ -94,6 +96,7 @@ jobs:
       # https://github.com/rust-lang/cargo/issues/10280
       CARGO_NET_GIT_FETCH_WITH_CLI: "true"
       RUST_BACKTRACE: "1"
+      RUST_LOG: debug
       AWS_DEFAULT_REGION: "us-east-1"
       AWS_ACCESS_KEY_ID: deltalake
       AWS_SECRET_ACCESS_KEY: weloverust
@@ -111,15 +114,27 @@ jobs:
         uses: actions-rs/toolchain@v1
         with:
           profile: default
-          toolchain: stable
+          toolchain: '1.80'
           override: true
 
+      # Install Java and Hadoop for HDFS integration tests
+      - uses: actions/setup-java@v4
+        with:
+          distribution: "temurin"
+          java-version: "17"
+
+      - name: Download Hadoop
+        run: |
+          wget -q https://dlcdn.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
+          tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE
+          echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH
+
       - name: Start emulated services
-        run: docker-compose up -d
+        run: docker compose up -d
 
       - name: Run tests with rustls (default)
         run: |
-          cargo test --features integration_test,azure,s3,gcs,datafusion
+          cargo test --features integration_test,${{ env.DEFAULT_FEATURES }}
 
       - name: Run tests with native-tls
         run: |
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
new file mode 100644
index 0000000000..a8d9beabcd
--- /dev/null
+++ b/.github/workflows/codecov.yml
@@ -0,0 +1,36 @@
+name: coverage
+
+on:
+  push:
+    branches: [main, "rust-v*"]
+  pull_request:
+    branches: [main, "rust-v*"]
+
+env:
+  DEFAULT_FEATURES: "azure,datafusion,s3,gcs,glue,hdfs "
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+    env:
+      CARGO_TERM_COLOR: always
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install rust
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: default
+          toolchain: '1.80'
+          override: true
+      - name: Install cargo-llvm-cov
+        uses: taiki-e/install-action@cargo-llvm-cov
+      - uses: Swatinem/rust-cache@v2
+      - name: Generate code coverage
+        run: cargo llvm-cov --features ${DEFAULT_FEATURES} --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          files: codecov.json
+          fail_ci_if_error: true
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 6b3d5a7ddb..121e0b8882 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -2,6 +2,7 @@ name: dev_pr
 
 # Trigger whenever a PR is changed (title as well as new / changed commits)
 on:
+  merge_group:
   pull_request_target:
     types:
       - opened
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 079cd66fcc..5729b87624 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -1,6 +1,7 @@
 name: Build (and maybe release) the documentation
 
 on:
+  merge_group:
   pull_request:
     paths:
       - python/**
@@ -31,9 +32,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-      - uses: psf/black@stable
-        with:
-          src: docs/src/python
+      - run: |
+          cd docs
+          make check
 
   build-deploy:
     needs:
@@ -47,25 +48,13 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: '3.10'
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Build and install deltalake
         run: |
           cd python
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make ${{ env.BUILD_ARGS }}
 
diff --git a/.github/workflows/python_benchmark.yml b/.github/workflows/python_benchmark.yml
new file mode 100644
index 0000000000..896c5cc412
--- /dev/null
+++ b/.github/workflows/python_benchmark.yml
@@ -0,0 +1,54 @@
+name: python_benchmark
+
+
+# This is separate from the python_build so that it doesn't need to run on the merge group
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+defaults:
+  run:
+    working-directory: ./python
+
+jobs:
+  benchmark:
+    name: Python Benchmark
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=line-tables-only"
+      CARGO_INCREMENTAL: 0
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
+
+      - name: Build deltalake in release mode
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+          MATURIN_EXTRA_ARGS=--release make develop
+
+      # Download previous benchmark result from cache (if exists)
+      - name: Download previous benchmark data
+        uses: actions/cache@v2
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
+
+      - name: Run benchmark
+        run: |
+          source venv/bin/activate
+          pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
+
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          tool: "pytest"
+          output-file-path: python/output.json
+          external-data-json-path: ./cache/benchmark-data.json
+          fail-on-alert: true
+
diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml
index bc2f20cc9a..dc5483e091 100644
--- a/.github/workflows/python_build.yml
+++ b/.github/workflows/python_build.yml
@@ -1,6 +1,7 @@
 name: python_build
 
 on:
+  merge_group:
   push:
     branches: [main]
   pull_request:
@@ -15,28 +16,22 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-      - name: Setup Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
+
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Check Python
         run: |
-          pip install ruff black mypy types-dataclasses typing-extensions
+          python -m venv venv
+          source venv/bin/activate
+          pip install ruff==0.5.2 mypy==1.10.1 types-dataclasses typing-extensions
           make check-python
 
-      - name: Install minimal stable with clippy and rustfmt
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-
       - name: Check Rust
         run: make check-rust
 
   test-minimal:
-    name: Python Build (Python 3.8 PyArrow 8.0.0)
+    name: Python Build (Python 3.8 PyArrow 16.0.0)
     runs-on: ubuntu-latest
     env:
       RUSTFLAGS: "-C debuginfo=line-tables-only"
@@ -45,28 +40,18 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Setup Python
-        uses: actions/setup-python@v2
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
         with:
           python-version: 3.8
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make setup
           # Install minimum PyArrow version
-          pip install -e .[pandas,devel] pyarrow==8.0.0
+          pip install -e .[pandas,devel] pyarrow==16.0.0
         env:
           RUSTFLAGS: "-C debuginfo=line-tables-only"
 
@@ -75,10 +60,6 @@ jobs:
           source venv/bin/activate
           make unit-test
 
-    # - name: Run Integration tests
-    #   run: |
-    #     py.test --cov tests -m integration
-
   test:
     name: Python Build (Python 3.10 PyArrow latest)
     runs-on: ubuntu-latest
@@ -89,26 +70,15 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.10"
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - name: Start emulated services
-        run: docker-compose up -d
+        run: docker compose up -d
 
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make develop
 
@@ -127,56 +97,6 @@ jobs:
           python -m pytest -m "not pandas and not integration and not benchmark"
           pip install pandas
 
-  benchmark:
-    name: Python Benchmark
-    runs-on: ubuntu-latest
-    env:
-      RUSTFLAGS: "-C debuginfo=line-tables-only"
-      CARGO_INCREMENTAL: 0
-
-    steps:
-      - uses: actions/checkout@v2
-
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-
-      - name: Build deltalake in release mode
-        run: |
-          pip install virtualenv
-          virtualenv venv
-          source venv/bin/activate
-          MATURIN_EXTRA_ARGS=--release make develop
-
-      # Download previous benchmark result from cache (if exists)
-      - name: Download previous benchmark data
-        uses: actions/cache@v2
-        with:
-          path: ./cache
-          key: ${{ runner.os }}-benchmark
-
-      - name: Run benchmark
-        run: |
-          source venv/bin/activate
-          pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
-
-      - name: Store benchmark result
-        uses: benchmark-action/github-action-benchmark@v1
-        with:
-          tool: "pytest"
-          output-file-path: python/output.json
-          external-data-json-path: ./cache/benchmark-data.json
-          fail-on-alert: true
-
   test-pyspark:
     name: PySpark Integration Tests
     runs-on: ubuntu-latest
@@ -187,18 +107,8 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Install latest nightly
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-          components: rustfmt, clippy
-
-      - uses: Swatinem/rust-cache@v2
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.10"
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
 
       - uses: actions/setup-java@v2
         with:
@@ -207,8 +117,7 @@ jobs:
 
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make develop-pyspark
 
@@ -231,15 +140,14 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+      - name: Setup Environment
+        uses: ./.github/actions/setup-env
         with:
           python-version: ${{ matrix.python-version }}
 
       - name: Build and install deltalake
         run: |
-          pip install virtualenv
-          virtualenv venv
+          python -m venv venv
           source venv/bin/activate
           make setup
           maturin develop
diff --git a/.github/workflows/python_release.yml b/.github/workflows/python_release.yml
index 48611bacb4..cf462f2070 100644
--- a/.github/workflows/python_release.yml
+++ b/.github/workflows/python_release.yml
@@ -35,7 +35,7 @@ jobs:
       fail-fast: false
       matrix:
         target: [x86_64-apple-darwin, aarch64-apple-darwin]
-    runs-on: macos-12
+    runs-on: macos-14
     steps:
       - uses: actions/checkout@v3
 
@@ -76,6 +76,7 @@ jobs:
         env:
           MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
         with:
+          maturin-version: v1.6.0 # https://github.com/PyO3/maturin/issues/2154
           target: x86_64-unknown-linux-gnu
           command: publish
           args: --skip-existing -m python/Cargo.toml ${{ env.FEATURES_FLAG }}
diff --git a/.gitignore b/.gitignore
index 84fc17c5f2..18dcc39f69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ tlaplus/*.toolbox/*/[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*-[0-9]*/
 .vscode
 .env
 .venv
+venv
 **/.DS_Store
 **/.python-version
 .coverage
@@ -21,6 +22,7 @@ __blobstorage__
 .githubchangeloggenerator.cache.log
 .githubchangeloggenerator.cache/
 .githubchangeloggenerator*
+data
 
 # Add all Cargo.lock files except for those in binary crates
 Cargo.lock
@@ -30,4 +32,4 @@ Cargo.lock
 
 justfile
 site
-__pycache__
+__pycache__
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e68641da21..7c0c5099c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,301 @@
 # Changelog
 
+## [rust-v0.19.0](https://github.com/delta-io/delta-rs/tree/rust-v0.19.0) (2024-08-14)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.18.2...rust-v0.19.0)
+
+**Implemented enhancements:**
+
+- Only allow squash merge [\#2542](https://github.com/delta-io/delta-rs/issues/2542)
+
+**Fixed bugs:**
+
+- Write also insert change types in writer CDC [\#2750](https://github.com/delta-io/delta-rs/issues/2750)
+- Regression in Python multiprocessing support [\#2744](https://github.com/delta-io/delta-rs/issues/2744)
+- SchemaError occurs during table optimisation after upgrade to v0.18.1 [\#2731](https://github.com/delta-io/delta-rs/issues/2731)
+- AWS WebIdentityToken exposure in log files [\#2719](https://github.com/delta-io/delta-rs/issues/2719)
+- Write performance degrades with multiple writers [\#2683](https://github.com/delta-io/delta-rs/issues/2683)
+- Write monotonic sequence, but read is non monotonic [\#2659](https://github.com/delta-io/delta-rs/issues/2659)
+- Python `write_deltalake` with `schema_mode="merge"` casts types [\#2642](https://github.com/delta-io/delta-rs/issues/2642)
+- Newest docs \(potentially\) not released [\#2587](https://github.com/delta-io/delta-rs/issues/2587)
+- CDC is not generated for Structs and Lists [\#2568](https://github.com/delta-io/delta-rs/issues/2568)
+
+**Closed issues:**
+
+- delete\_dir bug [\#2713](https://github.com/delta-io/delta-rs/issues/2713)
+
+**Merged pull requests:**
+
+- chore: fix a bunch of clippy lints and re-enable tests [\#2773](https://github.com/delta-io/delta-rs/pull/2773) ([rtyler](https://github.com/rtyler))
+- feat: more economic data skipping with datafusion [\#2772](https://github.com/delta-io/delta-rs/pull/2772) ([roeap](https://github.com/roeap))
+- chore: prepare the next notable release of 0.19.0 [\#2768](https://github.com/delta-io/delta-rs/pull/2768) ([rtyler](https://github.com/rtyler))
+- feat: restore the TryFrom for DeltaTablePartition [\#2767](https://github.com/delta-io/delta-rs/pull/2767) ([rtyler](https://github.com/rtyler))
+- feat: fail fast on forked process [\#2765](https://github.com/delta-io/delta-rs/pull/2765) ([Tom-Newton](https://github.com/Tom-Newton))
+- perf: early stop if all values in arr are null [\#2764](https://github.com/delta-io/delta-rs/pull/2764) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): don't flatten fields during cdf read [\#2763](https://github.com/delta-io/delta-rs/pull/2763) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: upgrade to datafusion 41 [\#2761](https://github.com/delta-io/delta-rs/pull/2761) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): cdc in writer not creating inserts [\#2751](https://github.com/delta-io/delta-rs/pull/2751) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: improved test fixtures [\#2749](https://github.com/delta-io/delta-rs/pull/2749) ([roeap](https://github.com/roeap))
+- feat: introduce CDC generation for merge operations [\#2747](https://github.com/delta-io/delta-rs/pull/2747) ([rtyler](https://github.com/rtyler))
+- docs: fix broken link in docs [\#2746](https://github.com/delta-io/delta-rs/pull/2746) ([astrojuanlu](https://github.com/astrojuanlu))
+- chore: update delta\_kernel to 0.3.0 [\#2742](https://github.com/delta-io/delta-rs/pull/2742) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: add to code\_owner crates [\#2741](https://github.com/delta-io/delta-rs/pull/2741) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: update changelog and versions for next release [\#2740](https://github.com/delta-io/delta-rs/pull/2740) ([rtyler](https://github.com/rtyler))
+- feat\(python, rust\): arrow large/view types passthrough, rust default engine [\#2738](https://github.com/delta-io/delta-rs/pull/2738) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: column parsing to include nested columns and enclosing char [\#2737](https://github.com/delta-io/delta-rs/pull/2737) ([gtrawinski](https://github.com/gtrawinski))
+
+## [rust-v0.18.2](https://github.com/delta-io/delta-rs/tree/rust-v0.18.2) (2024-08-07)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.18.1...rust-v0.18.2)
+
+**Implemented enhancements:**
+
+- Choose which columns to store min/max values for [\#2709](https://github.com/delta-io/delta-rs/issues/2709)
+- Projection pushdown for load\_cdf [\#2681](https://github.com/delta-io/delta-rs/issues/2681)
+- Way to check if Delta table exists at specified path [\#2662](https://github.com/delta-io/delta-rs/issues/2662)
+- Support HDFS via hdfs-native package [\#2611](https://github.com/delta-io/delta-rs/issues/2611)
+- Deletion `_change_type` does not appear in change data feed [\#2579](https://github.com/delta-io/delta-rs/issues/2579)
+- Could you please explain in the README what "Deltalake" is for the uninitiated? [\#2523](https://github.com/delta-io/delta-rs/issues/2523)
+- Discuss: Allow protocol change during write actions  [\#2444](https://github.com/delta-io/delta-rs/issues/2444)
+- Support for Arrow PyCapsule interface [\#2376](https://github.com/delta-io/delta-rs/issues/2376)
+
+**Fixed bugs:**
+
+- Slow add\_actions.to\_pydict for tables with large number of columns, impacting read performance [\#2733](https://github.com/delta-io/delta-rs/issues/2733)
+- append is deleting records [\#2716](https://github.com/delta-io/delta-rs/issues/2716)
+- segmentation fault - Python 3.10 on Mac M3  [\#2706](https://github.com/delta-io/delta-rs/issues/2706)
+- Failure to delete dir and files [\#2703](https://github.com/delta-io/delta-rs/issues/2703)
+- DeltaTable.from\_data\_catalog not working [\#2699](https://github.com/delta-io/delta-rs/issues/2699)
+- Project should use the same version of `ruff` in the `lint` stage of `python_build.yml` as in `pyproject.toml` [\#2678](https://github.com/delta-io/delta-rs/issues/2678)
+- un-tracked columns are giving json error when pyarrow schema have feild with nullable=False and create\_checkpoint is trigged  [\#2675](https://github.com/delta-io/delta-rs/issues/2675)
+- \[BUG\]write\_delta\({'custom\_metadata':str}\) cannot be converted. str to pyDict error \(0.18.2\_DeltaPython/Windows10\) [\#2697](https://github.com/delta-io/delta-rs/issues/2697)
+- Pyarrow engine not supporting schema overwrite with Append mode [\#2654](https://github.com/delta-io/delta-rs/issues/2654)
+- `deltalake-core` version re-exported by `deltalake` different than versions used by `deltalake-azure` and `deltalake-gcp` [\#2647](https://github.com/delta-io/delta-rs/issues/2647)
+- i32 limit in JSON stats [\#2646](https://github.com/delta-io/delta-rs/issues/2646)
+- Rust writer not encoding correct URL for partitions in delta table [\#2634](https://github.com/delta-io/delta-rs/issues/2634)
+- Large Types breaks merge predicate pruning [\#2632](https://github.com/delta-io/delta-rs/issues/2632)
+- Getting error when converting a partitioned parquet table to delta table [\#2626](https://github.com/delta-io/delta-rs/issues/2626)
+- Arrow: Parquet does not support writing empty structs when creating checkpoint [\#2622](https://github.com/delta-io/delta-rs/issues/2622)
+- InvalidTableLocation\("Unknown scheme: gs"\) on 0.18.0 [\#2610](https://github.com/delta-io/delta-rs/issues/2610)
+- Unable to read delta table created using Uniform [\#2578](https://github.com/delta-io/delta-rs/issues/2578)
+- schema merging doesn't work when overwriting with a predicate [\#2567](https://github.com/delta-io/delta-rs/issues/2567)
+- Not working in AWS Lambda \(0.16.2 - 0.17.4\) OSError: Generic S3 error [\#2511](https://github.com/delta-io/delta-rs/issues/2511)
+- DataFusion filter on partition column doesn't work. \(when the phsical schema ordering is different to logical one\) [\#2494](https://github.com/delta-io/delta-rs/issues/2494)
+- Creating checkpoints for tables with missing column stats results in Err [\#2493](https://github.com/delta-io/delta-rs/issues/2493)
+- Cannot merge to a table with a timestamp column after upgrading delta-rs [\#2478](https://github.com/delta-io/delta-rs/issues/2478)
+- Azure AD Auth fails on ARM64 [\#2475](https://github.com/delta-io/delta-rs/issues/2475)
+- Generic S3 error: Error after 0 retries ... Broken pipe \(os error 32\) [\#2403](https://github.com/delta-io/delta-rs/issues/2403)
+- write\_deltalake identifies large\_string as datatype even though string is set in schema [\#2374](https://github.com/delta-io/delta-rs/issues/2374)
+- Inconsistent arrow timestamp type breaks datafusion query [\#2341](https://github.com/delta-io/delta-rs/issues/2341)
+
+**Closed issues:**
+
+- Unable to write new partitions with type timestamp on tables created with delta-rs 0.10.0 [\#2631](https://github.com/delta-io/delta-rs/issues/2631)
+
+**Merged pull requests:**
+
+- fix: schema adapter doesn't map partial batches correctly [\#2735](https://github.com/delta-io/delta-rs/pull/2735) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- perf: grab file size in rust [\#2734](https://github.com/delta-io/delta-rs/pull/2734) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: use logical plan in update, refactor/simplify CDCTracker [\#2727](https://github.com/delta-io/delta-rs/pull/2727) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: use logical plan in delete, delta planner refactoring [\#2725](https://github.com/delta-io/delta-rs/pull/2725) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: try an alternative docke compose invocation syntax [\#2724](https://github.com/delta-io/delta-rs/pull/2724) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): use input schema to get correct schema in cdf reads [\#2723](https://github.com/delta-io/delta-rs/pull/2723) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): cdc write-support for `overwrite` and `replacewhere` writes [\#2722](https://github.com/delta-io/delta-rs/pull/2722) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): cdc write-support for `delete` operation [\#2721](https://github.com/delta-io/delta-rs/pull/2721) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: enabling actions for merge groups [\#2718](https://github.com/delta-io/delta-rs/pull/2718) ([rtyler](https://github.com/rtyler))
+- perf: apply projection when reading checkpoint parquet [\#2717](https://github.com/delta-io/delta-rs/pull/2717) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- feat\(python\): add DeltaTable.is\_deltatable static method \(\#2662\) [\#2715](https://github.com/delta-io/delta-rs/pull/2715) ([omkar-foss](https://github.com/omkar-foss))
+- chore: prepare python release 0.18.3 [\#2707](https://github.com/delta-io/delta-rs/pull/2707) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): use url encoder when encoding partition values [\#2705](https://github.com/delta-io/delta-rs/pull/2705) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(python, rust\): add projection in CDF reads [\#2704](https://github.com/delta-io/delta-rs/pull/2704) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: ensure DataFusion SessionState Parquet options are applied to DeltaScan [\#2702](https://github.com/delta-io/delta-rs/pull/2702) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: refactor `write_deltalake` in `writer.py` [\#2695](https://github.com/delta-io/delta-rs/pull/2695) ([fpgmaas](https://github.com/fpgmaas))
+- fix\(python\): empty dataset fix for "pyarrow" engine [\#2689](https://github.com/delta-io/delta-rs/pull/2689) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: add test coverage command to `Makefile` [\#2688](https://github.com/delta-io/delta-rs/pull/2688) ([fpgmaas](https://github.com/fpgmaas))
+- chore: create separate action to setup python and rust in the cicd pipeline [\#2687](https://github.com/delta-io/delta-rs/pull/2687) ([fpgmaas](https://github.com/fpgmaas))
+- fix: update delta kernel version [\#2685](https://github.com/delta-io/delta-rs/pull/2685) ([jeppe742](https://github.com/jeppe742))
+- chore: update README.md [\#2684](https://github.com/delta-io/delta-rs/pull/2684) ([veronewra](https://github.com/veronewra))
+- fix\(rust,python\): checkpoint with column nullable false [\#2680](https://github.com/delta-io/delta-rs/pull/2680) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: pin `ruff` and `mypy` versions in the `lint` stage in the CI pipeline [\#2679](https://github.com/delta-io/delta-rs/pull/2679) ([fpgmaas](https://github.com/fpgmaas))
+- chore: enable `RUF` ruleset for `ruff` [\#2677](https://github.com/delta-io/delta-rs/pull/2677) ([fpgmaas](https://github.com/fpgmaas))
+- chore: remove stale code for conditional import of `Literal` [\#2676](https://github.com/delta-io/delta-rs/pull/2676) ([fpgmaas](https://github.com/fpgmaas))
+- chore: remove references to black from the project [\#2674](https://github.com/delta-io/delta-rs/pull/2674) ([fpgmaas](https://github.com/fpgmaas))
+- chore: bump ruff to 0.5.2 [\#2673](https://github.com/delta-io/delta-rs/pull/2673) ([fpgmaas](https://github.com/fpgmaas))
+- chore: improve contributing.md [\#2672](https://github.com/delta-io/delta-rs/pull/2672) ([fpgmaas](https://github.com/fpgmaas))
+- feat: support userMetadata in CommitInfo [\#2670](https://github.com/delta-io/delta-rs/pull/2670) ([jkylling](https://github.com/jkylling))
+- chore: upgrade to datafusion 40 [\#2661](https://github.com/delta-io/delta-rs/pull/2661) ([rtyler](https://github.com/rtyler))
+- docs: improve navigation fixes [\#2660](https://github.com/delta-io/delta-rs/pull/2660) ([avriiil](https://github.com/avriiil))
+- docs: add integration docs for s3 backend [\#2658](https://github.com/delta-io/delta-rs/pull/2658) ([avriiil](https://github.com/avriiil))
+- docs: fix bullets on hdfs docs [\#2653](https://github.com/delta-io/delta-rs/pull/2653) ([Kimahriman](https://github.com/Kimahriman))
+- ci: update CODEOWNERS [\#2650](https://github.com/delta-io/delta-rs/pull/2650) ([hntd187](https://github.com/hntd187))
+- feat\(rust\): fix size\_in\_bytes in last\_checkpoint\_ to i64 [\#2649](https://github.com/delta-io/delta-rs/pull/2649) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: increase subcrate versions [\#2648](https://github.com/delta-io/delta-rs/pull/2648) ([rtyler](https://github.com/rtyler))
+- chore: missed one macos runner reference in actions [\#2645](https://github.com/delta-io/delta-rs/pull/2645) ([rtyler](https://github.com/rtyler))
+- chore: add a reproduction case for merge failures with struct\<string\> [\#2644](https://github.com/delta-io/delta-rs/pull/2644) ([rtyler](https://github.com/rtyler))
+- chore: remove macos builders from pull request flow [\#2638](https://github.com/delta-io/delta-rs/pull/2638) ([rtyler](https://github.com/rtyler))
+- fix: enable parquet pushdown for DeltaScan via TableProvider impl for DeltaTable  \(rebase\) [\#2637](https://github.com/delta-io/delta-rs/pull/2637) ([rtyler](https://github.com/rtyler))
+- chore: fix documentation generation with a pin of griffe [\#2636](https://github.com/delta-io/delta-rs/pull/2636) ([rtyler](https://github.com/rtyler))
+- fix\(python\): fixed large\_dtype to schema convert [\#2635](https://github.com/delta-io/delta-rs/pull/2635) ([sherlockbeard](https://github.com/sherlockbeard))
+- fix\(rust, python\): fix writing empty structs when creating checkpoint [\#2627](https://github.com/delta-io/delta-rs/pull/2627) ([sherlockbeard](https://github.com/sherlockbeard))
+- fix\(rust, python\): fix merge schema with overwrite [\#2623](https://github.com/delta-io/delta-rs/pull/2623) ([sherlockbeard](https://github.com/sherlockbeard))
+- chore: bump python 0.18.2 [\#2621](https://github.com/delta-io/delta-rs/pull/2621) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: report DataFusion metrics for DeltaScan [\#2617](https://github.com/delta-io/delta-rs/pull/2617) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- feat\(rust,python\): cast each parquet file to delta schema [\#2615](https://github.com/delta-io/delta-rs/pull/2615) ([HawaiianSpork](https://github.com/HawaiianSpork))
+- fix\(rust\): inconsistent order of partitioning columns \(\#2494\) [\#2614](https://github.com/delta-io/delta-rs/pull/2614) ([aditanase](https://github.com/aditanase))
+- docs: add Daft writer [\#2594](https://github.com/delta-io/delta-rs/pull/2594) ([avriiil](https://github.com/avriiil))
+- feat\(python, rust\): `add column` operation [\#2562](https://github.com/delta-io/delta-rs/pull/2562) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: change arrow map root name to follow with parquet root name [\#2538](https://github.com/delta-io/delta-rs/pull/2538) ([sclmn](https://github.com/sclmn))
+- feat\(python\): handle PyCapsule interface objects in write\_deltalake [\#2534](https://github.com/delta-io/delta-rs/pull/2534) ([kylebarron](https://github.com/kylebarron))
+- feat: improve merge performance by using predicate non-partition columns min/max for prefiltering [\#2513](https://github.com/delta-io/delta-rs/pull/2513) ([JonasDev1](https://github.com/JonasDev1))
+- feat\(python, rust\): cleanup expired logs post-commit hook [\#2459](https://github.com/delta-io/delta-rs/pull/2459) ([ion-elgreco](https://github.com/ion-elgreco))
+
+## [rust-v0.18.0](https://github.com/delta-io/delta-rs/tree/rust-v0.18.0) (2024-06-12)
+
+[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.3...rust-v0.18.0)
+
+**Implemented enhancements:**
+
+- documentation: concurrent writes for non-S3 backends [\#2556](https://github.com/delta-io/delta-rs/issues/2556)
+- pyarrow options for `write_delta` [\#2515](https://github.com/delta-io/delta-rs/issues/2515)
+- \[deltalake\_aws\] Allow configuring separate endpoints for S3 and DynamoDB clients. [\#2498](https://github.com/delta-io/delta-rs/issues/2498)
+- Include file stats when converting a parquet directory to a Delta table [\#2490](https://github.com/delta-io/delta-rs/issues/2490)
+- Adopt the delta kernel types [\#2489](https://github.com/delta-io/delta-rs/issues/2489)
+
+**Fixed bugs:**
+
+- `raise_if_not_exists` for properties not configurable on CreateBuilder [\#2564](https://github.com/delta-io/delta-rs/issues/2564)
+- write\_deltalake with rust engine fails when mode is append and overwrite schema is enabled [\#2553](https://github.com/delta-io/delta-rs/issues/2553)
+- Running the basic\_operations examples fails with `Error: Transaction { source: WriterFeaturesRequired(TimestampWithoutTimezone) `} [\#2552](https://github.com/delta-io/delta-rs/issues/2552)
+-  invalid peer certificate: BadSignature when connecting to s3 from  arm64/aarch64 [\#2551](https://github.com/delta-io/delta-rs/issues/2551)
+- load\_cdf\(\) issue : Generic S3 error: request or response body error: operation timed out [\#2549](https://github.com/delta-io/delta-rs/issues/2549)
+- write\_deltalake fails on Databricks volume [\#2540](https://github.com/delta-io/delta-rs/issues/2540)
+- Getting "Microsoft Azure Error: Operation timed out" when trying to retrieve big files [\#2537](https://github.com/delta-io/delta-rs/issues/2537)
+- Impossible to append to a DeltaTable with float data type on RHEL [\#2520](https://github.com/delta-io/delta-rs/issues/2520)
+- Creating DeltaTable object slow [\#2518](https://github.com/delta-io/delta-rs/issues/2518)
+- `write_deltalake` throws parser error when using `rust` engine and big decimals [\#2510](https://github.com/delta-io/delta-rs/issues/2510)
+- TypeError: Object of type int64 is not JSON serializable when writing using a Pandas dataframe [\#2501](https://github.com/delta-io/delta-rs/issues/2501)
+- unable to read delta table when table contains both null and non-null add stats [\#2477](https://github.com/delta-io/delta-rs/issues/2477)
+- Commits on WriteMode::MergeSchema cause table metadata corruption [\#2468](https://github.com/delta-io/delta-rs/issues/2468)
+- S3 object store always returns IMDS warnings [\#2460](https://github.com/delta-io/delta-rs/issues/2460)
+- File skipping according to documentation [\#2427](https://github.com/delta-io/delta-rs/issues/2427)
+- LockClientError [\#2379](https://github.com/delta-io/delta-rs/issues/2379)
+- get\_app\_transaction\_version\(\) returns wrong result [\#2340](https://github.com/delta-io/delta-rs/issues/2340)
+- Property setting in `create` is not handled correctly [\#2247](https://github.com/delta-io/delta-rs/issues/2247)
+- Handling of decimals in scientific notation  [\#2221](https://github.com/delta-io/delta-rs/issues/2221)
+- Unable to append to delta table without datafusion feature [\#2204](https://github.com/delta-io/delta-rs/issues/2204)
+- Decimal Column with Value 0 Causes Failure in Python Binding [\#2193](https://github.com/delta-io/delta-rs/issues/2193)
+
+**Merged pull requests:**
+
+- docs: improve S3 access docs [\#2589](https://github.com/delta-io/delta-rs/pull/2589) ([avriiil](https://github.com/avriiil))
+- chore: bump macOS runners, maybe resolve import error [\#2588](https://github.com/delta-io/delta-rs/pull/2588) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump to datafusion 39, arrow 52, pyo3 0.21 [\#2581](https://github.com/delta-io/delta-rs/pull/2581) ([abhiaagarwal](https://github.com/abhiaagarwal))
+- feat: add custom dynamodb endpoint configuration [\#2575](https://github.com/delta-io/delta-rs/pull/2575) ([hnaoto](https://github.com/hnaoto))
+- fix: consistently use raise\_if\_key\_not\_exists in CreateBuilder [\#2569](https://github.com/delta-io/delta-rs/pull/2569) ([vegarsti](https://github.com/vegarsti))
+- fix: add raise\_if\_key\_not\_exists to CreateBuilder [\#2565](https://github.com/delta-io/delta-rs/pull/2565) ([vegarsti](https://github.com/vegarsti))
+- docs: dt.delete add context + api docs link [\#2560](https://github.com/delta-io/delta-rs/pull/2560) ([avriiil](https://github.com/avriiil))
+- fix: update deltalake crate examples for crate layout and TimestampNtz [\#2559](https://github.com/delta-io/delta-rs/pull/2559) ([jhoekx](https://github.com/jhoekx))
+- docs: clarify locking mechanism requirement for S3 [\#2558](https://github.com/delta-io/delta-rs/pull/2558) ([inigohidalgo](https://github.com/inigohidalgo))
+- fix: remove deprecated overwrite\_schema configuration which has incorrect behavior [\#2554](https://github.com/delta-io/delta-rs/pull/2554) ([rtyler](https://github.com/rtyler))
+- fix: clippy warnings [\#2548](https://github.com/delta-io/delta-rs/pull/2548) ([imor](https://github.com/imor))
+- docs: dask write syntax fix [\#2543](https://github.com/delta-io/delta-rs/pull/2543) ([avriiil](https://github.com/avriiil))
+- fix: cast support fields nested in lists and maps [\#2541](https://github.com/delta-io/delta-rs/pull/2541) ([HawaiianSpork](https://github.com/HawaiianSpork))
+- feat: implement transaction identifiers - continued [\#2539](https://github.com/delta-io/delta-rs/pull/2539) ([roeap](https://github.com/roeap))
+- docs: pull delta from conda not pip [\#2535](https://github.com/delta-io/delta-rs/pull/2535) ([avriiil](https://github.com/avriiil))
+- chore: expose `files_by_partition` to public api [\#2533](https://github.com/delta-io/delta-rs/pull/2533) ([edmondop](https://github.com/edmondop))
+- chore: bump python 0.17.5 [\#2531](https://github.com/delta-io/delta-rs/pull/2531) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(rust\): make PartitionWriter public [\#2525](https://github.com/delta-io/delta-rs/pull/2525) ([adriangb](https://github.com/adriangb))
+- fix: msrv in workspace [\#2524](https://github.com/delta-io/delta-rs/pull/2524) ([roeap](https://github.com/roeap))
+- chore: fixing some clips [\#2521](https://github.com/delta-io/delta-rs/pull/2521) ([rtyler](https://github.com/rtyler))
+- fix: enable field\_with\_name to support nested fields with '.' delimiter [\#2519](https://github.com/delta-io/delta-rs/pull/2519) ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- chore: tidying up builds without datafusion feature and clippy [\#2516](https://github.com/delta-io/delta-rs/pull/2516) ([rtyler](https://github.com/rtyler))
+- fix\(python\): release GIL on most operations [\#2512](https://github.com/delta-io/delta-rs/pull/2512) ([adriangb](https://github.com/adriangb))
+- docs: fix typo [\#2508](https://github.com/delta-io/delta-rs/pull/2508) ([avriiil](https://github.com/avriiil))
+- fix\(rust, python\): fixed differences in storage options between log and object stores [\#2500](https://github.com/delta-io/delta-rs/pull/2500) ([mightyshazam](https://github.com/mightyshazam))
+- docs: improve daft integration docs [\#2496](https://github.com/delta-io/delta-rs/pull/2496) ([avriiil](https://github.com/avriiil))
+- feat: adopt kernel schema types [\#2495](https://github.com/delta-io/delta-rs/pull/2495) ([roeap](https://github.com/roeap))
+- feat: add stats to convert-to-delta operation [\#2491](https://github.com/delta-io/delta-rs/pull/2491) ([gruuya](https://github.com/gruuya))
+- fix\(python, rust\): region lookup wasn't working correctly for dynamo [\#2488](https://github.com/delta-io/delta-rs/pull/2488) ([mightyshazam](https://github.com/mightyshazam))
+- feat: introduce CDC write-side support for the Update operations [\#2486](https://github.com/delta-io/delta-rs/pull/2486) ([rtyler](https://github.com/rtyler))
+- fix\(python\): reuse state in `to_pyarrow_dataset` [\#2485](https://github.com/delta-io/delta-rs/pull/2485) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: check to see if the file exists before attempting to rename [\#2482](https://github.com/delta-io/delta-rs/pull/2482) ([rtyler](https://github.com/rtyler))
+- fix\(python, rust\): use new schema for stats parsing instead of old [\#2480](https://github.com/delta-io/delta-rs/pull/2480) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): unable to read delta table when table contains both null and non-null add stats [\#2476](https://github.com/delta-io/delta-rs/pull/2476) ([yjshen](https://github.com/yjshen))
+- chore: update the changelog to include rust-v0.17.3 [\#2473](https://github.com/delta-io/delta-rs/pull/2473) ([rtyler](https://github.com/rtyler))
+- chore: a bunch of tweaks to get releases out the door [\#2472](https://github.com/delta-io/delta-rs/pull/2472) ([rtyler](https://github.com/rtyler))
+- chore: bump the core crate for its next release [\#2470](https://github.com/delta-io/delta-rs/pull/2470) ([rtyler](https://github.com/rtyler))
+- fix: return unsupported error for merging schemas in the presence of partition columns [\#2469](https://github.com/delta-io/delta-rs/pull/2469) ([emcake](https://github.com/emcake))
+- feat\(python\): add  parameter to DeltaTable.to\_pyarrow\_dataset\(\) [\#2465](https://github.com/delta-io/delta-rs/pull/2465) ([adriangb](https://github.com/adriangb))
+- feat\(python, rust\): add OBJECT\_STORE\_CONCURRENCY\_LIMIT setting for ObjectStoreFactory [\#2458](https://github.com/delta-io/delta-rs/pull/2458) ([vigimite](https://github.com/vigimite))
+- fix\(rust\): handle 429 from GCS [\#2454](https://github.com/delta-io/delta-rs/pull/2454) ([adriangb](https://github.com/adriangb))
+- fix\(python\): reuse table state in write engine [\#2453](https://github.com/delta-io/delta-rs/pull/2453) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): implement abort commit for S3DynamoDBLogStore [\#2452](https://github.com/delta-io/delta-rs/pull/2452) ([PeterKeDer](https://github.com/PeterKeDer))
+- fix\(python, rust\): check timestamp\_ntz in nested fields, add check\_can\_write in pyarrow writer [\#2443](https://github.com/delta-io/delta-rs/pull/2443) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): remove imds calls from profile auth and region [\#2442](https://github.com/delta-io/delta-rs/pull/2442) ([mightyshazam](https://github.com/mightyshazam))
+- fix\(python, rust\): use from\_name during column projection creation [\#2441](https://github.com/delta-io/delta-rs/pull/2441) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump python for 0.17 release [\#2439](https://github.com/delta-io/delta-rs/pull/2439) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python,rust\): missing remove actions during `create_or_replace` [\#2437](https://github.com/delta-io/delta-rs/pull/2437) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: introduce the Operation trait to enforce consistency between operations [\#2435](https://github.com/delta-io/delta-rs/pull/2435) ([rtyler](https://github.com/rtyler))
+- fix\(python\): load\_as\_version with datetime object with no timezone specified [\#2429](https://github.com/delta-io/delta-rs/pull/2429) ([t1g0rz](https://github.com/t1g0rz))
+- feat\(python, rust\): respect column stats collection configurations [\#2428](https://github.com/delta-io/delta-rs/pull/2428) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: lazy static runtime in python [\#2424](https://github.com/delta-io/delta-rs/pull/2424) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat: implement repartitioned for DeltaScan [\#2421](https://github.com/delta-io/delta-rs/pull/2421) ([jkylling](https://github.com/jkylling))
+- fix: return error when checkpoints and metadata get out of sync [\#2406](https://github.com/delta-io/delta-rs/pull/2406) ([esarili](https://github.com/esarili))
+- fix\(rust\): stats\_parsed has different number of records with stats [\#2405](https://github.com/delta-io/delta-rs/pull/2405) ([yjshen](https://github.com/yjshen))
+- docs: add Daft integration [\#2402](https://github.com/delta-io/delta-rs/pull/2402) ([avriiil](https://github.com/avriiil))
+- feat\(rust\): advance state in post commit [\#2396](https://github.com/delta-io/delta-rs/pull/2396) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore\(rust\): bump arrow v51 and datafusion v37.1 [\#2395](https://github.com/delta-io/delta-rs/pull/2395) ([lasantosr](https://github.com/lasantosr))
+- docs: document required aws permissions [\#2393](https://github.com/delta-io/delta-rs/pull/2393) ([ale-rinaldi](https://github.com/ale-rinaldi))
+- feat\(rust\): post commit hook \(v2\), create checkpoint hook [\#2391](https://github.com/delta-io/delta-rs/pull/2391) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: time travel when checkpointed and logs removed [\#2389](https://github.com/delta-io/delta-rs/pull/2389) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): remove flush after writing every batch [\#2387](https://github.com/delta-io/delta-rs/pull/2387) ([PeterKeDer](https://github.com/PeterKeDer))
+- feat: added configuration variables to handle EC2 metadata service [\#2385](https://github.com/delta-io/delta-rs/pull/2385) ([mightyshazam](https://github.com/mightyshazam))
+- fix\(rust\): timestamp deserialization format, missing type [\#2383](https://github.com/delta-io/delta-rs/pull/2383) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: bump chrono [\#2372](https://github.com/delta-io/delta-rs/pull/2372) ([universalmind303](https://github.com/universalmind303))
+- chore: bump python 0.16.4 [\#2371](https://github.com/delta-io/delta-rs/pull/2371) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add snappy compression on checkpoint files [\#2365](https://github.com/delta-io/delta-rs/pull/2365) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add config for parquet pushdown on delta scan [\#2364](https://github.com/delta-io/delta-rs/pull/2364) ([Blajda](https://github.com/Blajda))
+- fix\(python,rust\): optimize compact on schema evolved table [\#2358](https://github.com/delta-io/delta-rs/pull/2358) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python, rust\): expr parsing date/timestamp [\#2357](https://github.com/delta-io/delta-rs/pull/2357) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: remove tmp files in cleanup\_metadata [\#2356](https://github.com/delta-io/delta-rs/pull/2356) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: make struct fields nullable in stats schema [\#2346](https://github.com/delta-io/delta-rs/pull/2346) ([qinix](https://github.com/qinix))
+- fix\(rust\): adhere to protocol for Decimal [\#2332](https://github.com/delta-io/delta-rs/pull/2332) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): raise schema mismatch when decimal is not subset [\#2330](https://github.com/delta-io/delta-rs/pull/2330) ([ion-elgreco](https://github.com/ion-elgreco))
+- feat\(rust\): derive Copy on some public enums [\#2329](https://github.com/delta-io/delta-rs/pull/2329) ([lasantosr](https://github.com/lasantosr))
+- fix: merge pushdown handling [\#2326](https://github.com/delta-io/delta-rs/pull/2326) ([Blajda](https://github.com/Blajda))
+- fix: merge concurrency control [\#2324](https://github.com/delta-io/delta-rs/pull/2324) ([ion-elgreco](https://github.com/ion-elgreco))
+- Revert 2291 merge predicate fix [\#2323](https://github.com/delta-io/delta-rs/pull/2323) ([Blajda](https://github.com/Blajda))
+- fix: try to fix timeouts [\#2318](https://github.com/delta-io/delta-rs/pull/2318) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): serialize MetricDetails from compaction runs to a string [\#2317](https://github.com/delta-io/delta-rs/pull/2317) ([liamphmurphy](https://github.com/liamphmurphy))
+- docs: add example in to\_pyarrow\_dataset [\#2315](https://github.com/delta-io/delta-rs/pull/2315) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python\): wrong batch size [\#2314](https://github.com/delta-io/delta-rs/pull/2314) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore: object store 0.9.1 [\#2311](https://github.com/delta-io/delta-rs/pull/2311) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: checkpoint features format below v3,7 [\#2307](https://github.com/delta-io/delta-rs/pull/2307) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: schema evolution not coercing with large arrow types [\#2305](https://github.com/delta-io/delta-rs/pull/2305) ([aersam](https://github.com/aersam))
+- fix: clean up some non-datafusion builds [\#2303](https://github.com/delta-io/delta-rs/pull/2303) ([rtyler](https://github.com/rtyler))
+- docs: fix typo [\#2300](https://github.com/delta-io/delta-rs/pull/2300) ([LauH1987](https://github.com/LauH1987))
+- docs: make replaceWhere example compile [\#2299](https://github.com/delta-io/delta-rs/pull/2299) ([LauH1987](https://github.com/LauH1987))
+- fix\(rust\): add missing chrono-tz feature [\#2295](https://github.com/delta-io/delta-rs/pull/2295) ([ion-elgreco](https://github.com/ion-elgreco))
+- chore\(python\): bump to v0.16.1 [\#2294](https://github.com/delta-io/delta-rs/pull/2294) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): features not maintained in protocol after checkpoint [\#2293](https://github.com/delta-io/delta-rs/pull/2293) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: merge predicate for concurrent writes [\#2291](https://github.com/delta-io/delta-rs/pull/2291) ([JonasDev1](https://github.com/JonasDev1))
+- fix: replace assert and AssertionError with appropriate exceptions [\#2286](https://github.com/delta-io/delta-rs/pull/2286) ([joe-sharman](https://github.com/joe-sharman))
+- docs: fix typo in delta-lake-polars.md [\#2285](https://github.com/delta-io/delta-rs/pull/2285) ([vladdoster](https://github.com/vladdoster))
+- fix\(python, rust\): prevent table scan returning large arrow dtypes [\#2274](https://github.com/delta-io/delta-rs/pull/2274) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(python\): always encapsulate column names in backticks in \_all functions [\#2271](https://github.com/delta-io/delta-rs/pull/2271) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix\(rust\): read only checkpoints that match \_last\_checkpoint version [\#2270](https://github.com/delta-io/delta-rs/pull/2270) ([ion-elgreco](https://github.com/ion-elgreco))
+- fix: add .venv to .gitignore [\#2268](https://github.com/delta-io/delta-rs/pull/2268) ([gacharya](https://github.com/gacharya))
+- feat\(python, rust\): add `set table properties` operation [\#2264](https://github.com/delta-io/delta-rs/pull/2264) ([ion-elgreco](https://github.com/ion-elgreco))
+- docs: use dagster deltalake polars library [\#2263](https://github.com/delta-io/delta-rs/pull/2263) ([avriiil](https://github.com/avriiil))
+- docs: update comment about r2 requiring locks [\#2261](https://github.com/delta-io/delta-rs/pull/2261) ([cmackenzie1](https://github.com/cmackenzie1))
+- fix\(\#2256\): use consistent units of time [\#2260](https://github.com/delta-io/delta-rs/pull/2260) ([cmackenzie1](https://github.com/cmackenzie1))
+- chore: update the changelog for rust-v0.17.1 [\#2259](https://github.com/delta-io/delta-rs/pull/2259) ([rtyler](https://github.com/rtyler))
+- feat\(python\): release GIL in the write\_deltalake function [\#2257](https://github.com/delta-io/delta-rs/pull/2257) ([franz101](https://github.com/franz101))
+- chore\(rust\): bump datafusion to 36 [\#2249](https://github.com/delta-io/delta-rs/pull/2249) ([universalmind303](https://github.com/universalmind303))
+- chore!: replace rusoto with AWS SDK [\#2243](https://github.com/delta-io/delta-rs/pull/2243) ([mightyshazam](https://github.com/mightyshazam))
+- fix: handle conflict checking in optimize correctly [\#2208](https://github.com/delta-io/delta-rs/pull/2208) ([emcake](https://github.com/emcake))
+- feat: logical Node for find files [\#2194](https://github.com/delta-io/delta-rs/pull/2194) ([hntd187](https://github.com/hntd187))
+
 ## [rust-v0.17.3](https://github.com/delta-io/delta-rs/tree/rust-v0.17.3) (2024-05-01)
 
 [Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.17.1...rust-v0.17.3)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4472a3640a..f681aa3948 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -17,34 +17,40 @@ If you want to claim an issue to work on, you can write the word `take` as a com
 - Install Rust, e.g. as described [here](https://doc.rust-lang.org/cargo/getting-started/installation.html)
 - Have a compatible Python version installed (check `python/pyproject.toml` for current requirement)
 - Create a Python virtual environment (required for development builds), e.g. as described [here](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/)
+    ```sh
+    python -m venv .venv
+    ```
+
 - Build the project for development (this requires an active virtual environment and will also install `deltalake` in that virtual environment)
-```
-cd python
-make develop
-```
+    ```sh
+    cd python
+    make develop
+    ```
 
 - Run some Python code, e.g. to run a specific test
-```
-python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
-```
+    ```sh
+    python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
+    ```
 
 - Run some Rust code, e.g. run an example
-```
-cd crates/deltalake
-cargo run --example basic_operations --features="datafusion"
-```
+    ```sh
+    cd crates/deltalake
+    cargo run --example basic_operations --features="datafusion"
+    ```
 
 ## Run the docs locally
-*This serves your local contens of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
-```
+*This serves your local contents of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
+
+```sh
 (cd python; make develop)
 pip install -r docs/requirements.txt
 mkdocs serve
 ```
 
 ## To make a pull request (PR)
-- Make sure all the following steps run/pass locally before submitting a PR
-```
+Make sure all the following steps run/pass locally before submitting a PR
+
+```sh
 cargo fmt -- --check
 cd python
 make check-rust
@@ -62,7 +68,7 @@ make build-docs
 - For debugging Rust code, install [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb). The extension should even create Debug launch configurations for the project if you allow it, an easy way to get started. Just set a breakpoint and run the relevant configuration.
 - For debugging from Python into Rust, follow this procedure:
 1. Add this to `.vscode/launch.json`
-```
+```json
 {
             "type": "lldb",
             "request": "attach",
diff --git a/Cargo.toml b/Cargo.toml
index 6168a500fd..ccbb766e0f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ resolver = "2"
 
 [workspace.package]
 authors = ["Qingping Hou <dave2008713@gmail.com>"]
-rust-version = "1.75"
+rust-version = "1.80"
 keywords = ["deltalake", "delta", "datalake"]
 readme = "README.md"
 edition = "2021"
@@ -26,30 +26,34 @@ debug = true
 debug = "line-tables-only"
 
 [workspace.dependencies]
+delta_kernel = { version = "=0.3.0" }
+# delta_kernel = { path = "../delta-kernel-rs/kernel", version = "0.3.0" }
+
 # arrow
-arrow = { version = "51" }
-arrow-arith = { version = "51" }
-arrow-array = { version = "51", features = ["chrono-tz"] }
-arrow-buffer = { version = "51" }
-arrow-cast = { version = "51" }
-arrow-ipc = { version = "51" }
-arrow-json = { version = "51" }
-arrow-ord = { version = "51" }
-arrow-row = { version = "51" }
-arrow-schema = { version = "51" }
-arrow-select = { version = "51" }
-object_store = { version = "0.9" }
-parquet = { version = "51" }
+arrow = { version = "52" }
+arrow-arith = { version = "52" }
+arrow-array = { version = "52", features = ["chrono-tz"] }
+arrow-buffer = { version = "52" }
+arrow-cast = { version = "52" }
+arrow-ipc = { version = "52" }
+arrow-json = { version = "52" }
+arrow-ord = { version = "52" }
+arrow-row = { version = "52" }
+arrow-schema = { version = "52" }
+arrow-select = { version = "52" }
+object_store = { version = "0.10.1" }
+parquet = { version = "52" }
 
 # datafusion
-datafusion = { version = "37.1" }
-datafusion-expr = { version = "37.1" }
-datafusion-common = { version = "37.1" }
-datafusion-proto = { version = "37.1" }
-datafusion-sql = { version = "37.1" }
-datafusion-physical-expr = { version = "37.1" }
-datafusion-functions = { version = "37.1" }
-datafusion-functions-array = { version = "37.1" }
+datafusion = { version = "41" }
+datafusion-expr = { version = "41" }
+datafusion-common = { version = "41" }
+datafusion-proto = { version = "41" }
+datafusion-sql = { version = "41" }
+datafusion-physical-expr = { version = "41" }
+datafusion-physical-plan = { version = "41" }
+datafusion-functions = { version = "41" }
+datafusion-functions-aggregate = { version = "41" }
 
 # serde
 serde = { version = "1.0.194", features = ["derive"] }
@@ -62,6 +66,7 @@ tracing = { version = "0.1", features = ["log"] }
 regex = { version = "1" }
 thiserror = { version = "1" }
 url = { version = "2" }
+urlencoding = "2.1.3"
 uuid = { version = "1" }
 
 # runtime / async
diff --git a/README.md b/README.md
index ec9a7d2d59..b7a26b8a42 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@
     <img alt="#delta-rs in the Delta Lake Slack workspace" src="https://img.shields.io/badge/slack-delta-blue.svg?logo=slack&style=flat-square&color=F75101">
   </a>
 </p>
+Delta Lake is an open-source storage format that runs on top of existing data lakes. Delta Lake is compatible with processing engines like Apache Spark and provides benefits such as ACID transaction guarantees, schema enforcement, and scalable data handling.
 
 The Delta Lake project aims to unlock the power of the Deltalake for as many users and projects as possible
 by providing native low-level APIs aimed at developers and integrators, as well as a high-level operations
@@ -135,12 +136,13 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc
 | -------------------- | :-----: | :-----: | ---------------------------------------------------------------- |
 | Local                | ![done] | ![done] |                                                                  |
 | S3 - AWS             | ![done] | ![done] | requires lock for concurrent writes                              |
-| S3 - MinIO           | ![done] | ![done] | requires lock for concurrent writes                              |
-| S3 - R2              | ![done] | ![done] | No lock required when using `AmazonS3ConfigKey::CopyIfNotExists` |
+| S3 - MinIO           | ![done] | ![done] | No lock required when using `AmazonS3ConfigKey::ConditionalPut` with `storage_options = {"conditional_put":"etag"}` |
+| S3 - R2              | ![done] | ![done] | No lock required when using `AmazonS3ConfigKey::ConditionalPut` with `storage_options = {"conditional_put":"etag"}` |
 | Azure Blob           | ![done] | ![done] |                                                                  |
 | Azure ADLS Gen2      | ![done] | ![done] |                                                                  |
 | Microsoft OneLake    | ![done] | ![done] |                                                                  |
 | Google Cloud Storage | ![done] | ![done] |                                                                  |
+| HDFS                 | ![done] | ![done] |                                                                  |
 
 ### Supported Operations
 
diff --git a/crates/aws/Cargo.toml b/crates/aws/Cargo.toml
index e6913a2162..992a32c93e 100644
--- a/crates/aws/Cargo.toml
+++ b/crates/aws/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-aws"
-version = "0.1.2"
+version = "0.3.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -12,19 +12,20 @@ repository.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
-aws-smithy-runtime-api = { version="1.1.7" }
-aws-smithy-runtime = { version="1.1.7", optional = true}
-aws-credential-types = { version="1.1.7", features = ["hardcoded-credentials"]}
-aws-config = { version = "1.1.6", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
-aws-sdk-dynamodb = {version = "1.15.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
-aws-sdk-sts = {version = "1.1.6", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
+deltalake-core = { version = "0.20.0", path = "../core" }
+aws-smithy-runtime-api = { version="1.7" }
+aws-smithy-runtime = { version="1.7", optional = true}
+aws-credential-types = { version="1.2", features = ["hardcoded-credentials"]}
+aws-config = { version = "1.5", default-features = false, features = ["behavior-version-latest","rt-tokio", "credentials-process", "sso"] }
+aws-sdk-dynamodb = {version = "1.45", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
+aws-sdk-sts = {version = "1.42", default-features = false, features = ["behavior-version-latest", "rt-tokio"] }
 lazy_static = "1"
 maplit = "1"
 
 # workspace dependencies
 async-trait = { workspace = true }
 bytes = { workspace = true }
+chrono = { workspace = true }
 futures = { workspace = true }
 tracing = { workspace = true }
 object_store = { workspace = true, features = ["aws"]}
@@ -33,7 +34,7 @@ tokio = { workspace = true }
 regex = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
-backoff = { version = "0.4", features = [ "tokio" ] }
+backon = { version = "1",default-features = false, features = [ "tokio-sleep" ] }
 hyper-tls = { version = "0.5", optional = true }
 
 [dev-dependencies]
diff --git a/crates/aws/src/constants.rs b/crates/aws/src/constants.rs
new file mode 100644
index 0000000000..90c23ff572
--- /dev/null
+++ b/crates/aws/src/constants.rs
@@ -0,0 +1,141 @@
+//! Constants used for modifying and configuring various AWS S3 (or similar) connections with
+//! delta-rs
+//!
+
+use lazy_static::lazy_static;
+use std::time::Duration;
+
+/// Custom S3 endpoint.
+pub const AWS_ENDPOINT_URL: &str = "AWS_ENDPOINT_URL";
+/// Custom DynamoDB endpoint.
+/// If DynamoDB endpoint is not supplied, will use S3 endpoint (AWS_ENDPOINT_URL)
+/// If it is supplied, this endpoint takes precedence over the global endpoint set in AWS_ENDPOINT_URL for DynamoDB
+pub const AWS_ENDPOINT_URL_DYNAMODB: &str = "AWS_ENDPOINT_URL_DYNAMODB";
+/// The AWS region.
+pub const AWS_REGION: &str = "AWS_REGION";
+/// The AWS profile.
+pub const AWS_PROFILE: &str = "AWS_PROFILE";
+/// The AWS_ACCESS_KEY_ID to use for S3.
+pub const AWS_ACCESS_KEY_ID: &str = "AWS_ACCESS_KEY_ID";
+/// The AWS_SECRET_ACCESS_KEY to use for S3.
+pub const AWS_SECRET_ACCESS_KEY: &str = "AWS_SECRET_ACCESS_KEY";
+/// The AWS_SESSION_TOKEN to use for S3.
+pub const AWS_SESSION_TOKEN: &str = "AWS_SESSION_TOKEN";
+/// Uses either "path" (the default) or "virtual", which turns on
+/// [virtual host addressing](http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html).
+pub const AWS_S3_ADDRESSING_STYLE: &str = "AWS_S3_ADDRESSING_STYLE";
+/// Locking provider to use for safe atomic rename.
+/// `dynamodb` is currently the only supported locking provider.
+/// If not set, safe atomic rename is not available.
+pub const AWS_S3_LOCKING_PROVIDER: &str = "AWS_S3_LOCKING_PROVIDER";
+/// The role to assume for S3 writes.
+pub const AWS_IAM_ROLE_ARN: &str = "AWS_IAM_ROLE_ARN";
+/// The role to assume. Please use [AWS_IAM_ROLE_ARN] instead
+#[deprecated(since = "0.20.0", note = "Please use AWS_IAM_ROLE_ARN instead")]
+pub const AWS_S3_ASSUME_ROLE_ARN: &str = "AWS_S3_ASSUME_ROLE_ARN";
+/// The role session name to use when a role is assumed. If not provided a random session name is generated.
+pub const AWS_IAM_ROLE_SESSION_NAME: &str = "AWS_IAM_ROLE_SESSION_NAME";
+/// The role session name to use when a role is assumed. If not provided a random session name is generated.
+#[deprecated(
+    since = "0.20.0",
+    note = "Please use AWS_IAM_ROLE_SESSION_NAME instead"
+)]
+pub const AWS_S3_ROLE_SESSION_NAME: &str = "AWS_S3_ROLE_SESSION_NAME";
+/// The `pool_idle_timeout` option of aws http client. Has to be lower than 20 seconds, which is
+/// default S3 server timeout <https://aws.amazon.com/premiumsupport/knowledge-center/s3-socket-connection-timeout-error/>.
+/// However, since rusoto uses hyper as a client, its default timeout is 90 seconds
+/// <https://docs.rs/hyper/0.13.2/hyper/client/struct.Builder.html#method.keep_alive_timeout>.
+/// Hence, the `connection closed before message completed` could occur.
+/// To avoid that, the default value of this setting is 15 seconds if it's not set otherwise.
+pub const AWS_S3_POOL_IDLE_TIMEOUT_SECONDS: &str = "AWS_S3_POOL_IDLE_TIMEOUT_SECONDS";
+/// The `pool_idle_timeout` for the as3_constants sts client. See
+/// the reasoning in `AWS_S3_POOL_IDLE_TIMEOUT_SECONDS`.
+pub const AWS_STS_POOL_IDLE_TIMEOUT_SECONDS: &str = "AWS_STS_POOL_IDLE_TIMEOUT_SECONDS";
+/// The number of retries for S3 GET requests failed with 500 Internal Server Error.
+pub const AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES: &str =
+    "AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES";
+/// The web identity token file to use when using a web identity provider.
+/// NOTE: web identity related options are set in the environment when
+/// creating an instance of [crate::storage::s3::S3StorageOptions].
+/// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
+pub const AWS_WEB_IDENTITY_TOKEN_FILE: &str = "AWS_WEB_IDENTITY_TOKEN_FILE";
+/// The role name to use for web identity.
+/// NOTE: web identity related options are set in the environment when
+/// creating an instance of [crate::storage::s3::S3StorageOptions].
+/// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
+pub const AWS_ROLE_ARN: &str = "AWS_ROLE_ARN";
+/// The role session name to use for web identity.
+/// NOTE: web identity related options are set in the environment when
+/// creating an instance of [crate::storage::s3::S3StorageOptions].
+/// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
+pub const AWS_ROLE_SESSION_NAME: &str = "AWS_ROLE_SESSION_NAME";
+/// Allow http connections - mainly useful for integration tests
+pub const AWS_ALLOW_HTTP: &str = "AWS_ALLOW_HTTP";
+
+/// If set to "true", allows creating commits without concurrent writer protection.
+/// Only safe if there is one writer to a given table.
+pub const AWS_S3_ALLOW_UNSAFE_RENAME: &str = "AWS_S3_ALLOW_UNSAFE_RENAME";
+
+/// If set to "true", disables the imds client
+/// Defaults to "true"
+pub const AWS_EC2_METADATA_DISABLED: &str = "AWS_EC2_METADATA_DISABLED";
+
+/// The timeout in milliseconds for the EC2 metadata endpoint
+/// Defaults to 100
+pub const AWS_EC2_METADATA_TIMEOUT: &str = "AWS_EC2_METADATA_TIMEOUT";
+
+/// Force the delta-rs to attempt to load AWS credentials
+pub const AWS_FORCE_CREDENTIAL_LOAD: &str = "AWS_FORCE_CREDENTIAL_LOAD";
+
+/// The list of option keys owned by the S3 module.
+/// Option keys not contained in this list will be added to the `extra_opts`
+/// field of [crate::storage::s3::S3StorageOptions].
+pub const S3_OPTS: &[&str] = &[
+    AWS_ENDPOINT_URL,
+    AWS_ENDPOINT_URL_DYNAMODB,
+    AWS_REGION,
+    AWS_PROFILE,
+    AWS_ACCESS_KEY_ID,
+    AWS_SECRET_ACCESS_KEY,
+    AWS_SESSION_TOKEN,
+    AWS_S3_LOCKING_PROVIDER,
+    AWS_S3_ASSUME_ROLE_ARN,
+    AWS_S3_ROLE_SESSION_NAME,
+    AWS_WEB_IDENTITY_TOKEN_FILE,
+    AWS_ROLE_ARN,
+    AWS_ROLE_SESSION_NAME,
+    AWS_S3_POOL_IDLE_TIMEOUT_SECONDS,
+    AWS_STS_POOL_IDLE_TIMEOUT_SECONDS,
+    AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES,
+    AWS_EC2_METADATA_DISABLED,
+    AWS_EC2_METADATA_TIMEOUT,
+];
+
+pub const DEFAULT_LOCK_TABLE_NAME: &str = "delta_log";
+pub const LOCK_TABLE_KEY_NAME: &str = "DELTA_DYNAMO_TABLE_NAME";
+pub const BILLING_MODE_KEY_NAME: &str = "DELTA_DYNAMO_BILLING_MODE";
+pub const MAX_ELAPSED_REQUEST_TIME_KEY_NAME: &str = "DELTA_DYNAMO_MAX_ELAPSED_REQUEST_TIME";
+
+pub const ATTR_TABLE_PATH: &str = "tablePath";
+pub const ATTR_FILE_NAME: &str = "fileName";
+pub const ATTR_TEMP_PATH: &str = "tempPath";
+pub const ATTR_COMPLETE: &str = "complete";
+pub const ATTR_EXPIRE_TIME: &str = "expireTime";
+
+pub const STRING_TYPE: &str = "S";
+
+pub const KEY_TYPE_HASH: &str = "HASH";
+pub const KEY_TYPE_RANGE: &str = "RANGE";
+
+lazy_static! {
+    pub static ref CONDITION_EXPR_CREATE: String = format!(
+        "attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME})"
+    );
+
+    pub static ref CONDITION_DELETE_INCOMPLETE: String = format!(
+        "(complete = :f) or (attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME}))"
+    );
+}
+
+pub const CONDITION_UPDATE_INCOMPLETE: &str = "complete = :f";
+pub const DEFAULT_COMMIT_ENTRY_EXPIRATION_DELAY: Duration = Duration::from_secs(86_400);
diff --git a/crates/aws/src/credentials.rs b/crates/aws/src/credentials.rs
index 9ddf19b74c..71441bf05e 100644
--- a/crates/aws/src/credentials.rs
+++ b/crates/aws/src/credentials.rs
@@ -1,118 +1,259 @@
-use std::{sync::Arc, time::Duration};
-
-use aws_config::{
-    ecs::EcsCredentialsProvider,
-    environment::{EnvironmentVariableCredentialsProvider, EnvironmentVariableRegionProvider},
-    imds::credentials::ImdsCredentialsProvider,
-    meta::{credentials::CredentialsProviderChain, region::RegionProviderChain},
-    profile::ProfileFileCredentialsProvider,
-    provider_config::ProviderConfig,
-    web_identity_token::WebIdentityTokenCredentialsProvider,
-};
-use aws_credential_types::provider::{self, ProvideCredentials};
-use tracing::Instrument;
+//! Custom AWS credential providers used by delta-rs
+//!
 
-const IMDS_PROVIDER_NAME: &str = "Ec2InstanceMetadata";
+use std::sync::Arc;
 
-#[derive(Debug)]
-pub struct ConfiguredCredentialChain {
-    provider_chain: CredentialsProviderChain,
-}
+use aws_config::default_provider::credentials::DefaultCredentialsChain;
+use aws_config::meta::credentials::CredentialsProviderChain;
+use aws_config::sts::AssumeRoleProvider;
+use aws_config::SdkConfig;
+use aws_credential_types::provider::error::CredentialsError;
+use aws_credential_types::provider::{future, ProvideCredentials};
+use aws_credential_types::Credentials;
 
-#[derive(Debug)]
-pub struct NoOpCredentials {}
+use deltalake_core::storage::object_store::aws::{AmazonS3ConfigKey, AwsCredential};
+use deltalake_core::storage::object_store::{
+    CredentialProvider, Error as ObjectStoreError, Result as ObjectStoreResult,
+};
+use deltalake_core::storage::StorageOptions;
+use deltalake_core::DeltaResult;
+use tracing::log::*;
 
-pub fn new_region_provider(disable_imds: bool, imds_timeout: u64) -> RegionProviderChain {
-    let env_provider = EnvironmentVariableRegionProvider::new();
-    let profile_file = aws_config::profile::region::ProfileFileRegionProvider::default();
-    if disable_imds {
-        return RegionProviderChain::first_try(env_provider).or_else(profile_file);
-    }
+use crate::constants::{self, AWS_ENDPOINT_URL};
 
-    RegionProviderChain::first_try(env_provider)
-        .or_else(profile_file)
-        .or_else(
-            aws_config::imds::region::Builder::default()
-                .imds_client(
-                    aws_config::imds::Client::builder()
-                        .connect_timeout(Duration::from_millis(imds_timeout))
-                        .read_timeout(Duration::from_millis(imds_timeout))
-                        .build(),
-                )
-                .build(),
-        )
+/// An [object_store::CredentialProvider] which handles converting a populated [SdkConfig]
+/// into a necessary [AwsCredential] type for configuring [object_store::aws::AmazonS3]
+#[derive(Clone, Debug)]
+pub(crate) struct AWSForObjectStore {
+    sdk_config: SdkConfig,
 }
 
-impl ConfiguredCredentialChain {
-    pub fn new(disable_imds: bool, imds_timeout: u64, conf: &ProviderConfig) -> Self {
-        let imds_provider = Self::build_imds_provider(conf, disable_imds, imds_timeout);
-        let env_provider = EnvironmentVariableCredentialsProvider::default();
-        let profile_provider = ProfileFileCredentialsProvider::builder()
-            .configure(conf)
-            .with_custom_provider(IMDS_PROVIDER_NAME, imds_provider.clone())
-            .build();
-        let web_identity_token_provider = WebIdentityTokenCredentialsProvider::builder()
-            .configure(conf)
-            .build();
-
-        let ecs_provider = EcsCredentialsProvider::builder().configure(conf).build();
-
-        let provider_chain = CredentialsProviderChain::first_try("Environment", env_provider)
-            .or_else("Profile", profile_provider)
-            .or_else("WebIdentityToken", web_identity_token_provider)
-            .or_else("EcsContainer", ecs_provider)
-            .or_else(IMDS_PROVIDER_NAME, imds_provider);
-
-        Self { provider_chain }
+impl AWSForObjectStore {
+    pub(crate) fn new(sdk_config: SdkConfig) -> Self {
+        Self { sdk_config }
     }
+}
 
-    async fn credentials(&self) -> provider::Result {
-        self.provider_chain
-            .provide_credentials()
-            .instrument(tracing::debug_span!("provide_credentials", provider = %"default_chain"))
-            .await
+#[async_trait::async_trait]
+impl CredentialProvider for AWSForObjectStore {
+    type Credential = AwsCredential;
+
+    /// Provide the necessary configured credentials from the AWS SDK for use by
+    /// [object_store::aws::AmazonS3]
+    async fn get_credential(&self) -> ObjectStoreResult<Arc<Self::Credential>> {
+        let provider = self
+            .sdk_config
+            .credentials_provider()
+            .ok_or(ObjectStoreError::NotImplemented)?;
+        let credentials =
+            provider
+                .provide_credentials()
+                .await
+                .map_err(|e| ObjectStoreError::NotSupported {
+                    source: Box::new(e),
+                })?;
+
+        debug!(
+            "CredentialProvider for Object Store using access key: {}",
+            credentials.access_key_id()
+        );
+
+        Ok(Arc::new(Self::Credential {
+            key_id: credentials.access_key_id().into(),
+            secret_key: credentials.secret_access_key().into(),
+            token: credentials.session_token().map(|o| o.to_string()),
+        }))
     }
+}
 
-    fn build_imds_provider(
-        conf: &ProviderConfig,
-        disable_imds: bool,
-        imds_timeout: u64,
-    ) -> Arc<dyn ProvideCredentials> {
-        if disable_imds {
-            return Arc::new(NoOpCredentials {});
-        }
+/// Name of the [OptionsCredentialsProvider] for AWS SDK use
+const OPTS_PROVIDER: &str = "DeltaStorageOptionsProvider";
 
-        let imds_provider = ImdsCredentialsProvider::builder()
-            .configure(conf)
-            .imds_client(
-                aws_config::imds::Client::builder()
-                    .connect_timeout(Duration::from_millis(imds_timeout))
-                    .read_timeout(Duration::from_millis(imds_timeout))
-                    .build(),
-            )
-            .build();
-        Arc::new(imds_provider)
+/// The [OptionsCredentialsProvider] helps users plug specific AWS credentials into their
+/// [StorageOptions] in such a way that the AWS SDK code will be properly
+/// loaded with those credentials before following the
+/// [aws_config::default_provider::credentials::DefaultCredentialsChain]
+#[derive(Clone, Debug)]
+pub(crate) struct OptionsCredentialsProvider {
+    options: StorageOptions,
+}
+
+impl OptionsCredentialsProvider {
+    /// Look at the options configured on the provider and return an appropriate
+    /// [Credentials] instance for AWS SDK credential resolution
+    fn credentials(&self) -> aws_credential_types::provider::Result {
+        debug!("Attempting to pull credentials from `StorageOptions`");
+        let access_key = self.options.0.get(constants::AWS_ACCESS_KEY_ID).ok_or(
+            CredentialsError::not_loaded("access key not in StorageOptions"),
+        )?;
+        let secret_key = self.options.0.get(constants::AWS_SECRET_ACCESS_KEY).ok_or(
+            CredentialsError::not_loaded("secret key not in StorageOptions"),
+        )?;
+        let session_token = self.options.0.get(constants::AWS_SESSION_TOKEN).cloned();
+
+        Ok(Credentials::new(
+            access_key,
+            secret_key,
+            session_token,
+            None,
+            OPTS_PROVIDER,
+        ))
     }
 }
 
-impl ProvideCredentials for ConfiguredCredentialChain {
-    fn provide_credentials<'a>(
-        &'a self,
-    ) -> aws_credential_types::provider::future::ProvideCredentials<'a>
+impl ProvideCredentials for OptionsCredentialsProvider {
+    fn provide_credentials<'a>(&'a self) -> future::ProvideCredentials<'a>
     where
         Self: 'a,
     {
-        aws_credential_types::provider::future::ProvideCredentials::new(self.credentials())
+        future::ProvideCredentials::ready(self.credentials())
     }
 }
 
-impl ProvideCredentials for NoOpCredentials {
-    fn provide_credentials<'a>(&'a self) -> provider::future::ProvideCredentials<'a>
-    where
-        Self: 'a,
-    {
-        aws_credential_types::provider::future::ProvideCredentials::new(std::future::ready(Err(
-            provider::error::CredentialsError::not_loaded_no_source(),
-        )))
+/// Generate a random session name for assuming IAM roles
+fn assume_role_sessio_name() -> String {
+    let now = chrono::Utc::now();
+
+    format!("delta-rs_{}", now.timestamp_millis())
+}
+
+/// Return the configured IAM role ARN or whatever is defined in the environment
+fn assume_role_arn(options: &StorageOptions) -> Option<String> {
+    options
+        .0
+        .get(constants::AWS_IAM_ROLE_ARN)
+        .or(options.0.get(constants::AWS_S3_ASSUME_ROLE_ARN))
+        .or(std::env::var_os(constants::AWS_IAM_ROLE_ARN)
+            .map(|o| {
+                o.into_string()
+                    .expect("Failed to unwrap AWS_IAM_ROLE_ARN which may have invalid data")
+            })
+            .as_ref())
+        .or(std::env::var_os(constants::AWS_S3_ASSUME_ROLE_ARN)
+            .map(|o| {
+                o.into_string()
+                    .expect("Failed to unwrap AWS_S3_ASSUME_ROLE_ARN which may have invalid data")
+            })
+            .as_ref())
+        .cloned()
+}
+
+/// Return the configured IAM assume role session name or provide a unique one
+fn assume_session_name(options: &StorageOptions) -> String {
+    let assume_session = options
+        .0
+        .get(constants::AWS_IAM_ROLE_SESSION_NAME)
+        .or(options.0.get(constants::AWS_S3_ROLE_SESSION_NAME))
+        .cloned();
+
+    match assume_session {
+        Some(s) => s,
+        None => assume_role_sessio_name(),
+    }
+}
+
+/// Take a set of [StorageOptions] and produce an appropriate AWS SDK [SdkConfig]
+/// for use with various AWS SDK APIs, such as in our [crate::logstore::S3DynamoDbLogStore]
+pub async fn resolve_credentials(options: StorageOptions) -> DeltaResult<SdkConfig> {
+    let default_provider = DefaultCredentialsChain::builder().build().await;
+
+    let credentials_provider = match assume_role_arn(&options) {
+        Some(arn) => {
+            debug!("Configuring AssumeRoleProvider with role arn: {arn}");
+            CredentialsProviderChain::first_try(
+                "AssumeRoleProvider",
+                AssumeRoleProvider::builder(arn)
+                    .session_name(assume_session_name(&options))
+                    .build()
+                    .await,
+            )
+            .or_else(
+                "StorageOptions",
+                OptionsCredentialsProvider {
+                    options: options.clone(),
+                },
+            )
+            .or_else("DefaultChain", default_provider)
+        }
+        None => CredentialsProviderChain::first_try(
+            "StorageOptions",
+            OptionsCredentialsProvider {
+                options: options.clone(),
+            },
+        )
+        .or_else("DefaultChain", default_provider),
+    };
+
+    Ok(aws_config::from_env()
+        .credentials_provider(credentials_provider)
+        .load()
+        .await)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::constants;
+    use maplit::hashmap;
+    use serial_test::serial;
+
+    #[tokio::test]
+    #[serial]
+    async fn test_options_credentials_provider() {
+        let options = StorageOptions(hashmap! {
+            constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
+            constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+        });
+
+        let config = resolve_credentials(options).await;
+        assert!(config.is_ok(), "{config:?}");
+        let config = config.unwrap();
+
+        if let Some(provider) = &config.credentials_provider() {
+            let credentials = provider
+                .provide_credentials()
+                .await
+                .expect("Failed to provide credentials");
+            assert_eq!(
+                "test_id",
+                credentials.access_key_id(),
+                "The access key should come from our options! {credentials:?}"
+            );
+            assert_eq!(
+                "test_secret",
+                credentials.secret_access_key(),
+                "The secret should come from our options! {credentials:?}"
+            );
+        } else {
+            panic!("Could not retrieve credentials from the SdkConfig: {config:?}");
+        }
+    }
+
+    #[tokio::test]
+    #[serial]
+    async fn test_options_credentials_provider_session_token() {
+        let options = StorageOptions(hashmap! {
+            constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
+            constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+            constants::AWS_SESSION_TOKEN.to_string() => "test_token".to_string(),
+        });
+
+        let config = resolve_credentials(options)
+            .await
+            .expect("Failed to resolve_credentials");
+
+        if let Some(provider) = &config.credentials_provider() {
+            let credentials = provider
+                .provide_credentials()
+                .await
+                .expect("Failed to provide credentials");
+            assert_eq!(
+                Some("test_token"),
+                credentials.session_token(),
+                "The session token should come from our options! {credentials:?}"
+            );
+        } else {
+            panic!("Could not retrieve credentials from the SdkConfig: {config:?}");
+        }
     }
 }
diff --git a/crates/aws/src/lib.rs b/crates/aws/src/lib.rs
index d179c37e68..ddb768bdd9 100644
--- a/crates/aws/src/lib.rs
+++ b/crates/aws/src/lib.rs
@@ -1,5 +1,9 @@
-//! Lock client implementation based on DynamoDb.
+//! AWS S3 and similar tooling for delta-rs
+//!
+//! This module also contains the [S3DynamoDbLogStore] implemtnation for concurrent writer support
+//! with AWS S3 specifically.
 
+pub mod constants;
 mod credentials;
 pub mod errors;
 pub mod logstore;
@@ -7,6 +11,7 @@ pub mod logstore;
 mod native;
 pub mod storage;
 use aws_config::SdkConfig;
+use aws_sdk_dynamodb::error::SdkError;
 use aws_sdk_dynamodb::{
     operation::{
         create_table::CreateTableError, delete_item::DeleteItemError, get_item::GetItemError,
@@ -29,7 +34,7 @@ use std::{
 };
 use tracing::debug;
 
-use deltalake_core::logstore::{logstores, LogStore, LogStoreFactory};
+use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory};
 use deltalake_core::storage::{factories, url_prefix_handler, ObjectStoreRef, StorageOptions};
 use deltalake_core::{DeltaResult, Path};
 use url::Url;
@@ -49,23 +54,36 @@ impl LogStoreFactory for S3LogStoreFactory {
     ) -> DeltaResult<Arc<dyn LogStore>> {
         let store = url_prefix_handler(store, Path::parse(location.path())?);
 
-        if options
-            .0
-            .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref())
-        {
+        // With conditional put in S3-like API we can use the deltalake default logstore which use PutIfAbsent
+        if options.0.keys().any(|key| {
+            let key = key.to_ascii_lowercase();
+            vec![
+                AmazonS3ConfigKey::ConditionalPut.as_ref(),
+                "conditional_put",
+            ]
+            .contains(&key.as_str())
+        }) {
+            debug!("S3LogStoreFactory has been asked to create a default LogStore where the underlying store has Conditonal Put enabled - no locking provider required");
+            return Ok(default_logstore(store, location, options));
+        }
+
+        if options.0.keys().any(|key| {
+            let key = key.to_ascii_lowercase();
+            vec![
+                AmazonS3ConfigKey::CopyIfNotExists.as_ref(),
+                "copy_if_not_exists",
+            ]
+            .contains(&key.as_str())
+        }) {
             debug!("S3LogStoreFactory has been asked to create a LogStore where the underlying store has copy-if-not-exists enabled - no locking provider required");
-            return Ok(deltalake_core::logstore::default_logstore(
-                store, location, options,
-            ));
+            return Ok(logstore::default_s3_logstore(store, location, options));
         }
 
         let s3_options = S3StorageOptions::from_map(&options.0)?;
 
         if s3_options.locking_provider.as_deref() != Some("dynamodb") {
             debug!("S3LogStoreFactory has been asked to create a LogStore without the dynamodb locking provider");
-            return Ok(deltalake_core::logstore::default_logstore(
-                store, location, options,
-            ));
+            return Ok(logstore::default_s3_logstore(store, location, options));
         }
 
         Ok(Arc::new(logstore::S3DynamoDbLogStore::try_new(
@@ -141,8 +159,12 @@ impl DynamoDbLockClient {
         lock_table_name: Option<String>,
         billing_mode: Option<String>,
         max_elapsed_request_time: Option<String>,
+        dynamodb_override_endpoint: Option<String>,
     ) -> Result<Self, DynamoDbConfigError> {
-        let dynamodb_client = aws_sdk_dynamodb::Client::new(sdk_config);
+        let dynamodb_sdk_config =
+            Self::create_dynamodb_sdk_config(sdk_config, dynamodb_override_endpoint);
+
+        let dynamodb_client = aws_sdk_dynamodb::Client::new(&dynamodb_sdk_config);
 
         let lock_table_name = lock_table_name
             .or_else(|| std::env::var(constants::LOCK_TABLE_KEY_NAME).ok())
@@ -177,6 +199,24 @@ impl DynamoDbLockClient {
             config,
         })
     }
+    fn create_dynamodb_sdk_config(
+        sdk_config: &SdkConfig,
+        dynamodb_override_endpoint: Option<String>,
+    ) -> SdkConfig {
+        /*
+        if dynamodb_override_endpoint exists/AWS_ENDPOINT_URL_DYNAMODB is specified by user
+        use dynamodb_override_endpoint to create dynamodb client
+        */
+
+        match dynamodb_override_endpoint {
+            Some(dynamodb_endpoint_url) => sdk_config
+                .to_owned()
+                .to_builder()
+                .endpoint_url(dynamodb_endpoint_url)
+                .build(),
+            None => sdk_config.to_owned(),
+        }
+    }
 
     /// Create the lock table where DynamoDb stores the commit information for all delta tables.
     ///
@@ -256,28 +296,28 @@ impl DynamoDbLockClient {
         version: i64,
     ) -> Result<Option<CommitEntry>, LockClientError> {
         let item = self
-            .retry(|| async {
-                match self
-                    .dynamodb_client
-                    .get_item()
-                    .consistent_read(true)
-                    .table_name(&self.config.lock_table_name)
-                    .set_key(Some(self.get_primary_key(version, table_path)))
-                    .send()
-                    .await
-                {
-                    Ok(x) => Ok(x),
-                    Err(sdk_err) => match sdk_err.as_service_error() {
-                        Some(GetItemError::ProvisionedThroughputExceededException(_)) => {
-                            Err(backoff::Error::transient(
-                                LockClientError::ProvisionedThroughputExceeded,
-                            ))
-                        }
-                        _ => Err(backoff::Error::permanent(sdk_err.into())),
-                    },
+            .retry(
+                || async {
+                    self.dynamodb_client
+                        .get_item()
+                        .consistent_read(true)
+                        .table_name(&self.config.lock_table_name)
+                        .set_key(Some(self.get_primary_key(version, table_path)))
+                        .send()
+                        .await
+                },
+                |err| match err.as_service_error() {
+                    Some(GetItemError::ProvisionedThroughputExceededException(_)) => true,
+                    _ => false,
+                },
+            )
+            .await
+            .map_err(|err| match err.as_service_error() {
+                Some(GetItemError::ProvisionedThroughputExceededException(_)) => {
+                    LockClientError::ProvisionedThroughputExceeded
                 }
-            })
-            .await?;
+                _ => err.into(),
+            })?;
         item.item.as_ref().map(CommitEntry::try_from).transpose()
     }
 
@@ -287,36 +327,38 @@ impl DynamoDbLockClient {
         table_path: &str,
         entry: &CommitEntry,
     ) -> Result<(), LockClientError> {
-        self.retry(|| async {
-            let item = create_value_map(entry, table_path);
-            match self
-                .dynamodb_client
-                .put_item()
-                .condition_expression(constants::CONDITION_EXPR_CREATE.as_str())
-                .table_name(self.get_lock_table_name())
-                .set_item(Some(item))
-                .send()
-                .await
-            {
-                Ok(_) => Ok(()),
-                Err(err) => match err.as_service_error() {
-                    Some(PutItemError::ProvisionedThroughputExceededException(_)) => Err(
-                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
-                    ),
-                    Some(PutItemError::ConditionalCheckFailedException(_)) => Err(
-                        backoff::Error::permanent(LockClientError::VersionAlreadyExists {
-                            table_path: table_path.to_owned(),
-                            version: entry.version,
-                        }),
-                    ),
-                    Some(PutItemError::ResourceNotFoundException(_)) => Err(
-                        backoff::Error::permanent(LockClientError::LockTableNotFound),
-                    ),
-                    _ => Err(backoff::Error::permanent(err.into())),
-                },
+        self.retry(
+            || async {
+                let item = create_value_map(entry, table_path);
+                let _ = self
+                    .dynamodb_client
+                    .put_item()
+                    .condition_expression(constants::CONDITION_EXPR_CREATE.as_str())
+                    .table_name(self.get_lock_table_name())
+                    .set_item(Some(item))
+                    .send()
+                    .await?;
+                Ok(())
+            },
+            |err: &SdkError<_, _>| match err.as_service_error() {
+                Some(PutItemError::ProvisionedThroughputExceededException(_)) => true,
+                _ => false,
+            },
+        )
+        .await
+        .map_err(|err| match err.as_service_error() {
+            Some(PutItemError::ProvisionedThroughputExceededException(_)) => {
+                LockClientError::ProvisionedThroughputExceeded
             }
+            Some(PutItemError::ConditionalCheckFailedException(_)) => {
+                LockClientError::VersionAlreadyExists {
+                    table_path: table_path.to_owned(),
+                    version: entry.version,
+                }
+            }
+            Some(PutItemError::ResourceNotFoundException(_)) => LockClientError::LockTableNotFound,
+            _ => err.into(),
         })
-        .await
     }
 
     /// Get the latest entry (entry with highest version).
@@ -338,33 +380,33 @@ impl DynamoDbLockClient {
         limit: i64,
     ) -> Result<Vec<CommitEntry>, LockClientError> {
         let query_result = self
-            .retry(|| async {
-                match self
-                    .dynamodb_client
-                    .query()
-                    .table_name(self.get_lock_table_name())
-                    .consistent_read(true)
-                    .limit(limit.try_into().unwrap_or(i32::MAX))
-                    .scan_index_forward(false)
-                    .key_condition_expression(format!("{} = :tn", constants::ATTR_TABLE_PATH))
-                    .set_expression_attribute_values(Some(
-                        maplit::hashmap!(":tn".into() => string_attr(table_path)),
-                    ))
-                    .send()
-                    .await
-                {
-                    Ok(result) => Ok(result),
-                    Err(sdk_err) => match sdk_err.as_service_error() {
-                        Some(QueryError::ProvisionedThroughputExceededException(_)) => {
-                            Err(backoff::Error::transient(
-                                LockClientError::ProvisionedThroughputExceeded,
-                            ))
-                        }
-                        _ => Err(backoff::Error::permanent(sdk_err.into())),
-                    },
+            .retry(
+                || async {
+                    self.dynamodb_client
+                        .query()
+                        .table_name(self.get_lock_table_name())
+                        .consistent_read(true)
+                        .limit(limit.try_into().unwrap_or(i32::MAX))
+                        .scan_index_forward(false)
+                        .key_condition_expression(format!("{} = :tn", constants::ATTR_TABLE_PATH))
+                        .set_expression_attribute_values(Some(
+                            maplit::hashmap!(":tn".into() => string_attr(table_path)),
+                        ))
+                        .send()
+                        .await
+                },
+                |err: &SdkError<_, _>| match err.as_service_error() {
+                    Some(QueryError::ProvisionedThroughputExceededException(_)) => true,
+                    _ => false,
+                },
+            )
+            .await
+            .map_err(|err| match err.as_service_error() {
+                Some(QueryError::ProvisionedThroughputExceededException(_)) => {
+                    LockClientError::ProvisionedThroughputExceeded
                 }
-            })
-            .await?;
+                _ => err.into(),
+            })?;
 
         query_result
             .items
@@ -385,35 +427,44 @@ impl DynamoDbLockClient {
             .duration_since(SystemTime::UNIX_EPOCH)
             .unwrap()
             .as_secs();
-        self.retry(|| async {
-            match self
-                .dynamodb_client
-                .update_item()
-                .table_name(self.get_lock_table_name())
-                .set_key(Some(self.get_primary_key(version, table_path)))
-                .update_expression("SET complete = :c, expireTime = :e".to_owned())
-                .set_expression_attribute_values(Some(maplit::hashmap! {
-                    ":c".to_owned() => string_attr("true"),
-                    ":e".to_owned() => num_attr(seconds_since_epoch),
-                    ":f".into() => string_attr("false"),
-                }))
-                .condition_expression(constants::CONDITION_UPDATE_INCOMPLETE)
-                .send()
-                .await
-            {
-                Ok(_) => Ok(UpdateLogEntryResult::UpdatePerformed),
-                Err(err) => match err.as_service_error() {
-                    Some(UpdateItemError::ProvisionedThroughputExceededException(_)) => Err(
-                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
-                    ),
-                    Some(UpdateItemError::ConditionalCheckFailedException(_)) => {
-                        Ok(UpdateLogEntryResult::AlreadyCompleted)
-                    }
-                    _ => Err(backoff::Error::permanent(err.into())),
+        let res = self
+            .retry(
+                || async {
+                    let _ = self
+                        .dynamodb_client
+                        .update_item()
+                        .table_name(self.get_lock_table_name())
+                        .set_key(Some(self.get_primary_key(version, table_path)))
+                        .update_expression("SET complete = :c, expireTime = :e".to_owned())
+                        .set_expression_attribute_values(Some(maplit::hashmap! {
+                            ":c".to_owned() => string_attr("true"),
+                            ":e".to_owned() => num_attr(seconds_since_epoch),
+                            ":f".into() => string_attr("false"),
+                        }))
+                        .condition_expression(constants::CONDITION_UPDATE_INCOMPLETE)
+                        .send()
+                        .await?;
+                    Ok(())
                 },
-            }
-        })
-        .await
+                |err: &SdkError<_, _>| match err.as_service_error() {
+                    Some(UpdateItemError::ProvisionedThroughputExceededException(_)) => true,
+                    _ => false,
+                },
+            )
+            .await;
+
+        match res {
+            Ok(()) => Ok(UpdateLogEntryResult::UpdatePerformed),
+            Err(err) => match err.as_service_error() {
+                Some(UpdateItemError::ProvisionedThroughputExceededException(_)) => {
+                    Err(LockClientError::ProvisionedThroughputExceeded)
+                }
+                Some(UpdateItemError::ConditionalCheckFailedException(_)) => {
+                    Ok(UpdateLogEntryResult::AlreadyCompleted)
+                }
+                _ => Err(err.into()),
+            },
+        }
     }
 
     /// Delete existing log entry if it is not already complete
@@ -422,48 +473,52 @@ impl DynamoDbLockClient {
         version: i64,
         table_path: &str,
     ) -> Result<(), LockClientError> {
-        self.retry(|| async {
-            match self
-                .dynamodb_client
-                .delete_item()
-                .table_name(self.get_lock_table_name())
-                .set_key(Some(self.get_primary_key(version, table_path)))
-                .set_expression_attribute_values(Some(maplit::hashmap! {
-                    ":f".into() => string_attr("false"),
-                }))
-                .condition_expression(constants::CONDITION_DELETE_INCOMPLETE.as_str())
-                .send()
-                .await
-            {
-                Ok(_) => Ok(()),
-                Err(err) => match err.as_service_error() {
-                    Some(DeleteItemError::ProvisionedThroughputExceededException(_)) => Err(
-                        backoff::Error::transient(LockClientError::ProvisionedThroughputExceeded),
-                    ),
-                    Some(DeleteItemError::ConditionalCheckFailedException(_)) => Err(
-                        backoff::Error::permanent(LockClientError::VersionAlreadyCompleted {
-                            table_path: table_path.to_owned(),
-                            version,
-                        }),
-                    ),
-                    _ => Err(backoff::Error::permanent(err.into())),
-                },
+        self.retry(
+            || async {
+                let _ = self
+                    .dynamodb_client
+                    .delete_item()
+                    .table_name(self.get_lock_table_name())
+                    .set_key(Some(self.get_primary_key(version, table_path)))
+                    .set_expression_attribute_values(Some(maplit::hashmap! {
+                        ":f".into() => string_attr("false"),
+                    }))
+                    .condition_expression(constants::CONDITION_DELETE_INCOMPLETE.as_str())
+                    .send()
+                    .await?;
+                Ok(())
+            },
+            |err: &SdkError<_, _>| match err.as_service_error() {
+                Some(DeleteItemError::ProvisionedThroughputExceededException(_)) => true,
+                _ => false,
+            },
+        )
+        .await
+        .map_err(|err| match err.as_service_error() {
+            Some(DeleteItemError::ProvisionedThroughputExceededException(_)) => {
+                LockClientError::ProvisionedThroughputExceeded
             }
+            Some(DeleteItemError::ConditionalCheckFailedException(_)) => {
+                LockClientError::VersionAlreadyCompleted {
+                    table_path: table_path.to_owned(),
+                    version,
+                }
+            }
+            _ => err.into(),
         })
-        .await
     }
 
-    async fn retry<I, E, Fn, Fut>(&self, operation: Fn) -> Result<I, E>
+    async fn retry<I, E, F, Fut, Wn>(&self, operation: F, when: Wn) -> Result<I, E>
     where
-        Fn: FnMut() -> Fut,
-        Fut: std::future::Future<Output = Result<I, backoff::Error<E>>>,
+        F: FnMut() -> Fut,
+        Fut: std::future::Future<Output = Result<I, E>>,
+        Wn: Fn(&E) -> bool,
     {
-        let backoff = backoff::ExponentialBackoffBuilder::new()
-            .with_multiplier(2.)
-            .with_max_interval(Duration::from_secs(15))
-            .with_max_elapsed_time(Some(self.config.max_elapsed_request_time))
-            .build();
-        backoff::future::retry(backoff, operation).await
+        use backon::Retryable;
+        let backoff = backon::ExponentialBuilder::default()
+            .with_factor(2.)
+            .with_max_delay(self.config.max_elapsed_request_time);
+        operation.retry(backoff).when(when).await
     }
 }
 
@@ -565,42 +620,6 @@ pub enum CreateLockTableResult {
     TableAlreadyExists,
 }
 
-pub mod constants {
-    use std::time::Duration;
-
-    use lazy_static::lazy_static;
-
-    pub const DEFAULT_LOCK_TABLE_NAME: &str = "delta_log";
-    pub const LOCK_TABLE_KEY_NAME: &str = "DELTA_DYNAMO_TABLE_NAME";
-    pub const BILLING_MODE_KEY_NAME: &str = "DELTA_DYNAMO_BILLING_MODE";
-    pub const MAX_ELAPSED_REQUEST_TIME_KEY_NAME: &str = "DELTA_DYNAMO_MAX_ELAPSED_REQUEST_TIME";
-
-    pub const ATTR_TABLE_PATH: &str = "tablePath";
-    pub const ATTR_FILE_NAME: &str = "fileName";
-    pub const ATTR_TEMP_PATH: &str = "tempPath";
-    pub const ATTR_COMPLETE: &str = "complete";
-    pub const ATTR_EXPIRE_TIME: &str = "expireTime";
-
-    pub const STRING_TYPE: &str = "S";
-
-    pub const KEY_TYPE_HASH: &str = "HASH";
-    pub const KEY_TYPE_RANGE: &str = "RANGE";
-
-    lazy_static! {
-        pub static ref CONDITION_EXPR_CREATE: String = format!(
-            "attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME})"
-        );
-
-        pub static ref CONDITION_DELETE_INCOMPLETE: String = format!(
-            "(complete = :f) or (attribute_not_exists({ATTR_TABLE_PATH}) and attribute_not_exists({ATTR_FILE_NAME}))"
-        );
-    }
-
-    pub const CONDITION_UPDATE_INCOMPLETE: &str = "complete = :f";
-
-    pub const DEFAULT_COMMIT_ENTRY_EXPIRATION_DELAY: Duration = Duration::from_secs(86_400);
-}
-
 /// Extract a field from an item's attribute value map, producing a descriptive error
 /// of the various failure cases.
 fn extract_required_string_field<'a>(
@@ -663,6 +682,7 @@ fn extract_version_from_filename(name: &str) -> Option<i64> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use aws_config::Region;
     use object_store::memory::InMemory;
     use serial_test::serial;
 
@@ -705,10 +725,37 @@ mod tests {
         let factory = S3LogStoreFactory::default();
         let store = InMemory::new();
         let url = Url::parse("s3://test-bucket").unwrap();
-        std::env::remove_var(storage::s3_constants::AWS_S3_LOCKING_PROVIDER);
+        std::env::remove_var(crate::constants::AWS_S3_LOCKING_PROVIDER);
         let logstore = factory
             .with_options(Arc::new(store), &url, &StorageOptions::from(HashMap::new()))
             .unwrap();
-        assert_eq!(logstore.name(), "DefaultLogStore");
+        assert_eq!(logstore.name(), "S3LogStore");
+    }
+
+    #[test]
+    #[serial]
+    fn test_create_dynamodb_sdk_config() {
+        let sdk_config = SdkConfig::builder()
+            .region(Region::from_static("eu-west-1"))
+            .endpoint_url("http://localhost:1234")
+            .build();
+        let dynamodb_sdk_config = DynamoDbLockClient::create_dynamodb_sdk_config(
+            &sdk_config,
+            Some("http://localhost:2345".to_string()),
+        );
+        assert_eq!(
+            dynamodb_sdk_config.endpoint_url(),
+            Some("http://localhost:2345"),
+        );
+        assert_eq!(
+            dynamodb_sdk_config.region().unwrap().to_string(),
+            "eu-west-1".to_string(),
+        );
+        let dynamodb_sdk_no_override_config =
+            DynamoDbLockClient::create_dynamodb_sdk_config(&sdk_config, None);
+        assert_eq!(
+            dynamodb_sdk_no_override_config.endpoint_url(),
+            Some("http://localhost:1234"),
+        );
     }
 }
diff --git a/crates/aws/src/logstore/default_logstore.rs b/crates/aws/src/logstore/default_logstore.rs
new file mode 100644
index 0000000000..a5688141c2
--- /dev/null
+++ b/crates/aws/src/logstore/default_logstore.rs
@@ -0,0 +1,113 @@
+//! Default implementation of [`LogStore`] for S3 storage backends
+
+use std::sync::Arc;
+
+use bytes::Bytes;
+use deltalake_core::{
+    logstore::{
+        abort_commit_entry, get_latest_version, read_commit_entry, write_commit_entry,
+        CommitOrBytes, LogStore, LogStoreConfig,
+    },
+    operations::transaction::TransactionError,
+    storage::{ObjectStoreRef, StorageOptions},
+    DeltaResult,
+};
+use object_store::{Error as ObjectStoreError, ObjectStore};
+use url::Url;
+
+/// Return the [S3LogStore] implementation with the provided configuration options
+pub fn default_s3_logstore(
+    store: ObjectStoreRef,
+    location: &Url,
+    options: &StorageOptions,
+) -> Arc<dyn LogStore> {
+    Arc::new(S3LogStore::new(
+        store,
+        LogStoreConfig {
+            location: location.clone(),
+            options: options.clone(),
+        },
+    ))
+}
+
+/// Default [`LogStore`] implementation
+#[derive(Debug, Clone)]
+pub struct S3LogStore {
+    pub(crate) storage: Arc<dyn ObjectStore>,
+    config: LogStoreConfig,
+}
+
+impl S3LogStore {
+    /// Create a new instance of [`S3LogStore`]
+    ///
+    /// # Arguments
+    ///
+    /// * `storage` - A shared reference to an [`object_store::ObjectStore`] with "/" pointing at delta table root (i.e. where `_delta_log` is located).
+    /// * `location` - A url corresponding to the storage location of `storage`.
+    pub fn new(storage: ObjectStoreRef, config: LogStoreConfig) -> Self {
+        Self { storage, config }
+    }
+}
+
+#[async_trait::async_trait]
+impl LogStore for S3LogStore {
+    fn name(&self) -> String {
+        "S3LogStore".into()
+    }
+
+    async fn read_commit_entry(&self, version: i64) -> DeltaResult<Option<Bytes>> {
+        read_commit_entry(self.storage.as_ref(), version).await
+    }
+
+    /// Tries to commit a prepared commit file. Returns [`TransactionError`]
+    /// if the given `version` already exists. The caller should handle the retry logic itself.
+    /// This is low-level transaction API. If user does not want to maintain the commit loop then
+    /// the `DeltaTransaction.commit` is desired to be used as it handles `try_commit_transaction`
+    /// with retry logic.
+    async fn write_commit_entry(
+        &self,
+        version: i64,
+        commit_or_bytes: CommitOrBytes,
+    ) -> Result<(), TransactionError> {
+        match commit_or_bytes {
+            CommitOrBytes::TmpCommit(tmp_commit) => {
+                Ok(write_commit_entry(&self.object_store(), version, &tmp_commit).await?)
+            }
+            _ => unreachable!(), // S3 Log Store should never receive bytes
+        }
+        .map_err(|err| -> TransactionError {
+            match err {
+                ObjectStoreError::AlreadyExists { .. } => {
+                    TransactionError::VersionAlreadyExists(version)
+                }
+                _ => TransactionError::from(err),
+            }
+        })?;
+        Ok(())
+    }
+
+    async fn abort_commit_entry(
+        &self,
+        version: i64,
+        commit_or_bytes: CommitOrBytes,
+    ) -> Result<(), TransactionError> {
+        match &commit_or_bytes {
+            CommitOrBytes::TmpCommit(tmp_commit) => {
+                abort_commit_entry(self.storage.as_ref(), version, tmp_commit).await
+            }
+            _ => unreachable!(), // S3 Log Store should never receive bytes
+        }
+    }
+
+    async fn get_latest_version(&self, current_version: i64) -> DeltaResult<i64> {
+        get_latest_version(self, current_version).await
+    }
+
+    fn object_store(&self) -> Arc<dyn ObjectStore> {
+        self.storage.clone()
+    }
+
+    fn config(&self) -> &LogStoreConfig {
+        &self.config
+    }
+}
diff --git a/crates/aws/src/logstore.rs b/crates/aws/src/logstore/dynamodb_logstore.rs
similarity index 94%
rename from crates/aws/src/logstore.rs
rename to crates/aws/src/logstore/dynamodb_logstore.rs
index 9eba66cb93..202df1709e 100644
--- a/crates/aws/src/logstore.rs
+++ b/crates/aws/src/logstore/dynamodb_logstore.rs
@@ -45,7 +45,7 @@ impl S3DynamoDbLogStore {
         object_store: ObjectStoreRef,
     ) -> DeltaResult<Self> {
         let lock_client = DynamoDbLockClient::try_new(
-            &s3_options.sdk_config,
+            &s3_options.sdk_config.clone().unwrap(),
             s3_options
                 .extra_opts
                 .get(constants::LOCK_TABLE_KEY_NAME)
@@ -58,6 +58,7 @@ impl S3DynamoDbLogStore {
                 .extra_opts
                 .get(constants::MAX_ELAPSED_REQUEST_TIME_KEY_NAME)
                 .cloned(),
+            s3_options.dynamodb_endpoint.clone(),
         )
         .map_err(|err| DeltaTableError::ObjectStore {
             source: ObjectStoreError::Generic {
@@ -198,8 +199,12 @@ impl LogStore for S3DynamoDbLogStore {
     async fn write_commit_entry(
         &self,
         version: i64,
-        tmp_commit: &Path,
+        commit_or_bytes: CommitOrBytes,
     ) -> Result<(), TransactionError> {
+        let tmp_commit = match commit_or_bytes {
+            CommitOrBytes::TmpCommit(tmp_commit) => tmp_commit,
+            _ => unreachable!(), // S3DynamoDBLogstore should never get Bytes
+        };
         let entry = CommitEntry::new(version, tmp_commit.clone());
         debug!("Writing commit entry for {self:?}: {entry:?}");
         // create log entry in dynamo db: complete = false, no expireTime
@@ -243,8 +248,12 @@ impl LogStore for S3DynamoDbLogStore {
     async fn abort_commit_entry(
         &self,
         version: i64,
-        tmp_commit: &Path,
+        commit_or_bytes: CommitOrBytes,
     ) -> Result<(), TransactionError> {
+        let tmp_commit = match commit_or_bytes {
+            CommitOrBytes::TmpCommit(tmp_commit) => tmp_commit,
+            _ => unreachable!(), // S3DynamoDBLogstore should never get Bytes
+        };
         self.lock_client
             .delete_commit_entry(version, &self.table_path)
             .await
@@ -265,7 +274,7 @@ impl LogStore for S3DynamoDbLogStore {
                 },
             })?;
 
-        abort_commit_entry(&self.storage, version, tmp_commit).await?;
+        abort_commit_entry(&self.storage, version, &tmp_commit).await?;
         Ok(())
     }
 
@@ -308,13 +317,3 @@ pub enum RepairLogEntryResult {
     /// Both parts of the repair process where already carried.
     AlreadyCompleted,
 }
-
-/// Represents the possible, positive outcomes of calling `DynamoDbClient::try_create_lock_table()`
-#[derive(Debug, PartialEq)]
-pub enum CreateLockTableResult {
-    /// Table created successfully.
-    TableCreated,
-    /// Table was not created because it already exists.
-    /// Does not imply that the table has the correct schema.
-    TableAlreadyExists,
-}
diff --git a/crates/aws/src/logstore/mod.rs b/crates/aws/src/logstore/mod.rs
new file mode 100644
index 0000000000..e5d7f87aec
--- /dev/null
+++ b/crates/aws/src/logstore/mod.rs
@@ -0,0 +1,11 @@
+//! Contains the different logstore implementations for S3.
+//! - S3LogStore (used when copy-if-not-exists or unsafe_rename is passed)
+//! - S3DynamoDBLogStore (used when DynamoDB is the locking client)
+
+mod default_logstore;
+mod dynamodb_logstore;
+
+pub use default_logstore::default_s3_logstore;
+pub use default_logstore::S3LogStore;
+pub use dynamodb_logstore::RepairLogEntryResult;
+pub use dynamodb_logstore::S3DynamoDbLogStore;
diff --git a/crates/aws/src/storage.rs b/crates/aws/src/storage.rs
index e7b4d71109..a6735b1c0f 100644
--- a/crates/aws/src/storage.rs
+++ b/crates/aws/src/storage.rs
@@ -1,28 +1,30 @@
 //! AWS S3 storage backend.
 
-use aws_config::meta::region::ProvideRegion;
-use aws_config::provider_config::ProviderConfig;
 use aws_config::{Region, SdkConfig};
 use bytes::Bytes;
+use deltalake_core::storage::object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey};
 use deltalake_core::storage::object_store::{
-    aws::AmazonS3ConfigKey, parse_url_opts, GetOptions, GetResult, ListResult, MultipartId,
-    ObjectMeta, ObjectStore, PutOptions, PutResult, Result as ObjectStoreResult,
+    parse_url_opts, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+    ObjectStoreScheme, PutMultipartOpts, PutOptions, PutPayload, PutResult,
+    Result as ObjectStoreResult,
 };
 use deltalake_core::storage::{
     limit_store_handler, str_is_truthy, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
 };
-use deltalake_core::{DeltaResult, ObjectStoreError, Path};
+use deltalake_core::{DeltaResult, DeltaTableError, ObjectStoreError, Path};
 use futures::stream::BoxStream;
 use futures::Future;
+use object_store::aws::S3CopyIfNotExists;
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::ops::Range;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use tokio::io::AsyncWrite;
+use tracing::log::*;
 use url::Url;
 
+use crate::constants;
 use crate::errors::DynamoDbConfigError;
 #[cfg(feature = "native-tls")]
 use crate::native;
@@ -71,36 +73,86 @@ impl ObjectStoreFactory for S3ObjectStoreFactory {
         storage_options: &StorageOptions,
     ) -> DeltaResult<(ObjectStoreRef, Path)> {
         let options = self.with_env_s3(storage_options);
-        let (inner, prefix) = parse_url_opts(
-            url,
-            options.0.iter().filter_map(|(key, value)| {
-                let s3_key = AmazonS3ConfigKey::from_str(&key.to_ascii_lowercase()).ok()?;
-                Some((s3_key, value.clone()))
-            }),
-        )?;
-
-        let store = limit_store_handler(inner, &options);
 
-        // If the copy-if-not-exists env var is set, we don't need to instantiate a locking client or check for allow-unsafe-rename.
-        if options
-            .0
-            .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref())
-        {
-            Ok((store, prefix))
-        } else {
-            let s3_options = S3StorageOptions::from_map(&storage_options.0)?;
+        // All S3-likes should start their builder the same way
+        let mut builder = AmazonS3Builder::new().with_url(url.to_string());
 
-            let store = S3StorageBackend::try_new(
-                store,
-                Some("dynamodb") == s3_options.locking_provider.as_deref()
-                    || s3_options.allow_unsafe_rename,
-            )?;
+        for (key, value) in options.0.iter() {
+            if let Ok(key) = AmazonS3ConfigKey::from_str(&key.to_ascii_lowercase()) {
+                builder = builder.with_config(key, value.clone());
+            }
+        }
 
-            Ok((Arc::new(store), prefix))
+        let (_scheme, path) =
+            ObjectStoreScheme::parse(url).map_err(|e| DeltaTableError::GenericError {
+                source: Box::new(e),
+            })?;
+        let prefix = Path::parse(path)?;
+
+        if is_aws(storage_options) {
+            debug!("Detected AWS S3, resolving credentials");
+            let sdk_config = execute_sdk_future(crate::credentials::resolve_credentials(
+                storage_options.clone(),
+            ))??;
+            builder = builder.with_credentials(Arc::new(
+                crate::credentials::AWSForObjectStore::new(sdk_config),
+            ));
         }
+
+        let inner = builder.build()?;
+
+        let store = aws_storage_handler(limit_store_handler(inner, &options), &options)?;
+        debug!("Initialized the object store: {store:?}");
+
+        Ok((store, prefix))
     }
 }
 
+fn aws_storage_handler(
+    store: ObjectStoreRef,
+    options: &StorageOptions,
+) -> DeltaResult<ObjectStoreRef> {
+    // If the copy-if-not-exists env var is set or ConditionalPut is set, we don't need to instantiate a locking client or check for allow-unsafe-rename.
+    if options
+        .0
+        .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref())
+        || options
+            .0
+            .contains_key(AmazonS3ConfigKey::ConditionalPut.as_ref())
+    {
+        Ok(store)
+    } else {
+        let s3_options = S3StorageOptions::from_map(&options.0)?;
+
+        let store = S3StorageBackend::try_new(
+            store,
+            Some("dynamodb") == s3_options.locking_provider.as_deref()
+                || s3_options.allow_unsafe_rename,
+        )?;
+        Ok(Arc::new(store))
+    }
+}
+
+// Determine whether this crate is being configured for use with native AWS S3 or an S3-alike
+//
+// This function will rteturn true in the default case since it's most likely that the absence of
+// options will mean default/S3 configuration
+fn is_aws(options: &StorageOptions) -> bool {
+    if options
+        .0
+        .contains_key(crate::constants::AWS_FORCE_CREDENTIAL_LOAD)
+    {
+        return true;
+    }
+    if options
+        .0
+        .contains_key(crate::constants::AWS_S3_LOCKING_PROVIDER)
+    {
+        return true;
+    }
+    !options.0.contains_key(crate::constants::AWS_ENDPOINT_URL)
+}
+
 /// Options used to configure the [S3StorageBackend].
 ///
 /// Available options are described in [s3_constants].
@@ -109,12 +161,13 @@ impl ObjectStoreFactory for S3ObjectStoreFactory {
 pub struct S3StorageOptions {
     pub virtual_hosted_style_request: bool,
     pub locking_provider: Option<String>,
+    pub dynamodb_endpoint: Option<String>,
     pub s3_pool_idle_timeout: Duration,
     pub sts_pool_idle_timeout: Duration,
     pub s3_get_internal_server_error_retries: usize,
     pub allow_unsafe_rename: bool,
     pub extra_opts: HashMap<String, String>,
-    pub sdk_config: SdkConfig,
+    pub sdk_config: Option<SdkConfig>,
 }
 
 impl Eq for S3StorageOptions {}
@@ -122,43 +175,42 @@ impl PartialEq for S3StorageOptions {
     fn eq(&self, other: &Self) -> bool {
         self.virtual_hosted_style_request == other.virtual_hosted_style_request
             && self.locking_provider == other.locking_provider
+            && self.dynamodb_endpoint == other.dynamodb_endpoint
             && self.s3_pool_idle_timeout == other.s3_pool_idle_timeout
             && self.sts_pool_idle_timeout == other.sts_pool_idle_timeout
             && self.s3_get_internal_server_error_retries
                 == other.s3_get_internal_server_error_retries
             && self.allow_unsafe_rename == other.allow_unsafe_rename
             && self.extra_opts == other.extra_opts
-            && self.sdk_config.endpoint_url() == other.sdk_config.endpoint_url()
-            && self.sdk_config.region() == other.sdk_config.region()
     }
 }
 
 impl S3StorageOptions {
     /// Creates an instance of S3StorageOptions from the given HashMap.
     pub fn from_map(options: &HashMap<String, String>) -> DeltaResult<S3StorageOptions> {
-        let extra_opts = options
+        let extra_opts: HashMap<String, String> = options
             .iter()
             .filter(|(k, _)| !s3_constants::S3_OPTS.contains(&k.as_str()))
             .map(|(k, v)| (k.to_owned(), v.to_owned()))
             .collect();
         // Copy web identity values provided in options but not the environment into the environment
         // to get picked up by the `from_k8s_env` call in `get_web_identity_provider`.
-        Self::ensure_env_var(options, s3_constants::AWS_REGION);
-        Self::ensure_env_var(options, s3_constants::AWS_PROFILE);
-        Self::ensure_env_var(options, s3_constants::AWS_ACCESS_KEY_ID);
-        Self::ensure_env_var(options, s3_constants::AWS_SECRET_ACCESS_KEY);
-        Self::ensure_env_var(options, s3_constants::AWS_SESSION_TOKEN);
-        Self::ensure_env_var(options, s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE);
-        Self::ensure_env_var(options, s3_constants::AWS_ROLE_ARN);
-        Self::ensure_env_var(options, s3_constants::AWS_ROLE_SESSION_NAME);
+        Self::ensure_env_var(options, constants::AWS_REGION);
+        Self::ensure_env_var(options, constants::AWS_PROFILE);
+        Self::ensure_env_var(options, constants::AWS_ACCESS_KEY_ID);
+        Self::ensure_env_var(options, constants::AWS_SECRET_ACCESS_KEY);
+        Self::ensure_env_var(options, constants::AWS_SESSION_TOKEN);
+        Self::ensure_env_var(options, constants::AWS_WEB_IDENTITY_TOKEN_FILE);
+        Self::ensure_env_var(options, constants::AWS_ROLE_ARN);
+        Self::ensure_env_var(options, constants::AWS_ROLE_SESSION_NAME);
         let s3_pool_idle_timeout =
-            Self::u64_or_default(options, s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, 15);
+            Self::u64_or_default(options, constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, 15);
         let sts_pool_idle_timeout =
-            Self::u64_or_default(options, s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS, 10);
+            Self::u64_or_default(options, constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS, 10);
 
         let s3_get_internal_server_error_retries = Self::u64_or_default(
             options,
-            s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES,
+            constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES,
             10,
         ) as usize;
 
@@ -167,58 +219,26 @@ impl S3StorageOptions {
                 .map(|addressing_style| addressing_style == "virtual")
                 .unwrap_or(false);
 
-        let allow_unsafe_rename = str_option(options, s3_constants::AWS_S3_ALLOW_UNSAFE_RENAME)
+        let allow_unsafe_rename = str_option(options, constants::AWS_S3_ALLOW_UNSAFE_RENAME)
             .map(|val| str_is_truthy(&val))
             .unwrap_or(false);
-        let disable_imds = str_option(options, s3_constants::AWS_EC2_METADATA_DISABLED)
-            .map(|val| str_is_truthy(&val))
-            .unwrap_or(true);
-        let imds_timeout =
-            Self::u64_or_default(options, s3_constants::AWS_EC2_METADATA_TIMEOUT, 100);
-        let (loader, provider_config) =
-            if let Some(endpoint_url) = str_option(options, s3_constants::AWS_ENDPOINT_URL) {
-                let (region_provider, provider_config) = Self::create_provider_config(
-                    str_option(options, s3_constants::AWS_REGION)
-                        .or_else(|| std::env::var("AWS_DEFAULT_REGION").ok())
-                        .map_or(Region::from_static("custom"), Region::new),
-                )?;
-                let loader = aws_config::from_env()
-                    .endpoint_url(endpoint_url)
-                    .region(region_provider);
-                (loader, provider_config)
-            } else {
-                let (region_provider, provider_config) = Self::create_provider_config(
-                    crate::credentials::new_region_provider(disable_imds, imds_timeout),
-                )?;
-                (
-                    aws_config::from_env().region(region_provider),
-                    provider_config,
-                )
-            };
 
-        let credentials_provider = crate::credentials::ConfiguredCredentialChain::new(
-            disable_imds,
-            imds_timeout,
-            &provider_config,
-        );
-        #[cfg(feature = "native-tls")]
-        let sdk_config = execute_sdk_future(
-            loader
-                .http_client(native::use_native_tls_client(
-                    str_option(options, s3_constants::AWS_ALLOW_HTTP)
-                        .map(|val| str_is_truthy(&val))
-                        .unwrap_or(false),
-                ))
-                .credentials_provider(credentials_provider)
-                .load(),
-        )?;
-        #[cfg(feature = "rustls")]
-        let sdk_config =
-            execute_sdk_future(loader.credentials_provider(credentials_provider).load())?;
+        let storage_options = StorageOptions(options.clone());
+
+        let sdk_config = match is_aws(&storage_options) {
+            false => None,
+            true => {
+                debug!("Detected AWS S3, resolving credentials");
+                Some(execute_sdk_future(
+                    crate::credentials::resolve_credentials(storage_options.clone()),
+                )??)
+            }
+        };
 
         Ok(Self {
             virtual_hosted_style_request,
-            locking_provider: str_option(options, s3_constants::AWS_S3_LOCKING_PROVIDER),
+            locking_provider: str_option(options, constants::AWS_S3_LOCKING_PROVIDER),
+            dynamodb_endpoint: str_option(options, constants::AWS_ENDPOINT_URL_DYNAMODB),
             s3_pool_idle_timeout: Duration::from_secs(s3_pool_idle_timeout),
             sts_pool_idle_timeout: Duration::from_secs(sts_pool_idle_timeout),
             s3_get_internal_server_error_retries,
@@ -228,22 +248,14 @@ impl S3StorageOptions {
         })
     }
 
+    /// Return the configured endpoint URL for S3 operations
     pub fn endpoint_url(&self) -> Option<&str> {
-        self.sdk_config.endpoint_url()
+        self.sdk_config.as_ref().map(|v| v.endpoint_url()).flatten()
     }
 
+    /// Return the configured region used for S3 operations
     pub fn region(&self) -> Option<&Region> {
-        self.sdk_config.region()
-    }
-
-    fn create_provider_config<T: ProvideRegion>(
-        region_provider: T,
-    ) -> DeltaResult<(T, ProviderConfig)> {
-        let region = execute_sdk_future(region_provider.region())?;
-        Ok((
-            region_provider,
-            ProviderConfig::default().with_region(region),
-        ))
+        self.sdk_config.as_ref().map(|v| v.region()).flatten()
     }
 
     fn u64_or_default(map: &HashMap<String, String>, key: &str, default: u64) -> u64 {
@@ -254,7 +266,9 @@ impl S3StorageOptions {
 
     fn ensure_env_var(map: &HashMap<String, String>, key: &str) {
         if let Some(val) = str_option(map, key) {
-            std::env::set_var(key, val);
+            unsafe {
+                std::env::set_var(key, val);
+            }
         }
     }
 
@@ -280,7 +294,7 @@ where
                         cfg = Some(handle.block_on(future));
                     });
                 });
-                cfg.ok_or(deltalake_core::DeltaTableError::ObjectStore {
+                cfg.ok_or(DeltaTableError::ObjectStore {
                     source: ObjectStoreError::Generic {
                         store: STORE_NAME,
                         source: Box::new(DynamoDbConfigError::InitializationError),
@@ -307,7 +321,11 @@ pub struct S3StorageBackend {
 
 impl std::fmt::Display for S3StorageBackend {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "S3StorageBackend")
+        write!(
+            f,
+            "S3StorageBackend {{ allow_unsafe_rename: {}, inner: {} }}",
+            self.allow_unsafe_rename, self.inner
+        )
     }
 }
 
@@ -325,20 +343,24 @@ impl S3StorageBackend {
 
 impl std::fmt::Debug for S3StorageBackend {
     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "S3StorageBackend")
+        write!(
+            fmt,
+            "S3StorageBackend {{ allow_unsafe_rename: {}, inner: {:?} }}",
+            self.allow_unsafe_rename, self.inner
+        )
     }
 }
 
 #[async_trait::async_trait]
 impl ObjectStore for S3StorageBackend {
-    async fn put(&self, location: &Path, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &Path,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -399,114 +421,28 @@ impl ObjectStore for S3StorageBackend {
         }
     }
 
-    async fn put_multipart(
-        &self,
-        location: &Path,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &Path,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
 /// Storage option keys to use when creating [crate::storage::s3::S3StorageOptions].
 /// The same key should be used whether passing a key in the hashmap or setting it as an environment variable.
 /// Provided keys may include configuration for the S3 backend and also the optional DynamoDb lock used for atomic rename.
+#[deprecated(
+    since = "0.20.0",
+    note = "s3_constants has moved up to deltalake_aws::constants::*"
+)]
 pub mod s3_constants {
-    /// Custom S3 endpoint.
-    pub const AWS_ENDPOINT_URL: &str = "AWS_ENDPOINT_URL";
-    /// The AWS region.
-    pub const AWS_REGION: &str = "AWS_REGION";
-    /// The AWS profile.
-    pub const AWS_PROFILE: &str = "AWS_PROFILE";
-    /// The AWS_ACCESS_KEY_ID to use for S3.
-    pub const AWS_ACCESS_KEY_ID: &str = "AWS_ACCESS_KEY_ID";
-    /// The AWS_SECRET_ACCESS_KEY to use for S3.
-    pub const AWS_SECRET_ACCESS_KEY: &str = "AWS_SECRET_ACCESS_KEY";
-    /// The AWS_SESSION_TOKEN to use for S3.
-    pub const AWS_SESSION_TOKEN: &str = "AWS_SESSION_TOKEN";
-    /// Uses either "path" (the default) or "virtual", which turns on
-    /// [virtual host addressing](http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html).
-    pub const AWS_S3_ADDRESSING_STYLE: &str = "AWS_S3_ADDRESSING_STYLE";
-    /// Locking provider to use for safe atomic rename.
-    /// `dynamodb` is currently the only supported locking provider.
-    /// If not set, safe atomic rename is not available.
-    pub const AWS_S3_LOCKING_PROVIDER: &str = "AWS_S3_LOCKING_PROVIDER";
-    /// The role to assume for S3 writes.
-    pub const AWS_S3_ASSUME_ROLE_ARN: &str = "AWS_S3_ASSUME_ROLE_ARN";
-    /// The role session name to use when a role is assumed. If not provided a random session name is generated.
-    pub const AWS_S3_ROLE_SESSION_NAME: &str = "AWS_S3_ROLE_SESSION_NAME";
-    /// The `pool_idle_timeout` option of aws http client. Has to be lower than 20 seconds, which is
-    /// default S3 server timeout <https://aws.amazon.com/premiumsupport/knowledge-center/s3-socket-connection-timeout-error/>.
-    /// However, since rusoto uses hyper as a client, its default timeout is 90 seconds
-    /// <https://docs.rs/hyper/0.13.2/hyper/client/struct.Builder.html#method.keep_alive_timeout>.
-    /// Hence, the `connection closed before message completed` could occur.
-    /// To avoid that, the default value of this setting is 15 seconds if it's not set otherwise.
-    pub const AWS_S3_POOL_IDLE_TIMEOUT_SECONDS: &str = "AWS_S3_POOL_IDLE_TIMEOUT_SECONDS";
-    /// The `pool_idle_timeout` for the as3_constants sts client. See
-    /// the reasoning in `AWS_S3_POOL_IDLE_TIMEOUT_SECONDS`.
-    pub const AWS_STS_POOL_IDLE_TIMEOUT_SECONDS: &str = "AWS_STS_POOL_IDLE_TIMEOUT_SECONDS";
-    /// The number of retries for S3 GET requests failed with 500 Internal Server Error.
-    pub const AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES: &str =
-        "AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES";
-    /// The web identity token file to use when using a web identity provider.
-    /// NOTE: web identity related options are set in the environment when
-    /// creating an instance of [crate::storage::s3::S3StorageOptions].
-    /// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
-    pub const AWS_WEB_IDENTITY_TOKEN_FILE: &str = "AWS_WEB_IDENTITY_TOKEN_FILE";
-    /// The role name to use for web identity.
-    /// NOTE: web identity related options are set in the environment when
-    /// creating an instance of [crate::storage::s3::S3StorageOptions].
-    /// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
-    pub const AWS_ROLE_ARN: &str = "AWS_ROLE_ARN";
-    /// The role session name to use for web identity.
-    /// NOTE: web identity related options are set in the environment when
-    /// creating an instance of [crate::storage::s3::S3StorageOptions].
-    /// See also <https://docs.rs/rusoto_sts/0.47.0/rusoto_sts/struct.WebIdentityProvider.html#method.from_k8s_env>.
-    pub const AWS_ROLE_SESSION_NAME: &str = "AWS_ROLE_SESSION_NAME";
-    /// Allow http connections - mainly useful for integration tests
-    pub const AWS_ALLOW_HTTP: &str = "AWS_ALLOW_HTTP";
-
-    /// If set to "true", allows creating commits without concurrent writer protection.
-    /// Only safe if there is one writer to a given table.
-    pub const AWS_S3_ALLOW_UNSAFE_RENAME: &str = "AWS_S3_ALLOW_UNSAFE_RENAME";
-
-    /// If set to "true", disables the imds client
-    /// Defaults to "true"
-    pub const AWS_EC2_METADATA_DISABLED: &str = "AWS_EC2_METADATA_DISABLED";
-
-    /// The timeout in milliseconds for the EC2 metadata endpoint
-    /// Defaults to 100
-    pub const AWS_EC2_METADATA_TIMEOUT: &str = "AWS_EC2_METADATA_TIMEOUT";
-
-    /// The list of option keys owned by the S3 module.
-    /// Option keys not contained in this list will be added to the `extra_opts`
-    /// field of [crate::storage::s3::S3StorageOptions].
-    pub const S3_OPTS: &[&str] = &[
-        AWS_ENDPOINT_URL,
-        AWS_REGION,
-        AWS_PROFILE,
-        AWS_ACCESS_KEY_ID,
-        AWS_SECRET_ACCESS_KEY,
-        AWS_SESSION_TOKEN,
-        AWS_S3_LOCKING_PROVIDER,
-        AWS_S3_ASSUME_ROLE_ARN,
-        AWS_S3_ROLE_SESSION_NAME,
-        AWS_WEB_IDENTITY_TOKEN_FILE,
-        AWS_ROLE_ARN,
-        AWS_ROLE_SESSION_NAME,
-        AWS_S3_POOL_IDLE_TIMEOUT_SECONDS,
-        AWS_STS_POOL_IDLE_TIMEOUT_SECONDS,
-        AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES,
-        AWS_EC2_METADATA_DISABLED,
-        AWS_EC2_METADATA_TIMEOUT,
-    ];
+    pub use crate::constants::*;
 }
 
 pub(crate) fn str_option(map: &HashMap<String, String>, key: &str) -> Option<String> {
@@ -523,11 +459,9 @@ pub(crate) fn str_option(map: &HashMap<String, String>, key: &str) -> Option<Str
 
 #[cfg(test)]
 mod tests {
-    use std::time::SystemTime;
-
     use super::*;
 
-    use aws_sdk_sts::config::ProvideCredentials;
+    use crate::constants;
     use maplit::hashmap;
     use serial_test::serial;
 
@@ -563,10 +497,14 @@ mod tests {
                 .filter(|k| !self.vars.contains_key(k))
                 .collect();
             for k in to_remove {
-                std::env::remove_var(k);
+                unsafe {
+                    std::env::remove_var(k);
+                }
             }
             for (key, value) in self.vars.drain() {
-                std::env::set_var(key, value);
+                unsafe {
+                    std::env::set_var(key, value);
+                }
             }
         }
     }
@@ -581,7 +519,9 @@ mod tests {
         });
 
         for k in keys_to_clear {
-            std::env::remove_var(k);
+            unsafe {
+                std::env::remove_var(k);
+            }
         }
     }
 
@@ -591,28 +531,31 @@ mod tests {
         ScopedEnv::run(|| {
             clear_env_of_aws_keys();
 
-            std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "http://localhost");
-            std::env::set_var(s3_constants::AWS_REGION, "us-west-1");
-            std::env::set_var(s3_constants::AWS_PROFILE, "default");
-            std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "default_key_id");
-            std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "default_secret_key");
-            std::env::set_var(s3_constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
+            std::env::set_var(constants::AWS_ENDPOINT_URL, "http://localhost");
+            std::env::set_var(constants::AWS_REGION, "us-west-1");
+            std::env::set_var(constants::AWS_PROFILE, "default");
+            std::env::set_var(constants::AWS_ACCESS_KEY_ID, "default_key_id");
+            std::env::set_var(constants::AWS_SECRET_ACCESS_KEY, "default_secret_key");
+            std::env::set_var(constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
             std::env::set_var(
-                s3_constants::AWS_S3_ASSUME_ROLE_ARN,
+                constants::AWS_S3_ASSUME_ROLE_ARN,
                 "arn:aws:iam::123456789012:role/some_role",
             );
-            std::env::set_var(s3_constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
-            std::env::set_var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
+            std::env::set_var(constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
+            std::env::set_var(constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
 
             let options = S3StorageOptions::try_default().unwrap();
             assert_eq!(
                 S3StorageOptions {
-                    sdk_config: SdkConfig::builder()
-                        .endpoint_url("http://localhost".to_string())
-                        .region(Region::from_static("us-west-1"))
-                        .build(),
+                    sdk_config: Some(
+                        SdkConfig::builder()
+                            .endpoint_url("http://localhost".to_string())
+                            .region(Region::from_static("us-west-1"))
+                            .build()
+                    ),
                     virtual_hosted_style_request: false,
                     locking_provider: Some("dynamodb".to_string()),
+                    dynamodb_endpoint: None,
                     s3_pool_idle_timeout: Duration::from_secs(15),
                     sts_pool_idle_timeout: Duration::from_secs(10),
                     s3_get_internal_server_error_retries: 10,
@@ -638,9 +581,11 @@ mod tests {
             .unwrap();
 
             let mut expected = S3StorageOptions::try_default().unwrap();
-            expected.sdk_config = SdkConfig::builder()
-                .region(Region::from_static("eu-west-1"))
-                .build();
+            expected.sdk_config = Some(
+                SdkConfig::builder()
+                    .region(Region::from_static("eu-west-1"))
+                    .build(),
+            );
             assert_eq!(expected, options);
         });
     }
@@ -651,38 +596,64 @@ mod tests {
         ScopedEnv::run(|| {
             clear_env_of_aws_keys();
             let options = S3StorageOptions::from_map(&hashmap! {
-                s3_constants::AWS_ENDPOINT_URL.to_string() => "http://localhost:1234".to_string(),
-                s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
-                s3_constants::AWS_PROFILE.to_string() => "default".to_string(),
-                s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string(),
-                s3_constants::AWS_S3_LOCKING_PROVIDER.to_string() => "another_locking_provider".to_string(),
-                s3_constants::AWS_S3_ASSUME_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/another_role".to_string(),
-                s3_constants::AWS_S3_ROLE_SESSION_NAME.to_string() => "another_session_name".to_string(),
-                s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "another_token_file".to_string(),
-                s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "1".to_string(),
-                s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "2".to_string(),
-                s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES.to_string() => "3".to_string(),
-                s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
-                s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+                constants::AWS_ENDPOINT_URL.to_string() => "http://localhost:1234".to_string(),
+                constants::AWS_REGION.to_string() => "us-west-2".to_string(),
+                constants::AWS_PROFILE.to_string() => "default".to_string(),
+                constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string(),
+                constants::AWS_S3_LOCKING_PROVIDER.to_string() => "another_locking_provider".to_string(),
+                constants::AWS_S3_ASSUME_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/another_role".to_string(),
+                constants::AWS_S3_ROLE_SESSION_NAME.to_string() => "another_session_name".to_string(),
+                constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "another_token_file".to_string(),
+                constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "1".to_string(),
+                constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "2".to_string(),
+                constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES.to_string() => "3".to_string(),
+                constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
+                constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
             }).unwrap();
 
             assert_eq!(
-                S3StorageOptions {
-                    sdk_config: SdkConfig::builder()
-                        .endpoint_url("http://localhost:1234".to_string())
-                        .region(Region::from_static("us-west-2"))
-                        .build(),
-                    virtual_hosted_style_request: true,
-                    locking_provider: Some("another_locking_provider".to_string()),
-                    s3_pool_idle_timeout: Duration::from_secs(1),
-                    sts_pool_idle_timeout: Duration::from_secs(2),
-                    s3_get_internal_server_error_retries: 3,
-                    extra_opts: hashmap! {
-                        s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string()
-                    },
-                    allow_unsafe_rename: false,
+                Some("another_locking_provider"),
+                options.locking_provider.as_deref()
+            );
+            assert_eq!(Duration::from_secs(1), options.s3_pool_idle_timeout);
+            assert_eq!(Duration::from_secs(2), options.sts_pool_idle_timeout);
+            assert_eq!(3, options.s3_get_internal_server_error_retries);
+            assert!(options.virtual_hosted_style_request);
+            assert!(!options.allow_unsafe_rename);
+            assert_eq!(
+                hashmap! {
+                    constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string()
                 },
-                options
+                options.extra_opts
+            );
+        });
+    }
+
+    #[test]
+    #[serial]
+    fn storage_options_from_map_with_dynamodb_endpoint_test() {
+        ScopedEnv::run(|| {
+            clear_env_of_aws_keys();
+            let options = S3StorageOptions::from_map(&hashmap! {
+                constants::AWS_ENDPOINT_URL.to_string() => "http://localhost:1234".to_string(),
+                constants::AWS_ENDPOINT_URL_DYNAMODB.to_string() => "http://localhost:2345".to_string(),
+                constants::AWS_REGION.to_string() => "us-west-2".to_string(),
+                constants::AWS_PROFILE.to_string() => "default".to_string(),
+                constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string(),
+                constants::AWS_S3_LOCKING_PROVIDER.to_string() => "another_locking_provider".to_string(),
+                constants::AWS_S3_ASSUME_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/another_role".to_string(),
+                constants::AWS_S3_ROLE_SESSION_NAME.to_string() => "another_session_name".to_string(),
+                constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "another_token_file".to_string(),
+                constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "1".to_string(),
+                constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS.to_string() => "2".to_string(),
+                constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES.to_string() => "3".to_string(),
+                constants::AWS_ACCESS_KEY_ID.to_string() => "test_id".to_string(),
+                constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret".to_string(),
+            }).unwrap();
+
+            assert_eq!(
+                Some("http://localhost:2345"),
+                options.dynamodb_endpoint.as_deref()
             );
         });
     }
@@ -692,38 +663,45 @@ mod tests {
     fn storage_options_mixed_test() {
         ScopedEnv::run(|| {
             clear_env_of_aws_keys();
-            std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "http://localhost");
-            std::env::set_var(s3_constants::AWS_REGION, "us-west-1");
-            std::env::set_var(s3_constants::AWS_PROFILE, "default");
-            std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "wrong_key_id");
-            std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "wrong_secret_key");
-            std::env::set_var(s3_constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
+            std::env::set_var(constants::AWS_ENDPOINT_URL, "http://localhost");
             std::env::set_var(
-                s3_constants::AWS_S3_ASSUME_ROLE_ARN,
+                constants::AWS_ENDPOINT_URL_DYNAMODB,
+                "http://localhost:dynamodb",
+            );
+            std::env::set_var(constants::AWS_REGION, "us-west-1");
+            std::env::set_var(constants::AWS_PROFILE, "default");
+            std::env::set_var(constants::AWS_ACCESS_KEY_ID, "wrong_key_id");
+            std::env::set_var(constants::AWS_SECRET_ACCESS_KEY, "wrong_secret_key");
+            std::env::set_var(constants::AWS_S3_LOCKING_PROVIDER, "dynamodb");
+            std::env::set_var(
+                constants::AWS_S3_ASSUME_ROLE_ARN,
                 "arn:aws:iam::123456789012:role/some_role",
             );
-            std::env::set_var(s3_constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
-            std::env::set_var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
+            std::env::set_var(constants::AWS_S3_ROLE_SESSION_NAME, "session_name");
+            std::env::set_var(constants::AWS_WEB_IDENTITY_TOKEN_FILE, "token_file");
 
-            std::env::set_var(s3_constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, "1");
-            std::env::set_var(s3_constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS, "2");
-            std::env::set_var(s3_constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES, "3");
+            std::env::set_var(constants::AWS_S3_POOL_IDLE_TIMEOUT_SECONDS, "1");
+            std::env::set_var(constants::AWS_STS_POOL_IDLE_TIMEOUT_SECONDS, "2");
+            std::env::set_var(constants::AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES, "3");
             let options = S3StorageOptions::from_map(&hashmap! {
-                s3_constants::AWS_ACCESS_KEY_ID.to_string() => "test_id_mixed".to_string(),
-                s3_constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret_mixed".to_string(),
-                s3_constants::AWS_REGION.to_string() => "us-west-2".to_string(),
+                constants::AWS_ACCESS_KEY_ID.to_string() => "test_id_mixed".to_string(),
+                constants::AWS_SECRET_ACCESS_KEY.to_string() => "test_secret_mixed".to_string(),
+                constants::AWS_REGION.to_string() => "us-west-2".to_string(),
                 "AWS_S3_GET_INTERNAL_SERVER_ERROR_RETRIES".to_string() => "3".to_string(),
             })
             .unwrap();
 
             assert_eq!(
                 S3StorageOptions {
-                    sdk_config: SdkConfig::builder()
-                        .endpoint_url("http://localhost".to_string())
-                        .region(Region::from_static("us-west-2"))
-                        .build(),
+                    sdk_config: Some(
+                        SdkConfig::builder()
+                            .endpoint_url("http://localhost".to_string())
+                            .region(Region::from_static("us-west-2"))
+                            .build()
+                    ),
                     virtual_hosted_style_request: false,
                     locking_provider: Some("dynamodb".to_string()),
+                    dynamodb_endpoint: Some("http://localhost:dynamodb".to_string()),
                     s3_pool_idle_timeout: Duration::from_secs(1),
                     sts_pool_idle_timeout: Duration::from_secs(2),
                     s3_get_internal_server_error_retries: 3,
@@ -741,30 +719,27 @@ mod tests {
         ScopedEnv::run(|| {
             clear_env_of_aws_keys();
             let _options = S3StorageOptions::from_map(&hashmap! {
-                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
-                s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "web_identity_token_file".to_string(),
-                s3_constants::AWS_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/web_identity_role".to_string(),
-                s3_constants::AWS_ROLE_SESSION_NAME.to_string() => "web_identity_session_name".to_string(),
+                constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
+                constants::AWS_WEB_IDENTITY_TOKEN_FILE.to_string() => "web_identity_token_file".to_string(),
+                constants::AWS_ROLE_ARN.to_string() => "arn:aws:iam::123456789012:role/web_identity_role".to_string(),
+                constants::AWS_ROLE_SESSION_NAME.to_string() => "web_identity_session_name".to_string(),
             }).unwrap();
 
-            assert_eq!(
-                "eu-west-1",
-                std::env::var(s3_constants::AWS_REGION).unwrap()
-            );
+            assert_eq!("eu-west-1", std::env::var(constants::AWS_REGION).unwrap());
 
             assert_eq!(
                 "web_identity_token_file",
-                std::env::var(s3_constants::AWS_WEB_IDENTITY_TOKEN_FILE).unwrap()
+                std::env::var(constants::AWS_WEB_IDENTITY_TOKEN_FILE).unwrap()
             );
 
             assert_eq!(
                 "arn:aws:iam::123456789012:role/web_identity_role",
-                std::env::var(s3_constants::AWS_ROLE_ARN).unwrap()
+                std::env::var(constants::AWS_ROLE_ARN).unwrap()
             );
 
             assert_eq!(
                 "web_identity_session_name",
-                std::env::var(s3_constants::AWS_ROLE_SESSION_NAME).unwrap()
+                std::env::var(constants::AWS_ROLE_SESSION_NAME).unwrap()
             );
         });
     }
@@ -776,10 +751,10 @@ mod tests {
             clear_env_of_aws_keys();
             let raw_options = hashmap! {};
 
-            std::env::set_var(s3_constants::AWS_ACCESS_KEY_ID, "env_key");
-            std::env::set_var(s3_constants::AWS_ENDPOINT_URL, "env_key");
-            std::env::set_var(s3_constants::AWS_SECRET_ACCESS_KEY, "env_key");
-            std::env::set_var(s3_constants::AWS_REGION, "env_key");
+            std::env::set_var(constants::AWS_ACCESS_KEY_ID, "env_key");
+            std::env::set_var(constants::AWS_ENDPOINT_URL, "env_key");
+            std::env::set_var(constants::AWS_SECRET_ACCESS_KEY, "env_key");
+            std::env::set_var(constants::AWS_REGION, "env_key");
 
             let combined_options =
                 S3ObjectStoreFactory {}.with_env_s3(&StorageOptions(raw_options));
@@ -818,47 +793,22 @@ mod tests {
         });
     }
 
-    #[tokio::test]
-    #[serial]
-    async fn storage_options_toggle_imds() {
-        ScopedEnv::run_async(async {
-            clear_env_of_aws_keys();
-            let disabled_time = storage_options_configure_imds(Some("true")).await;
-            let enabled_time = storage_options_configure_imds(Some("false")).await;
-            let default_time = storage_options_configure_imds(None).await;
-            println!(
-                "enabled_time: {}, disabled_time: {}, default_time: {}",
-                enabled_time.as_micros(),
-                disabled_time.as_micros(),
-                default_time.as_micros(),
-            );
-            assert!(disabled_time < enabled_time);
-            assert!(default_time < enabled_time);
-        })
-        .await;
-    }
+    #[test]
+    fn test_is_aws() {
+        let options = StorageOptions::default();
+        assert!(is_aws(&options));
 
-    async fn storage_options_configure_imds(value: Option<&str>) -> Duration {
-        let _options = match value {
-            Some(value) => S3StorageOptions::from_map(&hashmap! {
-                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
-                s3_constants::AWS_EC2_METADATA_DISABLED.to_string() => value.to_string(),
-            })
-            .unwrap(),
-            None => S3StorageOptions::from_map(&hashmap! {
-                s3_constants::AWS_REGION.to_string() => "eu-west-1".to_string(),
-            })
-            .unwrap(),
+        let minio: HashMap<String, String> = hashmap! {
+            crate::constants::AWS_ENDPOINT_URL.to_string() => "http://minio:8080".to_string(),
         };
+        let options = StorageOptions::from(minio);
+        assert!(!is_aws(&options));
 
-        assert_eq!(
-            "eu-west-1",
-            std::env::var(s3_constants::AWS_REGION).unwrap()
-        );
-
-        let provider = _options.sdk_config.credentials_provider().unwrap();
-        let now = SystemTime::now();
-        _ = provider.provide_credentials().await;
-        now.elapsed().unwrap()
+        let localstack: HashMap<String, String> = hashmap! {
+            crate::constants::AWS_FORCE_CREDENTIAL_LOAD.to_string() => "true".to_string(),
+            crate::constants::AWS_ENDPOINT_URL.to_string() => "http://minio:8080".to_string(),
+        };
+        let options = StorageOptions::from(localstack);
+        assert!(is_aws(&options));
     }
 }
diff --git a/crates/aws/tests/common.rs b/crates/aws/tests/common.rs
index 01aa505b1b..dfa2a9cd51 100644
--- a/crates/aws/tests/common.rs
+++ b/crates/aws/tests/common.rs
@@ -87,7 +87,7 @@ impl S3Integration {
             "dynamodb",
             "create-table",
             "--table-name",
-            &table_name,
+            table_name,
             "--provisioned-throughput",
             "ReadCapacityUnits=1,WriteCapacityUnits=1",
             "--attribute-definitions",
@@ -112,7 +112,7 @@ impl S3Integration {
     }
 
     fn wait_for_table(table_name: &str) -> std::io::Result<()> {
-        let args = ["dynamodb", "describe-table", "--table-name", &table_name];
+        let args = ["dynamodb", "describe-table", "--table-name", table_name];
         loop {
             let output = Command::new("aws")
                 .args(args)
@@ -145,7 +145,7 @@ impl S3Integration {
 
     fn delete_dynamodb_table(table_name: &str) -> std::io::Result<ExitStatus> {
         let mut child = Command::new("aws")
-            .args(["dynamodb", "delete-table", "--table-name", &table_name])
+            .args(["dynamodb", "delete-table", "--table-name", table_name])
             .stdout(Stdio::null())
             .spawn()
             .expect("aws command is installed");
diff --git a/crates/aws/tests/integration_s3_dynamodb.rs b/crates/aws/tests/integration_s3_dynamodb.rs
index eb674c4235..da0b0e06c8 100644
--- a/crates/aws/tests/integration_s3_dynamodb.rs
+++ b/crates/aws/tests/integration_s3_dynamodb.rs
@@ -10,7 +10,8 @@ use deltalake_aws::logstore::{RepairLogEntryResult, S3DynamoDbLogStore};
 use deltalake_aws::storage::S3StorageOptions;
 use deltalake_aws::{CommitEntry, DynamoDbConfig, DynamoDbLockClient};
 use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType};
-use deltalake_core::logstore::LogStore;
+use deltalake_core::logstore::{logstore_for, CommitOrBytes, LogStore};
+use deltalake_core::operations::create::CreateBuilder;
 use deltalake_core::operations::transaction::CommitBuilder;
 use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::storage::commit_uri_from_version;
@@ -22,6 +23,11 @@ use lazy_static::lazy_static;
 use object_store::path::Path;
 use serde_json::Value;
 use serial_test::serial;
+use tracing::log::*;
+
+use maplit::hashmap;
+use object_store::{PutOptions, PutPayload};
+use url::Url;
 
 mod common;
 use common::*;
@@ -38,7 +44,8 @@ lazy_static! {
 fn make_client() -> TestResult<DynamoDbLockClient> {
     let options: S3StorageOptions = S3StorageOptions::try_default().unwrap();
     Ok(DynamoDbLockClient::try_new(
-        &options.sdk_config,
+        &options.sdk_config.unwrap(),
+        None,
         None,
         None,
         None,
@@ -68,7 +75,7 @@ fn client_configs_via_env_variables() -> TestResult<()> {
             billing_mode: BillingMode::PayPerRequest,
             lock_table_name: "some_table".to_owned(),
             max_elapsed_request_time: Duration::from_secs(64),
-            sdk_config: options.sdk_config,
+            sdk_config: options.sdk_config.unwrap(),
         },
         *config,
     );
@@ -78,6 +85,48 @@ fn client_configs_via_env_variables() -> TestResult<()> {
     Ok(())
 }
 
+#[tokio::test]
+#[serial]
+async fn test_create_s3_table() -> TestResult<()> {
+    let _ = pretty_env_logger::try_init();
+    let context = IntegrationContext::new(Box::new(S3Integration::default()))?;
+    let _client = make_client()?;
+    let table_name = format!("{}_{}", "create_test", uuid::Uuid::new_v4());
+    let table_uri = context.uri_for_table(TestTables::Custom(table_name.to_owned()));
+
+    let schema = StructType::new(vec![StructField::new(
+        "id".to_string(),
+        DataType::Primitive(PrimitiveType::Integer),
+        true,
+    )]);
+    let storage_options: HashMap<String, String> = hashmap! {
+        deltalake_aws::constants::AWS_ALLOW_HTTP.into() => "true".into(),
+        // Despite not being in AWS, we should force credential resolution
+        deltalake_aws::constants::AWS_FORCE_CREDENTIAL_LOAD.into() => "true".into(),
+        deltalake_aws::constants::AWS_ENDPOINT_URL.into()  => "http://localhost:4566".into(),
+    };
+    let log_store = logstore_for(Url::parse(&table_uri)?, storage_options, None)?;
+
+    let payload = PutPayload::from_static(b"test-drivin");
+    let _put = log_store
+        .object_store()
+        .put_opts(
+            &Path::from("_delta_log/_commit_failed.tmp"),
+            payload,
+            PutOptions::default(),
+        )
+        .await?;
+
+    debug!("creating a CreateBuilder");
+    let _created = CreateBuilder::new()
+        .with_log_store(log_store)
+        .with_partition_columns(vec!["id"])
+        .with_columns(schema.fields().cloned())
+        .with_save_mode(SaveMode::Ignore)
+        .await?;
+    Ok(())
+}
+
 #[tokio::test]
 #[serial]
 async fn get_missing_item() -> TestResult<()> {
@@ -197,7 +246,10 @@ async fn test_abort_commit_entry() -> TestResult<()> {
     let entry = create_incomplete_commit_entry(&table, 1, "unfinished_commit").await?;
 
     log_store
-        .abort_commit_entry(entry.version, &entry.temp_path)
+        .abort_commit_entry(
+            entry.version,
+            CommitOrBytes::TmpCommit(entry.temp_path.clone()),
+        )
         .await?;
 
     // The entry should have been aborted - the latest entry should be one version lower
@@ -212,7 +264,7 @@ async fn test_abort_commit_entry() -> TestResult<()> {
 
     // Test abort commit is idempotent - still works if already aborted
     log_store
-        .abort_commit_entry(entry.version, &entry.temp_path)
+        .abort_commit_entry(entry.version, CommitOrBytes::TmpCommit(entry.temp_path))
         .await?;
 
     Ok(())
@@ -243,7 +295,10 @@ async fn test_abort_commit_entry_fail_to_delete_entry() -> TestResult<()> {
     // Abort will fail since we marked the entry as complete
     assert!(matches!(
         log_store
-            .abort_commit_entry(entry.version, &entry.temp_path)
+            .abort_commit_entry(
+                entry.version,
+                CommitOrBytes::TmpCommit(entry.temp_path.clone())
+            )
             .await,
         Err(_),
     ));
@@ -345,7 +400,12 @@ async fn create_incomplete_commit_entry(
         .into_prepared_commit_future()
         .await?;
 
-    let commit_entry = CommitEntry::new(version, prepared.path().to_owned());
+    let tmp_commit = match prepared.commit_or_bytes() {
+        CommitOrBytes::TmpCommit(tmp_commit) => tmp_commit,
+        _ => unreachable!(),
+    };
+
+    let commit_entry = CommitEntry::new(version, tmp_commit.to_owned());
     make_client()?
         .put_commit_entry(&table.table_uri(), &commit_entry)
         .await?;
@@ -390,7 +450,7 @@ async fn prepare_table(context: &IntegrationContext, table_name: &str) -> TestRe
     // create delta table
     let table = DeltaOps(table)
         .create()
-        .with_columns(schema.fields().clone())
+        .with_columns(schema.fields().cloned())
         .await?;
     println!("table created: {table:?}");
     Ok(table)
diff --git a/crates/aws/tests/repair_s3_rename_test.rs b/crates/aws/tests/repair_s3_rename_test.rs
index 68d8727ebe..d9e19de7b7 100644
--- a/crates/aws/tests/repair_s3_rename_test.rs
+++ b/crates/aws/tests/repair_s3_rename_test.rs
@@ -9,6 +9,7 @@ use deltalake_core::storage::object_store::{
 use deltalake_core::{DeltaTableBuilder, ObjectStore, Path};
 use deltalake_test::utils::IntegrationContext;
 use futures::stream::BoxStream;
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
 use serial_test::serial;
 use std::ops::Range;
 use std::sync::{Arc, Mutex};
@@ -60,8 +61,8 @@ async fn run_repair_test_case(path: &str, pause_copy: bool) -> Result<(), Object
     };
     let (s3_2, _) = create_s3_backend(&context, "w2", None, None);
 
-    s3_1.put(&src1, Bytes::from("test1")).await.unwrap();
-    s3_2.put(&src2, Bytes::from("test2")).await.unwrap();
+    s3_1.put(&src1, Bytes::from("test1").into()).await.unwrap();
+    s3_2.put(&src2, Bytes::from("test2").into()).await.unwrap();
 
     let rename1 = rename(s3_1, &src1, &dst1);
     // to ensure that first one is started actually first
@@ -166,14 +167,14 @@ impl ObjectStore for DelayedObjectStore {
         self.delete(from).await
     }
 
-    async fn put(&self, location: &Path, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &Path,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -227,19 +228,16 @@ impl ObjectStore for DelayedObjectStore {
         self.inner.rename_if_not_exists(from, to).await
     }
 
-    async fn put_multipart(
-        &self,
-        location: &Path,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &Path,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
diff --git a/crates/azure/Cargo.toml b/crates/azure/Cargo.toml
index cbe55a1b83..87a744d608 100644
--- a/crates/azure/Cargo.toml
+++ b/crates/azure/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-azure"
-version = "0.1.2"
+version = "0.3.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -12,7 +12,7 @@ repository.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
+deltalake-core = { version = "0.20.0", path = "../core" }
 lazy_static = "1"
 
 # workspace depenndecies
diff --git a/crates/azure/tests/integration.rs b/crates/azure/tests/integration.rs
index 5230462c92..3ffaa00cc5 100644
--- a/crates/azure/tests/integration.rs
+++ b/crates/azure/tests/integration.rs
@@ -75,7 +75,10 @@ async fn read_write_test_onelake(context: &IntegrationContext, path: &Path) -> T
 
     let expected = Bytes::from_static(b"test world from delta-rs on friday");
 
-    delta_store.put(path, expected.clone()).await.unwrap();
+    delta_store
+        .put(path, expected.clone().into())
+        .await
+        .unwrap();
     let fetched = delta_store.get(path).await.unwrap().bytes().await.unwrap();
     assert_eq!(expected, fetched);
 
diff --git a/crates/benchmarks/src/bin/merge.rs b/crates/benchmarks/src/bin/merge.rs
index bb178a192d..2465e23d94 100644
--- a/crates/benchmarks/src/bin/merge.rs
+++ b/crates/benchmarks/src/bin/merge.rs
@@ -7,9 +7,10 @@ use arrow::datatypes::Schema as ArrowSchema;
 use arrow_array::{RecordBatch, StringArray, UInt32Array};
 use chrono::Duration;
 use clap::{command, Args, Parser, Subcommand};
+use datafusion::functions::expr_fn::random;
 use datafusion::{datasource::MemTable, prelude::DataFrame};
 use datafusion_common::DataFusionError;
-use datafusion_expr::{cast, col, lit, random};
+use datafusion_expr::{cast, col, lit};
 use deltalake_core::protocol::SaveMode;
 use deltalake_core::{
     arrow::{
diff --git a/crates/catalog-glue/Cargo.toml b/crates/catalog-glue/Cargo.toml
index c757563c1b..549b3a11c8 100644
--- a/crates/catalog-glue/Cargo.toml
+++ b/crates/catalog-glue/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-catalog-glue"
-version = "0.1.0"
+version = "0.4.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -15,9 +15,7 @@ rust-version.workspace = true
 async-trait = { workspace = true }
 aws-config = "1"
 aws-sdk-glue = "1"
-deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
-# This can depend on a lowest common denominator of core once that's released
-# deltalake_core = { version = "0.17.0" }
+deltalake-core = { version = "0.20.0", path = "../core" }
 thiserror = { workspace = true }
 
 [dev-dependencies]
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 8056c85f29..52df035c71 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-core"
-version = "0.18.0"
+version = "0.20.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -15,6 +15,8 @@ rust-version.workspace = true
 features = ["datafusion", "json", "unity-experimental"]
 
 [dependencies]
+delta_kernel.workspace = true
+
 # arrow
 arrow = { workspace = true }
 arrow-arith = { workspace = true }
@@ -40,8 +42,9 @@ datafusion-common = { workspace = true, optional = true }
 datafusion-proto = { workspace = true, optional = true }
 datafusion-sql = { workspace = true, optional = true }
 datafusion-physical-expr = { workspace = true, optional = true }
+datafusion-physical-plan = { workspace = true, optional = true }
 datafusion-functions = { workspace = true, optional = true }
-datafusion-functions-array = { workspace = true, optional = true }
+datafusion-functions-aggregate = { workspace = true, optional = true }
 
 # serde
 serde = { workspace = true, features = ["derive"] }
@@ -55,6 +58,7 @@ regex = { workspace = true }
 thiserror = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
+urlencoding = { workspace = true}
 
 # runtime
 async-trait = { workspace = true }
@@ -62,8 +66,10 @@ futures = { workspace = true }
 num_cpus = { workspace = true }
 tokio = { workspace = true, features = [
     "macros",
+    "process",
     "rt",
     "rt-multi-thread",
+    "signal",
     "sync",
     "fs",
     "parking_lot",
@@ -71,7 +77,7 @@ tokio = { workspace = true, features = [
 
 # other deps (these should be organized and pulled into workspace.dependencies as necessary)
 cfg-if = "1"
-dashmap = "5"
+dashmap = "6"
 errno = "0.3"
 either = "1.8"
 fix-hidden-lifetime-bug = "0.2"
@@ -91,19 +97,20 @@ tracing = { workspace = true }
 rand = "0.8"
 z85 = "3.0.5"
 maplit = "1"
+sqlparser = { version = "0.51" }
 
 # Unity
 reqwest = { version = "0.11.18", default-features = false, features = [
     "rustls-tls",
     "json",
 ], optional = true }
-sqlparser = { version = "0.46", optional = true }
 
 [dev-dependencies]
 criterion = "0.5"
 ctor = "0"
 deltalake-test = { path = "../test", features = ["datafusion"] }
 dotenvy = "0"
+fs_extra = "1.2.0"
 hyper = { version = "0.14", features = ["server"] }
 maplit = "1"
 pretty_assertions = "1.2.1"
@@ -115,17 +122,18 @@ tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
 utime = "0.3"
 
 [features]
-default = []
+cdf = []
+default = ["cdf"]
 datafusion = [
     "dep:datafusion",
     "datafusion-expr",
     "datafusion-common",
     "datafusion-proto",
     "datafusion-physical-expr",
+    "datafusion-physical-plan",
     "datafusion-sql",
     "datafusion-functions",
-    "datafusion-functions-array",
-    "sqlparser",
+    "datafusion-functions-aggregate",
 ]
 datafusion-ext = ["datafusion"]
 json = ["parquet/json"]
diff --git a/crates/core/src/data_catalog/storage/mod.rs b/crates/core/src/data_catalog/storage/mod.rs
index fc30f32144..7b0b779069 100644
--- a/crates/core/src/data_catalog/storage/mod.rs
+++ b/crates/core/src/data_catalog/storage/mod.rs
@@ -6,7 +6,7 @@ use std::sync::Arc;
 
 use async_trait::async_trait;
 use dashmap::DashMap;
-use datafusion::catalog::schema::SchemaProvider;
+use datafusion::catalog::SchemaProvider;
 use datafusion::datasource::TableProvider;
 use datafusion_common::DataFusionError;
 use futures::TryStreamExt;
@@ -147,7 +147,8 @@ impl SchemaProvider for ListingSchemaProvider {
 mod tests {
     use super::*;
     use datafusion::assert_batches_sorted_eq;
-    use datafusion::catalog::{CatalogProvider, MemoryCatalogProvider};
+    use datafusion::catalog::CatalogProvider;
+    use datafusion::catalog_common::MemoryCatalogProvider;
     use datafusion::execution::context::SessionContext;
 
     #[test]
diff --git a/crates/core/src/data_catalog/unity/datafusion.rs b/crates/core/src/data_catalog/unity/datafusion.rs
index 6b6a4b4a63..44e7c9ca33 100644
--- a/crates/core/src/data_catalog/unity/datafusion.rs
+++ b/crates/core/src/data_catalog/unity/datafusion.rs
@@ -5,7 +5,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use dashmap::DashMap;
-use datafusion::catalog::schema::SchemaProvider;
+use datafusion::catalog::SchemaProvider;
 use datafusion::catalog::{CatalogProvider, CatalogProviderList};
 use datafusion::datasource::TableProvider;
 use datafusion_common::DataFusionError;
diff --git a/crates/core/src/data_catalog/unity/models.rs b/crates/core/src/data_catalog/unity/models.rs
index 265149b969..2066a4ee86 100644
--- a/crates/core/src/data_catalog/unity/models.rs
+++ b/crates/core/src/data_catalog/unity/models.rs
@@ -252,8 +252,8 @@ pub enum TableType {
     StreamingTable,
 }
 
-///
 #[derive(Deserialize)]
+/// Summary of the table
 pub struct TableSummary {
     /// The full name of the table.
     pub full_name: String,
diff --git a/crates/core/src/delta_datafusion/cdf/mod.rs b/crates/core/src/delta_datafusion/cdf/mod.rs
index 02382aa725..e561fc2152 100644
--- a/crates/core/src/delta_datafusion/cdf/mod.rs
+++ b/crates/core/src/delta_datafusion/cdf/mod.rs
@@ -1,13 +1,13 @@
 //! Logical operators and physical executions for CDF
+use std::collections::HashMap;
 
 use arrow_schema::{DataType, Field, TimeUnit};
 use lazy_static::lazy_static;
-use std::collections::HashMap;
 
-pub(crate) use scan::*;
-pub(crate) use scan_utils::*;
-
-use crate::kernel::{Add, AddCDCFile};
+pub(crate) use self::scan::*;
+pub(crate) use self::scan_utils::*;
+use crate::kernel::{Add, AddCDCFile, Remove};
+use crate::DeltaResult;
 
 mod scan;
 mod scan_utils;
@@ -59,37 +59,73 @@ impl<F: FileAction> CdcDataSpec<F> {
 /// This trait defines a generic set of operations used by CDF Reader
 pub trait FileAction {
     /// Adds partition values
-    fn partition_values(&self) -> &HashMap<String, Option<String>>;
+    fn partition_values(&self) -> DeltaResult<&HashMap<String, Option<String>>>;
     /// Physical Path to the data
     fn path(&self) -> String;
     /// Byte size of the physical file
-    fn size(&self) -> usize;
+    fn size(&self) -> DeltaResult<usize>;
 }
 
 impl FileAction for Add {
-    fn partition_values(&self) -> &HashMap<String, Option<String>> {
-        &self.partition_values
+    fn partition_values(&self) -> DeltaResult<&HashMap<String, Option<String>>> {
+        Ok(&self.partition_values)
     }
 
     fn path(&self) -> String {
         self.path.clone()
     }
 
-    fn size(&self) -> usize {
-        self.size as usize
+    fn size(&self) -> DeltaResult<usize> {
+        Ok(self.size as usize)
     }
 }
 
 impl FileAction for AddCDCFile {
-    fn partition_values(&self) -> &HashMap<String, Option<String>> {
-        &self.partition_values
+    fn partition_values(&self) -> DeltaResult<&HashMap<String, Option<String>>> {
+        Ok(&self.partition_values)
+    }
+
+    fn path(&self) -> String {
+        self.path.clone()
+    }
+
+    fn size(&self) -> DeltaResult<usize> {
+        Ok(self.size as usize)
+    }
+}
+
+impl FileAction for Remove {
+    fn partition_values(&self) -> DeltaResult<&HashMap<String, Option<String>>> {
+        // If extended_file_metadata is true, it should be required to have this filled in
+        if self.extended_file_metadata.unwrap_or_default() {
+            Ok(self.partition_values.as_ref().unwrap())
+        } else {
+            match self.partition_values {
+                Some(ref part_map) => Ok(part_map),
+                _ => Err(crate::DeltaTableError::Protocol {
+                    source: crate::protocol::ProtocolError::InvalidField(
+                        "partition_values".to_string(),
+                    ),
+                }),
+            }
+        }
     }
 
     fn path(&self) -> String {
         self.path.clone()
     }
 
-    fn size(&self) -> usize {
-        self.size as usize
+    fn size(&self) -> DeltaResult<usize> {
+        // If extended_file_metadata is true, it should be required to have this filled in
+        if self.extended_file_metadata.unwrap_or_default() {
+            Ok(self.size.unwrap() as usize)
+        } else {
+            match self.size {
+                Some(size) => Ok(size as usize),
+                _ => Err(crate::DeltaTableError::Protocol {
+                    source: crate::protocol::ProtocolError::InvalidField("size".to_string()),
+                }),
+            }
+        }
     }
 }
diff --git a/crates/core/src/delta_datafusion/cdf/scan.rs b/crates/core/src/delta_datafusion/cdf/scan.rs
index 1f9c9f52b3..e5098bca72 100644
--- a/crates/core/src/delta_datafusion/cdf/scan.rs
+++ b/crates/core/src/delta_datafusion/cdf/scan.rs
@@ -4,7 +4,7 @@ use std::sync::Arc;
 
 use arrow_schema::SchemaRef;
 use datafusion::execution::{SendableRecordBatchStream, TaskContext};
-use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
+use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 
 /// Physical execution of a scan
 #[derive(Debug, Clone)]
@@ -26,6 +26,10 @@ impl DisplayAs for DeltaCdfScan {
 }
 
 impl ExecutionPlan for DeltaCdfScan {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -38,7 +42,7 @@ impl ExecutionPlan for DeltaCdfScan {
         self.plan.properties()
     }
 
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
         vec![]
     }
 
diff --git a/crates/core/src/delta_datafusion/cdf/scan_utils.rs b/crates/core/src/delta_datafusion/cdf/scan_utils.rs
index 434afa4f74..27285179f6 100644
--- a/crates/core/src/delta_datafusion/cdf/scan_utils.rs
+++ b/crates/core/src/delta_datafusion/cdf/scan_utils.rs
@@ -18,9 +18,9 @@ pub fn map_action_to_scalar<F: FileAction>(
     action: &F,
     part: &str,
     schema: SchemaRef,
-) -> ScalarValue {
-    action
-        .partition_values()
+) -> DeltaResult<ScalarValue> {
+    Ok(action
+        .partition_values()?
         .get(part)
         .map(|val| {
             schema
@@ -36,7 +36,7 @@ pub fn map_action_to_scalar<F: FileAction>(
                 })
                 .unwrap_or(ScalarValue::Null)
         })
-        .unwrap_or(ScalarValue::Null)
+        .unwrap_or(ScalarValue::Null))
 }
 
 pub fn create_spec_partition_values<F: FileAction>(
@@ -67,7 +67,7 @@ pub fn create_partition_values<F: FileAction>(
             let partition_values = table_partition_cols
                 .iter()
                 .map(|part| map_action_to_scalar(&action, part, schema.clone()))
-                .collect::<Vec<ScalarValue>>();
+                .collect::<DeltaResult<Vec<ScalarValue>>>()?;
 
             let mut new_part_values = spec_partition_values.clone();
             new_part_values.extend(partition_values);
@@ -75,7 +75,7 @@ pub fn create_partition_values<F: FileAction>(
             let part = PartitionedFile {
                 object_meta: ObjectMeta {
                     location: Path::parse(action.path().as_str())?,
-                    size: action.size(),
+                    size: action.size()?,
                     e_tag: None,
                     last_modified: chrono::Utc.timestamp_nanos(0),
                     version: None,
@@ -83,6 +83,7 @@ pub fn create_partition_values<F: FileAction>(
                 partition_values: new_part_values.clone(),
                 extensions: None,
                 range: None,
+                statistics: None,
             };
 
             file_groups.entry(new_part_values).or_default().push(part);
@@ -91,9 +92,9 @@ pub fn create_partition_values<F: FileAction>(
     Ok(file_groups)
 }
 
-pub fn create_cdc_schema(mut schema_fields: Vec<Field>, include_type: bool) -> SchemaRef {
+pub fn create_cdc_schema(mut schema_fields: Vec<Arc<Field>>, include_type: bool) -> SchemaRef {
     if include_type {
-        schema_fields.push(Field::new(CHANGE_TYPE_COL, DataType::Utf8, true));
+        schema_fields.push(Field::new(CHANGE_TYPE_COL, DataType::Utf8, true).into());
     }
     Arc::new(Schema::new(schema_fields))
 }
diff --git a/crates/core/src/delta_datafusion/expr.rs b/crates/core/src/delta_datafusion/expr.rs
index 868969c571..eb542d98dd 100644
--- a/crates/core/src/delta_datafusion/expr.rs
+++ b/crates/core/src/delta_datafusion/expr.rs
@@ -20,39 +20,67 @@
 // Display functions and required macros were pulled from https://github.com/apache/arrow-datafusion/blob/ddb95497e2792015d5a5998eec79aac8d37df1eb/datafusion/expr/src/expr.rs
 
 //! Utility functions for Datafusion's Expressions
-
-use std::{
-    fmt::{self, Display, Error, Formatter, Write},
-    sync::Arc,
-};
+use std::fmt::{self, Display, Error, Formatter, Write};
+use std::sync::Arc;
 
 use arrow_schema::DataType;
 use chrono::{DateTime, NaiveDate};
 use datafusion::execution::context::SessionState;
+use datafusion::execution::session_state::SessionStateBuilder;
+use datafusion::execution::FunctionRegistry;
 use datafusion_common::Result as DFResult;
 use datafusion_common::{config::ConfigOptions, DFSchema, Result, ScalarValue, TableReference};
-use datafusion_expr::{
-    expr::InList, AggregateUDF, Between, BinaryExpr, Cast, Expr, GetIndexedField, Like, TableSource,
-};
+use datafusion_expr::expr::InList;
+use datafusion_expr::planner::ExprPlanner;
+use datafusion_expr::{AggregateUDF, Between, BinaryExpr, Cast, Expr, Like, TableSource};
 use datafusion_sql::planner::{ContextProvider, SqlToRel};
 use datafusion_sql::sqlparser::ast::escape_quoted_string;
 use datafusion_sql::sqlparser::dialect::GenericDialect;
 use datafusion_sql::sqlparser::parser::Parser;
 use datafusion_sql::sqlparser::tokenizer::Tokenizer;
 
-use crate::{DeltaResult, DeltaTableError};
-
 use super::DeltaParserOptions;
+use crate::{DeltaResult, DeltaTableError};
 
 pub(crate) struct DeltaContextProvider<'a> {
-    state: &'a SessionState,
+    state: SessionState,
+    /// Keeping this around just to make use of the 'a lifetime
+    _original: &'a SessionState,
+    planners: Vec<Arc<dyn ExprPlanner>>,
+}
+
+impl<'a> DeltaContextProvider<'a> {
+    fn new(state: &'a SessionState) -> Self {
+        let planners = state.expr_planners();
+        DeltaContextProvider {
+            planners,
+            // Creating a new session state with overridden scalar_functions since
+            // the get_field() UDF was dropped from the default scalar functions upstream in
+            // `36660fe10d9c0cdff62e0da0b94bee28422d3419`
+            state: SessionStateBuilder::new_from_existing(state.clone())
+                .with_scalar_functions(
+                    state
+                        .scalar_functions()
+                        .values()
+                        .cloned()
+                        .chain(std::iter::once(datafusion::functions::core::get_field()))
+                        .collect(),
+                )
+                .build(),
+            _original: state,
+        }
+    }
 }
 
 impl<'a> ContextProvider for DeltaContextProvider<'a> {
-    fn get_table_provider(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
+    fn get_table_source(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
         unimplemented!()
     }
 
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        self.planners.as_slice()
+    }
+
     fn get_function_meta(&self, name: &str) -> Option<Arc<datafusion_expr::ScalarUDF>> {
         self.state.scalar_functions().get(name).cloned()
     }
@@ -73,20 +101,16 @@ impl<'a> ContextProvider for DeltaContextProvider<'a> {
         self.state.window_functions().get(name).cloned()
     }
 
-    fn get_table_source(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
-        unimplemented!()
-    }
-
-    fn udfs_names(&self) -> Vec<String> {
-        unimplemented!()
+    fn udf_names(&self) -> Vec<String> {
+        self.state.scalar_functions().keys().cloned().collect()
     }
 
-    fn udafs_names(&self) -> Vec<String> {
-        unimplemented!()
+    fn udaf_names(&self) -> Vec<String> {
+        self.state.aggregate_functions().keys().cloned().collect()
     }
 
-    fn udwfs_names(&self) -> Vec<String> {
-        unimplemented!()
+    fn udwf_names(&self) -> Vec<String> {
+        self.state.window_functions().keys().cloned().collect()
     }
 }
 
@@ -110,7 +134,7 @@ pub(crate) fn parse_predicate_expression(
             source: Box::new(err),
         })?;
 
-    let context_provider = DeltaContextProvider { state: df_state };
+    let context_provider = DeltaContextProvider::new(df_state);
     let sql_to_rel =
         SqlToRel::new_with_options(&context_provider, DeltaParserOptions::default().into());
 
@@ -198,7 +222,7 @@ impl<'a> Display for SqlFormat<'a> {
             Expr::IsNotFalse(expr) => write!(f, "{} IS NOT FALSE", SqlFormat { expr }),
             Expr::IsNotUnknown(expr) => write!(f, "{} IS NOT UNKNOWN", SqlFormat { expr }),
             Expr::BinaryExpr(expr) => write!(f, "{}", BinaryExprFormat { expr }),
-            Expr::ScalarFunction(func) => fmt_function(f, func.func_def.name(), false, &func.args),
+            Expr::ScalarFunction(func) => fmt_function(f, func.func.name(), false, &func.args),
             Expr::Cast(Cast { expr, data_type }) => {
                 write!(f, "arrow_cast({}, '{}')", SqlFormat { expr }, data_type)
             }
@@ -276,33 +300,6 @@ impl<'a> Display for SqlFormat<'a> {
                     write!(f, "{expr} IN ({})", expr_vec_fmt!(list))
                 }
             }
-            Expr::GetIndexedField(GetIndexedField { expr, field }) => match field {
-                datafusion_expr::GetFieldAccess::NamedStructField { name } => {
-                    write!(
-                        f,
-                        "{}[{}]",
-                        SqlFormat { expr },
-                        ScalarValueFormat { scalar: name }
-                    )
-                }
-                datafusion_expr::GetFieldAccess::ListIndex { key } => {
-                    write!(f, "{}[{}]", SqlFormat { expr }, SqlFormat { expr: key })
-                }
-                datafusion_expr::GetFieldAccess::ListRange {
-                    start,
-                    stop,
-                    stride,
-                } => {
-                    write!(
-                        f,
-                        "{expr}[{start}:{stop}:{stride}]",
-                        expr = SqlFormat { expr },
-                        start = SqlFormat { expr: start },
-                        stop = SqlFormat { expr: stop },
-                        stride = SqlFormat { expr: stride }
-                    )
-                }
-            },
             _ => Err(fmt::Error),
         }
     }
@@ -425,15 +422,16 @@ impl<'a> fmt::Display for ScalarValueFormat<'a> {
 #[cfg(test)]
 mod test {
     use arrow_schema::DataType as ArrowDataType;
+    use datafusion::functions_array::expr_fn::cardinality;
+    use datafusion::functions_nested::expr_ext::{IndexAccessor, SliceAccessor};
     use datafusion::prelude::SessionContext;
     use datafusion_common::{Column, ScalarValue, ToDFSchema};
     use datafusion_expr::expr::ScalarFunction;
-    use datafusion_expr::{
-        col, lit, substring, BinaryExpr, Cast, Expr, ExprSchemable, ScalarFunctionDefinition,
-    };
+    use datafusion_expr::{col, lit, BinaryExpr, Cast, Expr, ExprSchemable};
     use datafusion_functions::core::arrow_cast;
+    use datafusion_functions::core::expr_ext::FieldAccessor;
     use datafusion_functions::encoding::expr_fn::decode;
-    use datafusion_functions_array::expr_fn::cardinality;
+    use datafusion_functions::expr_fn::substring;
 
     use crate::delta_datafusion::{DataFusionMixins, DeltaSessionContext};
     use crate::kernel::{ArrayType, DataType, PrimitiveType, StructField, StructType};
@@ -542,7 +540,7 @@ mod test {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -564,7 +562,7 @@ mod test {
                 override_expected_expr: Some(
                     datafusion_expr::Expr::ScalarFunction(
                         ScalarFunction {
-                            func_def: ScalarFunctionDefinition::UDF(arrow_cast()),
+                            func: arrow_cast(),
                             args: vec![
                                 lit(ScalarValue::Int64(Some(1))),
                                 lit(ScalarValue::Utf8(Some("Int32".into())))
@@ -671,7 +669,7 @@ mod test {
                     datafusion_expr::Expr::BinaryExpr(BinaryExpr {
                         left: Box::new(datafusion_expr::Expr::ScalarFunction(
                             ScalarFunction {
-                                func_def: ScalarFunctionDefinition::UDF(arrow_cast()),
+                                func: arrow_cast(),
                                 args: vec![
                                     col("value"),
                                     lit(ScalarValue::Utf8(Some("Utf8".into())))
@@ -685,19 +683,19 @@ mod test {
             },
             simple!(
                 col("_struct").field("a").eq(lit(20_i64)),
-                "_struct['a'] = 20".to_string()
+                "get_field(_struct, 'a') = 20".to_string()
             ),
             simple!(
                 col("_struct").field("nested").field("b").eq(lit(20_i64)),
-                "_struct['nested']['b'] = 20".to_string()
+                "get_field(get_field(_struct, 'nested'), 'b') = 20".to_string()
             ),
             simple!(
                 col("_list").index(lit(1_i64)).eq(lit(20_i64)),
-                "_list[1] = 20".to_string()
+                "array_element(_list, 1) = 20".to_string()
             ),
             simple!(
                 cardinality(col("_list").range(col("value"), lit(10_i64))),
-                "cardinality(_list[value:10:1])".to_string()
+                "cardinality(array_slice(_list, value, 10))".to_string()
             ),
             ParseTest {
                 expr: col("_timestamp_ntz").gt(lit(ScalarValue::TimestampMicrosecond(Some(1262304000000000), None))),
@@ -705,7 +703,7 @@ mod test {
                 override_expected_expr: Some(col("_timestamp_ntz").gt(
                     datafusion_expr::Expr::ScalarFunction(
                         ScalarFunction {
-                            func_def: ScalarFunctionDefinition::UDF(arrow_cast()),
+                            func: arrow_cast(),
                             args: vec![
                                 lit(ScalarValue::Utf8(Some("2010-01-01T00:00:00.000000".into()))),
                                 lit(ScalarValue::Utf8(Some("Timestamp(Microsecond, None)".into())))
@@ -723,7 +721,7 @@ mod test {
                 override_expected_expr: Some(col("_timestamp").gt(
                     datafusion_expr::Expr::ScalarFunction(
                         ScalarFunction {
-                            func_def: ScalarFunctionDefinition::UDF(arrow_cast()),
+                            func: arrow_cast(),
                             args: vec![
                                 lit(ScalarValue::Utf8(Some("2010-01-01T00:00:00.000000".into()))),
                                 lit(ScalarValue::Utf8(Some("Timestamp(Microsecond, Some(\"UTC\"))".into())))
diff --git a/crates/core/src/delta_datafusion/find_files/logical.rs b/crates/core/src/delta_datafusion/find_files/logical.rs
index 6234cbe5c2..4dd4a3b5da 100644
--- a/crates/core/src/delta_datafusion/find_files/logical.rs
+++ b/crates/core/src/delta_datafusion/find_files/logical.rs
@@ -92,7 +92,16 @@ impl UserDefinedLogicalNodeCore for FindFilesNode {
         )
     }
 
-    fn from_template(&self, _exprs: &[Expr], _inputs: &[LogicalPlan]) -> Self {
-        self.clone()
+    fn from_template(&self, exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        _inputs: Vec<LogicalPlan>,
+    ) -> datafusion_common::Result<Self> {
+        Ok(self.clone())
     }
 }
diff --git a/crates/core/src/delta_datafusion/find_files/mod.rs b/crates/core/src/delta_datafusion/find_files/mod.rs
index 2e8d26dee3..0c235242c2 100644
--- a/crates/core/src/delta_datafusion/find_files/mod.rs
+++ b/crates/core/src/delta_datafusion/find_files/mod.rs
@@ -1,6 +1,6 @@
-use arrow_array::cast::AsArray;
 use std::sync::Arc;
 
+use arrow_array::cast::AsArray;
 use arrow_array::types::UInt16Type;
 use arrow_array::RecordBatch;
 use arrow_schema::SchemaBuilder;
@@ -10,13 +10,13 @@ use async_trait::async_trait;
 use datafusion::datasource::MemTable;
 use datafusion::execution::context::{QueryPlanner, SessionState};
 use datafusion::execution::TaskContext;
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::limit::LocalLimitExec;
-use datafusion::physical_plan::ExecutionPlan;
 use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
 use datafusion::prelude::SessionContext;
 use datafusion_common::{DFSchemaRef, Result, ToDFSchema};
 use datafusion_expr::{col, Expr, LogicalPlan, UserDefinedLogicalNode};
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::limit::LocalLimitExec;
+use datafusion_physical_plan::ExecutionPlan;
 use lazy_static::lazy_static;
 
 use crate::delta_datafusion::find_files::logical::FindFilesNode;
@@ -28,8 +28,6 @@ use crate::logstore::LogStoreRef;
 use crate::table::state::DeltaTableState;
 use crate::DeltaTableError;
 
-use super::create_physical_expr_fix;
-
 pub mod logical;
 pub mod physical;
 
@@ -43,8 +41,10 @@ lazy_static! {
         ONLY_FILES_SCHEMA.clone().to_dfschema_ref().unwrap();
 }
 
+#[derive(Default)]
 struct FindFilesPlannerExtension {}
 
+#[derive(Default)]
 struct FindFilesPlanner {}
 
 #[async_trait]
@@ -139,11 +139,11 @@ async fn scan_table_by_files(
         .with_file_column(true)
         .build(&snapshot)?;
 
-    let logical_schema = df_logical_schema(&snapshot, &scan_config)?;
+    let logical_schema = df_logical_schema(&snapshot, &scan_config.file_column_name, None)?;
 
     // Identify which columns we need to project
     let mut used_columns = expression
-        .to_columns()?
+        .column_refs()
         .into_iter()
         .map(|column| logical_schema.index_of(&column.name))
         .collect::<std::result::Result<Vec<usize>, ArrowError>>()?;
@@ -161,11 +161,8 @@ async fn scan_table_by_files(
     let input_schema = scan.logical_schema.as_ref().to_owned();
     let input_dfschema = input_schema.clone().try_into()?;
 
-    let predicate_expr = create_physical_expr_fix(
-        Expr::IsTrue(Box::new(expression.clone())),
-        &input_dfschema,
-        state.execution_props(),
-    )?;
+    let predicate_expr =
+        state.create_physical_expr(Expr::IsTrue(Box::new(expression.clone())), &input_dfschema)?;
 
     let filter: Arc<dyn ExecutionPlan> =
         Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
@@ -193,6 +190,7 @@ async fn scan_table_by_files(
 pub mod tests {
     use std::sync::Arc;
 
+    use datafusion::execution::session_state::SessionStateBuilder;
     use datafusion::prelude::{DataFrame, SessionContext};
     use datafusion_common::{assert_batches_eq, assert_batches_sorted_eq};
     use datafusion_expr::{col, lit, Expr, Extension, LogicalPlan};
@@ -207,9 +205,9 @@ pub mod tests {
         expr: Expr,
     ) -> Result<Vec<arrow_array::RecordBatch>, DeltaTableError> {
         let ctx = SessionContext::new();
-        let state = ctx
-            .state()
-            .with_query_planner(Arc::new(FindFilesPlanner {}));
+        let state = SessionStateBuilder::new_from_existing(ctx.state())
+            .with_query_planner(Arc::new(FindFilesPlanner::default()))
+            .build();
         let find_files_node = LogicalPlan::Extension(Extension {
             node: Arc::new(FindFilesNode::new(
                 "my_cool_plan".into(),
diff --git a/crates/core/src/delta_datafusion/find_files/physical.rs b/crates/core/src/delta_datafusion/find_files/physical.rs
index eb09d2d94b..508d1f672e 100644
--- a/crates/core/src/delta_datafusion/find_files/physical.rs
+++ b/crates/core/src/delta_datafusion/find_files/physical.rs
@@ -8,14 +8,14 @@ use arrow_array::RecordBatch;
 use arrow_schema::SchemaRef;
 use datafusion::error::Result;
 use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
-use datafusion::physical_plan::memory::MemoryStream;
-use datafusion::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
-};
 use datafusion::prelude::SessionContext;
 use datafusion_common::tree_node::TreeNode;
 use datafusion_expr::Expr;
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion_physical_plan::memory::MemoryStream;
+use datafusion_physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
+};
 use futures::stream::BoxStream;
 use futures::{FutureExt, Stream, StreamExt, TryStreamExt};
 
@@ -85,6 +85,10 @@ impl DisplayAs for FindFilesExec {
 }
 
 impl ExecutionPlan for FindFilesExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -97,7 +101,7 @@ impl ExecutionPlan for FindFilesExec {
         &self.plan_properties
     }
 
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
         vec![]
     }
 
diff --git a/crates/core/src/delta_datafusion/logical.rs b/crates/core/src/delta_datafusion/logical.rs
index 52ee1194f4..2ce435b5b6 100644
--- a/crates/core/src/delta_datafusion/logical.rs
+++ b/crates/core/src/delta_datafusion/logical.rs
@@ -52,13 +52,22 @@ impl UserDefinedLogicalNodeCore for MetricObserver {
 
     fn from_template(
         &self,
-        _exprs: &[datafusion_expr::Expr],
+        exprs: &[datafusion_expr::Expr],
         inputs: &[datafusion_expr::LogicalPlan],
     ) -> Self {
-        MetricObserver {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<datafusion_expr::Expr>,
+        inputs: Vec<datafusion_expr::LogicalPlan>,
+    ) -> datafusion_common::Result<Self> {
+        Ok(MetricObserver {
             id: self.id.clone(),
             input: inputs[0].clone(),
             enable_pushdown: self.enable_pushdown,
-        }
+        })
     }
 }
diff --git a/crates/core/src/delta_datafusion/mod.rs b/crates/core/src/delta_datafusion/mod.rs
index c1b6208cff..8d64f85fb2 100644
--- a/crates/core/src/delta_datafusion/mod.rs
+++ b/crates/core/src/delta_datafusion/mod.rs
@@ -25,82 +25,74 @@ use std::collections::{HashMap, HashSet};
 use std::fmt::{self, Debug};
 use std::sync::Arc;
 
-use arrow::compute::{cast_with_options, CastOptions};
-use arrow::datatypes::DataType;
-use arrow::datatypes::{
-    DataType as ArrowDataType, Schema as ArrowSchema, SchemaRef, SchemaRef as ArrowSchemaRef,
-    TimeUnit,
-};
-use arrow::error::ArrowError;
-use arrow::record_batch::RecordBatch;
 use arrow_array::types::UInt16Type;
-use arrow_array::{Array, DictionaryArray, StringArray, TypedDictionaryArray};
+use arrow_array::{Array, DictionaryArray, RecordBatch, StringArray, TypedDictionaryArray};
 use arrow_cast::display::array_value_to_string;
-
-use arrow_schema::Field;
+use arrow_cast::{cast_with_options, CastOptions};
+use arrow_schema::{
+    ArrowError, DataType as ArrowDataType, Field, Schema as ArrowSchema, SchemaRef,
+    SchemaRef as ArrowSchemaRef, TimeUnit,
+};
+use arrow_select::concat::concat_batches;
 use async_trait::async_trait;
 use chrono::{DateTime, TimeZone, Utc};
-use datafusion::datasource::file_format::{parquet::ParquetFormat, FileFormat};
+use datafusion::catalog::{Session, TableProviderFactory};
+use datafusion::config::TableParquetOptions;
+use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder;
 use datafusion::datasource::physical_plan::{
     wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig,
 };
-use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::{listing::PartitionedFile, MemTable, TableProvider, TableType};
 use datafusion::execution::context::{SessionConfig, SessionContext, SessionState, TaskContext};
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::FunctionRegistry;
 use datafusion::physical_optimizer::pruning::PruningPredicate;
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::limit::LocalLimitExec;
-use datafusion::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream,
-    Statistics,
-};
 use datafusion_common::scalar::ScalarValue;
-use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor};
+use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
 use datafusion_common::{
     config::ConfigOptions, Column, DFSchema, DataFusionError, Result as DataFusionResult,
-    ToDFSchema,
+    TableReference, ToDFSchema,
 };
-use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::logical_plan::CreateExternalTable;
 use datafusion_expr::utils::conjunction;
-use datafusion_expr::{
-    col, Expr, Extension, GetFieldAccess, GetIndexedField, LogicalPlan,
-    TableProviderFilterPushDown, Volatility,
+use datafusion_expr::{col, Expr, Extension, LogicalPlan, TableProviderFilterPushDown, Volatility};
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::limit::LocalLimitExec;
+use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
+use datafusion_physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream,
+    Statistics,
 };
-use datafusion_functions::expr_fn::get_field;
-use datafusion_functions_array::extract::{array_element, array_slice};
-use datafusion_physical_expr::execution_props::ExecutionProps;
-use datafusion_physical_expr::PhysicalExpr;
 use datafusion_proto::logical_plan::LogicalExtensionCodec;
 use datafusion_proto::physical_plan::PhysicalExtensionCodec;
 use datafusion_sql::planner::ParserOptions;
 use either::Either;
 use futures::TryStreamExt;
-
 use itertools::Itertools;
 use object_store::ObjectMeta;
 use serde::{Deserialize, Serialize};
 use url::Url;
 
 use crate::delta_datafusion::expr::parse_predicate_expression;
+use crate::delta_datafusion::schema_adapter::DeltaSchemaAdapterFactory;
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Add, DataCheck, EagerSnapshot, Invariant, Snapshot};
+use crate::kernel::{Add, DataCheck, EagerSnapshot, Invariant, Snapshot, StructTypeExt};
 use crate::logstore::LogStoreRef;
 use crate::table::builder::ensure_table_uri;
 use crate::table::state::DeltaTableState;
 use crate::table::Constraint;
 use crate::{open_table, open_table_with_storage_options, DeltaTable};
 
-const PATH_COLUMN: &str = "__delta_rs_path";
+pub(crate) const PATH_COLUMN: &str = "__delta_rs_path";
 
 pub mod cdf;
 pub mod expr;
 pub mod logical;
 pub mod physical;
+pub mod planner;
 
 mod find_files;
+mod schema_adapter;
 
 impl From<DeltaTableError> for DataFusionError {
     fn from(err: DeltaTableError) -> Self {
@@ -199,53 +191,41 @@ impl DataFusionMixins for DeltaTableState {
 
 fn _arrow_schema(snapshot: &Snapshot, wrap_partitions: bool) -> DeltaResult<ArrowSchemaRef> {
     let meta = snapshot.metadata();
-    let fields = meta
-        .schema()?
+
+    let schema = meta.schema()?;
+    let fields = schema
         .fields()
-        .iter()
         .filter(|f| !meta.partition_columns.contains(&f.name().to_string()))
         .map(|f| f.try_into())
         .chain(
-            meta.schema()?
-                .fields()
-                .iter()
-                .filter(|f| meta.partition_columns.contains(&f.name().to_string()))
-                .map(|f| {
-                    let field = Field::try_from(f)?;
-                    let corrected = if wrap_partitions {
-                        match field.data_type() {
-                            // Only dictionary-encode types that may be large
-                            // // https://github.com/apache/arrow-datafusion/pull/5545
-                            DataType::Utf8
-                            | DataType::LargeUtf8
-                            | DataType::Binary
-                            | DataType::LargeBinary => {
-                                wrap_partition_type_in_dict(field.data_type().clone())
-                            }
-                            _ => field.data_type().clone(),
+            // We need stable order between logical and physical schemas, but the order of
+            // partitioning columns is not always the same in the json schema and the array
+            meta.partition_columns.iter().map(|partition_col| {
+                let f = schema.field(partition_col).unwrap();
+                let field = Field::try_from(f)?;
+                let corrected = if wrap_partitions {
+                    match field.data_type() {
+                        // Only dictionary-encode types that may be large
+                        // // https://github.com/apache/arrow-datafusion/pull/5545
+                        ArrowDataType::Utf8
+                        | ArrowDataType::LargeUtf8
+                        | ArrowDataType::Binary
+                        | ArrowDataType::LargeBinary => {
+                            wrap_partition_type_in_dict(field.data_type().clone())
                         }
-                    } else {
-                        field.data_type().clone()
-                    };
-                    Ok(field.with_data_type(corrected))
-                }),
+                        _ => field.data_type().clone(),
+                    }
+                } else {
+                    field.data_type().clone()
+                };
+                Ok(field.with_data_type(corrected))
+            }),
         )
         .collect::<Result<Vec<Field>, _>>()?;
 
     Ok(Arc::new(ArrowSchema::new(fields)))
 }
 
-pub(crate) trait DataFusionFileMixins {
-    /// Iterate over all files in the log matching a predicate
-    fn files_matching_predicate(&self, filters: &[Expr]) -> DeltaResult<impl Iterator<Item = Add>>;
-}
-
-impl DataFusionFileMixins for EagerSnapshot {
-    fn files_matching_predicate(&self, filters: &[Expr]) -> DeltaResult<impl Iterator<Item = Add>> {
-        files_matching_predicate(self, filters)
-    }
-}
-
 pub(crate) fn files_matching_predicate<'a>(
     snapshot: &'a EagerSnapshot,
     filters: &[Expr],
@@ -253,7 +233,8 @@ pub(crate) fn files_matching_predicate<'a>(
     if let Some(Some(predicate)) =
         (!filters.is_empty()).then_some(conjunction(filters.iter().cloned()))
     {
-        let expr = logical_expr_to_physical_expr(predicate, snapshot.arrow_schema()?.as_ref());
+        let expr = SessionContext::new()
+            .create_physical_expr(predicate, &snapshot.arrow_schema()?.to_dfschema()?)?;
         let pruning_predicate = PruningPredicate::try_new(expr, snapshot.arrow_schema()?)?;
         Ok(Either::Left(
             snapshot
@@ -309,9 +290,13 @@ pub(crate) fn register_store(store: LogStoreRef, env: Arc<RuntimeEnv>) {
 /// at the physical level
 pub(crate) fn df_logical_schema(
     snapshot: &DeltaTableState,
-    scan_config: &DeltaScanConfig,
+    file_column_name: &Option<String>,
+    schema: Option<ArrowSchemaRef>,
 ) -> DeltaResult<SchemaRef> {
-    let input_schema = snapshot.arrow_schema()?;
+    let input_schema = match schema {
+        Some(schema) => schema,
+        None => snapshot.input_schema()?,
+    };
     let table_partition_cols = &snapshot.metadata().partition_columns;
 
     let mut fields: Vec<Arc<Field>> = input_schema
@@ -330,8 +315,12 @@ pub(crate) fn df_logical_schema(
         ));
     }
 
-    if let Some(file_column_name) = &scan_config.file_column_name {
-        fields.push(Arc::new(Field::new(file_column_name, DataType::Utf8, true)));
+    if let Some(file_column_name) = file_column_name {
+        fields.push(Arc::new(Field::new(
+            file_column_name,
+            ArrowDataType::Utf8,
+            true,
+        )));
     }
 
     Ok(Arc::new(ArrowSchema::new(fields)))
@@ -349,7 +338,10 @@ pub struct DeltaScanConfigBuilder {
     file_column_name: Option<String>,
     /// Whether to wrap partition values in a dictionary encoding to potentially save space
     wrap_partition_values: Option<bool>,
+    /// Whether to push down filter in end result or just prune the files
     enable_parquet_pushdown: bool,
+    /// Schema to scan table with
+    schema: Option<SchemaRef>,
 }
 
 impl Default for DeltaScanConfigBuilder {
@@ -359,6 +351,7 @@ impl Default for DeltaScanConfigBuilder {
             file_column_name: None,
             wrap_partition_values: None,
             enable_parquet_pushdown: true,
+            schema: None,
         }
     }
 }
@@ -397,16 +390,21 @@ impl DeltaScanConfigBuilder {
         self
     }
 
+    /// Use the provided [SchemaRef] for the [DeltaScan]
+    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
+        self.schema = Some(schema);
+        self
+    }
+
     /// Build a DeltaScanConfig and ensure no column name conflicts occur during downstream processing
     pub fn build(&self, snapshot: &DeltaTableState) -> DeltaResult<DeltaScanConfig> {
-        let input_schema = snapshot.input_schema()?;
-        let mut file_column_name = None;
-        let mut column_names: HashSet<&String> = HashSet::new();
-        for field in input_schema.fields.iter() {
-            column_names.insert(field.name());
-        }
+        let file_column_name = if self.include_file_column {
+            let input_schema = snapshot.input_schema()?;
+            let mut column_names: HashSet<&String> = HashSet::new();
+            for field in input_schema.fields.iter() {
+                column_names.insert(field.name());
+            }
 
-        if self.include_file_column {
             match &self.file_column_name {
                 Some(name) => {
                     if column_names.contains(name) {
@@ -416,7 +414,7 @@ impl DeltaScanConfigBuilder {
                         )));
                     }
 
-                    file_column_name = Some(name.to_owned())
+                    Some(name.to_owned())
                 }
                 None => {
                     let prefix = PATH_COLUMN;
@@ -428,15 +426,18 @@ impl DeltaScanConfigBuilder {
                         name = format!("{}_{}", prefix, idx);
                     }
 
-                    file_column_name = Some(name);
+                    Some(name)
                 }
             }
-        }
+        } else {
+            None
+        };
 
         Ok(DeltaScanConfig {
             file_column_name,
             wrap_partition_values: self.wrap_partition_values.unwrap_or(true),
             enable_parquet_pushdown: self.enable_parquet_pushdown,
+            schema: self.schema.clone(),
         })
     }
 }
@@ -450,37 +451,36 @@ pub struct DeltaScanConfig {
     pub wrap_partition_values: bool,
     /// Allow pushdown of the scan filter
     pub enable_parquet_pushdown: bool,
+    /// Schema to read as
+    pub schema: Option<SchemaRef>,
 }
 
-#[derive(Debug)]
 pub(crate) struct DeltaScanBuilder<'a> {
     snapshot: &'a DeltaTableState,
     log_store: LogStoreRef,
     filter: Option<Expr>,
-    state: &'a SessionState,
+    session: &'a dyn Session,
     projection: Option<&'a Vec<usize>>,
     limit: Option<usize>,
     files: Option<&'a [Add]>,
-    config: DeltaScanConfig,
-    schema: Option<SchemaRef>,
+    config: Option<DeltaScanConfig>,
 }
 
 impl<'a> DeltaScanBuilder<'a> {
     pub fn new(
         snapshot: &'a DeltaTableState,
         log_store: LogStoreRef,
-        state: &'a SessionState,
+        session: &'a dyn Session,
     ) -> Self {
         DeltaScanBuilder {
             snapshot,
             log_store,
             filter: None,
-            state,
-            files: None,
+            session,
             projection: None,
             limit: None,
-            config: DeltaScanConfig::default(),
-            schema: None,
+            files: None,
+            config: None,
         }
     }
 
@@ -505,21 +505,26 @@ impl<'a> DeltaScanBuilder<'a> {
     }
 
     pub fn with_scan_config(mut self, config: DeltaScanConfig) -> Self {
-        self.config = config;
+        self.config = Some(config);
         self
     }
 
     pub async fn build(self) -> DeltaResult<DeltaScan> {
-        let config = self.config;
-        let schema = match self.schema {
-            Some(schema) => schema,
-            None => {
-                self.snapshot
-                    .physical_arrow_schema(self.log_store.object_store())
-                    .await?
-            }
+        let config = match self.config {
+            Some(config) => config,
+            None => DeltaScanConfigBuilder::new().build(self.snapshot)?,
         };
-        let logical_schema = df_logical_schema(self.snapshot, &config)?;
+
+        let schema = match config.schema.clone() {
+            Some(value) => Ok(value),
+            None => self.snapshot.arrow_schema(),
+        }?;
+
+        let logical_schema = df_logical_schema(
+            self.snapshot,
+            &config.file_column_name,
+            Some(schema.clone()),
+        )?;
 
         let logical_schema = if let Some(used_columns) = self.projection {
             let mut fields = vec![];
@@ -531,33 +536,45 @@ impl<'a> DeltaScanBuilder<'a> {
             logical_schema
         };
 
+        let context = SessionContext::new();
+        let df_schema = logical_schema.clone().to_dfschema()?;
         let logical_filter = self
             .filter
-            .map(|expr| logical_expr_to_physical_expr(expr, &logical_schema));
+            .map(|expr| context.create_physical_expr(expr, &df_schema).unwrap());
 
         // Perform Pruning of files to scan
-        let files = match self.files {
-            Some(files) => files.to_owned(),
+        let (files, files_scanned, files_pruned) = match self.files {
+            Some(files) => {
+                let files = files.to_owned();
+                let files_scanned = files.len();
+                (files, files_scanned, 0)
+            }
             None => {
                 if let Some(predicate) = &logical_filter {
                     let pruning_predicate =
                         PruningPredicate::try_new(predicate.clone(), logical_schema.clone())?;
                     let files_to_prune = pruning_predicate.prune(self.snapshot)?;
-                    self.snapshot
+                    let mut files_pruned = 0usize;
+                    let files = self
+                        .snapshot
                         .file_actions_iter()?
                         .zip(files_to_prune.into_iter())
-                        .filter_map(
-                            |(action, keep)| {
-                                if keep {
-                                    Some(action.to_owned())
-                                } else {
-                                    None
-                                }
-                            },
-                        )
-                        .collect()
+                        .filter_map(|(action, keep)| {
+                            if keep {
+                                Some(action.to_owned())
+                            } else {
+                                files_pruned += 1;
+                                None
+                            }
+                        })
+                        .collect::<Vec<_>>();
+
+                    let files_scanned = files.len();
+                    (files, files_scanned, files_pruned)
                 } else {
-                    self.snapshot.file_actions()?
+                    let files = self.snapshot.file_actions()?;
+                    let files_scanned = files.len();
+                    (files, files_scanned, 0)
                 }
             }
         };
@@ -603,9 +620,9 @@ impl<'a> DeltaScanBuilder<'a> {
 
         if let Some(file_column_name) = &config.file_column_name {
             let field_name_datatype = if config.wrap_partition_values {
-                wrap_partition_type_in_dict(DataType::Utf8)
+                wrap_partition_type_in_dict(ArrowDataType::Utf8)
             } else {
-                DataType::Utf8
+                ArrowDataType::Utf8
             };
             table_partition_cols.push(Field::new(
                 file_column_name.clone(),
@@ -619,37 +636,47 @@ impl<'a> DeltaScanBuilder<'a> {
             .datafusion_table_statistics()
             .unwrap_or(Statistics::new_unknown(&schema));
 
+        let parquet_options = TableParquetOptions {
+            global: self.session.config().options().execution.parquet.clone(),
+            ..Default::default()
+        };
+
+        let mut exec_plan_builder = ParquetExecBuilder::new(FileScanConfig {
+            object_store_url: self.log_store.object_store_url(),
+            file_schema,
+            file_groups: file_groups.into_values().collect(),
+            statistics: stats,
+            projection: self.projection.cloned(),
+            limit: self.limit,
+            table_partition_cols,
+            output_ordering: vec![],
+        })
+        .with_schema_adapter_factory(Arc::new(DeltaSchemaAdapterFactory {}))
+        .with_table_parquet_options(parquet_options);
+
         // Sometimes (i.e Merge) we want to prune files that don't make the
         // filter and read the entire contents for files that do match the
         // filter
-        let parquet_pushdown = if config.enable_parquet_pushdown {
-            logical_filter.clone()
-        } else {
-            None
+        if let Some(predicate) = logical_filter {
+            if config.enable_parquet_pushdown {
+                exec_plan_builder = exec_plan_builder.with_predicate(predicate);
+            }
         };
 
-        let scan = ParquetFormat::new()
-            .create_physical_plan(
-                self.state,
-                FileScanConfig {
-                    object_store_url: self.log_store.object_store_url(),
-                    file_schema,
-                    file_groups: file_groups.into_values().collect(),
-                    statistics: stats,
-                    projection: self.projection.cloned(),
-                    limit: self.limit,
-                    table_partition_cols,
-                    output_ordering: vec![],
-                },
-                parquet_pushdown.as_ref(),
-            )
-            .await?;
+        let metrics = ExecutionPlanMetricsSet::new();
+        MetricBuilder::new(&metrics)
+            .global_counter("files_scanned")
+            .add(files_scanned);
+        MetricBuilder::new(&metrics)
+            .global_counter("files_pruned")
+            .add(files_pruned);
 
         Ok(DeltaScan {
             table_uri: ensure_table_uri(self.log_store.root_uri())?.as_str().into(),
-            parquet_scan: scan,
+            parquet_scan: exec_plan_builder.build_arc(),
             config,
             logical_schema,
+            metrics,
         })
     }
 }
@@ -679,7 +706,7 @@ impl TableProvider for DeltaTable {
 
     async fn scan(
         &self,
-        session: &SessionState,
+        session: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
@@ -697,11 +724,14 @@ impl TableProvider for DeltaTable {
         Ok(Arc::new(scan))
     }
 
-    fn supports_filter_pushdown(
+    fn supports_filters_pushdown(
         &self,
-        _filter: &Expr,
-    ) -> DataFusionResult<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
+        filter: &[&Expr],
+    ) -> DataFusionResult<Vec<TableProviderFilterPushDown>> {
+        Ok(filter
+            .iter()
+            .map(|_| TableProviderFilterPushDown::Inexact)
+            .collect())
     }
 
     fn statistics(&self) -> Option<Statistics> {
@@ -715,6 +745,7 @@ pub struct DeltaTableProvider {
     log_store: LogStoreRef,
     config: DeltaScanConfig,
     schema: Arc<ArrowSchema>,
+    files: Option<Vec<Add>>,
 }
 
 impl DeltaTableProvider {
@@ -725,12 +756,19 @@ impl DeltaTableProvider {
         config: DeltaScanConfig,
     ) -> DeltaResult<Self> {
         Ok(DeltaTableProvider {
-            schema: df_logical_schema(&snapshot, &config)?,
+            schema: df_logical_schema(&snapshot, &config.file_column_name, config.schema.clone())?,
             snapshot,
             log_store,
             config,
+            files: None,
         })
     }
+
+    /// Define which files to consider while building a scan, for advanced usecases
+    pub fn with_files(mut self, files: Vec<Add>) -> DeltaTableProvider {
+        self.files = Some(files);
+        self
+    }
 }
 
 #[async_trait]
@@ -757,7 +795,7 @@ impl TableProvider for DeltaTableProvider {
 
     async fn scan(
         &self,
-        session: &SessionState,
+        session: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
@@ -765,22 +803,23 @@ impl TableProvider for DeltaTableProvider {
         register_store(self.log_store.clone(), session.runtime_env().clone());
         let filter_expr = conjunction(filters.iter().cloned());
 
-        let scan = DeltaScanBuilder::new(&self.snapshot, self.log_store.clone(), session)
+        let mut scan = DeltaScanBuilder::new(&self.snapshot, self.log_store.clone(), session)
             .with_projection(projection)
             .with_limit(limit)
             .with_filter(filter_expr)
-            .with_scan_config(self.config.clone())
-            .build()
-            .await?;
+            .with_scan_config(self.config.clone());
 
-        Ok(Arc::new(scan))
+        if let Some(files) = &self.files {
+            scan = scan.with_files(files);
+        }
+        Ok(Arc::new(scan.build().await?))
     }
 
-    fn supports_filter_pushdown(
+    fn supports_filters_pushdown(
         &self,
-        _filter: &Expr,
-    ) -> DataFusionResult<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
+        _filter: &[&Expr],
+    ) -> DataFusionResult<Vec<TableProviderFilterPushDown>> {
+        Ok(vec![TableProviderFilterPushDown::Inexact])
     }
 
     fn statistics(&self) -> Option<Statistics> {
@@ -800,6 +839,8 @@ pub struct DeltaScan {
     pub parquet_scan: Arc<dyn ExecutionPlan>,
     /// The schema of the table to be used when evaluating expressions
     pub logical_schema: Arc<ArrowSchema>,
+    /// Metrics for scan reported via DataFusion
+    metrics: ExecutionPlanMetricsSet,
 }
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -816,6 +857,10 @@ impl DisplayAs for DeltaScan {
 }
 
 impl ExecutionPlan for DeltaScan {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -828,8 +873,8 @@ impl ExecutionPlan for DeltaScan {
         self.parquet_scan.properties()
     }
 
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.parquet_scan.clone()]
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.parquet_scan]
     }
 
     fn with_new_children(
@@ -847,6 +892,7 @@ impl ExecutionPlan for DeltaScan {
             config: self.config.clone(),
             parquet_scan: children[0].clone(),
             logical_schema: self.logical_schema.clone(),
+            metrics: self.metrics.clone(),
         }))
     }
 
@@ -858,6 +904,10 @@ impl ExecutionPlan for DeltaScan {
         self.parquet_scan.execute(partition, context)
     }
 
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
+
     fn statistics(&self) -> DataFusionResult<Statistics> {
         self.parquet_scan.statistics()
     }
@@ -873,6 +923,7 @@ impl ExecutionPlan for DeltaScan {
                 config: self.config.clone(),
                 parquet_scan,
                 logical_schema: self.logical_schema.clone(),
+                metrics: self.metrics.clone(),
             })))
         } else {
             Ok(None)
@@ -945,7 +996,7 @@ pub(crate) fn get_null_of_arrow_type(t: &ArrowDataType) -> DeltaResult<ScalarVal
     }
 }
 
-pub(crate) fn partitioned_file_from_action(
+fn partitioned_file_from_action(
     action: &Add,
     partition_columns: &[String],
     schema: &ArrowSchema,
@@ -990,6 +1041,7 @@ pub(crate) fn partitioned_file_from_action(
         partition_values,
         range: None,
         extensions: None,
+        statistics: None,
     }
 }
 
@@ -1065,59 +1117,6 @@ pub(crate) fn to_correct_scalar_value(
     }
 }
 
-pub(crate) fn logical_expr_to_physical_expr(
-    expr: Expr,
-    schema: &ArrowSchema,
-) -> Arc<dyn PhysicalExpr> {
-    let df_schema = schema.clone().to_dfschema().unwrap();
-    let execution_props = ExecutionProps::new();
-    create_physical_expr_fix(expr, &df_schema, &execution_props).unwrap()
-}
-
-// TODO This should be removed after datafusion v38
-pub(crate) fn create_physical_expr_fix(
-    expr: Expr,
-    input_dfschema: &DFSchema,
-    execution_props: &ExecutionProps,
-) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
-    // Support Expr::struct by rewriting expressions.
-    let expr = expr
-        .transform_up(&|expr| {
-            // see https://github.com/apache/datafusion/issues/10181
-            // This is part of the function rewriter code in DataFusion inlined here temporarily
-            Ok(match expr {
-                Expr::GetIndexedField(GetIndexedField {
-                    expr,
-                    field: GetFieldAccess::NamedStructField { name },
-                }) => {
-                    let name = Expr::Literal(name);
-                    Transformed::yes(get_field(*expr, name))
-                }
-                // expr[idx] ==> array_element(expr, idx)
-                Expr::GetIndexedField(GetIndexedField {
-                    expr,
-                    field: GetFieldAccess::ListIndex { key },
-                }) => Transformed::yes(array_element(*expr, *key)),
-
-                // expr[start, stop, stride] ==> array_slice(expr, start, stop, stride)
-                Expr::GetIndexedField(GetIndexedField {
-                    expr,
-                    field:
-                        GetFieldAccess::ListRange {
-                            start,
-                            stop,
-                            stride,
-                        },
-                }) => Transformed::yes(array_slice(*expr, *start, *stop, *stride)),
-
-                _ => Transformed::no(expr),
-            })
-        })?
-        .data;
-
-    datafusion_physical_expr::create_physical_expr(&expr, input_dfschema, execution_props)
-}
-
 pub(crate) async fn execute_plan_to_batch(
     state: &SessionState,
     plan: Arc<dyn ExecutionPlan>,
@@ -1133,15 +1132,13 @@ pub(crate) async fn execute_plan_to_batch(
 
                 let batches = batch_stream.try_collect::<Vec<_>>().await?;
 
-                DataFusionResult::<_>::Ok(arrow::compute::concat_batches(&schema, batches.iter())?)
+                DataFusionResult::<_>::Ok(concat_batches(&schema, batches.iter())?)
             }
         }),
     )
     .await?;
 
-    let batch = arrow::compute::concat_batches(&plan.schema(), data.iter())?;
-
-    Ok(batch)
+    Ok(concat_batches(&plan.schema(), data.iter())?)
 }
 
 /// Responsible for checking batches of data conform to table's invariants.
@@ -1286,6 +1283,7 @@ impl PhysicalExtensionCodec for DeltaPhysicalCodec {
             parquet_scan: (*inputs)[0].clone(),
             config: wire.config,
             logical_schema: wire.logical_schema,
+            metrics: ExecutionPlanMetricsSet::new(),
         };
         Ok(Arc::new(delta_scan))
     }
@@ -1332,6 +1330,7 @@ impl LogicalExtensionCodec for DeltaLogicalCodec {
     fn try_decode_table_provider(
         &self,
         buf: &[u8],
+        _table_ref: &TableReference,
         _schema: SchemaRef,
         _ctx: &SessionContext,
     ) -> Result<Arc<dyn TableProvider>, DataFusionError> {
@@ -1342,6 +1341,7 @@ impl LogicalExtensionCodec for DeltaLogicalCodec {
 
     fn try_encode_table_provider(
         &self,
+        _table_ref: &TableReference,
         node: Arc<dyn TableProvider>,
         buf: &mut Vec<u8>,
     ) -> Result<(), DataFusionError> {
@@ -1364,7 +1364,7 @@ pub struct DeltaTableFactory {}
 impl TableProviderFactory for DeltaTableFactory {
     async fn create(
         &self,
-        _ctx: &SessionState,
+        _ctx: &dyn Session,
         cmd: &CreateExternalTable,
     ) -> datafusion::error::Result<Arc<dyn TableProvider>> {
         let provider = if cmd.options.is_empty() {
@@ -1386,7 +1386,7 @@ pub(crate) struct FindFilesExprProperties {
 /// Ensure only expressions that make sense are accepted, check for
 /// non-deterministic functions, and determine if the expression only contains
 /// partition columns
-impl TreeNodeVisitor for FindFilesExprProperties {
+impl TreeNodeVisitor<'_> for FindFilesExprProperties {
     type Node = Expr;
 
     fn f_down(&mut self, expr: &Self::Node) -> datafusion_common::Result<TreeNodeRecursion> {
@@ -1417,28 +1417,20 @@ impl TreeNodeVisitor for FindFilesExprProperties {
             | Expr::IsNotUnknown(_)
             | Expr::Negative(_)
             | Expr::InList { .. }
-            | Expr::GetIndexedField(_)
             | Expr::Between(_)
             | Expr::Case(_)
             | Expr::Cast(_)
             | Expr::TryCast(_) => (),
-            Expr::ScalarFunction(ScalarFunction { func_def, .. }) => {
-                let v = match func_def {
-                    datafusion_expr::ScalarFunctionDefinition::BuiltIn(f) => f.volatility(),
-                    datafusion_expr::ScalarFunctionDefinition::UDF(u) => u.signature().volatility,
-                    datafusion_expr::ScalarFunctionDefinition::Name(n) => {
+            Expr::ScalarFunction(scalar_function) => {
+                match scalar_function.func.signature().volatility {
+                    Volatility::Immutable => (),
+                    _ => {
                         self.result = Err(DeltaTableError::Generic(format!(
-                            "Cannot determine volatility of find files predicate function {n}",
+                            "Find files predicate contains nondeterministic function {}",
+                            scalar_function.func.name()
                         )));
                         return Ok(TreeNodeRecursion::Stop);
                     }
-                };
-                if v > Volatility::Immutable {
-                    self.result = Err(DeltaTableError::Generic(format!(
-                        "Find files predicate contains nondeterministic function {}",
-                        func_def.name()
-                    )));
-                    return Ok(TreeNodeRecursion::Stop);
                 }
             }
             _ => {
@@ -1526,11 +1518,11 @@ pub(crate) async fn find_files_scan<'a>(
     }
     .build(snapshot)?;
 
-    let logical_schema = df_logical_schema(snapshot, &scan_config)?;
+    let logical_schema = df_logical_schema(snapshot, &scan_config.file_column_name, None)?;
 
     // Identify which columns we need to project
     let mut used_columns = expression
-        .to_columns()?
+        .column_refs()
         .into_iter()
         .map(|column| logical_schema.index_of(&column.name))
         .collect::<Result<Vec<usize>, ArrowError>>()?;
@@ -1549,11 +1541,8 @@ pub(crate) async fn find_files_scan<'a>(
     let input_schema = scan.logical_schema.as_ref().to_owned();
     let input_dfschema = input_schema.clone().try_into()?;
 
-    let predicate_expr = create_physical_expr_fix(
-        Expr::IsTrue(Box::new(expression.clone())),
-        &input_dfschema,
-        state.execution_props(),
-    )?;
+    let predicate_expr =
+        state.create_physical_expr(Expr::IsTrue(Box::new(expression.clone())), &input_dfschema)?;
 
     let filter: Arc<dyn ExecutionPlan> =
         Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
@@ -1590,7 +1579,7 @@ pub(crate) async fn scan_memory_table(
             ))?
             .to_owned(),
     );
-    fields.push(Field::new(PATH_COLUMN, DataType::Utf8, false));
+    fields.push(Field::new(PATH_COLUMN, ArrowDataType::Utf8, false));
 
     for field in schema.fields() {
         if field.name().starts_with("partition.") {
@@ -1775,12 +1764,14 @@ impl From<Column> for DeltaColumn {
 
 #[cfg(test)]
 mod tests {
-    use crate::writer::test_utils::get_delta_schema;
-    use arrow::array::StructArray;
-    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_array::StructArray;
+    use arrow_schema::Schema;
     use chrono::{TimeZone, Utc};
     use datafusion::assert_batches_sorted_eq;
+    use datafusion::datasource::physical_plan::ParquetExec;
     use datafusion::physical_plan::empty::EmptyExec;
+    use datafusion::physical_plan::{visit_execution_plan, ExecutionPlanVisitor, PhysicalExpr};
+    use datafusion_expr::lit;
     use datafusion_proto::physical_plan::AsExecutionPlan;
     use datafusion_proto::protobuf;
     use object_store::path::Path;
@@ -1788,6 +1779,8 @@ mod tests {
     use std::ops::Deref;
 
     use super::*;
+    use crate::operations::write::SchemaMode;
+    use crate::writer::test_utils::get_delta_schema;
 
     // test deserialization of serialized partition values.
     // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization
@@ -1891,7 +1884,7 @@ mod tests {
         let file = partitioned_file_from_action(&action, &part_columns, &schema);
         let ref_file = PartitionedFile {
             object_meta: object_store::ObjectMeta {
-                location: Path::from("year=2015/month=1/part-00000-4dcb50d3-d017-450c-9df7-a7257dbd3c5d-c000.snappy.parquet".to_string()), 
+                location: Path::from("year=2015/month=1/part-00000-4dcb50d3-d017-450c-9df7-a7257dbd3c5d-c000.snappy.parquet".to_string()),
                 last_modified: Utc.timestamp_millis_opt(1660497727833).unwrap(),
                 size: 10644,
                 e_tag: None,
@@ -1900,6 +1893,7 @@ mod tests {
             partition_values: [ScalarValue::Int64(Some(2015)), ScalarValue::Int64(Some(1))].to_vec(),
             range: None,
             extensions: None,
+            statistics: None,
         };
         assert_eq!(file.partition_values, ref_file.partition_values)
     }
@@ -1907,8 +1901,8 @@ mod tests {
     #[tokio::test]
     async fn test_enforce_invariants() {
         let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, false),
-            Field::new("b", DataType::Int32, false),
+            Field::new("a", ArrowDataType::Utf8, false),
+            Field::new("b", ArrowDataType::Int32, false),
         ]));
         let batch = RecordBatch::try_new(
             Arc::clone(&schema),
@@ -1960,7 +1954,7 @@ mod tests {
         let struct_fields = schema.fields().clone();
         let schema = Arc::new(Schema::new(vec![Field::new(
             "x",
-            DataType::Struct(struct_fields),
+            ArrowDataType::Struct(struct_fields),
             false,
         )]));
         let inner = Arc::new(StructArray::from(batch));
@@ -1980,14 +1974,15 @@ mod tests {
         let codec = DeltaPhysicalCodec {};
 
         let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, false),
-            Field::new("b", DataType::Int32, false),
+            Field::new("a", ArrowDataType::Utf8, false),
+            Field::new("b", ArrowDataType::Int32, false),
         ]));
         let exec_plan = Arc::from(DeltaScan {
             table_uri: "s3://my_bucket/this/is/some/path".to_string(),
             parquet_scan: Arc::from(EmptyExec::new(schema.clone())),
             config: DeltaScanConfig::default(),
             logical_schema: schema.clone(),
+            metrics: ExecutionPlanMetricsSet::new(),
         });
         let proto: protobuf::PhysicalPlanNode =
             protobuf::PhysicalPlanNode::try_from_physical_plan(exec_plan.clone(), &codec)
@@ -2036,14 +2031,14 @@ mod tests {
         // Tests issue (1787) where partition columns were incorrect when they
         // have a different order in the metadata and table schema
         let schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("modified", DataType::Utf8, true),
-            Field::new("id", DataType::Utf8, true),
-            Field::new("value", DataType::Int32, true),
+            Field::new("modified", ArrowDataType::Utf8, true),
+            Field::new("id", ArrowDataType::Utf8, true),
+            Field::new("value", ArrowDataType::Int32, true),
         ]));
 
         let table = crate::DeltaOps::new_in_memory()
             .create()
-            .with_columns(get_delta_schema().fields().clone())
+            .with_columns(get_delta_schema().fields().cloned())
             .with_partition_columns(["modified", "id"])
             .await
             .unwrap();
@@ -2108,9 +2103,9 @@ mod tests {
     #[tokio::test]
     async fn delta_scan_case_sensitive() {
         let schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("moDified", DataType::Utf8, true),
-            Field::new("ID", DataType::Utf8, true),
-            Field::new("vaLue", DataType::Int32, true),
+            Field::new("moDified", ArrowDataType::Utf8, true),
+            Field::new("ID", ArrowDataType::Utf8, true),
+            Field::new("vaLue", ArrowDataType::Int32, true),
         ]));
 
         let batch = RecordBatch::try_new(
@@ -2173,4 +2168,402 @@ mod tests {
         assert_batches_sorted_eq!(&expected, &actual);
         */
     }
+
+    #[tokio::test]
+    async fn delta_scan_supports_missing_columns() {
+        let schema1 = Arc::new(ArrowSchema::new(vec![Field::new(
+            "col_1",
+            ArrowDataType::Utf8,
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(
+            schema1.clone(),
+            vec![Arc::new(arrow::array::StringArray::from(vec![
+                Some("A"),
+                Some("B"),
+            ]))],
+        )
+        .unwrap();
+
+        let schema2 = Arc::new(ArrowSchema::new(vec![
+            Field::new("col_1", ArrowDataType::Utf8, true),
+            Field::new("col_2", ArrowDataType::Utf8, true),
+        ]));
+
+        let batch2 = RecordBatch::try_new(
+            schema2.clone(),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("E"),
+                    Some("F"),
+                    Some("G"),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("E2"),
+                    Some("F2"),
+                    Some("G2"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch2])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let table = crate::DeltaOps(table)
+            .write(vec![batch1])
+            .with_schema_mode(SchemaMode::Merge)
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let config = DeltaScanConfigBuilder::new()
+            .build(table.snapshot().unwrap())
+            .unwrap();
+        let log = table.log_store();
+
+        let provider =
+            DeltaTableProvider::try_new(table.snapshot().unwrap().clone(), log, config).unwrap();
+        let ctx: SessionContext = DeltaSessionContext::default().into();
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        let df = ctx.sql("select col_1, col_2 from test").await.unwrap();
+        let actual = df.collect().await.unwrap();
+        let expected = vec![
+            "+-------+-------+",
+            "| col_1 | col_2 |",
+            "+-------+-------+",
+            "| A     |       |",
+            "| B     |       |",
+            "| E     | E2    |",
+            "| F     | F2    |",
+            "| G     | G2    |",
+            "+-------+-------+",
+        ];
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn delta_scan_supports_pushdown() {
+        let schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("col_1", ArrowDataType::Utf8, false),
+            Field::new("col_2", ArrowDataType::Utf8, false),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("A"),
+                    Some("B"),
+                    Some("C"),
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    Some("A2"),
+                    Some("B2"),
+                    Some("C2"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let config = DeltaScanConfigBuilder::new()
+            .build(table.snapshot().unwrap())
+            .unwrap();
+        let log = table.log_store();
+
+        let provider =
+            DeltaTableProvider::try_new(table.snapshot().unwrap().clone(), log, config).unwrap();
+
+        let mut cfg = SessionConfig::default();
+        cfg.options_mut().execution.parquet.pushdown_filters = true;
+        let ctx = SessionContext::new_with_config(cfg);
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        let df = ctx
+            .sql("select col_1, col_2 from test WHERE col_1 = 'A'")
+            .await
+            .unwrap();
+        let actual = df.collect().await.unwrap();
+        let expected = vec![
+            "+-------+-------+",
+            "| col_1 | col_2 |",
+            "+-------+-------+",
+            "| A     | A2    |",
+            "+-------+-------+",
+        ];
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn delta_scan_supports_nested_missing_columns() {
+        let column1_schema1: arrow::datatypes::Fields =
+            vec![Field::new("col_1a", ArrowDataType::Utf8, true)].into();
+        let schema1 = Arc::new(ArrowSchema::new(vec![Field::new(
+            "col_1",
+            ArrowDataType::Struct(column1_schema1.clone()),
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(
+            schema1.clone(),
+            vec![Arc::new(StructArray::new(
+                column1_schema1,
+                vec![Arc::new(arrow::array::StringArray::from(vec![
+                    Some("A"),
+                    Some("B"),
+                ]))],
+                None,
+            ))],
+        )
+        .unwrap();
+
+        let column1_schema2: arrow_schema::Fields = vec![
+            Field::new("col_1a", ArrowDataType::Utf8, true),
+            Field::new("col_1b", ArrowDataType::Utf8, true),
+        ]
+        .into();
+        let schema2 = Arc::new(ArrowSchema::new(vec![Field::new(
+            "col_1",
+            ArrowDataType::Struct(column1_schema2.clone()),
+            true,
+        )]));
+
+        let batch2 = RecordBatch::try_new(
+            schema2.clone(),
+            vec![Arc::new(StructArray::new(
+                column1_schema2,
+                vec![
+                    Arc::new(arrow::array::StringArray::from(vec![
+                        Some("E"),
+                        Some("F"),
+                        Some("G"),
+                    ])),
+                    Arc::new(arrow::array::StringArray::from(vec![
+                        Some("E2"),
+                        Some("F2"),
+                        Some("G2"),
+                    ])),
+                ],
+                None,
+            ))],
+        )
+        .unwrap();
+
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch1])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let table = crate::DeltaOps(table)
+            .write(vec![batch2])
+            .with_schema_mode(SchemaMode::Merge)
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let config = DeltaScanConfigBuilder::new()
+            .build(table.snapshot().unwrap())
+            .unwrap();
+        let log = table.log_store();
+
+        let provider =
+            DeltaTableProvider::try_new(table.snapshot().unwrap().clone(), log, config).unwrap();
+        let ctx: SessionContext = DeltaSessionContext::default().into();
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        let df = ctx
+            .sql("select col_1.col_1a, col_1.col_1b from test")
+            .await
+            .unwrap();
+        let actual = df.collect().await.unwrap();
+        let expected = vec![
+            "+--------------------+--------------------+",
+            "| test.col_1[col_1a] | test.col_1[col_1b] |",
+            "+--------------------+--------------------+",
+            "| A                  |                    |",
+            "| B                  |                    |",
+            "| E                  | E2                 |",
+            "| F                  | F2                 |",
+            "| G                  | G2                 |",
+            "+--------------------+--------------------+",
+        ];
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
+    #[tokio::test]
+    async fn test_multiple_predicate_pushdown() {
+        use crate::datafusion::prelude::SessionContext;
+        let schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("moDified", ArrowDataType::Utf8, true),
+            Field::new("id", ArrowDataType::Utf8, true),
+            Field::new("vaLue", ArrowDataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2021-02-01",
+                    "2021-02-01",
+                    "2021-02-02",
+                    "2021-02-02",
+                ])),
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C", "D"])),
+                Arc::new(arrow::array::Int32Array::from(vec![1, 10, 20, 100])),
+            ],
+        )
+        .unwrap();
+        // write some data
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch.clone()])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let datafusion = SessionContext::new();
+        let table = Arc::new(table);
+
+        datafusion.register_table("snapshot", table).unwrap();
+
+        let df = datafusion
+            .sql("select * from snapshot where id > 10000 and id < 20000")
+            .await
+            .unwrap();
+
+        df.collect().await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_delta_scan_builder_no_scan_config() {
+        let arr: Arc<dyn Array> = Arc::new(arrow::array::StringArray::from(vec!["s"]));
+        let batch = RecordBatch::try_from_iter_with_nullable(vec![("a", arr, false)]).unwrap();
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let ctx = SessionContext::new();
+        let state = ctx.state();
+        let scan = DeltaScanBuilder::new(table.snapshot().unwrap(), table.log_store(), &state)
+            .with_filter(Some(col("a").eq(lit("s"))))
+            .build()
+            .await
+            .unwrap();
+
+        let mut visitor = ParquetPredicateVisitor::default();
+        visit_execution_plan(&scan, &mut visitor).unwrap();
+
+        assert_eq!(visitor.predicate.unwrap().to_string(), "a@0 = s");
+        assert_eq!(
+            visitor.pruning_predicate.unwrap().orig_expr().to_string(),
+            "a@0 = s"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_delta_scan_builder_scan_config_disable_pushdown() {
+        let arr: Arc<dyn Array> = Arc::new(arrow::array::StringArray::from(vec!["s"]));
+        let batch = RecordBatch::try_from_iter_with_nullable(vec![("a", arr, false)]).unwrap();
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let snapshot = table.snapshot().unwrap();
+        let ctx = SessionContext::new();
+        let state = ctx.state();
+        let scan = DeltaScanBuilder::new(snapshot, table.log_store(), &state)
+            .with_filter(Some(col("a").eq(lit("s"))))
+            .with_scan_config(
+                DeltaScanConfigBuilder::new()
+                    .with_parquet_pushdown(false)
+                    .build(snapshot)
+                    .unwrap(),
+            )
+            .build()
+            .await
+            .unwrap();
+
+        let mut visitor = ParquetPredicateVisitor::default();
+        visit_execution_plan(&scan, &mut visitor).unwrap();
+
+        assert!(visitor.predicate.is_none());
+        assert!(visitor.pruning_predicate.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_delta_scan_applies_parquet_options() {
+        let arr: Arc<dyn Array> = Arc::new(arrow::array::StringArray::from(vec!["s"]));
+        let batch = RecordBatch::try_from_iter_with_nullable(vec![("a", arr, false)]).unwrap();
+        let table = crate::DeltaOps::new_in_memory()
+            .write(vec![batch])
+            .with_save_mode(crate::protocol::SaveMode::Append)
+            .await
+            .unwrap();
+
+        let snapshot = table.snapshot().unwrap();
+
+        let mut config = SessionConfig::default();
+        config.options_mut().execution.parquet.pushdown_filters = true;
+        let ctx = SessionContext::new_with_config(config);
+        let state = ctx.state();
+
+        let scan = DeltaScanBuilder::new(snapshot, table.log_store(), &state)
+            .build()
+            .await
+            .unwrap();
+
+        let mut visitor = ParquetOptionsVisitor::default();
+        visit_execution_plan(&scan, &mut visitor).unwrap();
+
+        assert_eq!(ctx.copied_table_options().parquet, visitor.options.unwrap());
+    }
+
+    #[derive(Default)]
+    struct ParquetPredicateVisitor {
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        pruning_predicate: Option<Arc<PruningPredicate>>,
+    }
+
+    impl ExecutionPlanVisitor for ParquetPredicateVisitor {
+        type Error = DataFusionError;
+
+        fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
+            if let Some(parquet_exec) = plan.as_any().downcast_ref::<ParquetExec>() {
+                self.predicate = parquet_exec.predicate().cloned();
+                self.pruning_predicate = parquet_exec.pruning_predicate().cloned();
+            }
+            Ok(true)
+        }
+    }
+
+    #[derive(Default)]
+    struct ParquetOptionsVisitor {
+        options: Option<TableParquetOptions>,
+    }
+
+    impl ExecutionPlanVisitor for ParquetOptionsVisitor {
+        type Error = DataFusionError;
+
+        fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
+            if let Some(parquet_exec) = plan.as_any().downcast_ref::<ParquetExec>() {
+                self.options = Some(parquet_exec.table_parquet_options().clone())
+            }
+            Ok(true)
+        }
+    }
 }
diff --git a/crates/core/src/delta_datafusion/physical.rs b/crates/core/src/delta_datafusion/physical.rs
index 0251836fa8..dd28e0d93b 100644
--- a/crates/core/src/delta_datafusion/physical.rs
+++ b/crates/core/src/delta_datafusion/physical.rs
@@ -1,13 +1,12 @@
 //! Physical Operations for DataFusion
 use std::sync::Arc;
 
+use arrow_array::RecordBatch;
 use arrow_schema::SchemaRef;
-use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::Result as DataFusionResult;
-use datafusion::physical_plan::DisplayAs;
-use datafusion::physical_plan::{
-    metrics::{ExecutionPlanMetricsSet, MetricsSet},
-    ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
+use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use datafusion_physical_plan::{
+    DisplayAs, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
 use futures::{Stream, StreamExt};
 
@@ -74,6 +73,10 @@ impl DisplayAs for MetricObserverExec {
 }
 
 impl ExecutionPlan for MetricObserverExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn std::any::Any {
         self
     }
@@ -86,8 +89,8 @@ impl ExecutionPlan for MetricObserverExec {
         self.parent.properties()
     }
 
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.parent.clone()]
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.parent]
     }
 
     fn execute(
@@ -174,3 +177,7 @@ pub(crate) fn find_metric_node(
 
     None
 }
+
+pub(crate) fn get_metric(metrics: &MetricsSet, name: &str) -> usize {
+    metrics.sum_by_name(name).map(|m| m.as_usize()).unwrap_or(0)
+}
diff --git a/crates/core/src/delta_datafusion/planner.rs b/crates/core/src/delta_datafusion/planner.rs
new file mode 100644
index 0000000000..6119b78ce6
--- /dev/null
+++ b/crates/core/src/delta_datafusion/planner.rs
@@ -0,0 +1,58 @@
+//! Custom planners for datafusion so that you can convert custom nodes, can be used
+//! to trace custom metrics in an operation
+//!
+//! # Example
+//!
+//! #[derive(Clone)]
+//! struct MergeMetricExtensionPlanner {}
+//!
+//! #[async_trait]
+//! impl ExtensionPlanner for MergeMetricExtensionPlanner {
+//!     async fn plan_extension(
+//!         &self,
+//!         planner: &dyn PhysicalPlanner,
+//!         node: &dyn UserDefinedLogicalNode,
+//!         _logical_inputs: &[&LogicalPlan],
+//!         physical_inputs: &[Arc<dyn ExecutionPlan>],
+//!         session_state: &SessionState,
+//!     ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {}
+//!
+//! let merge_planner = DeltaPlanner::<MergeMetricExtensionPlanner> {
+//!     extension_planner: MergeMetricExtensionPlanner {}
+//! };
+//!
+//! let state = state.with_query_planner(Arc::new(merge_planner));
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use datafusion::physical_planner::PhysicalPlanner;
+use datafusion::{
+    execution::{context::QueryPlanner, session_state::SessionState},
+    physical_plan::ExecutionPlan,
+    physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner},
+};
+use datafusion_expr::LogicalPlan;
+
+use crate::delta_datafusion::DataFusionResult;
+
+/// Deltaplanner
+pub struct DeltaPlanner<T: ExtensionPlanner> {
+    /// custom extension planner
+    pub extension_planner: T,
+}
+
+#[async_trait]
+impl<T: ExtensionPlanner + Send + Sync + 'static + Clone> QueryPlanner for DeltaPlanner<T> {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
+        let planner = Arc::new(Box::new(DefaultPhysicalPlanner::with_extension_planners(
+            vec![Arc::new(self.extension_planner.clone())],
+        )));
+        planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
diff --git a/crates/core/src/delta_datafusion/schema_adapter.rs b/crates/core/src/delta_datafusion/schema_adapter.rs
new file mode 100644
index 0000000000..99a97e2130
--- /dev/null
+++ b/crates/core/src/delta_datafusion/schema_adapter.rs
@@ -0,0 +1,82 @@
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use arrow_array::RecordBatch;
+use arrow_schema::{Schema, SchemaRef};
+use datafusion::datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory, SchemaMapper};
+
+use crate::operations::cast::cast_record_batch;
+
+/// A Schema Adapter Factory which provides casting record batches from parquet to meet
+/// delta lake conventions.
+#[derive(Debug)]
+pub(crate) struct DeltaSchemaAdapterFactory {}
+
+impl SchemaAdapterFactory for DeltaSchemaAdapterFactory {
+    fn create(&self, schema: SchemaRef) -> Box<dyn SchemaAdapter> {
+        Box::new(DeltaSchemaAdapter {
+            table_schema: schema,
+        })
+    }
+}
+
+pub(crate) struct DeltaSchemaAdapter {
+    /// Schema for the table
+    table_schema: SchemaRef,
+}
+
+impl SchemaAdapter for DeltaSchemaAdapter {
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.table_schema.field(index);
+        Some(file_schema.fields.find(field.name())?.0)
+    }
+
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::with_capacity(file_schema.fields().len());
+
+        for (file_idx, file_field) in file_schema.fields.iter().enumerate() {
+            if self.table_schema.fields().find(file_field.name()).is_some() {
+                projection.push(file_idx);
+            }
+        }
+
+        Ok((
+            Arc::new(SchemaMapping {
+                table_schema: self.table_schema.clone(),
+            }),
+            projection,
+        ))
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct SchemaMapping {
+    table_schema: SchemaRef,
+}
+
+impl SchemaMapper for SchemaMapping {
+    fn map_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch> {
+        let record_batch = cast_record_batch(&batch, self.table_schema.clone(), false, true)?;
+        Ok(record_batch)
+    }
+
+    fn map_partial_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch> {
+        let partial_table_schema = Arc::new(Schema::new(
+            batch
+                .schema()
+                .fields()
+                .iter()
+                .filter_map(|batch_field| {
+                    self.table_schema.field_with_name(batch_field.name()).ok()
+                })
+                .cloned()
+                .collect::<Vec<_>>(),
+        ));
+
+        let record_batch = cast_record_batch(&batch, partial_table_schema, false, true)?;
+        Ok(record_batch)
+    }
+}
diff --git a/crates/core/src/errors.rs b/crates/core/src/errors.rs
index 9c3b04aac3..609bc16656 100644
--- a/crates/core/src/errors.rs
+++ b/crates/core/src/errors.rs
@@ -11,6 +11,9 @@ pub type DeltaResult<T> = Result<T, DeltaTableError>;
 #[allow(missing_docs)]
 #[derive(thiserror::Error, Debug)]
 pub enum DeltaTableError {
+    #[error("Kernel error: {0}")]
+    KernelError(#[from] delta_kernel::error::Error),
+
     #[error("Delta protocol violation: {source}")]
     Protocol { source: ProtocolError },
 
@@ -218,6 +221,9 @@ pub enum DeltaTableError {
     #[error("Table has not yet been initialized")]
     NotInitialized,
 
+    #[error("Table has not yet been initialized with files, therefore {0} is not supported")]
+    NotInitializedWithFiles(String),
+
     #[error("Change Data not enabled for version: {version}, Start: {start}, End: {end}")]
     ChangeDataNotRecorded { version: i64, start: i64, end: i64 },
 
@@ -263,4 +269,9 @@ impl DeltaTableError {
         );
         Self::NotATable(msg)
     }
+
+    /// Create a [Generic](DeltaTableError::Generic) error with the given message.
+    pub fn generic(msg: impl ToString) -> Self {
+        Self::Generic(msg.to_string())
+    }
 }
diff --git a/crates/core/src/kernel/arrow/mod.rs b/crates/core/src/kernel/arrow/mod.rs
index 648ad16bbc..3ddd35560c 100644
--- a/crates/core/src/kernel/arrow/mod.rs
+++ b/crates/core/src/kernel/arrow/mod.rs
@@ -3,275 +3,17 @@
 use std::sync::Arc;
 
 use arrow_schema::{
-    ArrowError, DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef,
-    Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit,
+    DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef,
+    Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
 };
 use lazy_static::lazy_static;
 
-use super::{
-    ActionType, ArrayType, DataType, MapType, PrimitiveType, StructField, StructType,
-    DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE,
-};
-
 pub(crate) mod extract;
 pub(crate) mod json;
 
-const MAP_ROOT_DEFAULT: &str = "entries";
+const MAP_ROOT_DEFAULT: &str = "key_value";
 const MAP_KEY_DEFAULT: &str = "key";
 const MAP_VALUE_DEFAULT: &str = "value";
-const LIST_ROOT_DEFAULT: &str = "item";
-
-impl TryFrom<ActionType> for ArrowField {
-    type Error = ArrowError;
-
-    fn try_from(value: ActionType) -> Result<Self, Self::Error> {
-        value.schema_field().try_into()
-    }
-}
-
-impl TryFrom<&StructType> for ArrowSchema {
-    type Error = ArrowError;
-
-    fn try_from(s: &StructType) -> Result<Self, ArrowError> {
-        let fields = s
-            .fields()
-            .iter()
-            .map(TryInto::try_into)
-            .collect::<Result<Vec<ArrowField>, ArrowError>>()?;
-
-        Ok(ArrowSchema::new(fields))
-    }
-}
-
-impl TryFrom<&StructField> for ArrowField {
-    type Error = ArrowError;
-
-    fn try_from(f: &StructField) -> Result<Self, ArrowError> {
-        let metadata = f
-            .metadata()
-            .iter()
-            .map(|(key, val)| Ok((key.clone(), serde_json::to_string(val)?)))
-            .collect::<Result<_, serde_json::Error>>()
-            .map_err(|err| ArrowError::JsonError(err.to_string()))?;
-
-        let field = ArrowField::new(
-            f.name(),
-            ArrowDataType::try_from(f.data_type())?,
-            f.is_nullable(),
-        )
-        .with_metadata(metadata);
-
-        Ok(field)
-    }
-}
-
-impl TryFrom<&ArrayType> for ArrowField {
-    type Error = ArrowError;
-    fn try_from(a: &ArrayType) -> Result<Self, ArrowError> {
-        Ok(ArrowField::new(
-            LIST_ROOT_DEFAULT,
-            ArrowDataType::try_from(a.element_type())?,
-            // TODO check how to handle nullability
-            a.contains_null(),
-        ))
-    }
-}
-
-impl TryFrom<&MapType> for ArrowField {
-    type Error = ArrowError;
-
-    fn try_from(a: &MapType) -> Result<Self, ArrowError> {
-        Ok(ArrowField::new(
-            MAP_ROOT_DEFAULT,
-            ArrowDataType::Struct(
-                vec![
-                    ArrowField::new(
-                        MAP_KEY_DEFAULT,
-                        ArrowDataType::try_from(a.key_type())?,
-                        false,
-                    ),
-                    ArrowField::new(
-                        MAP_VALUE_DEFAULT,
-                        ArrowDataType::try_from(a.value_type())?,
-                        a.value_contains_null(),
-                    ),
-                ]
-                .into(),
-            ),
-            // always non-null
-            false,
-        ))
-    }
-}
-
-impl TryFrom<&DataType> for ArrowDataType {
-    type Error = ArrowError;
-
-    fn try_from(t: &DataType) -> Result<Self, ArrowError> {
-        match t {
-            DataType::Primitive(p) => {
-                match p {
-                    PrimitiveType::String => Ok(ArrowDataType::Utf8),
-                    PrimitiveType::Long => Ok(ArrowDataType::Int64), // undocumented type
-                    PrimitiveType::Integer => Ok(ArrowDataType::Int32),
-                    PrimitiveType::Short => Ok(ArrowDataType::Int16),
-                    PrimitiveType::Byte => Ok(ArrowDataType::Int8),
-                    PrimitiveType::Float => Ok(ArrowDataType::Float32),
-                    PrimitiveType::Double => Ok(ArrowDataType::Float64),
-                    PrimitiveType::Boolean => Ok(ArrowDataType::Boolean),
-                    PrimitiveType::Binary => Ok(ArrowDataType::Binary),
-                    PrimitiveType::Decimal(precision, scale) => {
-                        if precision <= &DECIMAL_MAX_PRECISION && scale <= &DECIMAL_MAX_SCALE {
-                            Ok(ArrowDataType::Decimal128(*precision, *scale))
-                        } else {
-                            Err(ArrowError::CastError(format!(
-                                "Precision/scale can not be larger than 38 ({},{})",
-                                precision, scale
-                            )))
-                        }
-                    }
-                    PrimitiveType::Date => {
-                        // A calendar date, represented as a year-month-day triple without a
-                        // timezone. Stored as 4 bytes integer representing days since 1970-01-01
-                        Ok(ArrowDataType::Date32)
-                    }
-                    PrimitiveType::Timestamp => Ok(ArrowDataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("UTC".into()),
-                    )),
-                    PrimitiveType::TimestampNtz => {
-                        Ok(ArrowDataType::Timestamp(TimeUnit::Microsecond, None))
-                    }
-                }
-            }
-            DataType::Struct(s) => Ok(ArrowDataType::Struct(
-                s.fields()
-                    .iter()
-                    .map(TryInto::try_into)
-                    .collect::<Result<Vec<ArrowField>, ArrowError>>()?
-                    .into(),
-            )),
-            DataType::Array(a) => Ok(ArrowDataType::List(Arc::new(a.as_ref().try_into()?))),
-            DataType::Map(m) => Ok(ArrowDataType::Map(Arc::new(m.as_ref().try_into()?), false)),
-        }
-    }
-}
-
-impl TryFrom<&ArrowSchema> for StructType {
-    type Error = ArrowError;
-
-    fn try_from(arrow_schema: &ArrowSchema) -> Result<Self, ArrowError> {
-        let new_fields: Result<Vec<StructField>, _> = arrow_schema
-            .fields()
-            .iter()
-            .map(|field| field.as_ref().try_into())
-            .collect();
-        Ok(StructType::new(new_fields?))
-    }
-}
-
-impl TryFrom<ArrowSchemaRef> for StructType {
-    type Error = ArrowError;
-
-    fn try_from(arrow_schema: ArrowSchemaRef) -> Result<Self, ArrowError> {
-        arrow_schema.as_ref().try_into()
-    }
-}
-
-impl TryFrom<&ArrowField> for StructField {
-    type Error = ArrowError;
-
-    fn try_from(arrow_field: &ArrowField) -> Result<Self, ArrowError> {
-        Ok(StructField::new(
-            arrow_field.name().clone(),
-            DataType::try_from(arrow_field.data_type())?,
-            arrow_field.is_nullable(),
-        )
-        .with_metadata(arrow_field.metadata().iter().map(|(k, v)| (k.clone(), v))))
-    }
-}
-
-impl TryFrom<&ArrowDataType> for DataType {
-    type Error = ArrowError;
-
-    fn try_from(arrow_datatype: &ArrowDataType) -> Result<Self, ArrowError> {
-        match arrow_datatype {
-            ArrowDataType::Utf8 => Ok(DataType::Primitive(PrimitiveType::String)),
-            ArrowDataType::LargeUtf8 => Ok(DataType::Primitive(PrimitiveType::String)),
-            ArrowDataType::Int64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type
-            ArrowDataType::Int32 => Ok(DataType::Primitive(PrimitiveType::Integer)),
-            ArrowDataType::Int16 => Ok(DataType::Primitive(PrimitiveType::Short)),
-            ArrowDataType::Int8 => Ok(DataType::Primitive(PrimitiveType::Byte)),
-            ArrowDataType::UInt64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type
-            ArrowDataType::UInt32 => Ok(DataType::Primitive(PrimitiveType::Integer)),
-            ArrowDataType::UInt16 => Ok(DataType::Primitive(PrimitiveType::Short)),
-            ArrowDataType::UInt8 => Ok(DataType::Primitive(PrimitiveType::Byte)),
-            ArrowDataType::Float32 => Ok(DataType::Primitive(PrimitiveType::Float)),
-            ArrowDataType::Float64 => Ok(DataType::Primitive(PrimitiveType::Double)),
-            ArrowDataType::Boolean => Ok(DataType::Primitive(PrimitiveType::Boolean)),
-            ArrowDataType::Binary => Ok(DataType::Primitive(PrimitiveType::Binary)),
-            ArrowDataType::FixedSizeBinary(_) => Ok(DataType::Primitive(PrimitiveType::Binary)),
-            ArrowDataType::LargeBinary => Ok(DataType::Primitive(PrimitiveType::Binary)),
-            ArrowDataType::Decimal128(p, s) => {
-                Ok(DataType::Primitive(PrimitiveType::Decimal(*p, *s)))
-            }
-            ArrowDataType::Decimal256(p, s) => DataType::decimal(*p, *s).map_err(|_| {
-                ArrowError::SchemaError(format!(
-                    "Invalid data type for Delta Lake: decimal({},{})",
-                    p, s
-                ))
-            }),
-            ArrowDataType::Date32 => Ok(DataType::Primitive(PrimitiveType::Date)),
-            ArrowDataType::Date64 => Ok(DataType::Primitive(PrimitiveType::Date)),
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => {
-                Ok(DataType::Primitive(PrimitiveType::TimestampNtz))
-            }
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz))
-                if tz.eq_ignore_ascii_case("utc") =>
-            {
-                Ok(DataType::Primitive(PrimitiveType::Timestamp))
-            }
-            ArrowDataType::Struct(fields) => {
-                let converted_fields: Result<Vec<StructField>, _> = fields
-                    .iter()
-                    .map(|field| field.as_ref().try_into())
-                    .collect();
-                Ok(DataType::Struct(Box::new(StructType::new(
-                    converted_fields?,
-                ))))
-            }
-            ArrowDataType::List(field) => Ok(DataType::Array(Box::new(ArrayType::new(
-                (*field).data_type().try_into()?,
-                (*field).is_nullable(),
-            )))),
-            ArrowDataType::LargeList(field) => Ok(DataType::Array(Box::new(ArrayType::new(
-                (*field).data_type().try_into()?,
-                (*field).is_nullable(),
-            )))),
-            ArrowDataType::FixedSizeList(field, _) => Ok(DataType::Array(Box::new(
-                ArrayType::new((*field).data_type().try_into()?, (*field).is_nullable()),
-            ))),
-            ArrowDataType::Map(field, _) => {
-                if let ArrowDataType::Struct(struct_fields) = field.data_type() {
-                    let key_type = struct_fields[0].data_type().try_into()?;
-                    let value_type = struct_fields[1].data_type().try_into()?;
-                    let value_type_nullable = struct_fields[1].is_nullable();
-                    Ok(DataType::Map(Box::new(MapType::new(
-                        key_type,
-                        value_type,
-                        value_type_nullable,
-                    ))))
-                } else {
-                    panic!("DataType::Map should contain a struct field child");
-                }
-            }
-            ArrowDataType::Dictionary(_, value_type) => Ok(value_type.as_ref().try_into()?),
-            s => Err(ArrowError::SchemaError(format!(
-                "Invalid data type for Delta Lake: {s}"
-            ))),
-        }
-    }
-}
 
 macro_rules! arrow_map {
     ($fieldname: ident, null) => {
@@ -507,13 +249,15 @@ pub(crate) fn delta_log_schema_for_table(
             .iter()
             .for_each(|f| max_min_schema_for_fields(&mut max_min_vec, f));
 
-        stats_parsed_fields.extend(["minValues", "maxValues"].into_iter().map(|name| {
-            ArrowField::new(
-                name,
-                ArrowDataType::Struct(max_min_vec.clone().into()),
-                true,
-            )
-        }));
+        if !max_min_vec.is_empty() {
+            stats_parsed_fields.extend(["minValues", "maxValues"].into_iter().map(|name| {
+                ArrowField::new(
+                    name,
+                    ArrowDataType::Struct(max_min_vec.clone().into()),
+                    true,
+                )
+            }));
+        }
 
         let mut null_count_vec = Vec::new();
         non_partition_fields
@@ -585,8 +329,7 @@ fn max_min_schema_for_fields(dest: &mut Vec<ArrowField>, f: &ArrowField) {
         // don't compute min or max for list, map or binary types
         ArrowDataType::List(_) | ArrowDataType::Map(_, _) | ArrowDataType::Binary => { /* noop */ }
         _ => {
-            let f = f.clone();
-            dest.push(f);
+            dest.push(ArrowField::new(f.name(), f.data_type().clone(), true));
         }
     }
 }
@@ -615,15 +358,15 @@ fn null_count_schema_for_fields(dest: &mut Vec<ArrowField>, f: &ArrowField) {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
     use arrow::array::ArrayData;
-    use arrow_array::Array;
-    use arrow_array::{make_array, ArrayRef, MapArray, StringArray, StructArray};
+    use arrow_array::{Array, BinaryArray, MapArray, RecordBatch, StringArray, StructArray};
     use arrow_buffer::{Buffer, ToByteSlice};
-    use arrow_schema::Field;
+    use delta_kernel::schema::{DataType, MapType, PrimitiveType, StructField, StructType};
 
     use super::*;
-    use std::collections::HashMap;
-    use std::sync::Arc;
 
     #[test]
     fn delta_log_schema_for_table_test() {
@@ -766,108 +509,6 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_arrow_from_delta_decimal_type() {
-        let precision = 20;
-        let scale = 2;
-        let decimal_field = DataType::Primitive(PrimitiveType::Decimal(precision, scale));
-        assert_eq!(
-            <ArrowDataType as TryFrom<&DataType>>::try_from(&decimal_field).unwrap(),
-            ArrowDataType::Decimal128(precision, scale)
-        );
-    }
-
-    #[test]
-    fn test_arrow_from_delta_decimal_type_invalid_precision() {
-        let precision = 39;
-        let scale = 2;
-        assert!(matches!(
-            <DataType as TryFrom<&ArrowDataType>>::try_from(&ArrowDataType::Decimal256(
-                precision, scale
-            ))
-            .unwrap_err(),
-            _
-        ));
-    }
-
-    #[test]
-    fn test_arrow_from_delta_decimal_type_invalid_scale() {
-        let precision = 2;
-        let scale = 39;
-        assert!(matches!(
-            <DataType as TryFrom<&ArrowDataType>>::try_from(&ArrowDataType::Decimal256(
-                precision, scale
-            ))
-            .unwrap_err(),
-            _
-        ));
-    }
-
-    #[test]
-    fn test_arrow_from_delta_timestamp_type() {
-        let timestamp_field = DataType::Primitive(PrimitiveType::Timestamp);
-        assert_eq!(
-            <ArrowDataType as TryFrom<&DataType>>::try_from(&timestamp_field).unwrap(),
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into()))
-        );
-    }
-
-    #[test]
-    fn test_arrow_from_delta_timestampntz_type() {
-        let timestamp_field = DataType::Primitive(PrimitiveType::TimestampNtz);
-        assert_eq!(
-            <ArrowDataType as TryFrom<&DataType>>::try_from(&timestamp_field).unwrap(),
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, None)
-        );
-    }
-
-    #[test]
-    fn test_delta_from_arrow_timestamp_type_no_tz() {
-        let timestamp_field = ArrowDataType::Timestamp(TimeUnit::Microsecond, None);
-        assert_eq!(
-            <DataType as TryFrom<&ArrowDataType>>::try_from(&timestamp_field).unwrap(),
-            DataType::Primitive(PrimitiveType::TimestampNtz)
-        );
-    }
-
-    #[test]
-    fn test_delta_from_arrow_timestamp_type_with_tz() {
-        let timestamp_field =
-            ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into()));
-        assert_eq!(
-            <DataType as TryFrom<&ArrowDataType>>::try_from(&timestamp_field).unwrap(),
-            DataType::Primitive(PrimitiveType::Timestamp)
-        );
-    }
-
-    #[test]
-    fn test_delta_from_arrow_map_type() {
-        let arrow_map = ArrowDataType::Map(
-            Arc::new(ArrowField::new(
-                "entries",
-                ArrowDataType::Struct(
-                    vec![
-                        ArrowField::new("key", ArrowDataType::Int8, false),
-                        ArrowField::new("value", ArrowDataType::Binary, true),
-                    ]
-                    .into(),
-                ),
-                false,
-            )),
-            false,
-        );
-        let converted_map: DataType = (&arrow_map).try_into().unwrap();
-
-        assert_eq!(
-            converted_map,
-            DataType::Map(Box::new(MapType::new(
-                DataType::Primitive(PrimitiveType::Byte),
-                DataType::Primitive(PrimitiveType::Binary),
-                true,
-            )))
-        );
-    }
-
     #[test]
     fn test_record_batch_from_map_type() {
         let keys = vec!["0", "1", "5", "6", "7"];
@@ -881,52 +522,36 @@ mod tests {
         let entry_offsets = vec![0u32, 1, 1, 4, 5, 5];
         let num_rows = keys.len();
 
-        // Copied the function `new_from_string` with the patched code from https://github.com/apache/arrow-rs/pull/4808
-        // This should be reverted back [`MapArray::new_from_strings`] once arrow is upgraded in this project.
-        fn new_from_strings<'a>(
-            keys: impl Iterator<Item = &'a str>,
-            values: &dyn Array,
-            entry_offsets: &[u32],
-        ) -> Result<MapArray, ArrowError> {
-            let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
-            let keys_data = StringArray::from_iter_values(keys);
-
-            let keys_field = Arc::new(Field::new("key", ArrowDataType::Utf8, false));
-            let values_field = Arc::new(Field::new(
-                "value",
-                values.data_type().clone(),
-                values.null_count() > 0,
-            ));
-
-            let entry_struct = StructArray::from(vec![
-                (keys_field, Arc::new(keys_data) as ArrayRef),
-                (values_field, make_array(values.to_data())),
-            ]);
-
-            let map_data_type = ArrowDataType::Map(
-                Arc::new(Field::new(
-                    "entries",
-                    entry_struct.data_type().clone(),
-                    false,
-                )),
-                false,
-            );
-
-            let map_data = ArrayData::builder(map_data_type)
-                .len(entry_offsets.len() - 1)
-                .add_buffer(entry_offsets_buffer)
-                .add_child_data(entry_struct.into_data())
-                .build()?;
+        let key_field = Arc::new(ArrowField::new(MAP_KEY_DEFAULT, ArrowDataType::Utf8, false));
+        let value_field = Arc::new(ArrowField::new(
+            MAP_VALUE_DEFAULT,
+            ArrowDataType::Binary,
+            false,
+        ));
+        let key_value_field = ArrowField::new_struct(
+            MAP_ROOT_DEFAULT,
+            vec![key_field.clone(), value_field.clone()],
+            false,
+        );
+        let key_value_array = StructArray::new(
+            vec![key_field, value_field].into(),
+            vec![
+                Arc::new(StringArray::from(keys)),
+                Arc::new(BinaryArray::from(values)),
+            ],
+            None,
+        );
+        let entry_offsets_buffer = Buffer::from(entry_offsets.as_slice().to_byte_slice());
 
-            Ok(MapArray::from(map_data))
-        }
+        let map_data_type = ArrowDataType::Map(Arc::new(key_value_field), false);
+        let map_data = ArrayData::builder(map_data_type)
+            .len(entry_offsets.len() - 1)
+            .add_buffer(entry_offsets_buffer)
+            .add_child_data(key_value_array.into_data())
+            .build()
+            .unwrap();
 
-        let map_array = new_from_strings(
-            keys.into_iter(),
-            &arrow::array::BinaryArray::from(values),
-            entry_offsets.as_slice(),
-        )
-        .expect("Could not create a map array");
+        let map_array = MapArray::from(map_data);
 
         let schema =
             <arrow::datatypes::Schema as TryFrom<&StructType>>::try_from(&StructType::new(vec![
@@ -942,9 +567,8 @@ mod tests {
             ]))
             .expect("Could not get schema");
 
-        let record_batch =
-            arrow::record_batch::RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_array)])
-                .expect("Failed to create RecordBatch");
+        let record_batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_array)])
+            .expect("Failed to create RecordBatch");
 
         assert_eq!(record_batch.num_columns(), 1);
         assert_eq!(record_batch.num_rows(), num_rows);
diff --git a/crates/core/src/kernel/error.rs b/crates/core/src/kernel/error.rs
index 853b10e411..cefe81bf9d 100644
--- a/crates/core/src/kernel/error.rs
+++ b/crates/core/src/kernel/error.rs
@@ -71,13 +71,3 @@ pub enum Error {
     #[error("Failed to parse value '{0}' as '{1}'")]
     Parse(String, DataType),
 }
-
-#[cfg(feature = "object_store")]
-impl From<object_store::Error> for Error {
-    fn from(value: object_store::Error) -> Self {
-        match value {
-            object_store::Error::NotFound { path, .. } => Self::FileNotFound(path),
-            err => Self::ObjectStore(err),
-        }
-    }
-}
diff --git a/crates/core/src/kernel/expressions/eval.rs b/crates/core/src/kernel/expressions/eval.rs
deleted file mode 100644
index cb6beea3ad..0000000000
--- a/crates/core/src/kernel/expressions/eval.rs
+++ /dev/null
@@ -1,384 +0,0 @@
-//! Default Expression handler.
-//!
-//! Expression handling based on arrow-rs compute kernels.
-
-use std::sync::Arc;
-
-use arrow_arith::boolean::{and, is_null, not, or};
-use arrow_arith::numeric::{add, div, mul, sub};
-use arrow_array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Datum, Decimal128Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, RecordBatch, StringArray,
-    StructArray, TimestampMicrosecondArray,
-};
-use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow_schema::{ArrowError, Field as ArrowField, Schema as ArrowSchema};
-use arrow_select::nullif::nullif;
-
-use crate::kernel::arrow::extract::extract_column;
-use crate::kernel::error::{DeltaResult, Error};
-use crate::kernel::expressions::{scalars::Scalar, Expression};
-use crate::kernel::expressions::{BinaryOperator, UnaryOperator};
-use crate::kernel::{DataType, PrimitiveType, VariadicOperator};
-
-fn downcast_to_bool(arr: &dyn Array) -> DeltaResult<&BooleanArray> {
-    arr.as_any()
-        .downcast_ref::<BooleanArray>()
-        .ok_or(Error::Generic("expected boolean array".to_string()))
-}
-
-fn wrap_comparison_result(arr: BooleanArray) -> ArrayRef {
-    Arc::new(arr) as Arc<dyn Array>
-}
-
-// TODO leverage scalars / Datum
-
-impl Scalar {
-    /// Convert scalar to arrow array.
-    pub fn to_array(&self, num_rows: usize) -> DeltaResult<ArrayRef> {
-        use Scalar::*;
-        let arr: ArrayRef = match self {
-            Integer(val) => Arc::new(Int32Array::from_value(*val, num_rows)),
-            Long(val) => Arc::new(Int64Array::from_value(*val, num_rows)),
-            Short(val) => Arc::new(Int16Array::from_value(*val, num_rows)),
-            Byte(val) => Arc::new(Int8Array::from_value(*val, num_rows)),
-            Float(val) => Arc::new(Float32Array::from_value(*val, num_rows)),
-            Double(val) => Arc::new(Float64Array::from_value(*val, num_rows)),
-            String(val) => Arc::new(StringArray::from(vec![val.clone(); num_rows])),
-            Boolean(val) => Arc::new(BooleanArray::from(vec![*val; num_rows])),
-            Timestamp(val) => {
-                Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows).with_timezone("UTC"))
-            }
-            TimestampNtz(val) => Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows)),
-            Date(val) => Arc::new(Date32Array::from_value(*val, num_rows)),
-            Binary(val) => Arc::new(BinaryArray::from(vec![val.as_slice(); num_rows])),
-            Decimal(val, precision, scale) => Arc::new(
-                Decimal128Array::from_value(*val, num_rows)
-                    .with_precision_and_scale(*precision, *scale)?,
-            ),
-            Null(data_type) => match data_type {
-                DataType::Primitive(primitive) => match primitive {
-                    PrimitiveType::Byte => Arc::new(Int8Array::new_null(num_rows)),
-                    PrimitiveType::Short => Arc::new(Int16Array::new_null(num_rows)),
-                    PrimitiveType::Integer => Arc::new(Int32Array::new_null(num_rows)),
-                    PrimitiveType::Long => Arc::new(Int64Array::new_null(num_rows)),
-                    PrimitiveType::Float => Arc::new(Float32Array::new_null(num_rows)),
-                    PrimitiveType::Double => Arc::new(Float64Array::new_null(num_rows)),
-                    PrimitiveType::String => Arc::new(StringArray::new_null(num_rows)),
-                    PrimitiveType::Boolean => Arc::new(BooleanArray::new_null(num_rows)),
-                    PrimitiveType::Timestamp => {
-                        Arc::new(TimestampMicrosecondArray::new_null(num_rows).with_timezone("UTC"))
-                    }
-                    PrimitiveType::TimestampNtz => {
-                        Arc::new(TimestampMicrosecondArray::new_null(num_rows))
-                    }
-                    PrimitiveType::Date => Arc::new(Date32Array::new_null(num_rows)),
-                    PrimitiveType::Binary => Arc::new(BinaryArray::new_null(num_rows)),
-                    PrimitiveType::Decimal(precision, scale) => Arc::new(
-                        Decimal128Array::new_null(num_rows)
-                            .with_precision_and_scale(*precision, *scale)
-                            .unwrap(),
-                    ),
-                },
-                DataType::Array(_) => unimplemented!(),
-                DataType::Map { .. } => unimplemented!(),
-                DataType::Struct { .. } => unimplemented!(),
-            },
-            Struct(values, fields) => {
-                let mut columns = Vec::with_capacity(values.len());
-                for val in values {
-                    columns.push(val.to_array(num_rows)?);
-                }
-                Arc::new(StructArray::try_new(
-                    fields
-                        .iter()
-                        .map(TryInto::<ArrowField>::try_into)
-                        .collect::<Result<Vec<_>, _>>()?
-                        .into(),
-                    columns,
-                    None,
-                )?)
-            }
-        };
-        Ok(arr)
-    }
-}
-
-/// evaluate expression
-pub(crate) fn evaluate_expression(
-    expression: &Expression,
-    batch: &RecordBatch,
-    result_type: Option<&DataType>,
-) -> DeltaResult<ArrayRef> {
-    use BinaryOperator::*;
-    use Expression::*;
-
-    match (expression, result_type) {
-        (Literal(scalar), _) => Ok(scalar.to_array(batch.num_rows())?),
-        (Column(name), _) => {
-            if name.contains('.') {
-                let mut path = name.split('.');
-                // Safety: we know that the first path step exists, because we checked for '.'
-                let arr = extract_column(batch, path.next().unwrap(), &mut path).cloned()?;
-                // NOTE: need to assign first so that rust can figure out lifetimes
-                Ok(arr)
-            } else {
-                batch
-                    .column_by_name(name)
-                    .ok_or(Error::MissingColumn(name.clone()))
-                    .cloned()
-            }
-        }
-        (Struct(fields), Some(DataType::Struct(schema))) => {
-            let output_schema: ArrowSchema = schema.as_ref().try_into()?;
-            let mut columns = Vec::with_capacity(fields.len());
-            for (expr, field) in fields.iter().zip(schema.fields()) {
-                columns.push(evaluate_expression(expr, batch, Some(field.data_type()))?);
-            }
-            Ok(Arc::new(StructArray::try_new(
-                output_schema.fields().clone(),
-                columns,
-                None,
-            )?))
-        }
-        (Struct(_), _) => Err(Error::Generic(
-            "Data type is required to evaluate struct expressions".to_string(),
-        )),
-        (UnaryOperation { op, expr }, _) => {
-            let arr = evaluate_expression(expr.as_ref(), batch, None)?;
-            Ok(match op {
-                UnaryOperator::Not => Arc::new(not(downcast_to_bool(&arr)?)?),
-                UnaryOperator::IsNull => Arc::new(is_null(&arr)?),
-            })
-        }
-        (BinaryOperation { op, left, right }, _) => {
-            let left_arr = evaluate_expression(left.as_ref(), batch, None)?;
-            let right_arr = evaluate_expression(right.as_ref(), batch, None)?;
-
-            type Operation = fn(&dyn Datum, &dyn Datum) -> Result<Arc<dyn Array>, ArrowError>;
-            let eval: Operation = match op {
-                Plus => add,
-                Minus => sub,
-                Multiply => mul,
-                Divide => div,
-                LessThan => |l, r| lt(l, r).map(wrap_comparison_result),
-                LessThanOrEqual => |l, r| lt_eq(l, r).map(wrap_comparison_result),
-                GreaterThan => |l, r| gt(l, r).map(wrap_comparison_result),
-                GreaterThanOrEqual => |l, r| gt_eq(l, r).map(wrap_comparison_result),
-                Equal => |l, r| eq(l, r).map(wrap_comparison_result),
-                NotEqual => |l, r| neq(l, r).map(wrap_comparison_result),
-            };
-
-            eval(&left_arr, &right_arr).map_err(|err| Error::GenericError {
-                source: Box::new(err),
-            })
-        }
-        (VariadicOperation { op, exprs }, _) => {
-            let reducer = match op {
-                VariadicOperator::And => and,
-                VariadicOperator::Or => or,
-            };
-            exprs
-                .iter()
-                .map(|expr| evaluate_expression(expr, batch, Some(&DataType::BOOLEAN)))
-                .reduce(|l, r| {
-                    Ok(reducer(downcast_to_bool(&l?)?, downcast_to_bool(&r?)?)
-                        .map(wrap_comparison_result)?)
-                })
-                .transpose()?
-                .ok_or(Error::Generic("empty expression".to_string()))
-        }
-        (NullIf { expr, if_expr }, _) => {
-            let expr_arr = evaluate_expression(expr.as_ref(), batch, None)?;
-            let if_expr_arr =
-                evaluate_expression(if_expr.as_ref(), batch, Some(&DataType::BOOLEAN))?;
-            let if_expr_arr = downcast_to_bool(&if_expr_arr)?;
-            Ok(nullif(&expr_arr, if_expr_arr)?)
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow_array::Int32Array;
-    use arrow_schema::{DataType, Field, Fields, Schema};
-    use std::ops::{Add, Div, Mul, Sub};
-
-    #[test]
-    fn test_extract_column() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values.clone())]).unwrap();
-        let column = Expression::Column("a".to_string());
-
-        let results = evaluate_expression(&column, &batch, None).unwrap();
-        assert_eq!(results.as_ref(), &values);
-
-        let schema = Schema::new(vec![Field::new(
-            "b",
-            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
-            false,
-        )]);
-
-        let struct_values: ArrayRef = Arc::new(values.clone());
-        let struct_array = StructArray::from(vec![(
-            Arc::new(Field::new("a", DataType::Int32, false)),
-            struct_values,
-        )]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(struct_array.clone())],
-        )
-        .unwrap();
-        let column = Expression::Column("b.a".to_string());
-        let results = evaluate_expression(&column, &batch, None).unwrap();
-        assert_eq!(results.as_ref(), &values);
-    }
-
-    #[test]
-    fn test_binary_op_scalar() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
-        let column = Expression::Column("a".to_string());
-
-        let expression = Box::new(column.clone().add(Expression::Literal(Scalar::Integer(1))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 3, 4]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().sub(Expression::Literal(Scalar::Integer(1))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![0, 1, 2]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().mul(Expression::Literal(Scalar::Integer(2))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        // TODO handle type casting
-        let expression = Box::new(column.div(Expression::Literal(Scalar::Integer(1))));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        assert_eq!(results.as_ref(), expected.as_ref())
-    }
-
-    #[test]
-    fn test_binary_op() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(values.clone()), Arc::new(values)],
-        )
-        .unwrap();
-        let column_a = Expression::Column("a".to_string());
-        let column_b = Expression::Column("b".to_string());
-
-        let expression = Box::new(column_a.clone().add(column_b.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column_a.clone().sub(column_b.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![0, 0, 0]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column_a.clone().mul(column_b));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![1, 4, 9]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-
-    #[test]
-    fn test_binary_cmp() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
-        let column = Expression::Column("a".to_string());
-        let lit = Expression::Literal(Scalar::Integer(2));
-
-        let expression = Box::new(column.clone().lt(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().lt_eq(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().gt(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, false, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().gt_eq(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, true, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().eq(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column.clone().ne(lit.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-
-    #[test]
-    fn test_logical() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Boolean, false),
-        ]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(BooleanArray::from(vec![true, false])),
-                Arc::new(BooleanArray::from(vec![false, true])),
-            ],
-        )
-        .unwrap();
-        let column_a = Expression::Column("a".to_string());
-        let column_b = Expression::Column("b".to_string());
-
-        let expression = Box::new(column_a.clone().and(column_b.clone()));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(
-            column_a
-                .clone()
-                .and(Expression::literal(Scalar::Boolean(true))),
-        );
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(column_a.clone().or(column_b));
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = Box::new(
-            column_a
-                .clone()
-                .or(Expression::literal(Scalar::Boolean(false))),
-        );
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-}
diff --git a/crates/core/src/kernel/expressions/mod.rs b/crates/core/src/kernel/expressions/mod.rs
deleted file mode 100644
index dd8aab51de..0000000000
--- a/crates/core/src/kernel/expressions/mod.rs
+++ /dev/null
@@ -1,478 +0,0 @@
-//! expressions.
-
-use std::collections::HashSet;
-use std::fmt::{Display, Formatter};
-use std::sync::Arc;
-
-use arrow_array::{ArrayRef, RecordBatch};
-use arrow_schema::Schema as ArrowSchema;
-use itertools::Itertools;
-
-use self::eval::evaluate_expression;
-use super::{DataType, DeltaResult, SchemaRef};
-
-pub use self::scalars::*;
-
-mod eval;
-mod scalars;
-
-/// Interface for implementing an Expression evaluator.
-///
-/// It contains one Expression which can be evaluated on multiple ColumnarBatches.
-/// Connectors can implement this interface to optimize the evaluation using the
-/// connector specific capabilities.
-pub trait ExpressionEvaluator {
-    /// Evaluate the expression on given ColumnarBatch data.
-    ///
-    /// Contains one value for each row of the input.
-    /// The data type of the output is same as the type output of the expression this evaluator is using.
-    fn evaluate(&self, batch: &RecordBatch) -> DeltaResult<ArrayRef>;
-}
-
-/// Provides expression evaluation capability to Delta Kernel.
-///
-/// Delta Kernel can use this client to evaluate predicate on partition filters,
-/// fill up partition column values and any computation on data using Expressions.
-pub trait ExpressionHandler {
-    /// Create an [`ExpressionEvaluator`] that can evaluate the given [`Expression`]
-    /// on columnar batches with the given [`Schema`] to produce data of [`DataType`].
-    ///
-    /// # Parameters
-    ///
-    /// - `schema`: Schema of the input data.
-    /// - `expression`: Expression to evaluate.
-    /// - `output_type`: Expected result data type.
-    ///
-    /// [`Schema`]: crate::schema::StructType
-    /// [`DataType`]: crate::schema::DataType
-    fn get_evaluator(
-        &self,
-        schema: SchemaRef,
-        expression: Expression,
-        output_type: DataType,
-    ) -> Arc<dyn ExpressionEvaluator>;
-}
-
-/// Default implementation of [`ExpressionHandler`] that uses [`evaluate_expression`]
-#[derive(Debug)]
-pub struct ArrowExpressionHandler {}
-
-impl ExpressionHandler for ArrowExpressionHandler {
-    fn get_evaluator(
-        &self,
-        schema: SchemaRef,
-        expression: Expression,
-        output_type: DataType,
-    ) -> Arc<dyn ExpressionEvaluator> {
-        Arc::new(DefaultExpressionEvaluator {
-            input_schema: schema,
-            expression: Box::new(expression),
-            output_type,
-        })
-    }
-}
-
-/// Default implementation of [`ExpressionEvaluator`] that uses [`evaluate_expression`]
-#[derive(Debug)]
-pub struct DefaultExpressionEvaluator {
-    input_schema: SchemaRef,
-    expression: Box<Expression>,
-    output_type: DataType,
-}
-
-impl ExpressionEvaluator for DefaultExpressionEvaluator {
-    fn evaluate(&self, batch: &RecordBatch) -> DeltaResult<ArrayRef> {
-        let _input_schema: ArrowSchema = self.input_schema.as_ref().try_into()?;
-        // TODO: make sure we have matching schemas for validation
-        // if batch.schema().as_ref() != &input_schema {
-        //     return Err(Error::Generic(format!(
-        //         "input schema does not match batch schema: {:?} != {:?}",
-        //         input_schema,
-        //         batch.schema()
-        //     )));
-        // };
-        evaluate_expression(&self.expression, batch, Some(&self.output_type))
-    }
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-/// A binary operator.
-pub enum BinaryOperator {
-    /// Arithmetic Plus
-    Plus,
-    /// Arithmetic Minus
-    Minus,
-    /// Arithmetic Multiply
-    Multiply,
-    /// Arithmetic Divide
-    Divide,
-    /// Comparison Less Than
-    LessThan,
-    /// Comparison Less Than Or Equal
-    LessThanOrEqual,
-    /// Comparison Greater Than
-    GreaterThan,
-    /// Comparison Greater Than Or Equal
-    GreaterThanOrEqual,
-    /// Comparison Equal
-    Equal,
-    /// Comparison Not Equal
-    NotEqual,
-}
-
-/// Variadic operators
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum VariadicOperator {
-    /// AND
-    And,
-    /// OR
-    Or,
-}
-
-impl Display for BinaryOperator {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            // Self::And => write!(f, "AND"),
-            // Self::Or => write!(f, "OR"),
-            Self::Plus => write!(f, "+"),
-            Self::Minus => write!(f, "-"),
-            Self::Multiply => write!(f, "*"),
-            Self::Divide => write!(f, "/"),
-            Self::LessThan => write!(f, "<"),
-            Self::LessThanOrEqual => write!(f, "<="),
-            Self::GreaterThan => write!(f, ">"),
-            Self::GreaterThanOrEqual => write!(f, ">="),
-            Self::Equal => write!(f, "="),
-            Self::NotEqual => write!(f, "!="),
-        }
-    }
-}
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-/// A unary operator.
-pub enum UnaryOperator {
-    /// Unary Not
-    Not,
-    /// Unary Is Null
-    IsNull,
-}
-
-/// A SQL expression.
-///
-/// These expressions do not track or validate data types, other than the type
-/// of literals. It is up to the expression evaluator to validate the
-/// expression against a schema and add appropriate casts as required.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Expression {
-    /// A literal value.
-    Literal(Scalar),
-    /// A column reference by name.
-    Column(String),
-    ///
-    Struct(Vec<Expression>),
-    /// A binary operation.
-    BinaryOperation {
-        /// The operator.
-        op: BinaryOperator,
-        /// The left-hand side of the operation.
-        left: Box<Expression>,
-        /// The right-hand side of the operation.
-        right: Box<Expression>,
-    },
-    /// A unary operation.
-    UnaryOperation {
-        /// The operator.
-        op: UnaryOperator,
-        /// The expression.
-        expr: Box<Expression>,
-    },
-    /// A variadic operation.
-    VariadicOperation {
-        /// The operator.
-        op: VariadicOperator,
-        /// The expressions.
-        exprs: Vec<Expression>,
-    },
-    /// A NULLIF expression.
-    NullIf {
-        /// The expression to evaluate.
-        expr: Box<Expression>,
-        /// The expression to compare against.
-        if_expr: Box<Expression>,
-    },
-    // TODO: support more expressions, such as IS IN, LIKE, etc.
-}
-
-impl<T: Into<Scalar>> From<T> for Expression {
-    fn from(value: T) -> Self {
-        Self::literal(value)
-    }
-}
-
-impl Display for Expression {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Literal(l) => write!(f, "{}", l),
-            Self::Column(name) => write!(f, "Column({})", name),
-            Self::Struct(exprs) => write!(
-                f,
-                "Struct({})",
-                &exprs.iter().map(|e| format!("{e}")).join(", ")
-            ),
-            Self::BinaryOperation { op, left, right } => write!(f, "{} {} {}", left, op, right),
-            Self::UnaryOperation { op, expr } => match op {
-                UnaryOperator::Not => write!(f, "NOT {}", expr),
-                UnaryOperator::IsNull => write!(f, "{} IS NULL", expr),
-            },
-            Self::VariadicOperation { op, exprs } => match op {
-                VariadicOperator::And => {
-                    write!(
-                        f,
-                        "AND({})",
-                        &exprs.iter().map(|e| format!("{e}")).join(", ")
-                    )
-                }
-                VariadicOperator::Or => {
-                    write!(
-                        f,
-                        "OR({})",
-                        &exprs.iter().map(|e| format!("{e}")).join(", ")
-                    )
-                }
-            },
-            Self::NullIf { expr, if_expr } => write!(f, "NULLIF({}, {})", expr, if_expr),
-        }
-    }
-}
-
-impl Expression {
-    /// Returns a set of columns referenced by this expression.
-    pub fn references(&self) -> HashSet<&str> {
-        let mut set = HashSet::new();
-
-        for expr in self.walk() {
-            if let Self::Column(name) = expr {
-                set.insert(name.as_str());
-            }
-        }
-
-        set
-    }
-
-    /// Create an new expression for a column reference
-    pub fn column(name: impl Into<String>) -> Self {
-        Self::Column(name.into())
-    }
-
-    /// Create a new expression for a literal value
-    pub fn literal(value: impl Into<Scalar>) -> Self {
-        Self::Literal(value.into())
-    }
-
-    /// Create a new expression for a struct
-    pub fn struct_expr(exprs: impl IntoIterator<Item = Self>) -> Self {
-        Self::Struct(exprs.into_iter().collect())
-    }
-
-    /// Create a new expression for a unary operation
-    pub fn unary(op: UnaryOperator, expr: impl Into<Expression>) -> Self {
-        Self::UnaryOperation {
-            op,
-            expr: Box::new(expr.into()),
-        }
-    }
-
-    /// Create a new expression for a binary operation
-    pub fn binary(
-        op: BinaryOperator,
-        lhs: impl Into<Expression>,
-        rhs: impl Into<Expression>,
-    ) -> Self {
-        Self::BinaryOperation {
-            op,
-            left: Box::new(lhs.into()),
-            right: Box::new(rhs.into()),
-        }
-    }
-
-    /// Create a new expression for a variadic operation
-    pub fn variadic(op: VariadicOperator, other: impl IntoIterator<Item = Self>) -> Self {
-        let mut exprs = other.into_iter().collect::<Vec<_>>();
-        if exprs.is_empty() {
-            // TODO this might break if we introduce new variadic operators?
-            return Self::literal(matches!(op, VariadicOperator::And));
-        }
-        if exprs.len() == 1 {
-            return exprs.pop().unwrap();
-        }
-        Self::VariadicOperation { op, exprs }
-    }
-
-    /// Create a new expression `self == other`
-    pub fn eq(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::Equal, self, other)
-    }
-
-    /// Create a new expression `self != other`
-    pub fn ne(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::NotEqual, self, other)
-    }
-
-    /// Create a new expression `self < other`
-    pub fn lt(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::LessThan, self, other)
-    }
-
-    /// Create a new expression `self > other`
-    pub fn gt(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::GreaterThan, self, other)
-    }
-
-    /// Create a new expression `self >= other`
-    pub fn gt_eq(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::GreaterThanOrEqual, self, other)
-    }
-
-    /// Create a new expression `self <= other`
-    pub fn lt_eq(self, other: Self) -> Self {
-        Self::binary(BinaryOperator::LessThanOrEqual, self, other)
-    }
-
-    /// Create a new expression `self AND other`
-    pub fn and(self, other: Self) -> Self {
-        self.and_many([other])
-    }
-
-    /// Create a new expression `self AND others`
-    pub fn and_many(self, other: impl IntoIterator<Item = Self>) -> Self {
-        Self::variadic(VariadicOperator::And, std::iter::once(self).chain(other))
-    }
-
-    /// Create a new expression `self AND other`
-    pub fn or(self, other: Self) -> Self {
-        self.or_many([other])
-    }
-
-    /// Create a new expression `self OR other`
-    pub fn or_many(self, other: impl IntoIterator<Item = Self>) -> Self {
-        Self::variadic(VariadicOperator::Or, std::iter::once(self).chain(other))
-    }
-
-    /// Create a new expression `self IS NULL`
-    pub fn is_null(self) -> Self {
-        Self::unary(UnaryOperator::IsNull, self)
-    }
-
-    /// Create a new expression `NULLIF(self, other)`
-    pub fn null_if(self, other: Self) -> Self {
-        Self::NullIf {
-            expr: Box::new(self),
-            if_expr: Box::new(other),
-        }
-    }
-
-    fn walk(&self) -> impl Iterator<Item = &Self> + '_ {
-        let mut stack = vec![self];
-        std::iter::from_fn(move || {
-            let expr = stack.pop()?;
-            match expr {
-                Self::Literal(_) => {}
-                Self::Column { .. } => {}
-                Self::Struct(exprs) => {
-                    stack.extend(exprs.iter());
-                }
-                Self::BinaryOperation { left, right, .. } => {
-                    stack.push(left);
-                    stack.push(right);
-                }
-                Self::UnaryOperation { expr, .. } => {
-                    stack.push(expr);
-                }
-                Self::VariadicOperation { op, exprs } => match op {
-                    VariadicOperator::And | VariadicOperator::Or => {
-                        stack.extend(exprs.iter());
-                    }
-                },
-                Self::NullIf { expr, if_expr } => {
-                    stack.push(expr);
-                    stack.push(if_expr);
-                }
-            }
-            Some(expr)
-        })
-    }
-}
-
-impl std::ops::Add<Expression> for Expression {
-    type Output = Self;
-
-    fn add(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Plus, self, rhs)
-    }
-}
-
-impl std::ops::Sub<Expression> for Expression {
-    type Output = Self;
-
-    fn sub(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Minus, self, rhs)
-    }
-}
-
-impl std::ops::Mul<Expression> for Expression {
-    type Output = Self;
-
-    fn mul(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Multiply, self, rhs)
-    }
-}
-
-impl std::ops::Div<Expression> for Expression {
-    type Output = Self;
-
-    fn div(self, rhs: Expression) -> Self::Output {
-        Self::binary(BinaryOperator::Divide, self, rhs)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Expression as Expr;
-
-    #[test]
-    fn test_expression_format() {
-        let col_ref = Expr::column("x");
-        let cases = [
-            (col_ref.clone(), "Column(x)"),
-            (col_ref.clone().eq(Expr::literal(2)), "Column(x) = 2"),
-            (
-                col_ref
-                    .clone()
-                    .gt_eq(Expr::literal(2))
-                    .and(col_ref.clone().lt_eq(Expr::literal(10))),
-                "AND(Column(x) >= 2, Column(x) <= 10)",
-            ),
-            (
-                col_ref
-                    .clone()
-                    .gt(Expr::literal(2))
-                    .or(col_ref.clone().lt(Expr::literal(10))),
-                "OR(Column(x) > 2, Column(x) < 10)",
-            ),
-            (
-                (col_ref.clone() - Expr::literal(4)).lt(Expr::literal(10)),
-                "Column(x) - 4 < 10",
-            ),
-            (
-                (col_ref.clone() + Expr::literal(4)) / Expr::literal(10) * Expr::literal(42),
-                "Column(x) + 4 / 10 * 42",
-            ),
-            (col_ref.eq(Expr::literal("foo")), "Column(x) = 'foo'"),
-        ];
-
-        for (expr, expected) in cases {
-            let result = format!("{}", expr);
-            assert_eq!(result, expected);
-        }
-    }
-}
diff --git a/crates/core/src/kernel/expressions/scalars.rs b/crates/core/src/kernel/expressions/scalars.rs
deleted file mode 100644
index 571c2abf92..0000000000
--- a/crates/core/src/kernel/expressions/scalars.rs
+++ /dev/null
@@ -1,559 +0,0 @@
-//! Scalar values for use in expressions.
-
-use std::cmp::Ordering;
-use std::fmt::{Display, Formatter};
-
-use arrow_array::Array;
-use arrow_schema::TimeUnit;
-use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc};
-use object_store::path::Path;
-
-use crate::kernel::{DataType, Error, PrimitiveType, StructField};
-use crate::NULL_PARTITION_VALUE_DATA_PATH;
-
-/// A single value, which can be null. Used for representing literal values
-/// in [Expressions][crate::expressions::Expression].
-#[derive(Debug, Clone, PartialEq)]
-pub enum Scalar {
-    /// 32bit integer
-    Integer(i32),
-    /// 64bit integer
-    Long(i64),
-    /// 16bit integer
-    Short(i16),
-    /// 8bit integer
-    Byte(i8),
-    /// 32bit floating point
-    Float(f32),
-    /// 64bit floating point
-    Double(f64),
-    /// utf-8 encoded string.
-    String(String),
-    /// true or false value
-    Boolean(bool),
-    /// Microsecond precision timestamp, adjusted to UTC.
-    Timestamp(i64),
-    /// Microsecond precision timestamp, with no timezone.
-    TimestampNtz(i64),
-    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
-    Date(i32),
-    /// Binary data
-    Binary(Vec<u8>),
-    /// Decimal value
-    Decimal(i128, u8, i8),
-    /// Null value with a given data type.
-    Null(DataType),
-    /// Struct value
-    Struct(Vec<Scalar>, Vec<StructField>),
-}
-
-impl Scalar {
-    /// Returns the data type of this scalar.
-    pub fn data_type(&self) -> DataType {
-        match self {
-            Self::Integer(_) => DataType::Primitive(PrimitiveType::Integer),
-            Self::Long(_) => DataType::Primitive(PrimitiveType::Long),
-            Self::Short(_) => DataType::Primitive(PrimitiveType::Short),
-            Self::Byte(_) => DataType::Primitive(PrimitiveType::Byte),
-            Self::Float(_) => DataType::Primitive(PrimitiveType::Float),
-            Self::Double(_) => DataType::Primitive(PrimitiveType::Double),
-            Self::String(_) => DataType::Primitive(PrimitiveType::String),
-            Self::Boolean(_) => DataType::Primitive(PrimitiveType::Boolean),
-            Self::Timestamp(_) => DataType::Primitive(PrimitiveType::Timestamp),
-            Self::TimestampNtz(_) => DataType::Primitive(PrimitiveType::TimestampNtz),
-            Self::Date(_) => DataType::Primitive(PrimitiveType::Date),
-            Self::Binary(_) => DataType::Primitive(PrimitiveType::Binary),
-            // Unwrapping should be safe, since the scalar should never have values that are unsupported
-            Self::Decimal(_, precision, scale) => DataType::decimal(*precision, *scale).unwrap(),
-            Self::Null(data_type) => data_type.clone(),
-            Self::Struct(_, fields) => DataType::struct_type(fields.clone()),
-        }
-    }
-
-    /// Returns true if this scalar is null.
-    pub fn is_null(&self) -> bool {
-        matches!(self, Self::Null(_))
-    }
-
-    /// Serializes this scalar as a string.
-    pub fn serialize(&self) -> String {
-        match self {
-            Self::String(s) => s.to_owned(),
-            Self::Byte(b) => b.to_string(),
-            Self::Short(s) => s.to_string(),
-            Self::Integer(i) => i.to_string(),
-            Self::Long(l) => l.to_string(),
-            Self::Float(f) => f.to_string(),
-            Self::Double(d) => d.to_string(),
-            Self::Boolean(b) => {
-                if *b {
-                    "true".to_string()
-                } else {
-                    "false".to_string()
-                }
-            }
-            Self::TimestampNtz(ts) | Self::Timestamp(ts) => {
-                let ts = Utc.timestamp_micros(*ts).single().unwrap();
-                ts.format("%Y-%m-%d %H:%M:%S%.6f").to_string()
-            }
-            Self::Date(days) => {
-                let date = DateTime::from_timestamp(*days as i64 * 24 * 3600, 0).unwrap();
-                date.format("%Y-%m-%d").to_string()
-            }
-            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
-                Ordering::Equal => value.to_string(),
-                Ordering::Greater => {
-                    let scalar_multiple = 10_i128.pow(*scale as u32);
-                    let mut s = String::new();
-                    s.push_str((value / scalar_multiple).to_string().as_str());
-                    s.push('.');
-                    s.push_str(&format!(
-                        "{:0>scale$}",
-                        value % scalar_multiple,
-                        scale = *scale as usize
-                    ));
-                    s
-                }
-                Ordering::Less => {
-                    let mut s = value.to_string();
-                    for _ in 0..(scale.abs()) {
-                        s.push('0');
-                    }
-                    s
-                }
-            },
-            Self::Binary(val) => create_escaped_binary_string(val.as_slice()),
-            Self::Null(_) => "null".to_string(),
-            Self::Struct(_, _) => todo!("serializing struct values is not yet supported"),
-        }
-    }
-
-    /// Serializes this scalar as a string for use in hive partition file names.
-    pub fn serialize_encoded(&self) -> String {
-        if self.is_null() {
-            return NULL_PARTITION_VALUE_DATA_PATH.to_string();
-        }
-        Path::from(self.serialize()).to_string()
-    }
-
-    /// Create a [`Scalar`] form a row in an arrow array.
-    pub fn from_array(arr: &dyn Array, index: usize) -> Option<Self> {
-        use arrow_array::*;
-        use arrow_schema::DataType::*;
-
-        if arr.len() <= index {
-            return None;
-        }
-        if arr.is_null(index) {
-            return Some(Self::Null(arr.data_type().try_into().ok()?));
-        }
-
-        match arr.data_type() {
-            Utf8 => arr
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .map(|v| Self::String(v.value(index).to_string())),
-            LargeUtf8 => arr
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .map(|v| Self::String(v.value(index).to_string())),
-            Boolean => arr
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .map(|v| Self::Boolean(v.value(index))),
-            Binary => arr
-                .as_any()
-                .downcast_ref::<BinaryArray>()
-                .map(|v| Self::Binary(v.value(index).to_vec())),
-            LargeBinary => arr
-                .as_any()
-                .downcast_ref::<LargeBinaryArray>()
-                .map(|v| Self::Binary(v.value(index).to_vec())),
-            FixedSizeBinary(_) => arr
-                .as_any()
-                .downcast_ref::<FixedSizeBinaryArray>()
-                .map(|v| Self::Binary(v.value(index).to_vec())),
-            Int8 => arr
-                .as_any()
-                .downcast_ref::<Int8Array>()
-                .map(|v| Self::Byte(v.value(index))),
-            Int16 => arr
-                .as_any()
-                .downcast_ref::<Int16Array>()
-                .map(|v| Self::Short(v.value(index))),
-            Int32 => arr
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .map(|v| Self::Integer(v.value(index))),
-            Int64 => arr
-                .as_any()
-                .downcast_ref::<Int64Array>()
-                .map(|v| Self::Long(v.value(index))),
-            UInt8 => arr
-                .as_any()
-                .downcast_ref::<UInt8Array>()
-                .map(|v| Self::Byte(v.value(index) as i8)),
-            UInt16 => arr
-                .as_any()
-                .downcast_ref::<UInt16Array>()
-                .map(|v| Self::Short(v.value(index) as i16)),
-            UInt32 => arr
-                .as_any()
-                .downcast_ref::<UInt32Array>()
-                .map(|v| Self::Integer(v.value(index) as i32)),
-            UInt64 => arr
-                .as_any()
-                .downcast_ref::<UInt64Array>()
-                .map(|v| Self::Long(v.value(index) as i64)),
-            Float32 => arr
-                .as_any()
-                .downcast_ref::<Float32Array>()
-                .map(|v| Self::Float(v.value(index))),
-            Float64 => arr
-                .as_any()
-                .downcast_ref::<Float64Array>()
-                .map(|v| Self::Double(v.value(index))),
-            Decimal128(precision, scale) => {
-                arr.as_any().downcast_ref::<Decimal128Array>().map(|v| {
-                    let value = v.value(index);
-                    Self::Decimal(value, *precision, *scale)
-                })
-            }
-            Date32 => arr
-                .as_any()
-                .downcast_ref::<Date32Array>()
-                .map(|v| Self::Date(v.value(index))),
-            // TODO handle timezones when implementing timestamp ntz feature.
-            Timestamp(TimeUnit::Microsecond, tz) => match tz {
-                None => arr
-                    .as_any()
-                    .downcast_ref::<TimestampMicrosecondArray>()
-                    .map(|v| Self::Timestamp(v.value(index))),
-                Some(tz_str) if tz_str.as_ref() == "UTC" => arr
-                    .as_any()
-                    .downcast_ref::<TimestampMicrosecondArray>()
-                    .map(|v| Self::Timestamp(v.clone().with_timezone("UTC").value(index))),
-                _ => None,
-            },
-            Struct(fields) => {
-                let struct_fields = fields
-                    .iter()
-                    .flat_map(|f| TryFrom::try_from(f.as_ref()))
-                    .collect::<Vec<_>>();
-                let values = arr
-                    .as_any()
-                    .downcast_ref::<StructArray>()
-                    .and_then(|struct_arr| {
-                        struct_fields
-                            .iter()
-                            .map(|f: &StructField| {
-                                struct_arr
-                                    .column_by_name(f.name())
-                                    .and_then(|c| Self::from_array(c.as_ref(), index))
-                            })
-                            .collect::<Option<Vec<_>>>()
-                    })?;
-                if struct_fields.len() != values.len() {
-                    return None;
-                }
-                Some(Self::Struct(values, struct_fields))
-            }
-            Float16
-            | Decimal256(_, _)
-            | List(_)
-            | LargeList(_)
-            | FixedSizeList(_, _)
-            | Map(_, _)
-            | Date64
-            | Timestamp(_, _)
-            | Time32(_)
-            | Time64(_)
-            | Duration(_)
-            | Interval(_)
-            | Dictionary(_, _)
-            | RunEndEncoded(_, _)
-            | Union(_, _)
-            | Utf8View
-            | BinaryView
-            | ListView(_)
-            | LargeListView(_)
-            | Null => None,
-        }
-    }
-}
-
-impl PartialOrd for Scalar {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        use Scalar::*;
-        match (self, other) {
-            (Null(_), Null(_)) => Some(Ordering::Equal),
-            (Integer(a), Integer(b)) => a.partial_cmp(b),
-            (Long(a), Long(b)) => a.partial_cmp(b),
-            (Short(a), Short(b)) => a.partial_cmp(b),
-            (Byte(a), Byte(b)) => a.partial_cmp(b),
-            (Float(a), Float(b)) => a.partial_cmp(b),
-            (Double(a), Double(b)) => a.partial_cmp(b),
-            (String(a), String(b)) => a.partial_cmp(b),
-            (Boolean(a), Boolean(b)) => a.partial_cmp(b),
-            (Timestamp(a), Timestamp(b)) => a.partial_cmp(b),
-            (TimestampNtz(a), TimestampNtz(b)) => a.partial_cmp(b),
-            (Date(a), Date(b)) => a.partial_cmp(b),
-            (Binary(a), Binary(b)) => a.partial_cmp(b),
-            (Decimal(a, _, _), Decimal(b, _, _)) => a.partial_cmp(b),
-            (Struct(a, _), Struct(b, _)) => a.partial_cmp(b),
-            // TODO should we make an assumption about the ordering of nulls?
-            // rigth now this is only used for internal purposes.
-            (Null(_), _) => Some(Ordering::Less),
-            (_, Null(_)) => Some(Ordering::Greater),
-            _ => None,
-        }
-    }
-}
-
-impl Display for Scalar {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Integer(i) => write!(f, "{}", i),
-            Self::Long(i) => write!(f, "{}", i),
-            Self::Short(i) => write!(f, "{}", i),
-            Self::Byte(i) => write!(f, "{}", i),
-            Self::Float(fl) => write!(f, "{}", fl),
-            Self::Double(fl) => write!(f, "{}", fl),
-            Self::String(s) => write!(f, "'{}'", s),
-            Self::Boolean(b) => write!(f, "{}", b),
-            Self::Timestamp(ts) => write!(f, "{}", ts),
-            Self::TimestampNtz(ts) => write!(f, "{}", ts),
-            Self::Date(d) => write!(f, "{}", d),
-            Self::Binary(b) => write!(f, "{:?}", b),
-            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
-                Ordering::Equal => {
-                    write!(f, "{}", value)
-                }
-                Ordering::Greater => {
-                    let scalar_multiple = 10_i128.pow(*scale as u32);
-                    write!(f, "{}", value / scalar_multiple)?;
-                    write!(f, ".")?;
-                    write!(
-                        f,
-                        "{:0>scale$}",
-                        value % scalar_multiple,
-                        scale = *scale as usize
-                    )
-                }
-                Ordering::Less => {
-                    write!(f, "{}", value)?;
-                    for _ in 0..(scale.abs()) {
-                        write!(f, "0")?;
-                    }
-                    Ok(())
-                }
-            },
-            Self::Null(_) => write!(f, "null"),
-            Self::Struct(values, fields) => {
-                write!(f, "{{")?;
-                for (i, (value, field)) in values.iter().zip(fields.iter()).enumerate() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{}: {}", field.name, value)?;
-                }
-                write!(f, "}}")
-            }
-        }
-    }
-}
-
-impl From<i32> for Scalar {
-    fn from(i: i32) -> Self {
-        Self::Integer(i)
-    }
-}
-
-impl From<i64> for Scalar {
-    fn from(i: i64) -> Self {
-        Self::Long(i)
-    }
-}
-
-impl From<bool> for Scalar {
-    fn from(b: bool) -> Self {
-        Self::Boolean(b)
-    }
-}
-
-impl From<&str> for Scalar {
-    fn from(s: &str) -> Self {
-        Self::String(s.into())
-    }
-}
-
-impl From<String> for Scalar {
-    fn from(value: String) -> Self {
-        Self::String(value)
-    }
-}
-
-// TODO: add more From impls
-
-impl PrimitiveType {
-    fn data_type(&self) -> DataType {
-        DataType::Primitive(*self)
-    }
-
-    /// Parses a string into a scalar value.
-    pub fn parse_scalar(&self, raw: &str) -> Result<Scalar, Error> {
-        use PrimitiveType::*;
-
-        lazy_static::lazy_static! {
-            static ref UNIX_EPOCH: DateTime<Utc> = DateTime::from_timestamp(0, 0).unwrap();
-        }
-
-        if raw.is_empty() || raw == NULL_PARTITION_VALUE_DATA_PATH {
-            return Ok(Scalar::Null(self.data_type()));
-        }
-
-        match self {
-            String => Ok(Scalar::String(raw.to_string())),
-            Byte => self.str_parse_scalar(raw, Scalar::Byte),
-            Short => self.str_parse_scalar(raw, Scalar::Short),
-            Integer => self.str_parse_scalar(raw, Scalar::Integer),
-            Long => self.str_parse_scalar(raw, Scalar::Long),
-            Float => self.str_parse_scalar(raw, Scalar::Float),
-            Double => self.str_parse_scalar(raw, Scalar::Double),
-            Boolean => {
-                if raw.eq_ignore_ascii_case("true") {
-                    Ok(Scalar::Boolean(true))
-                } else if raw.eq_ignore_ascii_case("false") {
-                    Ok(Scalar::Boolean(false))
-                } else {
-                    Err(self.parse_error(raw))
-                }
-            }
-            Date => {
-                let date = NaiveDate::parse_from_str(raw, "%Y-%m-%d")
-                    .map_err(|_| self.parse_error(raw))?
-                    .and_hms_opt(0, 0, 0)
-                    .ok_or(self.parse_error(raw))?;
-                let date = Utc.from_utc_datetime(&date);
-                let days = date.signed_duration_since(*UNIX_EPOCH).num_days() as i32;
-                Ok(Scalar::Date(days))
-            }
-            Timestamp => {
-                let timestamp = NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S%.f")
-                    .map_err(|_| self.parse_error(raw))?;
-                let timestamp = Utc.from_utc_datetime(&timestamp);
-                let micros = timestamp
-                    .signed_duration_since(*UNIX_EPOCH)
-                    .num_microseconds()
-                    .ok_or(self.parse_error(raw))?;
-                Ok(Scalar::Timestamp(micros))
-            }
-            TimestampNtz => {
-                let timestamp = NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S%.f")
-                    .map_err(|_| self.parse_error(raw))?;
-                let timestamp = Utc.from_utc_datetime(&timestamp);
-                let micros = timestamp
-                    .signed_duration_since(*UNIX_EPOCH)
-                    .num_microseconds()
-                    .ok_or(self.parse_error(raw))?;
-                Ok(Scalar::TimestampNtz(micros))
-            }
-            Binary => {
-                let bytes = parse_escaped_binary_string(raw).map_err(|_| self.parse_error(raw))?;
-                Ok(Scalar::Binary(bytes))
-            }
-            _ => todo!("parsing {:?} is not yet supported", self),
-        }
-    }
-
-    fn parse_error(&self, raw: &str) -> Error {
-        Error::Parse(raw.to_string(), self.data_type())
-    }
-
-    fn str_parse_scalar<T: std::str::FromStr>(
-        &self,
-        raw: &str,
-        f: impl FnOnce(T) -> Scalar,
-    ) -> Result<Scalar, Error> {
-        match raw.parse() {
-            Ok(val) => Ok(f(val)),
-            Err(..) => Err(self.parse_error(raw)),
-        }
-    }
-}
-
-fn create_escaped_binary_string(data: &[u8]) -> String {
-    let mut escaped_string = String::new();
-    for &byte in data {
-        // Convert each byte to its two-digit hexadecimal representation
-        let hex_representation = format!("{:04X}", byte);
-        // Append the hexadecimal representation with an escape sequence
-        escaped_string.push_str("\\u");
-        escaped_string.push_str(&hex_representation);
-    }
-    escaped_string
-}
-
-fn parse_escaped_binary_string(escaped_string: &str) -> Result<Vec<u8>, &'static str> {
-    let mut parsed_bytes = Vec::new();
-    let mut chars = escaped_string.chars();
-
-    while let Some(ch) = chars.next() {
-        if ch == '\\' {
-            // Check for the escape sequence "\\u" indicating a hexadecimal value
-            if chars.next() == Some('u') {
-                // Read two hexadecimal digits and convert to u8
-                if let (Some(digit1), Some(digit2), Some(digit3), Some(digit4)) =
-                    (chars.next(), chars.next(), chars.next(), chars.next())
-                {
-                    if let Ok(byte) =
-                        u8::from_str_radix(&format!("{}{}{}{}", digit1, digit2, digit3, digit4), 16)
-                    {
-                        parsed_bytes.push(byte);
-                    } else {
-                        return Err("Error parsing hexadecimal value");
-                    }
-                } else {
-                    return Err("Incomplete escape sequence");
-                }
-            } else {
-                // Unrecognized escape sequence
-                return Err("Unrecognized escape sequence");
-            }
-        } else {
-            // Regular character, convert to u8 and push into the result vector
-            parsed_bytes.push(ch as u8);
-        }
-    }
-
-    Ok(parsed_bytes)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_binary_roundtrip() {
-        let scalar = Scalar::Binary(vec![0, 1, 2, 3, 4, 5]);
-        let parsed = PrimitiveType::Binary
-            .parse_scalar(&scalar.serialize())
-            .unwrap();
-        assert_eq!(scalar, parsed);
-    }
-
-    #[test]
-    fn test_decimal_display() {
-        let s = Scalar::Decimal(123456789, 9, 2);
-        assert_eq!(s.to_string(), "1234567.89");
-
-        let s = Scalar::Decimal(123456789, 9, 0);
-        assert_eq!(s.to_string(), "123456789");
-
-        let s = Scalar::Decimal(123456789, 9, 9);
-        assert_eq!(s.to_string(), "0.123456789");
-
-        let s = Scalar::Decimal(123, 9, -3);
-        assert_eq!(s.to_string(), "123000");
-    }
-}
diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs
index 876a09a33c..bcd9abbd15 100644
--- a/crates/core/src/kernel/mod.rs
+++ b/crates/core/src/kernel/mod.rs
@@ -1,15 +1,15 @@
 //! Delta Kernel module
 //!
 //! The Kernel module contains all the logic for reading and processing the Delta Lake transaction log.
+use delta_kernel::engine::arrow_expression::ArrowExpressionHandler;
 
 pub mod arrow;
 pub mod error;
-pub mod expressions;
 pub mod models;
+pub mod scalars;
 mod snapshot;
 
 pub use error::*;
-pub use expressions::*;
 pub use models::*;
 pub use snapshot::*;
 
@@ -20,3 +20,7 @@ pub trait DataCheck {
     /// The SQL expression to use for the check
     fn get_expression(&self) -> &str;
 }
+
+lazy_static::lazy_static! {
+    static ref ARROW_HANDLER: ArrowExpressionHandler = ArrowExpressionHandler {};
+}
diff --git a/crates/core/src/kernel/models/actions.rs b/crates/core/src/kernel/models/actions.rs
index f44ff4ac00..6ec8fc11fb 100644
--- a/crates/core/src/kernel/models/actions.rs
+++ b/crates/core/src/kernel/models/actions.rs
@@ -1,19 +1,18 @@
 use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::str::FromStr;
-// use std::io::{Cursor, Read};
-// use std::sync::Arc;
 
-// use roaring::RoaringTreemap;
+use maplit::hashset;
 use serde::{Deserialize, Serialize};
 use tracing::warn;
 use url::Url;
 
 use super::schema::StructType;
 use crate::kernel::{error::Error, DeltaResult};
+use crate::TableProperty;
 
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 /// Defines a file format used in table
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 pub struct Format {
     /// Name of the encoding for files in this table
     pub provider: String,
@@ -137,34 +136,338 @@ pub struct Protocol {
 
 impl Protocol {
     /// Create a new protocol action
-    pub fn new(min_reader_version: i32, min_wrriter_version: i32) -> Self {
+    pub fn new(min_reader_version: i32, min_writer_version: i32) -> Self {
         Self {
             min_reader_version,
-            min_writer_version: min_wrriter_version,
+            min_writer_version,
             reader_features: None,
             writer_features: None,
         }
     }
 
-    /// set the reader features in the protocol action
+    /// set the reader features in the protocol action, automatically bumps min_reader_version
     pub fn with_reader_features(
         mut self,
         reader_features: impl IntoIterator<Item = impl Into<ReaderFeatures>>,
     ) -> Self {
-        self.reader_features = Some(reader_features.into_iter().map(|c| c.into()).collect());
+        let all_reader_features = reader_features
+            .into_iter()
+            .map(Into::into)
+            .collect::<HashSet<_>>();
+        if !all_reader_features.is_empty() {
+            self.min_reader_version = 3
+        }
+        self.reader_features = Some(all_reader_features);
         self
     }
 
-    /// set the writer features in the protocol action
+    /// set the writer features in the protocol action, automatically bumps min_writer_version
     pub fn with_writer_features(
         mut self,
         writer_features: impl IntoIterator<Item = impl Into<WriterFeatures>>,
     ) -> Self {
-        self.writer_features = Some(writer_features.into_iter().map(|c| c.into()).collect());
+        let all_writer_feautures = writer_features
+            .into_iter()
+            .map(|c| c.into())
+            .collect::<HashSet<_>>();
+        if !all_writer_feautures.is_empty() {
+            self.min_writer_version = 7
+        }
+        self.writer_features = Some(all_writer_feautures);
+        self
+    }
+
+    /// Converts existing properties into features if the reader_version is >=3 or writer_version >=3
+    /// only converts features that are "true"
+    pub fn move_table_properties_into_features(
+        mut self,
+        configuration: &HashMap<String, Option<String>>,
+    ) -> Protocol {
+        if self.min_writer_version >= 7 {
+            let mut converted_writer_features = configuration
+                .iter()
+                .filter(|(_, value)| {
+                    value.as_ref().map_or(false, |v| {
+                        v.to_ascii_lowercase().parse::<bool>().is_ok_and(|v| v)
+                    })
+                })
+                .collect::<HashMap<&String, &Option<String>>>()
+                .keys()
+                .map(|key| (*key).clone().into())
+                .filter(|v| !matches!(v, WriterFeatures::Other(_)))
+                .collect::<HashSet<WriterFeatures>>();
+
+            if configuration
+                .keys()
+                .any(|v| v.starts_with("delta.constraints."))
+            {
+                converted_writer_features.insert(WriterFeatures::CheckConstraints);
+            }
+
+            match self.writer_features {
+                Some(mut features) => {
+                    features.extend(converted_writer_features);
+                    self.writer_features = Some(features);
+                }
+                None => self.writer_features = Some(converted_writer_features),
+            }
+        }
+        if self.min_reader_version > 3 {
+            let converted_reader_features = configuration
+                .iter()
+                .filter(|(_, value)| {
+                    value.as_ref().map_or(false, |v| {
+                        v.to_ascii_lowercase().parse::<bool>().is_ok_and(|v| v)
+                    })
+                })
+                .map(|(key, _)| (*key).clone().into())
+                .filter(|v| !matches!(v, ReaderFeatures::Other(_)))
+                .collect::<HashSet<ReaderFeatures>>();
+            match self.reader_features {
+                Some(mut features) => {
+                    features.extend(converted_reader_features);
+                    self.reader_features = Some(features);
+                }
+                None => self.reader_features = Some(converted_reader_features),
+            }
+        }
+        self
+    }
+    /// Will apply the properties to the protocol by either bumping the version or setting
+    /// features
+    pub fn apply_properties_to_protocol(
+        mut self,
+        new_properties: &HashMap<String, String>,
+        raise_if_not_exists: bool,
+    ) -> DeltaResult<Protocol> {
+        let mut parsed_properties: HashMap<TableProperty, String> = HashMap::new();
+
+        for (key, value) in new_properties {
+            if let Ok(parsed_key) = key.parse::<TableProperty>() {
+                parsed_properties.insert(parsed_key, value.to_string());
+            } else if raise_if_not_exists {
+                return Err(Error::Generic(format!(
+                    "Error parsing property '{}':'{}'",
+                    key, value
+                )));
+            }
+        }
+
+        // Check and update delta.minReaderVersion
+        if let Some(min_reader_version) = parsed_properties.get(&TableProperty::MinReaderVersion) {
+            let new_min_reader_version = min_reader_version.parse::<i32>();
+            match new_min_reader_version {
+                Ok(version) => match version {
+                    1..=3 => {
+                        if version > self.min_reader_version {
+                            self.min_reader_version = version
+                        }
+                    }
+                    _ => {
+                        return Err(Error::Generic(format!(
+                        "delta.minReaderVersion = '{}' is invalid, valid values are ['1','2','3']",
+                        min_reader_version
+                    )))
+                    }
+                },
+                Err(_) => {
+                    return Err(Error::Generic(format!(
+                        "delta.minReaderVersion = '{}' is invalid, valid values are ['1','2','3']",
+                        min_reader_version
+                    )))
+                }
+            }
+        }
+
+        // Check and update delta.minWriterVersion
+        if let Some(min_writer_version) = parsed_properties.get(&TableProperty::MinWriterVersion) {
+            let new_min_writer_version = min_writer_version.parse::<i32>();
+            match new_min_writer_version {
+                Ok(version) => match version {
+                    2..=7 => {
+                        if version > self.min_writer_version {
+                            self.min_writer_version = version
+                        }
+                    }
+                    _ => {
+                        return Err(Error::Generic(format!(
+                            "delta.minWriterVersion = '{}' is invalid, valid values are ['2','3','4','5','6','7']",
+                            min_writer_version
+                        )))
+                    }
+                },
+                Err(_) => {
+                    return Err(Error::Generic(format!(
+                        "delta.minWriterVersion = '{}' is invalid, valid values are ['2','3','4','5','6','7']",
+                        min_writer_version
+                    )))
+                }
+            }
+        }
+
+        // Check enableChangeDataFeed and bump protocol or add writerFeature if writer versions is >=7
+        if let Some(enable_cdf) = parsed_properties.get(&TableProperty::EnableChangeDataFeed) {
+            let if_enable_cdf = enable_cdf.to_ascii_lowercase().parse::<bool>();
+            match if_enable_cdf {
+                Ok(true) => {
+                    if self.min_writer_version >= 7 {
+                        match self.writer_features {
+                            Some(mut features) => {
+                                features.insert(WriterFeatures::ChangeDataFeed);
+                                self.writer_features = Some(features);
+                            }
+                            None => {
+                                self.writer_features =
+                                    Some(hashset! {WriterFeatures::ChangeDataFeed})
+                            }
+                        }
+                    } else if self.min_writer_version <= 3 {
+                        self.min_writer_version = 4
+                    }
+                }
+                Ok(false) => {}
+                _ => {
+                    return Err(Error::Generic(format!(
+                        "delta.enableChangeDataFeed = '{}' is invalid, valid values are ['true']",
+                        enable_cdf
+                    )))
+                }
+            }
+        }
+
+        if let Some(enable_dv) = parsed_properties.get(&TableProperty::EnableDeletionVectors) {
+            let if_enable_dv = enable_dv.to_ascii_lowercase().parse::<bool>();
+            match if_enable_dv {
+                Ok(true) => {
+                    let writer_features = match self.writer_features {
+                        Some(mut features) => {
+                            features.insert(WriterFeatures::DeletionVectors);
+                            features
+                        }
+                        None => hashset! {WriterFeatures::DeletionVectors},
+                    };
+                    let reader_features = match self.reader_features {
+                        Some(mut features) => {
+                            features.insert(ReaderFeatures::DeletionVectors);
+                            features
+                        }
+                        None => hashset! {ReaderFeatures::DeletionVectors},
+                    };
+                    self.min_reader_version = 3;
+                    self.min_writer_version = 7;
+                    self.writer_features = Some(writer_features);
+                    self.reader_features = Some(reader_features);
+                }
+                Ok(false) => {}
+                _ => {
+                    return Err(Error::Generic(format!(
+                        "delta.enableDeletionVectors = '{}' is invalid, valid values are ['true']",
+                        enable_dv
+                    )))
+                }
+            }
+        }
+        Ok(self)
+    }
+    /// Enable timestamp_ntz in the protocol
+    pub fn enable_timestamp_ntz(mut self) -> Protocol {
+        self = self.with_reader_features(vec![ReaderFeatures::TimestampWithoutTimezone]);
+        self = self.with_writer_features(vec![WriterFeatures::TimestampWithoutTimezone]);
         self
     }
 }
 
+/// High level table features
+#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash)]
+#[serde(rename_all = "camelCase")]
+pub enum TableFeatures {
+    /// Mapping of one column to another
+    ColumnMapping,
+    /// Deletion vectors for merge, update, delete
+    DeletionVectors,
+    /// timestamps without timezone support
+    #[serde(rename = "timestampNtz")]
+    TimestampWithoutTimezone,
+    /// version 2 of checkpointing
+    V2Checkpoint,
+    /// Append Only Tables
+    AppendOnly,
+    /// Table invariants
+    Invariants,
+    /// Check constraints on columns
+    CheckConstraints,
+    /// CDF on a table
+    ChangeDataFeed,
+    /// Columns with generated values
+    GeneratedColumns,
+    /// ID Columns
+    IdentityColumns,
+    /// Row tracking on tables
+    RowTracking,
+    /// domain specific metadata
+    DomainMetadata,
+    /// Iceberg compatibility support
+    IcebergCompatV1,
+}
+
+impl FromStr for TableFeatures {
+    type Err = ();
+
+    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        match value {
+            "columnMapping" => Ok(TableFeatures::ColumnMapping),
+            "deletionVectors" => Ok(TableFeatures::DeletionVectors),
+            "timestampNtz" => Ok(TableFeatures::TimestampWithoutTimezone),
+            "v2Checkpoint" => Ok(TableFeatures::V2Checkpoint),
+            "appendOnly" => Ok(TableFeatures::AppendOnly),
+            "invariants" => Ok(TableFeatures::Invariants),
+            "checkConstraints" => Ok(TableFeatures::CheckConstraints),
+            "changeDataFeed" => Ok(TableFeatures::ChangeDataFeed),
+            "generatedColumns" => Ok(TableFeatures::GeneratedColumns),
+            "identityColumns" => Ok(TableFeatures::IdentityColumns),
+            "rowTracking" => Ok(TableFeatures::RowTracking),
+            "domainMetadata" => Ok(TableFeatures::DomainMetadata),
+            "icebergCompatV1" => Ok(TableFeatures::IcebergCompatV1),
+            _ => Err(()),
+        }
+    }
+}
+
+impl AsRef<str> for TableFeatures {
+    fn as_ref(&self) -> &str {
+        match self {
+            TableFeatures::ColumnMapping => "columnMapping",
+            TableFeatures::DeletionVectors => "deletionVectors",
+            TableFeatures::TimestampWithoutTimezone => "timestampNtz",
+            TableFeatures::V2Checkpoint => "v2Checkpoint",
+            TableFeatures::AppendOnly => "appendOnly",
+            TableFeatures::Invariants => "invariants",
+            TableFeatures::CheckConstraints => "checkConstraints",
+            TableFeatures::ChangeDataFeed => "changeDataFeed",
+            TableFeatures::GeneratedColumns => "generatedColumns",
+            TableFeatures::IdentityColumns => "identityColumns",
+            TableFeatures::RowTracking => "rowTracking",
+            TableFeatures::DomainMetadata => "domainMetadata",
+            TableFeatures::IcebergCompatV1 => "icebergCompatV1",
+        }
+    }
+}
+
+impl fmt::Display for TableFeatures {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.as_ref())
+    }
+}
+
+impl TableFeatures {
+    /// Convert table feature to respective reader or/and write feature
+    pub fn to_reader_writer_features(&self) -> (Option<ReaderFeatures>, Option<WriterFeatures>) {
+        let reader_feature = ReaderFeatures::try_from(self).ok();
+        let writer_feature = WriterFeatures::try_from(self).ok();
+        (reader_feature, writer_feature)
+    }
+}
+
 /// Features table readers can support as well as let users know
 /// what is supported
 #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash)]
@@ -237,6 +540,19 @@ impl fmt::Display for ReaderFeatures {
     }
 }
 
+impl TryFrom<&TableFeatures> for ReaderFeatures {
+    type Error = String;
+
+    fn try_from(value: &TableFeatures) -> Result<Self, Self::Error> {
+        match ReaderFeatures::from(value.as_ref()) {
+            ReaderFeatures::Other(_) => {
+                Err(format!("Table feature {} is not a reader feature", value))
+            }
+            value => Ok(value),
+        }
+    }
+}
+
 /// Features table writers can support as well as let users know
 /// what is supported
 #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash)]
@@ -328,6 +644,19 @@ impl fmt::Display for WriterFeatures {
     }
 }
 
+impl TryFrom<&TableFeatures> for WriterFeatures {
+    type Error = String;
+
+    fn try_from(value: &TableFeatures) -> Result<Self, Self::Error> {
+        match WriterFeatures::from(value.as_ref()) {
+            WriterFeatures::Other(_) => {
+                Err(format!("Table feature {} is not a writer feature", value))
+            }
+            value => Ok(value),
+        }
+    }
+}
+
 impl From<&parquet::record::Field> for WriterFeatures {
     fn from(value: &parquet::record::Field) -> Self {
         match value {
@@ -736,6 +1065,10 @@ pub struct CommitInfo {
     /// Additional provenance information for the commit
     #[serde(flatten, default)]
     pub info: HashMap<String, serde_json::Value>,
+
+    /// User defined metadata
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user_metadata: Option<String>,
 }
 
 /// The domain metadata action contains a configuration (string) for a named metadata domain
@@ -907,15 +1240,9 @@ pub(crate) mod serde_path {
 #[cfg(test)]
 mod tests {
     use std::path::PathBuf;
-    // use std::sync::Arc;
-
-    // use object_store::local::LocalFileSystem;
-
-    use crate::kernel::PrimitiveType;
 
     use super::*;
-    // use crate::client::filesystem::ObjectStoreFileSystemClient;
-    // use crate::executor::tokio::TokioBackgroundExecutor;
+    use crate::kernel::PrimitiveType;
 
     fn dv_relateive() -> DeletionVectorDescriptor {
         DeletionVectorDescriptor {
diff --git a/crates/core/src/kernel/models/fields.rs b/crates/core/src/kernel/models/fields.rs
index fa672aaefc..a5a6585060 100644
--- a/crates/core/src/kernel/models/fields.rs
+++ b/crates/core/src/kernel/models/fields.rs
@@ -1,8 +1,9 @@
 //! Schema definitions for action types
+use std::sync::Arc;
 
+use delta_kernel::schema::{ArrayType, DataType, MapType, StructField, StructType};
 use lazy_static::lazy_static;
 
-use super::schema::{ArrayType, DataType, MapType, StructField, StructType};
 use super::ActionType;
 
 impl ActionType {
@@ -271,3 +272,10 @@ fn deletion_vector_field() -> StructField {
 pub(crate) fn log_schema() -> &'static StructType {
     &LOG_SCHEMA
 }
+
+pub(crate) fn log_schema_ref() -> &'static Arc<StructType> {
+    lazy_static! {
+        static ref LOG_SCHEMA_REF: Arc<StructType> = Arc::new(LOG_SCHEMA.clone());
+    }
+    &LOG_SCHEMA_REF
+}
diff --git a/crates/core/src/kernel/models/schema.rs b/crates/core/src/kernel/models/schema.rs
index 161de0352a..3a88564f1d 100644
--- a/crates/core/src/kernel/models/schema.rs
+++ b/crates/core/src/kernel/models/schema.rs
@@ -1,93 +1,21 @@
 //! Delta table schema
 
-use std::borrow::Borrow;
-use std::fmt::Formatter;
-use std::hash::{Hash, Hasher};
 use std::sync::Arc;
-use std::{collections::HashMap, fmt::Display};
 
-use serde::{Deserialize, Serialize};
+pub use delta_kernel::schema::{
+    ArrayType, ColumnMetadataKey, DataType, MapType, MetadataValue, PrimitiveType, StructField,
+    StructType,
+};
 use serde_json::Value;
 
 use crate::kernel::error::Error;
 use crate::kernel::DataCheck;
-use crate::protocol::ProtocolError;
 
 /// Type alias for a top level schema
 pub type Schema = StructType;
 /// Schema reference type
 pub type SchemaRef = Arc<StructType>;
 
-/// A value that can be stored in the metadata of a Delta table schema entity.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
-#[serde(untagged)]
-pub enum MetadataValue {
-    /// A number value
-    Number(i32),
-    /// A string value
-    String(String),
-    /// A Boolean value
-    Boolean(bool),
-}
-
-impl From<String> for MetadataValue {
-    fn from(value: String) -> Self {
-        Self::String(value)
-    }
-}
-
-impl From<&String> for MetadataValue {
-    fn from(value: &String) -> Self {
-        Self::String(value.clone())
-    }
-}
-
-impl From<i32> for MetadataValue {
-    fn from(value: i32) -> Self {
-        Self::Number(value)
-    }
-}
-
-impl From<bool> for MetadataValue {
-    fn from(value: bool) -> Self {
-        Self::Boolean(value)
-    }
-}
-
-impl From<Value> for MetadataValue {
-    fn from(value: Value) -> Self {
-        Self::String(value.to_string())
-    }
-}
-
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum ColumnMetadataKey {
-    ColumnMappingId,
-    ColumnMappingPhysicalName,
-    GenerationExpression,
-    IdentityStart,
-    IdentityStep,
-    IdentityHighWaterMark,
-    IdentityAllowExplicitInsert,
-    Invariants,
-}
-
-impl AsRef<str> for ColumnMetadataKey {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::ColumnMappingId => "delta.columnMapping.id",
-            Self::ColumnMappingPhysicalName => "delta.columnMapping.physicalName",
-            Self::GenerationExpression => "delta.generationExpression",
-            Self::IdentityAllowExplicitInsert => "delta.identity.allowExplicitInsert",
-            Self::IdentityHighWaterMark => "delta.identity.highWaterMark",
-            Self::IdentityStart => "delta.identity.start",
-            Self::IdentityStep => "delta.identity.step",
-            Self::Invariants => "delta.invariants",
-        }
-    }
-}
-
 /// An invariant for a column that is enforced on all writes to a Delta table.
 #[derive(Eq, PartialEq, Debug, Default, Clone)]
 pub struct Invariant {
@@ -117,168 +45,17 @@ impl DataCheck for Invariant {
     }
 }
 
-/// Represents a struct field defined in the Delta table schema.
-// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Schema-Serialization-Format
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
-pub struct StructField {
-    /// Name of this (possibly nested) column
-    pub name: String,
-    /// The data type of this field
-    #[serde(rename = "type")]
-    pub data_type: DataType,
-    /// Denotes whether this Field can be null
-    pub nullable: bool,
-    /// A JSON map containing information about this column
-    pub metadata: HashMap<String, MetadataValue>,
-}
-
-impl Hash for StructField {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.name.hash(state);
-        self.data_type.hash(state);
-        self.nullable.hash(state);
-    }
-}
-
-impl Borrow<str> for StructField {
-    fn borrow(&self) -> &str {
-        self.name.as_ref()
-    }
-}
-
-impl Eq for StructField {}
-
-impl StructField {
-    /// Creates a new field
-    pub fn new(name: impl Into<String>, data_type: impl Into<DataType>, nullable: bool) -> Self {
-        Self {
-            name: name.into(),
-            data_type: data_type.into(),
-            nullable,
-            metadata: HashMap::default(),
-        }
-    }
-
-    /// Creates a new field with metadata
-    pub fn with_metadata(
-        mut self,
-        metadata: impl IntoIterator<Item = (impl Into<String>, impl Into<MetadataValue>)>,
-    ) -> Self {
-        self.metadata = metadata
-            .into_iter()
-            .map(|(k, v)| (k.into(), v.into()))
-            .collect();
-        self
-    }
-
-    /// Get the value of a specific metadata key
-    pub fn get_config_value(&self, key: &ColumnMetadataKey) -> Option<&MetadataValue> {
-        self.metadata.get(key.as_ref())
-    }
-
-    #[inline]
-    /// Returns the name of the column
-    pub fn name(&self) -> &String {
-        &self.name
-    }
-
-    #[inline]
-    /// Returns whether the column is nullable
-    pub fn is_nullable(&self) -> bool {
-        self.nullable
-    }
-
-    /// Returns the physical name of the column
-    /// Equals the name if column mapping is not enabled on table
-    pub fn physical_name(&self) -> Result<&str, Error> {
-        // Even on mapping type id the physical name should be there for partitions
-        let phys_name = self.get_config_value(&ColumnMetadataKey::ColumnMappingPhysicalName);
-        match phys_name {
-            None => Ok(&self.name),
-            Some(MetadataValue::Boolean(_)) => Ok(&self.name),
-            Some(MetadataValue::String(s)) => Ok(s),
-            Some(MetadataValue::Number(_)) => Err(Error::MetadataError(
-                "Unexpected type for physical name".to_string(),
-            )),
-        }
-    }
-
-    #[inline]
-    /// Returns the data type of the column
-    pub const fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    #[inline]
-    /// Returns the metadata of the column
-    pub const fn metadata(&self) -> &HashMap<String, MetadataValue> {
-        &self.metadata
-    }
-}
-
-/// A struct is used to represent both the top-level schema of the table
-/// as well as struct columns that contain nested columns.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq, Hash)]
-pub struct StructType {
-    #[serde(rename = "type")]
-    /// The type of this struct
-    pub type_name: String,
-    /// The type of element stored in this array
-    pub fields: Vec<StructField>,
+/// Trait to add convenince functions to struct type
+pub trait StructTypeExt {
+    /// Get all invariants in the schemas
+    fn get_invariants(&self) -> Result<Vec<Invariant>, Error>;
 }
 
-impl StructType {
-    /// Creates a new struct type
-    pub fn new(fields: Vec<StructField>) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields,
-        }
-    }
-
-    /// Returns an immutable reference of the fields in the struct
-    pub fn fields(&self) -> &Vec<StructField> {
-        &self.fields
-    }
-
-    /// Find the index of the column with the given name.
-    pub fn index_of(&self, name: &str) -> Result<usize, Error> {
-        let (idx, _) = self
-            .fields()
-            .iter()
-            .enumerate()
-            .find(|(_, b)| b.name() == name)
-            .ok_or_else(|| {
-                let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
-                Error::Schema(format!(
-                    "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
-                ))
-            })?;
-        Ok(idx)
-    }
-
-    /// Returns a reference of a specific [`StructField`] instance selected by name.
-    pub fn field_with_name(&self, name: &str) -> Result<&StructField, Error> {
-        match name.split_once('.') {
-            Some((parent, children)) => {
-                let parent_field = &self.fields[self.index_of(parent)?];
-                match parent_field.data_type {
-                    DataType::Struct(ref inner) => Ok(inner.field_with_name(children)?),
-                    _ => Err(Error::Schema(format!(
-                        "Field {} is not a struct type",
-                        parent_field.name()
-                    ))),
-                }
-            }
-            None => Ok(&self.fields[self.index_of(name)?]),
-        }
-    }
-
+impl StructTypeExt for StructType {
     /// Get all invariants in the schemas
-    pub fn get_invariants(&self) -> Result<Vec<Invariant>, Error> {
+    fn get_invariants(&self) -> Result<Vec<Invariant>, Error> {
         let mut remaining_fields: Vec<(String, StructField)> = self
             .fields()
-            .iter()
             .map(|field| (field.name.clone(), field.clone()))
             .collect();
         let mut invariants: Vec<Invariant> = Vec::new();
@@ -297,7 +74,6 @@ impl StructType {
                     remaining_fields.extend(
                         inner
                             .fields()
-                            .iter()
                             .map(|field| {
                                 let new_prefix = add_segment(&field_path, &field.name);
                                 (new_prefix, field.clone())
@@ -349,521 +125,11 @@ impl StructType {
     }
 }
 
-impl FromIterator<StructField> for StructType {
-    fn from_iter<T: IntoIterator<Item = StructField>>(iter: T) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: iter.into_iter().collect(),
-        }
-    }
-}
-
-impl<'a> FromIterator<&'a StructField> for StructType {
-    fn from_iter<T: IntoIterator<Item = &'a StructField>>(iter: T) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: iter.into_iter().cloned().collect(),
-        }
-    }
-}
-
-impl<const N: usize> From<[StructField; N]> for StructType {
-    fn from(value: [StructField; N]) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: value.to_vec(),
-        }
-    }
-}
-
-impl<'a, const N: usize> From<[&'a StructField; N]> for StructType {
-    fn from(value: [&'a StructField; N]) -> Self {
-        Self {
-            type_name: "struct".into(),
-            fields: value.into_iter().cloned().collect(),
-        }
-    }
-}
-
-impl<'a> IntoIterator for &'a StructType {
-    type Item = &'a StructField;
-    type IntoIter = std::slice::Iter<'a, StructField>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.fields.iter()
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq, Hash)]
-#[serde(rename_all = "camelCase")]
-/// An array stores a variable length collection of items of some type.
-pub struct ArrayType {
-    #[serde(rename = "type")]
-    /// The type of this struct
-    pub type_name: String,
-    /// The type of element stored in this array
-    pub element_type: DataType,
-    /// Denoting whether this array can contain one or more null values
-    pub contains_null: bool,
-}
-
-impl ArrayType {
-    /// Creates a new array type
-    pub fn new(element_type: DataType, contains_null: bool) -> Self {
-        Self {
-            type_name: "array".into(),
-            element_type,
-            contains_null,
-        }
-    }
-
-    #[inline]
-    /// Returns the element type of the array
-    pub const fn element_type(&self) -> &DataType {
-        &self.element_type
-    }
-
-    #[inline]
-    /// Returns whether the array can contain null values
-    pub const fn contains_null(&self) -> bool {
-        self.contains_null
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq, Hash)]
-#[serde(rename_all = "camelCase")]
-/// A map stores an arbitrary length collection of key-value pairs
-pub struct MapType {
-    #[serde(rename = "type")]
-    /// The type of this struct
-    pub type_name: String,
-    /// The type of element used for the key of this map
-    pub key_type: DataType,
-    /// The type of element used for the value of this map
-    pub value_type: DataType,
-    /// Denoting whether this array can contain one or more null values
-    #[serde(default = "default_true")]
-    pub value_contains_null: bool,
-}
-
-impl MapType {
-    /// Creates a new map type
-    pub fn new(key_type: DataType, value_type: DataType, value_contains_null: bool) -> Self {
-        Self {
-            type_name: "map".into(),
-            key_type,
-            value_type,
-            value_contains_null,
-        }
-    }
-
-    #[inline]
-    /// Returns the key type of the map
-    pub const fn key_type(&self) -> &DataType {
-        &self.key_type
-    }
-
-    #[inline]
-    /// Returns the value type of the map
-    pub const fn value_type(&self) -> &DataType {
-        &self.value_type
-    }
-
-    #[inline]
-    /// Returns whether the map can contain null values
-    pub const fn value_contains_null(&self) -> bool {
-        self.value_contains_null
-    }
-}
-
-fn default_true() -> bool {
-    true
-}
-
-/// The maximum precision for [PrimitiveType::Decimal] values
-pub const DECIMAL_MAX_PRECISION: u8 = 38;
-
-/// The maximum scale for [PrimitiveType::Decimal] values
-pub const DECIMAL_MAX_SCALE: i8 = 38;
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Copy, Clone, Eq, Hash)]
-#[serde(rename_all = "snake_case")]
-/// Primitive types supported by Delta
-pub enum PrimitiveType {
-    /// UTF-8 encoded string of characters
-    String,
-    /// i64: 8-byte signed integer. Range: -9223372036854775808 to 9223372036854775807
-    Long,
-    /// i32: 4-byte signed integer. Range: -2147483648 to 2147483647
-    Integer,
-    /// i16: 2-byte signed integer numbers. Range: -32768 to 32767
-    Short,
-    /// i8: 1-byte signed integer number. Range: -128 to 127
-    Byte,
-    /// f32: 4-byte single-precision floating-point numbers
-    Float,
-    /// f64: 8-byte double-precision floating-point numbers
-    Double,
-    /// bool: boolean values
-    Boolean,
-    /// Binary: uninterpreted binary data
-    Binary,
-    /// Date: Calendar date (year, month, day)
-    Date,
-    /// Microsecond precision timestamp, adjusted to UTC.
-    Timestamp,
-    /// Micrsoecond precision timestamp with no timezone
-    #[serde(alias = "timestampNtz")]
-    TimestampNtz,
-    #[serde(
-        serialize_with = "serialize_decimal",
-        deserialize_with = "deserialize_decimal",
-        untagged
-    )]
-    /// Decimal: arbitrary precision decimal numbers
-    Decimal(u8, i8),
-}
-
-fn serialize_decimal<S: serde::Serializer>(
-    precision: &u8,
-    scale: &i8,
-    serializer: S,
-) -> Result<S::Ok, S::Error> {
-    serializer.serialize_str(&format!("decimal({},{})", precision, scale))
-}
-
-fn deserialize_decimal<'de, D>(deserializer: D) -> Result<(u8, i8), D::Error>
-where
-    D: serde::Deserializer<'de>,
-{
-    let str_value = String::deserialize(deserializer)?;
-    if !str_value.starts_with("decimal(") || !str_value.ends_with(')') {
-        return Err(serde::de::Error::custom(format!(
-            "Invalid decimal: {}",
-            str_value
-        )));
-    }
-
-    let mut parts = str_value[8..str_value.len() - 1].split(',');
-    let precision = parts
-        .next()
-        .and_then(|part| part.trim().parse::<u8>().ok())
-        .ok_or_else(|| {
-            serde::de::Error::custom(format!("Invalid precision in decimal: {}", str_value))
-        })?;
-    let scale = parts
-        .next()
-        .and_then(|part| part.trim().parse::<i8>().ok())
-        .ok_or_else(|| {
-            serde::de::Error::custom(format!("Invalid scale in decimal: {}", str_value))
-        })?;
-    if precision > DECIMAL_MAX_PRECISION || scale > DECIMAL_MAX_SCALE {
-        return Err(serde::de::Error::custom(format!(
-            "Precision or scale is larger than 38: {}, {}",
-            precision, scale
-        )));
-    }
-    Ok((precision, scale))
-}
-
-impl Display for PrimitiveType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            PrimitiveType::String => write!(f, "string"),
-            PrimitiveType::Long => write!(f, "long"),
-            PrimitiveType::Integer => write!(f, "integer"),
-            PrimitiveType::Short => write!(f, "short"),
-            PrimitiveType::Byte => write!(f, "byte"),
-            PrimitiveType::Float => write!(f, "float"),
-            PrimitiveType::Double => write!(f, "double"),
-            PrimitiveType::Boolean => write!(f, "boolean"),
-            PrimitiveType::Binary => write!(f, "binary"),
-            PrimitiveType::Date => write!(f, "date"),
-            PrimitiveType::Timestamp => write!(f, "timestamp"),
-            PrimitiveType::TimestampNtz => write!(f, "timestampNtz"),
-            PrimitiveType::Decimal(precision, scale) => {
-                write!(f, "decimal({},{})", precision, scale)
-            }
-        }
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq, Hash)]
-#[serde(untagged, rename_all = "camelCase")]
-/// Top level delta tdatatypes
-pub enum DataType {
-    /// UTF-8 encoded string of characters
-    Primitive(PrimitiveType),
-    /// An array stores a variable length collection of items of some type.
-    Array(Box<ArrayType>),
-    /// A struct is used to represent both the top-level schema of the table as well
-    /// as struct columns that contain nested columns.
-    Struct(Box<StructType>),
-    /// A map stores an arbitrary length collection of key-value pairs
-    /// with a single keyType and a single valueType
-    Map(Box<MapType>),
-}
-
-impl From<MapType> for DataType {
-    fn from(map_type: MapType) -> Self {
-        DataType::Map(Box::new(map_type))
-    }
-}
-
-impl From<StructType> for DataType {
-    fn from(struct_type: StructType) -> Self {
-        DataType::Struct(Box::new(struct_type))
-    }
-}
-
-impl From<ArrayType> for DataType {
-    fn from(array_type: ArrayType) -> Self {
-        DataType::Array(Box::new(array_type))
-    }
-}
-
-#[allow(missing_docs)]
-impl DataType {
-    pub const STRING: Self = DataType::Primitive(PrimitiveType::String);
-    pub const LONG: Self = DataType::Primitive(PrimitiveType::Long);
-    pub const INTEGER: Self = DataType::Primitive(PrimitiveType::Integer);
-    pub const SHORT: Self = DataType::Primitive(PrimitiveType::Short);
-    pub const BYTE: Self = DataType::Primitive(PrimitiveType::Byte);
-    pub const FLOAT: Self = DataType::Primitive(PrimitiveType::Float);
-    pub const DOUBLE: Self = DataType::Primitive(PrimitiveType::Double);
-    pub const BOOLEAN: Self = DataType::Primitive(PrimitiveType::Boolean);
-    pub const BINARY: Self = DataType::Primitive(PrimitiveType::Binary);
-    pub const DATE: Self = DataType::Primitive(PrimitiveType::Date);
-    pub const TIMESTAMP: Self = DataType::Primitive(PrimitiveType::Timestamp);
-    pub const TIMESTAMPNTZ: Self = DataType::Primitive(PrimitiveType::TimestampNtz);
-
-    pub fn decimal(precision: u8, scale: i8) -> Result<Self, ProtocolError> {
-        if precision > DECIMAL_MAX_PRECISION || scale > DECIMAL_MAX_SCALE {
-            return Err(ProtocolError::InvalidField(format!(
-                "decimal({},{})",
-                precision, scale
-            )));
-        }
-        Ok(DataType::Primitive(PrimitiveType::Decimal(
-            precision, scale,
-        )))
-    }
-
-    pub fn struct_type(fields: Vec<StructField>) -> Self {
-        DataType::Struct(Box::new(StructType::new(fields)))
-    }
-}
-
-impl Display for DataType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            DataType::Primitive(p) => write!(f, "{}", p),
-            DataType::Array(a) => write!(f, "array<{}>", a.element_type),
-            DataType::Struct(s) => {
-                write!(f, "struct<")?;
-                for (i, field) in s.fields.iter().enumerate() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{}: {}", field.name, field.data_type)?;
-                }
-                write!(f, ">")
-            }
-            DataType::Map(m) => write!(f, "map<{}, {}>", m.key_type, m.value_type),
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
     use serde_json;
     use serde_json::json;
-    use std::collections::hash_map::DefaultHasher;
-
-    #[test]
-    fn test_serde_data_types() {
-        let data = r#"
-        {
-            "name": "a",
-            "type": "integer",
-            "nullable": false,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(
-            field.data_type,
-            DataType::Primitive(PrimitiveType::Integer)
-        ));
-
-        let data = r#"
-        {
-            "name": "c",
-            "type": {
-                "type": "array",
-                "elementType": "integer",
-                "containsNull": false
-            },
-            "nullable": true,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(field.data_type, DataType::Array(_)));
-
-        let data = r#"
-        {
-            "name": "e",
-            "type": {
-                "type": "array",
-                "elementType": {
-                    "type": "struct",
-                    "fields": [
-                        {
-                            "name": "d",
-                            "type": "integer",
-                            "nullable": false,
-                            "metadata": {}
-                        }
-                    ]
-                },
-                "containsNull": true
-            },
-            "nullable": true,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(field.data_type, DataType::Array(_)));
-        match field.data_type {
-            DataType::Array(array) => assert!(matches!(array.element_type, DataType::Struct(_))),
-            _ => unreachable!(),
-        }
-
-        let data = r#"
-        {
-            "name": "f",
-            "type": {
-                "type": "map",
-                "keyType": "string",
-                "valueType": "string",
-                "valueContainsNull": true
-            },
-            "nullable": true,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(field.data_type, DataType::Map(_)));
-    }
-
-    #[test]
-    fn test_roundtrip_decimal() {
-        let data = r#"
-        {
-            "name": "a",
-            "type": "decimal(10, 2)",
-            "nullable": false,
-            "metadata": {}
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-        assert!(matches!(
-            field.data_type,
-            DataType::Primitive(PrimitiveType::Decimal(10, 2))
-        ));
-
-        let json_str = serde_json::to_string(&field).unwrap();
-        assert_eq!(
-            json_str,
-            r#"{"name":"a","type":"decimal(10,2)","nullable":false,"metadata":{}}"#
-        );
-    }
-
-    #[test]
-    fn test_invalid_decimal() {
-        let data = r#"
-        {
-            "name": "a",
-            "type": "decimal(39, 10)",
-            "nullable": false,
-            "metadata": {}
-        }
-        "#;
-        assert!(matches!(
-            serde_json::from_str::<StructField>(data).unwrap_err(),
-            _
-        ));
-
-        let data = r#"
-        {
-            "name": "a",
-            "type": "decimal(10, 39)",
-            "nullable": false,
-            "metadata": {}
-        }
-        "#;
-        assert!(matches!(
-            serde_json::from_str::<StructField>(data).unwrap_err(),
-            _
-        ));
-    }
-
-    #[test]
-    fn test_field_metadata() {
-        let data = r#"
-        {
-            "name": "e",
-            "type": {
-                "type": "array",
-                "elementType": {
-                    "type": "struct",
-                    "fields": [
-                        {
-                            "name": "d",
-                            "type": "integer",
-                            "nullable": false,
-                            "metadata": {
-                                "delta.columnMapping.id": 5,
-                                "delta.columnMapping.physicalName": "col-a7f4159c-53be-4cb0-b81a-f7e5240cfc49"
-                            }
-                        }
-                    ]
-                },
-                "containsNull": true
-            },
-            "nullable": true,
-            "metadata": {
-                "delta.columnMapping.id": 4,
-                "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1"
-            }
-        }
-        "#;
-        let field: StructField = serde_json::from_str(data).unwrap();
-
-        let col_id = field
-            .get_config_value(&ColumnMetadataKey::ColumnMappingId)
-            .unwrap();
-        assert!(matches!(col_id, MetadataValue::Number(num) if *num == 4));
-        let physical_name = field
-            .get_config_value(&ColumnMetadataKey::ColumnMappingPhysicalName)
-            .unwrap();
-        assert!(
-            matches!(physical_name, MetadataValue::String(name) if *name == "col-5f422f40-de70-45b2-88ab-1d5c90e94db1")
-        );
-    }
-
-    #[test]
-    fn test_read_schemas() {
-        let file = std::fs::File::open("./tests/serde/schema.json").unwrap();
-        let schema: Result<StructType, _> = serde_json::from_reader(file);
-        assert!(schema.is_ok());
-
-        let file = std::fs::File::open("./tests/serde/checkpoint_schema.json").unwrap();
-        let schema: Result<StructType, _> = serde_json::from_reader(file);
-        assert!(schema.is_ok())
-    }
 
     #[test]
     fn test_get_invariants() {
@@ -934,88 +200,4 @@ mod tests {
         let buf = r#"{"type":"struct","fields":[{"name":"ID_D_DATE","type":"long","nullable":true,"metadata":{"delta.identity.start":1,"delta.identity.step":1,"delta.identity.allowExplicitInsert":false}},{"name":"TXT_DateKey","type":"string","nullable":true,"metadata":{}}]}"#;
         let _schema: StructType = serde_json::from_str(buf).expect("Failed to load");
     }
-
-    fn get_hash(field: &StructField) -> u64 {
-        let mut hasher = DefaultHasher::new();
-        field.hash(&mut hasher);
-        hasher.finish()
-    }
-
-    #[test]
-    fn test_hash_struct_field() {
-        // different names should result in different hashes
-        let field_1 = StructField::new(
-            "field_name_1",
-            DataType::Primitive(PrimitiveType::Decimal(4, 4)),
-            true,
-        );
-        let field_2 = StructField::new(
-            "field_name_2",
-            DataType::Primitive(PrimitiveType::Decimal(4, 4)),
-            true,
-        );
-        assert_ne!(get_hash(&field_1), get_hash(&field_2));
-
-        // different types should result in different hashes
-        let field_int = StructField::new(
-            "field_name",
-            DataType::Primitive(PrimitiveType::Integer),
-            true,
-        );
-        let field_string = StructField::new(
-            "field_name",
-            DataType::Primitive(PrimitiveType::String),
-            true,
-        );
-        assert_ne!(get_hash(&field_int), get_hash(&field_string));
-
-        // different nullability should result in different hashes
-        let field_true = StructField::new(
-            "field_name",
-            DataType::Primitive(PrimitiveType::Binary),
-            true,
-        );
-        let field_false = StructField::new(
-            "field_name",
-            DataType::Primitive(PrimitiveType::Binary),
-            false,
-        );
-        assert_ne!(get_hash(&field_true), get_hash(&field_false));
-
-        // case where hashes are the same
-        let field_1 = StructField::new(
-            "field_name",
-            DataType::Primitive(PrimitiveType::Timestamp),
-            true,
-        );
-        let field_2 = StructField::new(
-            "field_name",
-            DataType::Primitive(PrimitiveType::Timestamp),
-            true,
-        );
-        assert_eq!(get_hash(&field_1), get_hash(&field_2));
-    }
-
-    #[test]
-    fn test_field_with_name() {
-        let schema = StructType::new(vec![
-            StructField::new("a", DataType::STRING, true),
-            StructField::new("b", DataType::INTEGER, true),
-        ]);
-        let field = schema.field_with_name("b").unwrap();
-        assert_eq!(*field, StructField::new("b", DataType::INTEGER, true));
-    }
-
-    #[test]
-    fn test_field_with_name_nested() {
-        let nested = StructType::new(vec![StructField::new("a", DataType::BOOLEAN, true)]);
-        let schema = StructType::new(vec![
-            StructField::new("a", DataType::STRING, true),
-            StructField::new("b", DataType::Struct(Box::new(nested)), true),
-        ]);
-
-        let field = schema.field_with_name("b.a").unwrap();
-
-        assert_eq!(*field, StructField::new("a", DataType::BOOLEAN, true));
-    }
 }
diff --git a/crates/core/src/kernel/scalars.rs b/crates/core/src/kernel/scalars.rs
new file mode 100644
index 0000000000..bc1bd6eed9
--- /dev/null
+++ b/crates/core/src/kernel/scalars.rs
@@ -0,0 +1,286 @@
+//! Auxiliary methods for dealing with kernel scalars
+use std::cmp::Ordering;
+
+use arrow_array::Array;
+use arrow_schema::TimeUnit;
+use chrono::{DateTime, TimeZone, Utc};
+use delta_kernel::{
+    expressions::{Scalar, StructData},
+    schema::StructField,
+};
+use object_store::path::Path;
+#[cfg(test)]
+use serde_json::Value;
+use urlencoding::encode;
+
+use crate::NULL_PARTITION_VALUE_DATA_PATH;
+
+/// Auxiliary methods for dealing with kernel scalars
+pub trait ScalarExt: Sized {
+    /// Serialize to string
+    fn serialize(&self) -> String;
+    /// Serialize to string for use in hive partition file names
+    fn serialize_encoded(&self) -> String;
+    /// Create a [`Scalar`] from an arrow array row
+    fn from_array(arr: &dyn Array, index: usize) -> Option<Self>;
+    /// Serialize as serde_json::Value
+    #[cfg(test)]
+    fn to_json(&self) -> serde_json::Value;
+}
+
+impl ScalarExt for Scalar {
+    /// Serializes this scalar as a string.
+    fn serialize(&self) -> String {
+        match self {
+            Self::String(s) => s.to_owned(),
+            Self::Byte(b) => b.to_string(),
+            Self::Short(s) => s.to_string(),
+            Self::Integer(i) => i.to_string(),
+            Self::Long(l) => l.to_string(),
+            Self::Float(f) => f.to_string(),
+            Self::Double(d) => d.to_string(),
+            Self::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
+            Self::TimestampNtz(ts) | Self::Timestamp(ts) => {
+                let ts = Utc.timestamp_micros(*ts).single().unwrap();
+                ts.format("%Y-%m-%d %H:%M:%S%.6f").to_string()
+            }
+            Self::Date(days) => {
+                let date = DateTime::from_timestamp(*days as i64 * 24 * 3600, 0).unwrap();
+                date.format("%Y-%m-%d").to_string()
+            }
+            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
+                Ordering::Equal => value.to_string(),
+                Ordering::Greater => {
+                    let scalar_multiple = 10_i128.pow(*scale as u32);
+                    let mut s = String::new();
+                    s.push_str((value / scalar_multiple).to_string().as_str());
+                    s.push('.');
+                    s.push_str(&format!(
+                        "{:0>scale$}",
+                        value % scalar_multiple,
+                        scale = *scale as usize
+                    ));
+                    s
+                }
+                Ordering::Less => {
+                    let mut s = value.to_string();
+                    for _ in 0..*scale {
+                        s.push('0');
+                    }
+                    s
+                }
+            },
+            Self::Binary(val) => create_escaped_binary_string(val.as_slice()),
+            Self::Null(_) => "null".to_string(),
+            Self::Struct(_) => unimplemented!(),
+        }
+    }
+
+    /// Serializes this scalar as a string for use in hive partition file names.
+    fn serialize_encoded(&self) -> String {
+        if self.is_null() {
+            return NULL_PARTITION_VALUE_DATA_PATH.to_string();
+        }
+        encode(Path::from(self.serialize()).as_ref()).to_string()
+    }
+
+    /// Create a [`Scalar`] form a row in an arrow array.
+    fn from_array(arr: &dyn Array, index: usize) -> Option<Self> {
+        use arrow_array::*;
+        use arrow_schema::DataType::*;
+
+        if arr.len() <= index {
+            return None;
+        }
+        if arr.is_null(index) {
+            return Some(Self::Null(arr.data_type().try_into().ok()?));
+        }
+
+        match arr.data_type() {
+            Utf8 => arr
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .map(|v| Self::String(v.value(index).to_string())),
+            LargeUtf8 => arr
+                .as_any()
+                .downcast_ref::<LargeStringArray>()
+                .map(|v| Self::String(v.value(index).to_string())),
+            Boolean => arr
+                .as_any()
+                .downcast_ref::<BooleanArray>()
+                .map(|v| Self::Boolean(v.value(index))),
+            Binary => arr
+                .as_any()
+                .downcast_ref::<BinaryArray>()
+                .map(|v| Self::Binary(v.value(index).to_vec())),
+            LargeBinary => arr
+                .as_any()
+                .downcast_ref::<LargeBinaryArray>()
+                .map(|v| Self::Binary(v.value(index).to_vec())),
+            FixedSizeBinary(_) => arr
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .map(|v| Self::Binary(v.value(index).to_vec())),
+            Int8 => arr
+                .as_any()
+                .downcast_ref::<Int8Array>()
+                .map(|v| Self::Byte(v.value(index))),
+            Int16 => arr
+                .as_any()
+                .downcast_ref::<Int16Array>()
+                .map(|v| Self::Short(v.value(index))),
+            Int32 => arr
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .map(|v| Self::Integer(v.value(index))),
+            Int64 => arr
+                .as_any()
+                .downcast_ref::<Int64Array>()
+                .map(|v| Self::Long(v.value(index))),
+            UInt8 => arr
+                .as_any()
+                .downcast_ref::<UInt8Array>()
+                .map(|v| Self::Byte(v.value(index) as i8)),
+            UInt16 => arr
+                .as_any()
+                .downcast_ref::<UInt16Array>()
+                .map(|v| Self::Short(v.value(index) as i16)),
+            UInt32 => arr
+                .as_any()
+                .downcast_ref::<UInt32Array>()
+                .map(|v| Self::Integer(v.value(index) as i32)),
+            UInt64 => arr
+                .as_any()
+                .downcast_ref::<UInt64Array>()
+                .map(|v| Self::Long(v.value(index) as i64)),
+            Float32 => arr
+                .as_any()
+                .downcast_ref::<Float32Array>()
+                .map(|v| Self::Float(v.value(index))),
+            Float64 => arr
+                .as_any()
+                .downcast_ref::<Float64Array>()
+                .map(|v| Self::Double(v.value(index))),
+            Decimal128(precision, scale) => {
+                arr.as_any().downcast_ref::<Decimal128Array>().map(|v| {
+                    let value = v.value(index);
+                    Self::Decimal(value, *precision, *scale as u8)
+                })
+            }
+            Date32 => arr
+                .as_any()
+                .downcast_ref::<Date32Array>()
+                .map(|v| Self::Date(v.value(index))),
+            Timestamp(TimeUnit::Microsecond, None) => arr
+                .as_any()
+                .downcast_ref::<TimestampMicrosecondArray>()
+                .map(|v| Self::TimestampNtz(v.value(index))),
+            Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.eq_ignore_ascii_case("utc") => arr
+                .as_any()
+                .downcast_ref::<TimestampMicrosecondArray>()
+                .map(|v| Self::Timestamp(v.clone().value(index))),
+            Struct(fields) => {
+                let struct_fields = fields
+                    .iter()
+                    .flat_map(|f| TryFrom::try_from(f.as_ref()))
+                    .collect::<Vec<_>>();
+                let values = arr
+                    .as_any()
+                    .downcast_ref::<StructArray>()
+                    .and_then(|struct_arr| {
+                        struct_fields
+                            .iter()
+                            .map(|f: &StructField| {
+                                struct_arr
+                                    .column_by_name(f.name())
+                                    .and_then(|c| Self::from_array(c.as_ref(), index))
+                            })
+                            .collect::<Option<Vec<_>>>()
+                    })?;
+                Some(Self::Struct(
+                    StructData::try_new(struct_fields, values).ok()?,
+                ))
+            }
+            Float16
+            | Decimal256(_, _)
+            | List(_)
+            | LargeList(_)
+            | FixedSizeList(_, _)
+            | Map(_, _)
+            | Date64
+            | Timestamp(_, _)
+            | Time32(_)
+            | Time64(_)
+            | Duration(_)
+            | Interval(_)
+            | Dictionary(_, _)
+            | RunEndEncoded(_, _)
+            | Union(_, _)
+            | Utf8View
+            | BinaryView
+            | ListView(_)
+            | LargeListView(_)
+            | Null => None,
+        }
+    }
+
+    /// Serializes this scalar as a serde_json::Value.
+    #[cfg(test)]
+    fn to_json(&self) -> serde_json::Value {
+        match self {
+            Self::String(s) => Value::String(s.to_owned()),
+            Self::Byte(b) => Value::Number(serde_json::Number::from(*b)),
+            Self::Short(s) => Value::Number(serde_json::Number::from(*s)),
+            Self::Integer(i) => Value::Number(serde_json::Number::from(*i)),
+            Self::Long(l) => Value::Number(serde_json::Number::from(*l)),
+            Self::Float(f) => Value::Number(serde_json::Number::from_f64(*f as f64).unwrap()),
+            Self::Double(d) => Value::Number(serde_json::Number::from_f64(*d).unwrap()),
+            Self::Boolean(b) => Value::Bool(*b),
+            Self::TimestampNtz(ts) | Self::Timestamp(ts) => {
+                let ts = Utc.timestamp_micros(*ts).single().unwrap();
+                Value::String(ts.format("%Y-%m-%d %H:%M:%S%.6f").to_string())
+            }
+            Self::Date(days) => {
+                let date = DateTime::from_timestamp(*days as i64 * 24 * 3600, 0).unwrap();
+                Value::String(date.format("%Y-%m-%d").to_string())
+            }
+            Self::Decimal(value, _, scale) => match scale.cmp(&0) {
+                Ordering::Equal => Value::String(value.to_string()),
+                Ordering::Greater => {
+                    let scalar_multiple = 10_i128.pow(*scale as u32);
+                    let mut s = String::new();
+                    s.push_str((value / scalar_multiple).to_string().as_str());
+                    s.push('.');
+                    s.push_str(&format!(
+                        "{:0>scale$}",
+                        value % scalar_multiple,
+                        scale = *scale as usize
+                    ));
+                    Value::String(s)
+                }
+                Ordering::Less => {
+                    let mut s = value.to_string();
+                    for _ in 0..*scale {
+                        s.push('0');
+                    }
+                    Value::String(s)
+                }
+            },
+            Self::Binary(val) => Value::String(create_escaped_binary_string(val.as_slice())),
+            Self::Null(_) => Value::Null,
+            Self::Struct(_) => unimplemented!(),
+        }
+    }
+}
+
+fn create_escaped_binary_string(data: &[u8]) -> String {
+    let mut escaped_string = String::new();
+    for &byte in data {
+        // Convert each byte to its two-digit hexadecimal representation
+        let hex_representation = format!("{:04X}", byte);
+        // Append the hexadecimal representation with an escape sequence
+        escaped_string.push_str("\\u");
+        escaped_string.push_str(&hex_representation);
+    }
+    escaped_string
+}
diff --git a/crates/core/src/kernel/snapshot/log_data.rs b/crates/core/src/kernel/snapshot/log_data.rs
index 24fae0ad75..1a30ec7c46 100644
--- a/crates/core/src/kernel/snapshot/log_data.rs
+++ b/crates/core/src/kernel/snapshot/log_data.rs
@@ -2,16 +2,20 @@ use std::borrow::Cow;
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use arrow_array::{Array, Int32Array, Int64Array, MapArray, RecordBatch, StringArray, StructArray};
+use arrow_array::{
+    Array, Int32Array, Int64Array, MapArray, RecordBatch, StringArray, StructArray, UInt64Array,
+};
 use chrono::{DateTime, Utc};
+use delta_kernel::expressions::Scalar;
 use indexmap::IndexMap;
 use object_store::path::Path;
 use object_store::ObjectMeta;
 use percent_encoding::percent_decode_str;
 
+use super::super::scalars::ScalarExt;
 use crate::kernel::arrow::extract::{extract_and_cast, extract_and_cast_opt};
 use crate::kernel::{
-    DataType, DeletionVectorDescriptor, Metadata, Remove, Scalar, StructField, StructType,
+    DataType, DeletionVectorDescriptor, Metadata, Remove, StructField, StructType,
 };
 use crate::{DeltaResult, DeltaTableError};
 
@@ -196,12 +200,16 @@ impl LogicalFile<'_> {
             .column(0)
             .as_any()
             .downcast_ref::<StringArray>()
-            .ok_or(DeltaTableError::Generic("()".into()))?;
+            .ok_or(DeltaTableError::generic(
+                "expected partition values key field to be of type string",
+            ))?;
         let values = map_value
             .column(1)
             .as_any()
             .downcast_ref::<StringArray>()
-            .ok_or(DeltaTableError::Generic("()".into()))?;
+            .ok_or(DeltaTableError::generic(
+                "expected partition values value field to be of type string",
+            ))?;
 
         let values = keys
             .iter()
@@ -210,8 +218,8 @@ impl LogicalFile<'_> {
                 let (key, field) = self.partition_fields.get_key_value(k.unwrap()).unwrap();
                 let field_type = match field.data_type() {
                     DataType::Primitive(p) => Ok(p),
-                    _ => Err(DeltaTableError::Generic(
-                        "nested partitioning values are not supported".to_string(),
+                    _ => Err(DeltaTableError::generic(
+                        "nested partitioning values are not supported",
                     )),
                 }?;
                 Ok((
@@ -223,7 +231,7 @@ impl LogicalFile<'_> {
             })
             .collect::<DeltaResult<HashMap<_, _>>>()?;
 
-        // NOTE: we recreate the map as a BTreeMap to ensure the order of the keys is consistently
+        // NOTE: we recreate the map as a IndexMap to ensure the order of the keys is consistently
         // the same as the order of partition fields.
         self.partition_fields
             .iter()
@@ -351,7 +359,16 @@ impl<'a> FileStatsAccessor<'a> {
             metadata
                 .partition_columns
                 .iter()
-                .map(|c| Ok((c.as_str(), schema.field_with_name(c.as_str())?)))
+                .map(|c| {
+                    Ok((
+                        c.as_str(),
+                        schema
+                            .field(c.as_str())
+                            .ok_or(DeltaTableError::PartitionError {
+                                partition: c.clone(),
+                            })?,
+                    ))
+                })
                 .collect::<DeltaResult<IndexMap<_, _>>>()?,
         );
         let deletion_vector = extract_and_cast_opt::<StructArray>(data, "add.deletionVector");
@@ -459,20 +476,35 @@ impl<'a> IntoIterator for LogDataHandler<'a> {
 
 #[cfg(feature = "datafusion")]
 mod datafusion {
+    use std::collections::HashSet;
     use std::sync::Arc;
 
+    use ::datafusion::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
+    use ::datafusion::physical_optimizer::pruning::PruningStatistics;
+    use ::datafusion::physical_plan::Accumulator;
+    use arrow::compute::concat_batches;
     use arrow_arith::aggregate::sum;
-    use arrow_array::Int64Array;
+    use arrow_array::{ArrayRef, BooleanArray, Int64Array};
     use arrow_schema::DataType as ArrowDataType;
     use datafusion_common::scalar::ScalarValue;
     use datafusion_common::stats::{ColumnStatistics, Precision, Statistics};
-    use datafusion_expr::AggregateFunction;
-    use datafusion_physical_expr::aggregate::AggregateExpr;
-    use datafusion_physical_expr::expressions::{Column, Max, Min};
+    use datafusion_common::Column;
+    use delta_kernel::engine::arrow_data::ArrowEngineData;
+    use delta_kernel::expressions::Expression;
+    use delta_kernel::schema::{DataType, PrimitiveType};
+    use delta_kernel::{ExpressionEvaluator, ExpressionHandler};
 
     use super::*;
     use crate::kernel::arrow::extract::{extract_and_cast_opt, extract_column};
+    use crate::kernel::ARROW_HANDLER;
 
+    #[derive(Debug, Default, Clone)]
+    enum AccumulatorType {
+        Min,
+        Max,
+        #[default]
+        Unused,
+    }
     // TODO validate this works with "wide and narrow" builds / stats
 
     impl FileStatsAccessor<'_> {
@@ -501,7 +533,7 @@ mod datafusion {
             &self,
             path_step: &str,
             name: &str,
-            fun: &AggregateFunction,
+            fun_type: AccumulatorType,
         ) -> Precision<ScalarValue> {
             let mut path = name.split('.');
             let array = if let Ok(array) = extract_column(self.stats, path_step, &mut path) {
@@ -511,28 +543,24 @@ mod datafusion {
             };
 
             if array.data_type().is_primitive() {
-                let agg: Box<dyn AggregateExpr> = match fun {
-                    AggregateFunction::Min => Box::new(Min::new(
-                        // NOTE: this is just a placeholder, we never evalutae this expression
-                        Arc::new(Column::new(name, 0)),
-                        name,
-                        array.data_type().clone(),
-                    )),
-                    AggregateFunction::Max => Box::new(Max::new(
-                        // NOTE: this is just a placeholder, we never evalutae this expression
-                        Arc::new(Column::new(name, 0)),
-                        name,
-                        array.data_type().clone(),
-                    )),
-                    _ => return Precision::Absent,
+                let accumulator: Option<Box<dyn Accumulator>> = match fun_type {
+                    AccumulatorType::Min => MinAccumulator::try_new(array.data_type())
+                        .map_or(None, |a| Some(Box::new(a))),
+                    AccumulatorType::Max => MaxAccumulator::try_new(array.data_type())
+                        .map_or(None, |a| Some(Box::new(a))),
+                    _ => None,
                 };
-                let mut accum = agg.create_accumulator().ok().unwrap();
-                return accum
-                    .update_batch(&[array.clone()])
-                    .ok()
-                    .and_then(|_| accum.evaluate().ok())
-                    .map(Precision::Exact)
-                    .unwrap_or(Precision::Absent);
+
+                if let Some(mut accumulator) = accumulator {
+                    return accumulator
+                        .update_batch(&[array.clone()])
+                        .ok()
+                        .and_then(|_| accumulator.evaluate().ok())
+                        .map(Precision::Exact)
+                        .unwrap_or(Precision::Absent);
+                }
+
+                return Precision::Absent;
             }
 
             match array.data_type() {
@@ -540,7 +568,11 @@ mod datafusion {
                     return fields
                         .iter()
                         .map(|f| {
-                            self.column_bounds(path_step, &format!("{name}.{}", f.name()), fun)
+                            self.column_bounds(
+                                path_step,
+                                &format!("{name}.{}", f.name()),
+                                fun_type.clone(),
+                            )
                         })
                         .map(|s| match s {
                             Precision::Exact(s) => Some(s),
@@ -579,8 +611,7 @@ mod datafusion {
             let null_count_col = format!("{COL_NULL_COUNT}.{}", name.as_ref());
             let null_count = self.collect_count(&null_count_col);
 
-            let min_value =
-                self.column_bounds(COL_MIN_VALUES, name.as_ref(), &AggregateFunction::Min);
+            let min_value = self.column_bounds(COL_MIN_VALUES, name.as_ref(), AccumulatorType::Min);
             let min_value = match &min_value {
                 Precision::Exact(value) if value.is_null() => Precision::Absent,
                 // TODO this is a hack, we should not be casting here but rather when we read the checkpoint data.
@@ -591,8 +622,7 @@ mod datafusion {
                 _ => min_value,
             };
 
-            let max_value =
-                self.column_bounds(COL_MAX_VALUES, name.as_ref(), &AggregateFunction::Max);
+            let max_value = self.column_bounds(COL_MAX_VALUES, name.as_ref(), AccumulatorType::Max);
             let max_value = match &max_value {
                 Precision::Exact(value) if value.is_null() => Precision::Absent,
                 Precision::Exact(ScalarValue::TimestampNanosecond(a, b)) => Precision::Exact(
@@ -670,7 +700,6 @@ mod datafusion {
             let column_statistics = self
                 .schema
                 .fields()
-                .iter()
                 .map(|f| self.column_stats(f.name()))
                 .collect::<Option<Vec<_>>>()?;
             Some(Statistics {
@@ -679,6 +708,122 @@ mod datafusion {
                 column_statistics,
             })
         }
+
+        fn pick_stats(&self, column: &Column, stats_field: &'static str) -> Option<ArrayRef> {
+            let field = self.schema.field(&column.name)?;
+            // See issue #1214. Binary type does not support natural order which is required for Datafusion to prune
+            if field.data_type() == &DataType::Primitive(PrimitiveType::Binary) {
+                return None;
+            }
+            let expression = if self.metadata.partition_columns.contains(&column.name) {
+                Expression::Column(format!("add.partitionValues_parsed.{}", column.name))
+            } else {
+                Expression::Column(format!("add.stats_parsed.{}.{}", stats_field, column.name))
+            };
+            let evaluator = ARROW_HANDLER.get_evaluator(
+                crate::kernel::models::fields::log_schema_ref().clone(),
+                expression,
+                field.data_type().clone(),
+            );
+            let mut results = Vec::with_capacity(self.data.len());
+            for batch in self.data.iter() {
+                let engine = ArrowEngineData::new(batch.clone());
+                let result = evaluator.evaluate(&engine).ok()?;
+                let result = result
+                    .as_any()
+                    .downcast_ref::<ArrowEngineData>()
+                    .ok_or(DeltaTableError::generic(
+                        "failed to downcast evaluator result to ArrowEngineData.",
+                    ))
+                    .ok()?;
+                results.push(result.record_batch().clone());
+            }
+            let batch = concat_batches(results[0].schema_ref(), &results).ok()?;
+            batch.column_by_name("output").map(|c| c.clone())
+        }
+    }
+
+    impl<'a> PruningStatistics for LogDataHandler<'a> {
+        /// return the minimum values for the named column, if known.
+        /// Note: the returned array must contain `num_containers()` rows
+        fn min_values(&self, column: &Column) -> Option<ArrayRef> {
+            self.pick_stats(column, "minValues")
+        }
+
+        /// return the maximum values for the named column, if known.
+        /// Note: the returned array must contain `num_containers()` rows.
+        fn max_values(&self, column: &Column) -> Option<ArrayRef> {
+            self.pick_stats(column, "maxValues")
+        }
+
+        /// return the number of containers (e.g. row groups) being
+        /// pruned with these statistics
+        fn num_containers(&self) -> usize {
+            self.data.iter().map(|f| f.num_rows()).sum()
+        }
+
+        /// return the number of null values for the named column as an
+        /// `Option<UInt64Array>`.
+        ///
+        /// Note: the returned array must contain `num_containers()` rows.
+        fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
+            if !self.metadata.partition_columns.contains(&column.name) {
+                let counts = self.pick_stats(column, "nullCount")?;
+                return arrow_cast::cast(counts.as_ref(), &ArrowDataType::UInt64).ok();
+            }
+            let partition_values = self.pick_stats(column, "__dummy__")?;
+            let row_counts = self.row_counts(column)?;
+            let row_counts = row_counts.as_any().downcast_ref::<UInt64Array>()?;
+            let mut null_counts = Vec::with_capacity(partition_values.len());
+            for i in 0..partition_values.len() {
+                let null_count = if partition_values.is_null(i) {
+                    row_counts.value(i)
+                } else {
+                    0
+                };
+                null_counts.push(null_count);
+            }
+            Some(Arc::new(UInt64Array::from(null_counts)))
+        }
+
+        /// return the number of rows for the named column in each container
+        /// as an `Option<UInt64Array>`.
+        ///
+        /// Note: the returned array must contain `num_containers()` rows
+        fn row_counts(&self, _column: &Column) -> Option<ArrayRef> {
+            lazy_static::lazy_static! {
+                static ref ROW_COUNTS_EVAL: Arc<dyn ExpressionEvaluator> =  ARROW_HANDLER.get_evaluator(
+                    crate::kernel::models::fields::log_schema_ref().clone(),
+                    Expression::column("add.stats_parsed.numRecords"),
+                    DataType::Primitive(PrimitiveType::Long),
+                );
+            }
+            let mut results = Vec::with_capacity(self.data.len());
+            for batch in self.data.iter() {
+                let engine = ArrowEngineData::new(batch.clone());
+                let result = ROW_COUNTS_EVAL.evaluate(&engine).ok()?;
+                let result = result
+                    .as_any()
+                    .downcast_ref::<ArrowEngineData>()
+                    .ok_or(DeltaTableError::generic(
+                        "failed to downcast evaluator result to ArrowEngineData.",
+                    ))
+                    .ok()?;
+                results.push(result.record_batch().clone());
+            }
+            let batch = concat_batches(results[0].schema_ref(), &results).ok()?;
+            arrow_cast::cast(batch.column_by_name("output")?, &ArrowDataType::UInt64).ok()
+        }
+
+        // This function is required since DataFusion 35.0, but is implemented as a no-op
+        // https://github.com/apache/arrow-datafusion/blob/ec6abece2dcfa68007b87c69eefa6b0d7333f628/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs#L550
+        fn contained(
+            &self,
+            _column: &Column,
+            _value: &HashSet<ScalarValue>,
+        ) -> Option<BooleanArray> {
+            None
+        }
     }
 }
 
diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs
index 2f76ac18d4..596304e003 100644
--- a/crates/core/src/kernel/snapshot/log_segment.rs
+++ b/crates/core/src/kernel/snapshot/log_segment.rs
@@ -9,8 +9,9 @@ use itertools::Itertools;
 use lazy_static::lazy_static;
 use object_store::path::Path;
 use object_store::{Error as ObjectStoreError, ObjectMeta, ObjectStore};
-use parquet::arrow::arrow_reader::ArrowReaderOptions;
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
+use parquet::arrow::ProjectionMask;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use tracing::debug;
@@ -36,7 +37,7 @@ lazy_static! {
 /// specifically, this trait adds the ability to recognize valid log files and
 /// parse the version number from a log file path
 // TODO handle compaction files
-pub(super) trait PathExt {
+pub(crate) trait PathExt {
     fn child(&self, path: impl AsRef<str>) -> DeltaResult<Path>;
     /// Returns the last path segment if not terminated with a "/"
     fn filename(&self) -> Option<&str>;
@@ -250,19 +251,45 @@ impl LogSegment {
     pub(super) fn checkpoint_stream(
         &self,
         store: Arc<dyn ObjectStore>,
-        _read_schema: &Schema,
+        read_schema: &Schema,
         config: &DeltaTableConfig,
     ) -> BoxStream<'_, DeltaResult<RecordBatch>> {
         let batch_size = config.log_batch_size;
+        let read_schema = Arc::new(read_schema.clone());
         futures::stream::iter(self.checkpoint_files.clone())
             .map(move |meta| {
                 let store = store.clone();
+                let read_schema = read_schema.clone();
                 async move {
-                    let reader = ParquetObjectReader::new(store, meta);
-                    let options = ArrowReaderOptions::new(); //.with_page_index(enable_page_index);
-                    let builder =
-                        ParquetRecordBatchStreamBuilder::new_with_options(reader, options).await?;
-                    builder.with_batch_size(batch_size).build()
+                    let mut reader = ParquetObjectReader::new(store, meta);
+                    let options = ArrowReaderOptions::new();
+                    let reader_meta = ArrowReaderMetadata::load_async(&mut reader, options).await?;
+
+                    // Create projection selecting read_schema fields from parquet file's arrow schema
+                    let projection = reader_meta
+                        .schema()
+                        .fields
+                        .iter()
+                        .enumerate()
+                        .filter_map(|(i, f)| {
+                            if read_schema.fields.contains_key(f.name()) {
+                                Some(i)
+                            } else {
+                                None
+                            }
+                        })
+                        .collect::<Vec<_>>();
+                    let projection =
+                        ProjectionMask::roots(reader_meta.parquet_schema(), projection);
+
+                    // Note: the output batch stream batches have all null value rows for action types not
+                    // present in the projection. When a RowFilter was used to remove null rows, the performance
+                    // got worse when projecting all fields, and was no better when projecting a subset.
+                    // The all null rows are filtered out anyway when the batch stream is consumed.
+                    ParquetRecordBatchStreamBuilder::new_with_metadata(reader, reader_meta)
+                        .with_projection(projection.clone())
+                        .with_batch_size(batch_size)
+                        .build()
                 }
             })
             .buffered(config.log_buffer_size)
@@ -373,13 +400,13 @@ struct CheckpointMetadata {
     #[allow(unreachable_pub)] // used by acceptance tests (TODO make an fn accessor?)
     pub version: i64,
     /// The number of actions that are stored in the checkpoint.
-    pub(crate) size: i32,
+    pub(crate) size: i64,
     /// The number of fragments if the last checkpoint was written in multiple parts.
     pub(crate) parts: Option<i32>,
     /// The number of bytes of the checkpoint.
-    pub(crate) size_in_bytes: Option<i32>,
+    pub(crate) size_in_bytes: Option<i64>,
     /// The number of AddFile actions in the checkpoint.
-    pub(crate) num_of_add_files: Option<i32>,
+    pub(crate) num_of_add_files: Option<i64>,
     /// The schema of the checkpoint file.
     pub(crate) checkpoint_schema: Option<Schema>,
     /// The checksum of the last checkpoint JSON.
@@ -514,7 +541,13 @@ pub(super) mod tests {
     use deltalake_test::utils::*;
     use tokio::task::JoinHandle;
 
-    use crate::checkpoints::create_checkpoint_from_table_uri_and_cleanup;
+    use crate::{
+        checkpoints::{create_checkpoint_for, create_checkpoint_from_table_uri_and_cleanup},
+        kernel::{Action, Add, Format, Remove},
+        operations::transaction::{CommitBuilder, TableReference},
+        protocol::{DeltaOperation, SaveMode},
+        DeltaTableBuilder,
+    };
 
     use super::*;
 
@@ -655,13 +688,11 @@ pub(super) mod tests {
     mod slow_store {
         use std::sync::Arc;
 
-        use bytes::Bytes;
         use futures::stream::BoxStream;
         use object_store::{
-            path::Path, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore,
-            PutOptions, PutResult, Result,
+            path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
+            ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result,
         };
-        use tokio::io::AsyncWrite;
 
         #[derive(Debug)]
         pub(super) struct SlowListStore {
@@ -679,24 +710,21 @@ pub(super) mod tests {
             async fn put_opts(
                 &self,
                 location: &Path,
-                bytes: Bytes,
+                bytes: PutPayload,
                 opts: PutOptions,
             ) -> Result<PutResult> {
                 self.store.put_opts(location, bytes, opts).await
             }
-            async fn put_multipart(
-                &self,
-                location: &Path,
-            ) -> Result<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+            async fn put_multipart(&self, location: &Path) -> Result<Box<dyn MultipartUpload>> {
                 self.store.put_multipart(location).await
             }
 
-            async fn abort_multipart(
+            async fn put_multipart_opts(
                 &self,
                 location: &Path,
-                multipart_id: &MultipartId,
-            ) -> Result<()> {
-                self.store.abort_multipart(location, multipart_id).await
+                opts: PutMultipartOpts,
+            ) -> Result<Box<dyn MultipartUpload>> {
+                self.store.put_multipart_opts(location, opts).await
             }
 
             async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
@@ -742,4 +770,94 @@ pub(super) mod tests {
             assert!(!path.is_commit_file());
         }
     }
+
+    #[tokio::test]
+    async fn test_checkpoint_stream_parquet_read() {
+        let metadata = Metadata {
+            id: "test".to_string(),
+            format: Format::new("parquet".to_string(), None),
+            schema_string: r#"{"type":"struct",  "fields": []}"#.to_string(),
+            ..Default::default()
+        };
+        let protocol = Protocol::default();
+
+        let mut actions = vec![Action::Metadata(metadata), Action::Protocol(protocol)];
+        for i in 0..10 {
+            actions.push(Action::Add(Add {
+                path: format!("part-{}.parquet", i),
+                modification_time: chrono::Utc::now().timestamp_millis(),
+                ..Default::default()
+            }));
+        }
+
+        let log_store = DeltaTableBuilder::from_uri("memory:///")
+            .build_storage()
+            .unwrap();
+        let op = DeltaOperation::Write {
+            mode: SaveMode::Overwrite,
+            partition_by: None,
+            predicate: None,
+        };
+        let commit = CommitBuilder::default()
+            .with_actions(actions)
+            .build(None, log_store.clone(), op)
+            .await
+            .unwrap();
+
+        let mut actions = Vec::new();
+        // remove all but one file
+        for i in 0..9 {
+            actions.push(Action::Remove(Remove {
+                path: format!("part-{}.parquet", i),
+                deletion_timestamp: Some(chrono::Utc::now().timestamp_millis()),
+                ..Default::default()
+            }))
+        }
+
+        let op = DeltaOperation::Delete { predicate: None };
+        let table_data = &commit.snapshot as &dyn TableReference;
+        let commit = CommitBuilder::default()
+            .with_actions(actions)
+            .build(Some(table_data), log_store.clone(), op)
+            .await
+            .unwrap();
+
+        create_checkpoint_for(commit.version, &commit.snapshot, log_store.as_ref())
+            .await
+            .unwrap();
+
+        let batches = LogSegment::try_new(
+            &Path::default(),
+            Some(commit.version),
+            log_store.object_store().as_ref(),
+        )
+        .await
+        .unwrap()
+        .checkpoint_stream(
+            log_store.object_store(),
+            &StructType::new(vec![
+                ActionType::Metadata.schema_field().clone(),
+                ActionType::Protocol.schema_field().clone(),
+                ActionType::Add.schema_field().clone(),
+            ]),
+            &Default::default(),
+        )
+        .try_collect::<Vec<_>>()
+        .await
+        .unwrap();
+
+        let batch = arrow::compute::concat_batches(&batches[0].schema(), batches.iter()).unwrap();
+
+        // there are 9 remove action rows but all columns are null
+        // because the removes are not projected in the schema
+        // these get filtered out upstream and there was no perf
+        // benefit when applying a row filter
+        // in addition there is 1 add, 1 metadata, and 1 protocol row
+        assert_eq!(batch.num_rows(), 12);
+
+        assert_eq!(batch.schema().fields().len(), 3);
+        assert!(batch.schema().field_with_name("metaData").is_ok());
+        assert!(batch.schema().field_with_name("protocol").is_ok());
+        assert!(batch.schema().field_with_name("add").is_ok());
+    }
 }
diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs
index cd6cf8bb5f..0df62c867b 100644
--- a/crates/core/src/kernel/snapshot/mod.rs
+++ b/crates/core/src/kernel/snapshot/mod.rs
@@ -44,7 +44,7 @@ use crate::{DeltaResult, DeltaTableConfig, DeltaTableError};
 pub use self::log_data::*;
 
 mod log_data;
-mod log_segment;
+pub(crate) mod log_segment;
 pub(crate) mod parse;
 mod replay;
 mod serde;
@@ -193,6 +193,11 @@ impl Snapshot {
         &self.protocol
     }
 
+    /// Get the table config which is loaded with of the snapshot
+    pub fn load_config(&self) -> &DeltaTableConfig {
+        &self.config
+    }
+
     /// Get the table root of the snapshot
     pub fn table_root(&self) -> Path {
         Path::from(self.table_url.clone())
@@ -311,50 +316,19 @@ impl Snapshot {
     /// Get the statistics schema of the snapshot
     pub fn stats_schema(&self, table_schema: Option<&StructType>) -> DeltaResult<StructType> {
         let schema = table_schema.unwrap_or_else(|| self.schema());
+        stats_schema(schema, self.table_config())
+    }
 
-        let stats_fields = if let Some(stats_cols) = self.table_config().stats_columns() {
-            stats_cols
-                .iter()
-                .map(|col| match schema.field_with_name(col) {
-                    Ok(field) => match field.data_type() {
-                        DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => {
-                            Err(DeltaTableError::Generic(format!(
-                                "Stats column {} has unsupported type {}",
-                                col,
-                                field.data_type()
-                            )))
-                        }
-                        _ => Ok(StructField::new(
-                            field.name(),
-                            field.data_type().clone(),
-                            true,
-                        )),
-                    },
-                    _ => Err(DeltaTableError::Generic(format!(
-                        "Stats column {} not found in schema",
-                        col
-                    ))),
-                })
-                .collect::<Result<Vec<_>, _>>()?
-        } else {
-            let num_indexed_cols = self.table_config().num_indexed_cols();
-            schema
-                .fields
-                .iter()
-                .enumerate()
-                .filter_map(|(idx, f)| stats_field(idx, num_indexed_cols, f))
-                .collect()
-        };
-        Ok(StructType::new(vec![
-            StructField::new("numRecords", DataType::LONG, true),
-            StructField::new("minValues", StructType::new(stats_fields.clone()), true),
-            StructField::new("maxValues", StructType::new(stats_fields.clone()), true),
-            StructField::new(
-                "nullCount",
-                StructType::new(stats_fields.iter().filter_map(to_count_field).collect()),
-                true,
-            ),
-        ]))
+    /// Get the partition values schema of the snapshot
+    pub fn partitions_schema(
+        &self,
+        table_schema: Option<&StructType>,
+    ) -> DeltaResult<Option<StructType>> {
+        if self.metadata().partition_columns.is_empty() {
+            return Ok(None);
+        }
+        let schema = table_schema.unwrap_or_else(|| self.schema());
+        partitions_schema(schema, &self.metadata().partition_columns)
     }
 }
 
@@ -369,7 +343,7 @@ pub struct EagerSnapshot {
 
     // NOTE: this is a Vec of RecordBatch instead of a single RecordBatch because
     //       we do not yet enforce a consistent schema across all batches we read from the log.
-    files: Vec<RecordBatch>,
+    pub(crate) files: Vec<RecordBatch>,
 }
 
 impl EagerSnapshot {
@@ -395,8 +369,13 @@ impl EagerSnapshot {
             .iter()
             .flat_map(get_visitor)
             .collect::<Vec<_>>();
-        let snapshot = Snapshot::try_new(table_root, store.clone(), config, version).await?;
-        let files = snapshot.files(store, &mut visitors)?.try_collect().await?;
+        let snapshot =
+            Snapshot::try_new(table_root, store.clone(), config.clone(), version).await?;
+
+        let files = match config.require_files {
+            true => snapshot.files(store, &mut visitors)?.try_collect().await?,
+            false => vec![],
+        };
 
         let mut sn = Self {
             snapshot,
@@ -561,6 +540,11 @@ impl EagerSnapshot {
         self.snapshot.table_root()
     }
 
+    /// Get the table config which is loaded with of the snapshot
+    pub fn load_config(&self) -> &DeltaTableConfig {
+        &self.snapshot.load_config()
+    }
+
     /// Well known table configuration
     pub fn table_config(&self) -> TableConfig<'_> {
         self.snapshot.table_config()
@@ -688,6 +672,74 @@ impl EagerSnapshot {
     }
 }
 
+fn stats_schema(schema: &StructType, config: TableConfig<'_>) -> DeltaResult<StructType> {
+    let stats_fields = if let Some(stats_cols) = config.stats_columns() {
+        stats_cols
+            .iter()
+            .map(|col| match get_stats_field(schema, col) {
+                Some(field) => match field.data_type() {
+                    DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => {
+                        Err(DeltaTableError::Generic(format!(
+                            "Stats column {} has unsupported type {}",
+                            col,
+                            field.data_type()
+                        )))
+                    }
+                    _ => Ok(StructField::new(
+                        field.name(),
+                        field.data_type().clone(),
+                        true,
+                    )),
+                },
+                _ => Err(DeltaTableError::Generic(format!(
+                    "Stats column {} not found in schema",
+                    col
+                ))),
+            })
+            .collect::<Result<Vec<_>, _>>()?
+    } else {
+        let num_indexed_cols = config.num_indexed_cols();
+        schema
+            .fields
+            .values()
+            .enumerate()
+            .filter_map(|(idx, f)| stats_field(idx, num_indexed_cols, f))
+            .collect()
+    };
+    Ok(StructType::new(vec![
+        StructField::new("numRecords", DataType::LONG, true),
+        StructField::new("minValues", StructType::new(stats_fields.clone()), true),
+        StructField::new("maxValues", StructType::new(stats_fields.clone()), true),
+        StructField::new(
+            "nullCount",
+            StructType::new(stats_fields.iter().filter_map(to_count_field).collect()),
+            true,
+        ),
+    ]))
+}
+
+pub(crate) fn partitions_schema(
+    schema: &StructType,
+    partition_columns: &Vec<String>,
+) -> DeltaResult<Option<StructType>> {
+    if partition_columns.is_empty() {
+        return Ok(None);
+    }
+    Ok(Some(StructType::new(
+        partition_columns
+            .iter()
+            .map(|col| {
+                schema.field(col).map(|field| field.clone()).ok_or_else(|| {
+                    DeltaTableError::Generic(format!(
+                        "Partition column {} not found in schema",
+                        col
+                    ))
+                })
+            })
+            .collect::<Result<Vec<_>, _>>()?,
+    )))
+}
+
 fn stats_field(idx: usize, num_indexed_cols: i32, field: &StructField) -> Option<StructField> {
     if !(num_indexed_cols < 0 || (idx as i32) < num_indexed_cols) {
         return None;
@@ -699,7 +751,6 @@ fn stats_field(idx: usize, num_indexed_cols: i32, field: &StructField) -> Option
             StructType::new(
                 dt_struct
                     .fields()
-                    .iter()
                     .flat_map(|f| stats_field(idx, num_indexed_cols, f))
                     .collect(),
             ),
@@ -718,12 +769,7 @@ fn to_count_field(field: &StructField) -> Option<StructField> {
         DataType::Map(_) | DataType::Array(_) | &DataType::BINARY => None,
         DataType::Struct(s) => Some(StructField::new(
             field.name(),
-            StructType::new(
-                s.fields()
-                    .iter()
-                    .filter_map(to_count_field)
-                    .collect::<Vec<_>>(),
-            ),
+            StructType::new(s.fields().filter_map(to_count_field).collect::<Vec<_>>()),
             true,
         )),
         _ => Some(StructField::new(field.name(), DataType::LONG, true)),
@@ -744,6 +790,45 @@ mod datafusion {
     }
 }
 
+/// Retrieves a specific field from the schema based on the provided field name.
+/// It handles cases where the field name is nested or enclosed in backticks.
+fn get_stats_field<'a>(schema: &'a StructType, stats_field_name: &str) -> Option<&'a StructField> {
+    let dialect = sqlparser::dialect::GenericDialect {};
+    match sqlparser::parser::Parser::new(&dialect).try_with_sql(stats_field_name) {
+        Ok(mut parser) => match parser.parse_multipart_identifier() {
+            Ok(parts) => find_nested_field(schema, &parts),
+            Err(_) => schema.field(stats_field_name),
+        },
+        Err(_) => schema.field(stats_field_name),
+    }
+}
+
+fn find_nested_field<'a>(
+    schema: &'a StructType,
+    parts: &[sqlparser::ast::Ident],
+) -> Option<&'a StructField> {
+    if parts.is_empty() {
+        return None;
+    }
+    let part_name = &parts[0].value;
+    match schema.field(part_name) {
+        Some(field) => {
+            if parts.len() == 1 {
+                Some(field)
+            } else {
+                match field.data_type() {
+                    DataType::Struct(struct_schema) => {
+                        find_nested_field(struct_schema, &parts[1..])
+                    }
+                    // Any part before the end must be a struct
+                    _ => None,
+                }
+            }
+        }
+        None => None,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashMap;
@@ -758,6 +843,7 @@ mod tests {
     use super::*;
     use crate::kernel::Remove;
     use crate::protocol::{DeltaOperation, SaveMode};
+    use crate::test_utils::ActionFactory;
 
     #[tokio::test]
     async fn test_snapshots() -> TestResult {
@@ -962,4 +1048,167 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_partition_schema() {
+        let schema = StructType::new(vec![
+            StructField::new("id", DataType::LONG, true),
+            StructField::new("name", DataType::STRING, true),
+            StructField::new("date", DataType::DATE, true),
+        ]);
+
+        let partition_columns = vec!["date".to_string()];
+        let metadata = ActionFactory::metadata(&schema, Some(&partition_columns), None);
+        let protocol = ActionFactory::protocol(None, None, None::<Vec<_>>, None::<Vec<_>>);
+
+        let commit_data = CommitData::new(
+            vec![
+                Action::Protocol(protocol.clone()),
+                Action::Metadata(metadata.clone()),
+            ],
+            DeltaOperation::Write {
+                mode: SaveMode::Append,
+                partition_by: Some(partition_columns),
+                predicate: None,
+            },
+            HashMap::new(),
+            vec![],
+        );
+        let (log_segment, _) = LogSegment::new_test(vec![&commit_data]).unwrap();
+
+        let snapshot = Snapshot {
+            log_segment: log_segment.clone(),
+            protocol: protocol.clone(),
+            metadata,
+            schema: schema.clone(),
+            table_url: "table".to_string(),
+            config: Default::default(),
+        };
+
+        let expected = StructType::new(vec![StructField::new("date", DataType::DATE, true)]);
+        assert_eq!(snapshot.partitions_schema(None).unwrap(), Some(expected));
+
+        let metadata = ActionFactory::metadata(&schema, None::<Vec<&str>>, None);
+        let commit_data = CommitData::new(
+            vec![
+                Action::Protocol(protocol.clone()),
+                Action::Metadata(metadata.clone()),
+            ],
+            DeltaOperation::Write {
+                mode: SaveMode::Append,
+                partition_by: None,
+                predicate: None,
+            },
+            HashMap::new(),
+            vec![],
+        );
+        let (log_segment, _) = LogSegment::new_test(vec![&commit_data]).unwrap();
+
+        let snapshot = Snapshot {
+            log_segment,
+            config: Default::default(),
+            protocol: protocol.clone(),
+            metadata,
+            schema: schema.clone(),
+            table_url: "table".to_string(),
+        };
+
+        assert_eq!(snapshot.partitions_schema(None).unwrap(), None);
+    }
+
+    #[test]
+    fn test_field_with_name() {
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b", DataType::INTEGER, true),
+        ]);
+        let field = get_stats_field(&schema, "b").unwrap();
+        assert_eq!(*field, StructField::new("b", DataType::INTEGER, true));
+    }
+
+    #[test]
+    fn test_field_with_name_escaped() {
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b.b", DataType::INTEGER, true),
+        ]);
+        let field = get_stats_field(&schema, "`b.b`").unwrap();
+        assert_eq!(*field, StructField::new("b.b", DataType::INTEGER, true));
+    }
+
+    #[test]
+    fn test_field_does_not_exist() {
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b", DataType::INTEGER, true),
+        ]);
+        let field = get_stats_field(&schema, "c");
+        assert!(field.is_none());
+    }
+
+    #[test]
+    fn test_field_part_is_not_struct() {
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b", DataType::INTEGER, true),
+        ]);
+        let field = get_stats_field(&schema, "b.c");
+        assert!(field.is_none());
+    }
+
+    #[test]
+    fn test_field_name_does_not_parse() {
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b", DataType::INTEGER, true),
+        ]);
+        let field = get_stats_field(&schema, "b.");
+        assert!(field.is_none());
+    }
+
+    #[test]
+    fn test_field_with_name_nested() {
+        let nested = StructType::new(vec![StructField::new(
+            "nested_struct",
+            DataType::BOOLEAN,
+            true,
+        )]);
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b", DataType::Struct(Box::new(nested)), true),
+        ]);
+
+        let field = get_stats_field(&schema, "b.nested_struct").unwrap();
+
+        assert_eq!(
+            *field,
+            StructField::new("nested_struct", DataType::BOOLEAN, true)
+        );
+    }
+
+    #[test]
+    fn test_field_with_last_name_nested_backticks() {
+        let nested = StructType::new(vec![StructField::new("pr!me", DataType::BOOLEAN, true)]);
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b", DataType::Struct(Box::new(nested)), true),
+        ]);
+
+        let field = get_stats_field(&schema, "b.`pr!me`").unwrap();
+
+        assert_eq!(*field, StructField::new("pr!me", DataType::BOOLEAN, true));
+    }
+
+    #[test]
+    fn test_field_with_name_nested_backticks() {
+        let nested = StructType::new(vec![StructField::new("pr", DataType::BOOLEAN, true)]);
+        let schema = StructType::new(vec![
+            StructField::new("a", DataType::STRING, true),
+            StructField::new("b&b", DataType::Struct(Box::new(nested)), true),
+        ]);
+
+        let field = get_stats_field(&schema, "`b&b`.pr").unwrap();
+
+        assert_eq!(*field, StructField::new("pr", DataType::BOOLEAN, true));
+    }
 }
diff --git a/crates/core/src/kernel/snapshot/parse.rs b/crates/core/src/kernel/snapshot/parse.rs
index a3ccef1902..f75744691e 100644
--- a/crates/core/src/kernel/snapshot/parse.rs
+++ b/crates/core/src/kernel/snapshot/parse.rs
@@ -11,6 +11,11 @@ use crate::{DeltaResult, DeltaTableError};
 
 pub(super) fn read_metadata(batch: &dyn ProvidesColumnByName) -> DeltaResult<Option<Metadata>> {
     if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(batch, "metaData") {
+        // Stop early if all values are null
+        if arr.null_count() == arr.len() {
+            return Ok(None);
+        }
+
         let id = ex::extract_and_cast::<StringArray>(arr, "id")?;
         let name = ex::extract_and_cast::<StringArray>(arr, "name")?;
         let description = ex::extract_and_cast::<StringArray>(arr, "description")?;
@@ -43,6 +48,11 @@ pub(super) fn read_metadata(batch: &dyn ProvidesColumnByName) -> DeltaResult<Opt
 
 pub(super) fn read_protocol(batch: &dyn ProvidesColumnByName) -> DeltaResult<Option<Protocol>> {
     if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(batch, "protocol") {
+        // Stop early if all values are null
+        if arr.null_count() == arr.len() {
+            return Ok(None);
+        }
+
         let min_reader_version = ex::extract_and_cast::<Int32Array>(arr, "minReaderVersion")?;
         let min_writer_version = ex::extract_and_cast::<Int32Array>(arr, "minWriterVersion")?;
         let maybe_reader_features = ex::extract_and_cast_opt::<ListArray>(arr, "readerFeatures");
@@ -73,7 +83,7 @@ pub(super) fn read_adds(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<Add
         let size = ex::extract_and_cast::<Int64Array>(arr, "size")?;
         let modification_time = ex::extract_and_cast::<Int64Array>(arr, "modificationTime")?;
         let data_change = ex::extract_and_cast::<BooleanArray>(arr, "dataChange")?;
-        let stats = ex::extract_and_cast::<StringArray>(arr, "stats")?;
+        let stats = ex::extract_and_cast_opt::<StringArray>(arr, "stats");
         let tags = ex::extract_and_cast_opt::<MapArray>(arr, "tags");
         let dv = ex::extract_and_cast_opt::<StructArray>(arr, "deletionVector");
 
@@ -116,7 +126,8 @@ pub(super) fn read_adds(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<Add
                     size: ex::read_primitive(size, i)?,
                     modification_time: ex::read_primitive(modification_time, i)?,
                     data_change: ex::read_bool(data_change, i)?,
-                    stats: ex::read_str_opt(stats, i).map(|s| s.to_string()),
+                    stats: stats
+                        .and_then(|stats| ex::read_str_opt(stats, i).map(|s| s.to_string())),
                     partition_values: pvs
                         .and_then(|pv| collect_map(&pv.value(i)).map(|m| m.collect()))
                         .unwrap_or_default(),
@@ -138,6 +149,11 @@ pub(super) fn read_cdf_adds(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec
     let mut result = Vec::new();
 
     if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(array, "cdc") {
+        // Stop early if all values are null
+        if arr.null_count() == arr.len() {
+            return Ok(result);
+        }
+
         let path = ex::extract_and_cast::<StringArray>(arr, "path")?;
         let pvs = ex::extract_and_cast_opt::<MapArray>(arr, "partitionValues");
         let size = ex::extract_and_cast::<Int64Array>(arr, "size")?;
@@ -171,6 +187,11 @@ pub(super) fn read_removes(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<
     let mut result = Vec::new();
 
     if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(array, "remove") {
+        // Stop early if all values are null
+        if arr.null_count() == arr.len() {
+            return Ok(result);
+        }
+
         let path = ex::extract_and_cast::<StringArray>(arr, "path")?;
         let data_change = ex::extract_and_cast::<BooleanArray>(arr, "dataChange")?;
         let deletion_timestamp = ex::extract_and_cast::<Int64Array>(arr, "deletionTimestamp")?;
@@ -237,7 +258,9 @@ pub(super) fn read_removes(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<
     Ok(result)
 }
 
-fn collect_map(val: &StructArray) -> Option<impl Iterator<Item = (String, Option<String>)> + '_> {
+pub(super) fn collect_map(
+    val: &StructArray,
+) -> Option<impl Iterator<Item = (String, Option<String>)> + '_> {
     let keys = val
         .column(0)
         .as_ref()
diff --git a/crates/core/src/kernel/snapshot/replay.rs b/crates/core/src/kernel/snapshot/replay.rs
index 3efd9584e2..1b18b61bc7 100644
--- a/crates/core/src/kernel/snapshot/replay.rs
+++ b/crates/core/src/kernel/snapshot/replay.rs
@@ -1,17 +1,20 @@
+use std::collections::HashMap;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::Context;
 use std::task::Poll;
 
 use arrow_arith::boolean::{is_not_null, or};
-use arrow_array::{
-    Array, ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray, StructArray,
-};
+use arrow_array::MapArray;
+use arrow_array::*;
 use arrow_schema::{
-    DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema,
+    DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema,
     SchemaRef as ArrowSchemaRef,
 };
 use arrow_select::filter::filter_record_batch;
+use delta_kernel::expressions::Scalar;
+use delta_kernel::schema::DataType;
+use delta_kernel::schema::PrimitiveType;
 use futures::Stream;
 use hashbrown::HashSet;
 use itertools::Itertools;
@@ -19,14 +22,14 @@ use percent_encoding::percent_decode_str;
 use pin_project_lite::pin_project;
 use tracing::debug;
 
+use super::parse::collect_map;
+use super::ReplayVisitor;
+use super::Snapshot;
 use crate::kernel::arrow::extract::{self as ex, ProvidesColumnByName};
 use crate::kernel::arrow::json;
 use crate::kernel::StructType;
 use crate::{DeltaResult, DeltaTableConfig, DeltaTableError};
 
-use super::ReplayVisitor;
-use super::Snapshot;
-
 pin_project! {
     pub struct ReplayStream<'a, S> {
         scanner: LogReplayScanner,
@@ -51,8 +54,10 @@ impl<'a, S> ReplayStream<'a, S> {
         visitors: &'a mut Vec<Box<dyn ReplayVisitor>>,
     ) -> DeltaResult<Self> {
         let stats_schema = Arc::new((&snapshot.stats_schema(None)?).try_into()?);
+        let partitions_schema = snapshot.partitions_schema(None)?.map(|s| Arc::new(s));
         let mapper = Arc::new(LogMapper {
             stats_schema,
+            partitions_schema,
             config: snapshot.config.clone(),
         });
         Ok(Self {
@@ -67,6 +72,7 @@ impl<'a, S> ReplayStream<'a, S> {
 
 pub(super) struct LogMapper {
     stats_schema: ArrowSchemaRef,
+    partitions_schema: Option<Arc<StructType>>,
     config: DeltaTableConfig,
 }
 
@@ -77,67 +83,289 @@ impl LogMapper {
     ) -> DeltaResult<Self> {
         Ok(Self {
             stats_schema: Arc::new((&snapshot.stats_schema(table_schema)?).try_into()?),
+            partitions_schema: snapshot
+                .partitions_schema(table_schema)?
+                .map(|s| Arc::new(s)),
             config: snapshot.config.clone(),
         })
     }
 
     pub fn map_batch(&self, batch: RecordBatch) -> DeltaResult<RecordBatch> {
-        map_batch(batch, self.stats_schema.clone(), &self.config)
+        map_batch(
+            batch,
+            self.stats_schema.clone(),
+            self.partitions_schema.clone(),
+            &self.config,
+        )
     }
 }
 
 fn map_batch(
     batch: RecordBatch,
     stats_schema: ArrowSchemaRef,
+    partition_schema: Option<Arc<StructType>>,
     config: &DeltaTableConfig,
 ) -> DeltaResult<RecordBatch> {
-    let stats_col = ex::extract_and_cast_opt::<StringArray>(&batch, "add.stats");
+    let mut new_batch = batch.clone();
+
+    let stats = ex::extract_and_cast_opt::<StringArray>(&batch, "add.stats");
     let stats_parsed_col = ex::extract_and_cast_opt::<StructArray>(&batch, "add.stats_parsed");
-    if stats_parsed_col.is_some() {
-        return Ok(batch);
+    if stats_parsed_col.is_none() && stats.is_some() {
+        new_batch = parse_stats(new_batch, stats_schema, config)?;
     }
-    if let Some(stats) = stats_col {
-        let stats: Arc<StructArray> =
-            Arc::new(json::parse_json(stats, stats_schema.clone(), config)?.into());
-        let schema = batch.schema();
-        let add_col = ex::extract_and_cast::<StructArray>(&batch, "add")?;
-        let (add_idx, _) = schema.column_with_name("add").unwrap();
-        let add_type = add_col
-            .fields()
-            .iter()
-            .cloned()
-            .chain(std::iter::once(Arc::new(ArrowField::new(
-                "stats_parsed",
-                ArrowDataType::Struct(stats_schema.fields().clone()),
-                true,
-            ))))
-            .collect_vec();
-        let new_add = Arc::new(StructArray::try_new(
-            add_type.clone().into(),
-            add_col
-                .columns()
-                .iter()
+
+    if let Some(partitions_schema) = partition_schema {
+        let partitions_parsed_col =
+            ex::extract_and_cast_opt::<StructArray>(&batch, "add.partitionValues_parsed");
+        if partitions_parsed_col.is_none() {
+            new_batch = parse_partitions(new_batch, partitions_schema.as_ref())?;
+        }
+    }
+
+    Ok(new_batch)
+}
+
+/// parse the serialized stats in the  `add.stats` column in the files batch
+/// and add a new column `stats_parsed` containing the the parsed stats.
+fn parse_stats(
+    batch: RecordBatch,
+    stats_schema: ArrowSchemaRef,
+    config: &DeltaTableConfig,
+) -> DeltaResult<RecordBatch> {
+    let stats = ex::extract_and_cast_opt::<StringArray>(&batch, "add.stats").ok_or(
+        DeltaTableError::generic("No stats column found in files batch. This is unexpected."),
+    )?;
+    let stats: StructArray = json::parse_json(stats, stats_schema.clone(), config)?.into();
+    insert_field(batch, stats, "stats_parsed")
+}
+
+fn parse_partitions(batch: RecordBatch, partition_schema: &StructType) -> DeltaResult<RecordBatch> {
+    let partitions = ex::extract_and_cast_opt::<MapArray>(&batch, "add.partitionValues").ok_or(
+        DeltaTableError::generic(
+            "No partitionValues column found in files batch. This is unexpected.",
+        ),
+    )?;
+
+    let mut values = partition_schema
+        .fields()
+        .map(|f| {
+            (
+                f.name().to_string(),
+                Vec::<Scalar>::with_capacity(partitions.len()),
+            )
+        })
+        .collect::<HashMap<_, _>>();
+
+    for i in 0..partitions.len() {
+        if partitions.is_null(i) {
+            return Err(DeltaTableError::generic(
+                "Expected potentially empty partition values map, but found a null value.",
+            ));
+        }
+        let data: HashMap<_, _> = collect_map(&partitions.value(i))
+            .ok_or(DeltaTableError::generic(
+                "Failed to collect partition values from map array.",
+            ))?
+            .map(|(k, v)| {
+                let field = partition_schema
+                    .field(k.as_str())
+                    .ok_or(DeltaTableError::generic(format!(
+                        "Partition column {} not found in schema.",
+                        k
+                    )))?;
+                let field_type = match field.data_type() {
+                    DataType::Primitive(p) => Ok(p),
+                    _ => Err(DeltaTableError::generic(
+                        "nested partitioning values are not supported",
+                    )),
+                }?;
+                Ok::<_, DeltaTableError>((
+                    k,
+                    v.map(|vv| field_type.parse_scalar(vv.as_str()))
+                        .transpose()?
+                        .unwrap_or(Scalar::Null(field.data_type().clone())),
+                ))
+            })
+            .collect::<Result<_, _>>()?;
+
+        partition_schema.fields().for_each(|f| {
+            let value = data
+                .get(f.name())
                 .cloned()
-                .chain(std::iter::once(stats as ArrayRef))
-                .collect(),
-            add_col.nulls().cloned(),
-        )?);
-        let new_add_field = Arc::new(ArrowField::new(
-            "add",
-            ArrowDataType::Struct(add_type.into()),
-            true,
-        ));
-        let mut fields = schema.fields().to_vec();
-        let _ = std::mem::replace(&mut fields[add_idx], new_add_field);
-        let mut columns = batch.columns().to_vec();
-        let _ = std::mem::replace(&mut columns[add_idx], new_add);
-        return Ok(RecordBatch::try_new(
-            Arc::new(ArrowSchema::new(fields)),
-            columns,
-        )?);
+                .unwrap_or(Scalar::Null(f.data_type().clone()));
+            values.get_mut(f.name()).unwrap().push(value);
+        });
     }
 
-    Ok(batch)
+    let columns = partition_schema
+        .fields()
+        .map(|f| {
+            let values = values.get(f.name()).unwrap();
+            match f.data_type() {
+                DataType::Primitive(p) => {
+                    // Safety: we created the Scalars above using the parsing function of the same PrimitiveType
+                    // should this fail, it's a bug in our code, and we should panic
+                    let arr = match p {
+                        PrimitiveType::String => {
+                            Arc::new(StringArray::from_iter(values.iter().map(|v| match v {
+                                Scalar::String(s) => Some(s.clone()),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Long => {
+                            Arc::new(Int64Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Long(i) => Some(*i),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Integer => {
+                            Arc::new(Int32Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Integer(i) => Some(*i),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Short => {
+                            Arc::new(Int16Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Short(i) => Some(*i),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Byte => {
+                            Arc::new(Int8Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Byte(i) => Some(*i),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Float => {
+                            Arc::new(Float32Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Float(f) => Some(*f),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Double => {
+                            Arc::new(Float64Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Double(f) => Some(*f),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Boolean => {
+                            Arc::new(BooleanArray::from_iter(values.iter().map(|v| match v {
+                                Scalar::Boolean(b) => Some(*b),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Binary => {
+                            Arc::new(BinaryArray::from_iter(values.iter().map(|v| match v {
+                                Scalar::Binary(b) => Some(b.clone()),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+                        PrimitiveType::Date => {
+                            Arc::new(Date32Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Date(d) => Some(*d),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))) as ArrayRef
+                        }
+
+                        PrimitiveType::Timestamp => Arc::new(
+                            TimestampMicrosecondArray::from_iter(values.iter().map(|v| match v {
+                                Scalar::Timestamp(t) => Some(*t),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))
+                            .with_timezone("UTC"),
+                        ) as ArrayRef,
+                        PrimitiveType::TimestampNtz => Arc::new(
+                            TimestampMicrosecondArray::from_iter(values.iter().map(|v| match v {
+                                Scalar::TimestampNtz(t) => Some(*t),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            })),
+                        ) as ArrayRef,
+                        PrimitiveType::Decimal(p, s) => Arc::new(
+                            Decimal128Array::from_iter(values.iter().map(|v| match v {
+                                Scalar::Decimal(d, _, _) => Some(*d),
+                                Scalar::Null(_) => None,
+                                _ => panic!("unexpected scalar type"),
+                            }))
+                            .with_precision_and_scale(*p, *s as i8)?,
+                        ) as ArrayRef,
+                    };
+                    Ok(arr)
+                }
+                _ => Err(DeltaTableError::generic(
+                    "complex partitioning values are not supported",
+                )),
+            }
+        })
+        .collect::<Result<Vec<_>, _>>()?;
+
+    insert_field(
+        batch,
+        StructArray::try_new(
+            Fields::from(
+                partition_schema
+                    .fields()
+                    .map(|f| f.try_into())
+                    .collect::<Result<Vec<ArrowField>, _>>()?,
+            ),
+            columns,
+            None,
+        )?,
+        "partitionValues_parsed",
+    )
+}
+
+fn insert_field(batch: RecordBatch, array: StructArray, name: &str) -> DeltaResult<RecordBatch> {
+    let schema = batch.schema();
+    let add_col = ex::extract_and_cast::<StructArray>(&batch, "add")?;
+    let (add_idx, _) = schema.column_with_name("add").unwrap();
+
+    let add_type = add_col
+        .fields()
+        .iter()
+        .cloned()
+        .chain(std::iter::once(Arc::new(ArrowField::new(
+            name,
+            array.data_type().clone(),
+            true,
+        ))))
+        .collect_vec();
+    let new_add = Arc::new(StructArray::try_new(
+        add_type.clone().into(),
+        add_col
+            .columns()
+            .iter()
+            .cloned()
+            .chain(std::iter::once(Arc::new(array) as ArrayRef))
+            .collect(),
+        add_col.nulls().cloned(),
+    )?);
+    let new_add_field = Arc::new(ArrowField::new(
+        "add",
+        ArrowDataType::Struct(add_type.into()),
+        true,
+    ));
+
+    let mut fields = schema.fields().to_vec();
+    let _ = std::mem::replace(&mut fields[add_idx], new_add_field);
+    let mut columns = batch.columns().to_vec();
+    let _ = std::mem::replace(&mut columns[add_idx], new_add);
+
+    Ok(RecordBatch::try_new(
+        Arc::new(ArrowSchema::new(fields)),
+        columns,
+    )?)
 }
 
 impl<'a, S> Stream for ReplayStream<'a, S>
@@ -356,16 +584,22 @@ fn read_file_info<'a>(arr: &'a dyn ProvidesColumnByName) -> DeltaResult<Vec<Opti
 
 #[cfg(test)]
 pub(super) mod tests {
+    use std::collections::HashMap;
     use std::sync::Arc;
 
     use arrow_select::concat::concat_batches;
+    use delta_kernel::schema::DataType;
     use deltalake_test::utils::*;
     use futures::TryStreamExt;
     use object_store::path::Path;
 
-    use super::super::log_segment::LogSegment;
+    use super::super::{log_segment::LogSegment, partitions_schema, stats_schema};
     use super::*;
     use crate::kernel::{models::ActionType, StructType};
+    use crate::operations::transaction::CommitData;
+    use crate::protocol::DeltaOperation;
+    use crate::table::config::TableConfig;
+    use crate::test_utils::{ActionFactory, TestResult, TestSchemas};
 
     pub(crate) async fn test_log_replay(context: &IntegrationContext) -> TestResult {
         let log_schema = Arc::new(StructType::new(vec![
@@ -420,4 +654,115 @@ pub(super) mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_parse_stats() -> TestResult {
+        let schema = TestSchemas::simple();
+        let config_map = HashMap::new();
+        let table_config = TableConfig(&config_map);
+        let config = DeltaTableConfig::default();
+
+        let commit_data = CommitData {
+            actions: vec![ActionFactory::add(schema, HashMap::new(), Vec::new(), true).into()],
+            operation: DeltaOperation::Write {
+                mode: crate::protocol::SaveMode::Append,
+                partition_by: None,
+                predicate: None,
+            },
+            app_metadata: Default::default(),
+            app_transactions: Default::default(),
+        };
+        let (_, maybe_batches) = LogSegment::new_test(&[commit_data])?;
+
+        let batches = maybe_batches.into_iter().collect::<Result<Vec<_>, _>>()?;
+        let batch = concat_batches(&batches[0].schema(), &batches)?;
+
+        assert!(ex::extract_and_cast_opt::<StringArray>(&batch, "add.stats").is_some());
+        assert!(ex::extract_and_cast_opt::<StructArray>(&batch, "add.stats_parsed").is_none());
+
+        let stats_schema = stats_schema(&schema, table_config)?;
+        let new_batch = parse_stats(batch, Arc::new((&stats_schema).try_into()?), &config)?;
+
+        assert!(ex::extract_and_cast_opt::<StructArray>(&new_batch, "add.stats_parsed").is_some());
+        let parsed_col = ex::extract_and_cast::<StructArray>(&new_batch, "add.stats_parsed")?;
+        let delta_type: DataType = parsed_col.data_type().try_into()?;
+
+        match delta_type {
+            DataType::Struct(fields) => {
+                assert_eq!(fields.as_ref(), &stats_schema);
+            }
+            _ => panic!("unexpected data type"),
+        }
+
+        // let expression = Expression::column("add.stats");
+        // let evaluator = ARROW_HANDLER.get_evaluator(
+        //     Arc::new(batch.schema_ref().as_ref().try_into()?),
+        //     expression,
+        //     DataType::Primitive(PrimitiveType::String),
+        // );
+        // let engine_data = ArrowEngineData::new(batch);
+        // let result = evaluator
+        //     .evaluate(&engine_data)?
+        //     .as_any()
+        //     .downcast_ref::<ArrowEngineData>()
+        //     .ok_or(DeltaTableError::generic(
+        //         "failed to downcast evaluator result to ArrowEngineData.",
+        //     ))?
+        //     .record_batch()
+        //     .clone();
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_partition_values() -> TestResult {
+        let schema = TestSchemas::simple();
+        let partition_columns = vec![schema.field("modified").unwrap().name().to_string()];
+
+        let commit_data = CommitData {
+            actions: vec![ActionFactory::add(
+                schema,
+                HashMap::new(),
+                partition_columns.clone(),
+                true,
+            )
+            .into()],
+            operation: DeltaOperation::Write {
+                mode: crate::protocol::SaveMode::Append,
+                partition_by: Some(partition_columns.clone()),
+                predicate: None,
+            },
+            app_metadata: Default::default(),
+            app_transactions: Default::default(),
+        };
+        let (_, maybe_batches) = LogSegment::new_test(&[commit_data])?;
+
+        let batches = maybe_batches.into_iter().collect::<Result<Vec<_>, _>>()?;
+        let batch = concat_batches(&batches[0].schema(), &batches)?;
+
+        assert!(ex::extract_and_cast_opt::<MapArray>(&batch, "add.partitionValues").is_some());
+        assert!(
+            ex::extract_and_cast_opt::<StructArray>(&batch, "add.partitionValues_parsed").is_none()
+        );
+
+        let partitions_schema = partitions_schema(&schema, &partition_columns)?.unwrap();
+        let new_batch = parse_partitions(batch, &partitions_schema)?;
+
+        assert!(
+            ex::extract_and_cast_opt::<StructArray>(&new_batch, "add.partitionValues_parsed")
+                .is_some()
+        );
+        let parsed_col =
+            ex::extract_and_cast::<StructArray>(&new_batch, "add.partitionValues_parsed")?;
+        let delta_type: DataType = parsed_col.data_type().try_into()?;
+
+        match delta_type {
+            DataType::Struct(fields) => {
+                assert_eq!(fields.as_ref(), &partitions_schema);
+            }
+            _ => panic!("unexpected data type"),
+        }
+
+        Ok(())
+    }
 }
diff --git a/crates/core/src/kernel/snapshot/serde.rs b/crates/core/src/kernel/snapshot/serde.rs
index dd7403bc28..45c1206c82 100644
--- a/crates/core/src/kernel/snapshot/serde.rs
+++ b/crates/core/src/kernel/snapshot/serde.rs
@@ -1,10 +1,11 @@
+use std::fmt;
+
 use arrow_ipc::reader::FileReader;
 use arrow_ipc::writer::FileWriter;
 use chrono::{DateTime, TimeZone, Utc};
 use object_store::ObjectMeta;
 use serde::de::{self, Deserializer, SeqAccess, Visitor};
 use serde::{ser::SerializeSeq, Deserialize, Serialize};
-use std::fmt;
 
 use super::log_segment::LogSegment;
 use super::EagerSnapshot;
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index 4ef9fc06fd..0498e4493c 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -79,6 +79,9 @@ pub mod schema;
 pub mod storage;
 pub mod table;
 
+#[cfg(test)]
+pub mod test_utils;
+
 #[cfg(feature = "datafusion")]
 pub mod delta_datafusion;
 pub mod writer;
@@ -89,10 +92,8 @@ pub use self::data_catalog::{DataCatalog, DataCatalogError};
 pub use self::errors::*;
 pub use self::schema::partitions::*;
 pub use self::schema::*;
-pub use self::table::builder::{
-    DeltaTableBuilder, DeltaTableConfig, DeltaTableLoadOptions, DeltaVersion,
-};
-pub use self::table::config::DeltaConfigKey;
+pub use self::table::builder::{DeltaTableBuilder, DeltaTableConfig, DeltaVersion};
+pub use self::table::config::TableProperty;
 pub use self::table::DeltaTable;
 pub use object_store::{path::Path, Error as ObjectStoreError, ObjectMeta, ObjectStore};
 pub use operations::DeltaOps;
diff --git a/crates/core/src/logstore/default_logstore.rs b/crates/core/src/logstore/default_logstore.rs
index 8fd4f52beb..79a1c76653 100644
--- a/crates/core/src/logstore/default_logstore.rs
+++ b/crates/core/src/logstore/default_logstore.rs
@@ -1,12 +1,25 @@
 //! Default implementation of [`LogStore`] for storage backends with atomic put-if-absent operation
 
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use bytes::Bytes;
-use object_store::{path::Path, ObjectStore};
+use object_store::{Attributes, Error as ObjectStoreError, ObjectStore, PutOptions, TagSet};
 
-use super::{LogStore, LogStoreConfig};
-use crate::{operations::transaction::TransactionError, storage::ObjectStoreRef, DeltaResult};
+use super::{CommitOrBytes, LogStore, LogStoreConfig};
+use crate::{
+    operations::transaction::TransactionError,
+    storage::{commit_uri_from_version, ObjectStoreRef},
+    DeltaResult,
+};
+
+fn put_options() -> &'static PutOptions {
+    static PUT_OPTS: OnceLock<PutOptions> = OnceLock::new();
+    PUT_OPTS.get_or_init(|| PutOptions {
+        mode: object_store::PutMode::Create, // Creates if file doesn't exists yet
+        tags: TagSet::default(),
+        attributes: Attributes::default(),
+    })
+}
 
 /// Default [`LogStore`] implementation
 #[derive(Debug, Clone)]
@@ -45,17 +58,39 @@ impl LogStore for DefaultLogStore {
     async fn write_commit_entry(
         &self,
         version: i64,
-        tmp_commit: &Path,
+        commit_or_bytes: CommitOrBytes,
     ) -> Result<(), TransactionError> {
-        super::write_commit_entry(self.storage.as_ref(), version, tmp_commit).await
+        match commit_or_bytes {
+            CommitOrBytes::LogBytes(log_bytes) => self
+                .object_store()
+                .put_opts(
+                    &commit_uri_from_version(version),
+                    log_bytes.into(),
+                    put_options().clone(),
+                )
+                .await
+                .map_err(|err| -> TransactionError {
+                    match err {
+                        ObjectStoreError::AlreadyExists { .. } => {
+                            TransactionError::VersionAlreadyExists(version)
+                        }
+                        _ => TransactionError::from(err),
+                    }
+                })?,
+            _ => unreachable!(), // Default log store should never get a tmp_commit, since this is for conditional put stores
+        };
+        Ok(())
     }
 
     async fn abort_commit_entry(
         &self,
-        version: i64,
-        tmp_commit: &Path,
+        _version: i64,
+        commit_or_bytes: CommitOrBytes,
     ) -> Result<(), TransactionError> {
-        super::abort_commit_entry(self.storage.as_ref(), version, tmp_commit).await
+        match &commit_or_bytes {
+            CommitOrBytes::LogBytes(_) => Ok(()),
+            _ => unreachable!(), // Default log store should never get a tmp_commit, since this is for conditional put stores
+        }
     }
 
     async fn get_latest_version(&self, current_version: i64) -> DeltaResult<i64> {
diff --git a/crates/core/src/logstore/mod.rs b/crates/core/src/logstore/mod.rs
index b8646cdb65..dd82274157 100644
--- a/crates/core/src/logstore/mod.rs
+++ b/crates/core/src/logstore/mod.rs
@@ -1,31 +1,31 @@
 //! Delta log store.
-use dashmap::DashMap;
-use futures::StreamExt;
-use lazy_static::lazy_static;
-use regex::Regex;
-use serde::{
-    de::{Error, SeqAccess, Visitor},
-    ser::SerializeSeq,
-    Deserialize, Serialize,
-};
 use std::io::{BufRead, BufReader, Cursor};
 use std::sync::OnceLock;
 use std::{cmp::max, collections::HashMap, sync::Arc};
-use url::Url;
 
-use crate::{
-    errors::DeltaResult,
-    kernel::Action,
-    operations::transaction::TransactionError,
-    protocol::{get_last_checkpoint, ProtocolError},
-    storage::{
-        commit_uri_from_version, retry_ext::ObjectStoreRetryExt, ObjectStoreRef, StorageOptions,
-    },
-    DeltaTableError,
-};
 use bytes::Bytes;
+use dashmap::DashMap;
+use futures::StreamExt;
+use lazy_static::lazy_static;
 use object_store::{path::Path, Error as ObjectStoreError, ObjectStore};
+use regex::Regex;
+use serde::de::{Error, SeqAccess, Visitor};
+use serde::ser::SerializeSeq;
+use serde::{Deserialize, Serialize};
 use tracing::{debug, warn};
+use url::Url;
+
+use crate::kernel::log_segment::PathExt;
+use crate::kernel::Action;
+use crate::operations::transaction::TransactionError;
+use crate::protocol::{get_last_checkpoint, ProtocolError};
+use crate::storage::DeltaIOStorageBackend;
+use crate::storage::{
+    commit_uri_from_version, retry_ext::ObjectStoreRetryExt, IORuntime, ObjectStoreRef,
+    StorageOptions,
+};
+
+use crate::{DeltaResult, DeltaTableError};
 
 #[cfg(feature = "datafusion")]
 use datafusion::datasource::object_store::ObjectStoreUrl;
@@ -102,11 +102,12 @@ lazy_static! {
 /// # use std::collections::HashMap;
 /// # use url::Url;
 /// let location = Url::parse("memory:///").expect("Failed to make location");
-/// let logstore = logstore_for(location, HashMap::new()).expect("Failed to get a logstore");
+/// let logstore = logstore_for(location, HashMap::new(), None).expect("Failed to get a logstore");
 /// ```
 pub fn logstore_for(
     location: Url,
     options: impl Into<StorageOptions> + Clone,
+    io_runtime: Option<IORuntime>,
 ) -> DeltaResult<LogStoreRef> {
     // turn location into scheme
     let scheme = Url::parse(&format!("{}://", location.scheme()))
@@ -114,10 +115,11 @@ pub fn logstore_for(
 
     if let Some(entry) = crate::storage::factories().get(&scheme) {
         debug!("Found a storage provider for {scheme} ({location})");
+
         let (store, _prefix) = entry
             .value()
             .parse_url_opts(&location, &options.clone().into())?;
-        return logstore_with(store, location, options);
+        return logstore_with(store, location, options, io_runtime);
     }
     Err(DeltaTableError::InvalidTableLocation(location.into()))
 }
@@ -127,10 +129,17 @@ pub fn logstore_with(
     store: ObjectStoreRef,
     location: Url,
     options: impl Into<StorageOptions> + Clone,
+    io_runtime: Option<IORuntime>,
 ) -> DeltaResult<LogStoreRef> {
     let scheme = Url::parse(&format!("{}://", location.scheme()))
         .map_err(|_| DeltaTableError::InvalidTableLocation(location.clone().into()))?;
 
+    let store = if let Some(io_runtime) = io_runtime {
+        Arc::new(DeltaIOStorageBackend::new(store, io_runtime.get_handle())) as ObjectStoreRef
+    } else {
+        store
+    };
+
     if let Some(factory) = logstores().get(&scheme) {
         debug!("Found a logstore provider for {scheme}");
         return factory.with_options(store, &location, &options.into());
@@ -143,6 +152,15 @@ pub fn logstore_with(
     ))
 }
 
+/// Holder whether it's tmp_commit path or commit bytes
+#[derive(Clone)]
+pub enum CommitOrBytes {
+    /// Path of the tmp commit, to be used by logstores which use CopyIfNotExists
+    TmpCommit(Path),
+    /// Bytes of the log, to be used by logstoers which use Conditional Put
+    LogBytes(Bytes),
+}
+
 /// Configuration parameters for a log store
 #[derive(Debug, Clone)]
 pub struct LogStoreConfig {
@@ -182,14 +200,14 @@ pub trait LogStore: Sync + Send {
     async fn write_commit_entry(
         &self,
         version: i64,
-        tmp_commit: &Path,
+        commit_or_bytes: CommitOrBytes,
     ) -> Result<(), TransactionError>;
 
     /// Abort the commit entry for the given version.
     async fn abort_commit_entry(
         &self,
         version: i64,
-        tmp_commit: &Path,
+        commit_or_bytes: CommitOrBytes,
     ) -> Result<(), TransactionError>;
 
     /// Find latest version currently stored in the delta log.
@@ -221,7 +239,7 @@ pub trait LogStore: Sync + Send {
         let mut stream = object_store.list(Some(self.log_path()));
         if let Some(res) = stream.next().await {
             match res {
-                Ok(_) => Ok(true),
+                Ok(meta) => Ok(meta.location.is_commit_file()),
                 Err(ObjectStoreError::NotFound { .. }) => Ok(false),
                 Err(err) => Err(err)?,
             }
@@ -311,7 +329,7 @@ pub async fn get_actions(
 // TODO: maybe a bit of a hack, required to `#[derive(Debug)]` for the operation builders
 impl std::fmt::Debug for dyn LogStore + '_ {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "LogStore({})", self.root_uri())
+        write!(f, "{}({})", self.name(), self.root_uri())
     }
 }
 
@@ -475,16 +493,53 @@ mod tests {
     #[test]
     fn logstore_with_invalid_url() {
         let location = Url::parse("nonexistent://table").unwrap();
-        let store = logstore_for(location, HashMap::default());
+        let store = logstore_for(location, HashMap::default(), None);
         assert!(store.is_err());
     }
 
     #[test]
     fn logstore_with_memory() {
         let location = Url::parse("memory://table").unwrap();
-        let store = logstore_for(location, HashMap::default());
+        let store = logstore_for(location, HashMap::default(), None);
         assert!(store.is_ok());
     }
+
+    #[test]
+    fn logstore_with_memory_and_rt() {
+        let location = Url::parse("memory://table").unwrap();
+        let store = logstore_for(location, HashMap::default(), Some(IORuntime::default()));
+        assert!(store.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_is_location_a_table() {
+        use object_store::path::Path;
+        use object_store::{PutOptions, PutPayload};
+        let location = Url::parse("memory://table").unwrap();
+        let store =
+            logstore_for(location, HashMap::default(), None).expect("Failed to get logstore");
+        assert!(!store
+            .is_delta_table_location()
+            .await
+            .expect("Failed to look at table"));
+
+        // Let's put a failed commit into the directory and then see if it's still considered a
+        // delta table (it shouldn't be).
+        let payload = PutPayload::from_static(b"test-drivin");
+        let _put = store
+            .object_store()
+            .put_opts(
+                &Path::from("_delta_log/_commit_failed.tmp"),
+                payload,
+                PutOptions::default(),
+            )
+            .await
+            .expect("Failed to put");
+        assert!(!store
+            .is_delta_table_location()
+            .await
+            .expect("Failed to look at table"));
+    }
 }
 
 #[cfg(feature = "datafusion")]
diff --git a/crates/core/src/operations/add_column.rs b/crates/core/src/operations/add_column.rs
new file mode 100644
index 0000000000..8fff1677b8
--- /dev/null
+++ b/crates/core/src/operations/add_column.rs
@@ -0,0 +1,113 @@
+//! Add a new column to a table
+
+use delta_kernel::schema::StructType;
+use futures::future::BoxFuture;
+use itertools::Itertools;
+
+use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL};
+use crate::kernel::StructField;
+use crate::logstore::LogStoreRef;
+use crate::operations::cast::merge_schema::merge_delta_struct;
+use crate::protocol::DeltaOperation;
+use crate::table::state::DeltaTableState;
+use crate::{DeltaResult, DeltaTable, DeltaTableError};
+
+/// Add new columns and/or nested fields to a table
+pub struct AddColumnBuilder {
+    /// A snapshot of the table's state
+    snapshot: DeltaTableState,
+    /// Fields to add/merge into schema
+    fields: Option<Vec<StructField>>,
+    /// Delta object store for handling data files
+    log_store: LogStoreRef,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
+}
+
+impl super::Operation<()> for AddColumnBuilder {}
+
+impl AddColumnBuilder {
+    /// Create a new builder
+    pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
+        Self {
+            snapshot,
+            log_store,
+            fields: None,
+            commit_properties: CommitProperties::default(),
+        }
+    }
+
+    /// Specify the fields to be added
+    pub fn with_fields(mut self, fields: impl IntoIterator<Item = StructField> + Clone) -> Self {
+        self.fields = Some(fields.into_iter().collect());
+        self
+    }
+    /// Additional metadata to be added to commit info
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
+        self
+    }
+}
+
+impl std::future::IntoFuture for AddColumnBuilder {
+    type Output = DeltaResult<DeltaTable>;
+
+    type IntoFuture = BoxFuture<'static, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            let mut metadata = this.snapshot.metadata().clone();
+            let fields = match this.fields {
+                Some(v) => v,
+                None => return Err(DeltaTableError::Generic("No fields provided".to_string())),
+            };
+
+            let fields_right = &StructType::new(fields.clone());
+            let table_schema = this.snapshot.schema();
+            let new_table_schema = merge_delta_struct(table_schema, fields_right)?;
+
+            // TODO(ion): Think of a way how we can simply this checking through the API or centralize some checks.
+            let contains_timestampntz = PROTOCOL.contains_timestampntz(fields.iter());
+            let protocol = this.snapshot.protocol();
+
+            let maybe_new_protocol = if contains_timestampntz {
+                let updated_protocol = protocol.clone().enable_timestamp_ntz();
+                if !(protocol.min_reader_version == 3 && protocol.min_writer_version == 7) {
+                    // Convert existing properties to features since we advanced the protocol to v3,7
+                    Some(
+                        updated_protocol
+                            .move_table_properties_into_features(&metadata.configuration),
+                    )
+                } else {
+                    Some(updated_protocol)
+                }
+            } else {
+                None
+            };
+
+            let operation = DeltaOperation::AddColumn {
+                fields: fields.into_iter().collect_vec(),
+            };
+
+            metadata.schema_string = serde_json::to_string(&new_table_schema)?;
+
+            let mut actions = vec![metadata.into()];
+
+            if let Some(new_protocol) = maybe_new_protocol {
+                actions.push(new_protocol.into())
+            }
+
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions)
+                .build(Some(&this.snapshot), this.log_store.clone(), operation)
+                .await?;
+
+            Ok(DeltaTable::new_with_state(
+                this.log_store,
+                commit.snapshot(),
+            ))
+        })
+    }
+}
diff --git a/crates/core/src/operations/add_feature.rs b/crates/core/src/operations/add_feature.rs
new file mode 100644
index 0000000000..7200c37d03
--- /dev/null
+++ b/crates/core/src/operations/add_feature.rs
@@ -0,0 +1,196 @@
+//! Enable table features
+
+use futures::future::BoxFuture;
+use itertools::Itertools;
+
+use super::transaction::{CommitBuilder, CommitProperties};
+use crate::kernel::{ReaderFeatures, TableFeatures, WriterFeatures};
+use crate::logstore::LogStoreRef;
+use crate::protocol::DeltaOperation;
+use crate::table::state::DeltaTableState;
+use crate::DeltaTable;
+use crate::{DeltaResult, DeltaTableError};
+
+/// Enable table features for a table
+pub struct AddTableFeatureBuilder {
+    /// A snapshot of the table's state
+    snapshot: DeltaTableState,
+    /// Name of the feature
+    name: Vec<TableFeatures>,
+    /// Allow protocol versions to be increased by setting features
+    allow_protocol_versions_increase: bool,
+    /// Delta object store for handling data files
+    log_store: LogStoreRef,
+    /// Additional information to add to the commit
+    commit_properties: CommitProperties,
+}
+
+impl super::Operation<()> for AddTableFeatureBuilder {}
+
+impl AddTableFeatureBuilder {
+    /// Create a new builder
+    pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self {
+        Self {
+            name: vec![],
+            allow_protocol_versions_increase: false,
+            snapshot,
+            log_store,
+            commit_properties: CommitProperties::default(),
+        }
+    }
+
+    /// Specify the features to be added
+    pub fn with_feature<S: Into<TableFeatures>>(mut self, name: S) -> Self {
+        self.name.push(name.into());
+        self
+    }
+
+    /// Specify the features to be added
+    pub fn with_features<S: Into<TableFeatures>>(mut self, name: Vec<S>) -> Self {
+        self.name
+            .extend(name.into_iter().map(Into::into).collect_vec());
+        self
+    }
+
+    /// Specify if you want to allow protocol version to be increased
+    pub fn with_allow_protocol_versions_increase(mut self, allow: bool) -> Self {
+        self.allow_protocol_versions_increase = allow;
+        self
+    }
+
+    /// Additional metadata to be added to commit info
+    pub fn with_commit_properties(mut self, commit_properties: CommitProperties) -> Self {
+        self.commit_properties = commit_properties;
+        self
+    }
+}
+
+impl std::future::IntoFuture for AddTableFeatureBuilder {
+    type Output = DeltaResult<DeltaTable>;
+
+    type IntoFuture = BoxFuture<'static, Self::Output>;
+
+    fn into_future(self) -> Self::IntoFuture {
+        let this = self;
+
+        Box::pin(async move {
+            let name = if this.name.is_empty() {
+                return Err(DeltaTableError::Generic("No features provided".to_string()));
+            } else {
+                this.name
+            };
+            let (reader_features, writer_features): (
+                Vec<Option<ReaderFeatures>>,
+                Vec<Option<WriterFeatures>>,
+            ) = name.iter().map(|v| v.to_reader_writer_features()).unzip();
+            let reader_features = reader_features.into_iter().flatten().collect_vec();
+            let writer_features = writer_features.into_iter().flatten().collect_vec();
+
+            let mut protocol = this.snapshot.protocol().clone();
+
+            if !this.allow_protocol_versions_increase {
+                if !reader_features.is_empty()
+                    && !writer_features.is_empty()
+                    && !(protocol.min_reader_version == 3 && protocol.min_writer_version == 7)
+                {
+                    return Err(DeltaTableError::Generic("Table feature enables reader and writer feature, but reader is not v3, and writer not v7. Set allow_protocol_versions_increase or increase versions explicitly through set_tbl_properties".to_string()));
+                } else if !reader_features.is_empty() && protocol.min_reader_version < 3 {
+                    return Err(DeltaTableError::Generic("Table feature enables reader feature, but min_reader is not v3. Set allow_protocol_versions_increase or increase version explicitly through set_tbl_properties".to_string()));
+                } else if !writer_features.is_empty() && protocol.min_writer_version < 7 {
+                    return Err(DeltaTableError::Generic("Table feature enables writer feature, but min_writer is not v7. Set allow_protocol_versions_increase or increase version explicitly through set_tbl_properties".to_string()));
+                }
+            }
+
+            protocol = protocol.with_reader_features(reader_features);
+            protocol = protocol.with_writer_features(writer_features);
+
+            let operation = DeltaOperation::AddFeature { name };
+
+            let actions = vec![protocol.into()];
+
+            let commit = CommitBuilder::from(this.commit_properties)
+                .with_actions(actions)
+                .build(Some(&this.snapshot), this.log_store.clone(), operation)
+                .await?;
+
+            Ok(DeltaTable::new_with_state(
+                this.log_store,
+                commit.snapshot(),
+            ))
+        })
+    }
+}
+
+#[cfg(feature = "datafusion")]
+#[cfg(test)]
+mod tests {
+    use delta_kernel::DeltaResult;
+
+    use crate::{
+        kernel::TableFeatures,
+        writer::test_utils::{create_bare_table, get_record_batch},
+        DeltaOps,
+    };
+
+    #[tokio::test]
+    async fn add_feature() -> DeltaResult<()> {
+        let batch = get_record_batch(None, false);
+        let write = DeltaOps(create_bare_table())
+            .write(vec![batch.clone()])
+            .await
+            .unwrap();
+        let table = DeltaOps(write);
+        let result = table
+            .add_feature()
+            .with_feature(TableFeatures::ChangeDataFeed)
+            .with_allow_protocol_versions_increase(true)
+            .await
+            .unwrap();
+
+        assert!(&result
+            .protocol()
+            .cloned()
+            .unwrap()
+            .writer_features
+            .unwrap_or_default()
+            .contains(&crate::kernel::WriterFeatures::ChangeDataFeed));
+
+        let result = DeltaOps(result)
+            .add_feature()
+            .with_feature(TableFeatures::DeletionVectors)
+            .with_allow_protocol_versions_increase(true)
+            .await
+            .unwrap();
+
+        let current_protocol = &result.protocol().cloned().unwrap();
+        assert!(&current_protocol
+            .writer_features
+            .clone()
+            .unwrap_or_default()
+            .contains(&crate::kernel::WriterFeatures::DeletionVectors));
+        assert!(&current_protocol
+            .reader_features
+            .clone()
+            .unwrap_or_default()
+            .contains(&crate::kernel::ReaderFeatures::DeletionVectors));
+        assert_eq!(result.version(), 2);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn add_feature_disallowed_increase() -> DeltaResult<()> {
+        let batch = get_record_batch(None, false);
+        let write = DeltaOps(create_bare_table())
+            .write(vec![batch.clone()])
+            .await
+            .unwrap();
+        let table = DeltaOps(write);
+        let result = table
+            .add_feature()
+            .with_feature(TableFeatures::ChangeDataFeed)
+            .await;
+
+        assert!(result.is_err());
+        Ok(())
+    }
+}
diff --git a/crates/core/src/operations/cast.rs b/crates/core/src/operations/cast.rs
deleted file mode 100644
index b231346266..0000000000
--- a/crates/core/src/operations/cast.rs
+++ /dev/null
@@ -1,354 +0,0 @@
-//! Provide common cast functionality for callers
-//!
-use crate::kernel::{
-    ArrayType, DataType as DeltaDataType, MapType, MetadataValue, StructField, StructType,
-};
-use arrow_array::{new_null_array, Array, ArrayRef, RecordBatch, StructArray};
-use arrow_cast::{cast_with_options, CastOptions};
-use arrow_schema::{ArrowError, DataType, Fields, SchemaRef as ArrowSchemaRef};
-use std::collections::HashMap;
-use std::sync::Arc;
-
-use crate::DeltaResult;
-
-fn try_merge_metadata(
-    left: &mut HashMap<String, MetadataValue>,
-    right: &HashMap<String, MetadataValue>,
-) -> Result<(), ArrowError> {
-    for (k, v) in right {
-        if let Some(vl) = left.get(k) {
-            if vl != v {
-                return Err(ArrowError::SchemaError(format!(
-                    "Cannot merge metadata with different values for key {}",
-                    k
-                )));
-            }
-        } else {
-            left.insert(k.clone(), v.clone());
-        }
-    }
-    Ok(())
-}
-
-pub(crate) fn merge_struct(
-    left: &StructType,
-    right: &StructType,
-) -> Result<StructType, ArrowError> {
-    let mut errors = Vec::with_capacity(left.fields().len());
-    let merged_fields: Result<Vec<StructField>, ArrowError> = left
-        .fields()
-        .iter()
-        .map(|field| {
-            let right_field = right.field_with_name(field.name());
-            if let Ok(right_field) = right_field {
-                let type_or_not = merge_type(field.data_type(), right_field.data_type());
-                match type_or_not {
-                    Err(e) => {
-                        errors.push(e.to_string());
-                        Err(e)
-                    }
-                    Ok(f) => {
-                        let mut new_field = StructField::new(
-                            field.name(),
-                            f,
-                            field.is_nullable() || right_field.is_nullable(),
-                        );
-
-                        new_field.metadata = field.metadata.clone();
-                        try_merge_metadata(&mut new_field.metadata, &right_field.metadata)?;
-                        Ok(new_field)
-                    }
-                }
-            } else {
-                Ok(field.clone())
-            }
-        })
-        .collect();
-    match merged_fields {
-        Ok(mut fields) => {
-            for field in right.fields() {
-                if !left.field_with_name(field.name()).is_ok() {
-                    fields.push(field.clone());
-                }
-            }
-
-            Ok(StructType::new(fields))
-        }
-        Err(e) => {
-            errors.push(e.to_string());
-            Err(ArrowError::SchemaError(errors.join("\n")))
-        }
-    }
-}
-
-pub(crate) fn merge_type(
-    left: &DeltaDataType,
-    right: &DeltaDataType,
-) -> Result<DeltaDataType, ArrowError> {
-    if left == right {
-        return Ok(left.clone());
-    }
-    match (left, right) {
-        (DeltaDataType::Array(a), DeltaDataType::Array(b)) => {
-            let merged = merge_type(&a.element_type, &b.element_type)?;
-            Ok(DeltaDataType::Array(Box::new(ArrayType::new(
-                merged,
-                a.contains_null() || b.contains_null(),
-            ))))
-        }
-        (DeltaDataType::Map(a), DeltaDataType::Map(b)) => {
-            let merged_key = merge_type(&a.key_type, &b.key_type)?;
-            let merged_value = merge_type(&a.value_type, &b.value_type)?;
-            Ok(DeltaDataType::Map(Box::new(MapType::new(
-                merged_key,
-                merged_value,
-                a.value_contains_null() || b.value_contains_null(),
-            ))))
-        }
-        (DeltaDataType::Struct(a), DeltaDataType::Struct(b)) => {
-            let merged = merge_struct(a, b)?;
-            Ok(DeltaDataType::Struct(Box::new(merged)))
-        }
-        (a, b) => Err(ArrowError::SchemaError(format!(
-            "Cannot merge types {} and {}",
-            a, b
-        ))),
-    }
-}
-
-pub(crate) fn merge_schema(
-    left: ArrowSchemaRef,
-    right: ArrowSchemaRef,
-) -> Result<ArrowSchemaRef, ArrowError> {
-    let left_delta: StructType = left.try_into()?;
-    let right_delta: StructType = right.try_into()?;
-    let merged: StructType = merge_struct(&left_delta, &right_delta)?;
-    Ok(Arc::new((&merged).try_into()?))
-}
-
-fn cast_struct(
-    struct_array: &StructArray,
-    fields: &Fields,
-    cast_options: &CastOptions,
-    add_missing: bool,
-) -> Result<Vec<Arc<(dyn Array)>>, arrow_schema::ArrowError> {
-    fields
-        .iter()
-        .map(|field| {
-            let col_or_not = struct_array.column_by_name(field.name());
-            match col_or_not {
-                None => match add_missing {
-                    true => Ok(new_null_array(field.data_type(), struct_array.len())),
-                    false => Err(arrow_schema::ArrowError::SchemaError(format!(
-                        "Could not find column {0}",
-                        field.name()
-                    ))),
-                },
-                Some(col) => {
-                    if let (DataType::Struct(_), DataType::Struct(child_fields)) =
-                        (col.data_type(), field.data_type())
-                    {
-                        let child_struct = StructArray::from(col.into_data());
-                        let s =
-                            cast_struct(&child_struct, child_fields, cast_options, add_missing)?;
-                        Ok(Arc::new(StructArray::new(
-                            child_fields.clone(),
-                            s,
-                            child_struct.nulls().map(ToOwned::to_owned),
-                        )) as ArrayRef)
-                    } else if is_cast_required(col.data_type(), field.data_type()) {
-                        cast_with_options(col, field.data_type(), cast_options)
-                    } else {
-                        Ok(col.clone())
-                    }
-                }
-            }
-        })
-        .collect::<Result<Vec<_>, _>>()
-}
-
-fn is_cast_required(a: &DataType, b: &DataType) -> bool {
-    match (a, b) {
-        (DataType::List(a_item), DataType::List(b_item)) => {
-            // If list item name is not the default('item') the list must be casted
-            !a.equals_datatype(b) || a_item.name() != b_item.name()
-        }
-        (_, _) => !a.equals_datatype(b),
-    }
-}
-
-/// Cast recordbatch to a new target_schema, by casting each column array
-pub fn cast_record_batch(
-    batch: &RecordBatch,
-    target_schema: ArrowSchemaRef,
-    safe: bool,
-    add_missing: bool,
-) -> DeltaResult<RecordBatch> {
-    let cast_options = CastOptions {
-        safe,
-        ..Default::default()
-    };
-
-    let s = StructArray::new(
-        batch.schema().as_ref().to_owned().fields,
-        batch.columns().to_owned(),
-        None,
-    );
-    let columns = cast_struct(&s, target_schema.fields(), &cast_options, add_missing)?;
-    Ok(RecordBatch::try_new(target_schema, columns)?)
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::kernel::{
-        ArrayType as DeltaArrayType, DataType as DeltaDataType, StructField as DeltaStructField,
-        StructType as DeltaStructType,
-    };
-    use crate::operations::cast::MetadataValue;
-    use crate::operations::cast::{cast_record_batch, is_cast_required};
-    use arrow::array::ArrayData;
-    use arrow_array::{Array, ArrayRef, ListArray, RecordBatch};
-    use arrow_buffer::Buffer;
-    use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
-    use std::collections::HashMap;
-    use std::sync::Arc;
-
-    #[test]
-    fn test_merge_schema_with_dict() {
-        let left_schema = Arc::new(Schema::new(vec![Field::new(
-            "f",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            false,
-        )]));
-        let right_schema = Arc::new(Schema::new(vec![Field::new(
-            "f",
-            DataType::LargeUtf8,
-            true,
-        )]));
-
-        let result = super::merge_schema(left_schema, right_schema).unwrap();
-        assert_eq!(result.fields().len(), 1);
-        let delta_type: DeltaDataType = result.fields()[0].data_type().try_into().unwrap();
-        assert_eq!(delta_type, DeltaDataType::STRING);
-        assert!(result.fields()[0].is_nullable());
-    }
-
-    #[test]
-    fn test_merge_schema_with_meta() {
-        let mut left_meta = HashMap::new();
-        left_meta.insert("a".to_string(), "a1".to_string());
-        let left_schema = DeltaStructType::new(vec![DeltaStructField::new(
-            "f",
-            DeltaDataType::STRING,
-            false,
-        )
-        .with_metadata(left_meta)]);
-        let mut right_meta = HashMap::new();
-        right_meta.insert("b".to_string(), "b2".to_string());
-        let right_schema = DeltaStructType::new(vec![DeltaStructField::new(
-            "f",
-            DeltaDataType::STRING,
-            true,
-        )
-        .with_metadata(right_meta)]);
-
-        let result = super::merge_struct(&left_schema, &right_schema).unwrap();
-        assert_eq!(result.fields().len(), 1);
-        let delta_type = result.fields()[0].data_type();
-        assert_eq!(delta_type, &DeltaDataType::STRING);
-        let mut expected_meta = HashMap::new();
-        expected_meta.insert("a".to_string(), MetadataValue::String("a1".to_string()));
-        expected_meta.insert("b".to_string(), MetadataValue::String("b2".to_string()));
-        assert_eq!(result.fields()[0].metadata(), &expected_meta);
-    }
-
-    #[test]
-    fn test_merge_schema_with_nested() {
-        let left_schema = Arc::new(Schema::new(vec![Field::new(
-            "f",
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, false))),
-            false,
-        )]));
-        let right_schema = Arc::new(Schema::new(vec![Field::new(
-            "f",
-            DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))),
-            true,
-        )]));
-
-        let result = super::merge_schema(left_schema, right_schema).unwrap();
-        assert_eq!(result.fields().len(), 1);
-        let delta_type: DeltaDataType = result.fields()[0].data_type().try_into().unwrap();
-        assert_eq!(
-            delta_type,
-            DeltaDataType::Array(Box::new(DeltaArrayType::new(DeltaDataType::STRING, false)))
-        );
-        assert!(result.fields()[0].is_nullable());
-    }
-
-    #[test]
-    fn test_cast_record_batch_with_list_non_default_item() {
-        let array = Arc::new(make_list_array()) as ArrayRef;
-        let source_schema = Schema::new(vec![Field::new(
-            "list_column",
-            array.data_type().clone(),
-            false,
-        )]);
-        let record_batch = RecordBatch::try_new(Arc::new(source_schema), vec![array]).unwrap();
-
-        let fields = Fields::from(vec![Field::new_list(
-            "list_column",
-            Field::new("item", DataType::Int8, false),
-            false,
-        )]);
-        let target_schema = Arc::new(Schema::new(fields)) as SchemaRef;
-
-        let result = cast_record_batch(&record_batch, target_schema, false, false);
-
-        let schema = result.unwrap().schema();
-        let field = schema.column_with_name("list_column").unwrap().1;
-        if let DataType::List(list_item) = field.data_type() {
-            assert_eq!(list_item.name(), "item");
-        } else {
-            panic!("Not a list");
-        }
-    }
-
-    fn make_list_array() -> ListArray {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
-            .build()
-            .unwrap();
-
-        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
-
-        let list_data_type = DataType::List(Arc::new(Field::new("element", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build()
-            .unwrap();
-        ListArray::from(list_data)
-    }
-
-    #[test]
-    fn test_is_cast_required_with_list() {
-        let field1 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false)));
-        let field2 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false)));
-
-        assert!(!is_cast_required(&field1, &field2));
-    }
-
-    #[test]
-    fn test_is_cast_required_with_list_non_default_item() {
-        let field1 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false)));
-        let field2 = DataType::List(FieldRef::from(Field::new(
-            "element",
-            DataType::Int32,
-            false,
-        )));
-
-        assert!(is_cast_required(&field1, &field2));
-    }
-}
diff --git a/crates/core/src/operations/cast/merge_schema.rs b/crates/core/src/operations/cast/merge_schema.rs
new file mode 100644
index 0000000000..64fe2b7ed6
--- /dev/null
+++ b/crates/core/src/operations/cast/merge_schema.rs
@@ -0,0 +1,352 @@
+//! Provide schema merging for delta schemas
+//!
+use std::collections::HashMap;
+
+use arrow::datatypes::DataType::Dictionary;
+use arrow_schema::{
+    ArrowError, DataType, Field as ArrowField, Fields, Schema as ArrowSchema,
+    SchemaRef as ArrowSchemaRef,
+};
+
+use crate::kernel::{ArrayType, DataType as DeltaDataType, MapType, StructField, StructType};
+
+fn try_merge_metadata<T: std::cmp::PartialEq + Clone>(
+    left: &mut HashMap<String, T>,
+    right: &HashMap<String, T>,
+) -> Result<(), ArrowError> {
+    for (k, v) in right {
+        if let Some(vl) = left.get(k) {
+            if vl != v {
+                return Err(ArrowError::SchemaError(format!(
+                    "Cannot merge metadata with different values for key {}",
+                    k
+                )));
+            }
+        } else {
+            left.insert(k.clone(), v.clone());
+        }
+    }
+    Ok(())
+}
+
+pub(crate) fn merge_delta_type(
+    left: &DeltaDataType,
+    right: &DeltaDataType,
+) -> Result<DeltaDataType, ArrowError> {
+    if left == right {
+        return Ok(left.clone());
+    }
+    match (left, right) {
+        (DeltaDataType::Array(a), DeltaDataType::Array(b)) => {
+            let merged = merge_delta_type(&a.element_type, &b.element_type)?;
+            Ok(DeltaDataType::Array(Box::new(ArrayType::new(
+                merged,
+                a.contains_null() || b.contains_null(),
+            ))))
+        }
+        (DeltaDataType::Map(a), DeltaDataType::Map(b)) => {
+            let merged_key = merge_delta_type(&a.key_type, &b.key_type)?;
+            let merged_value = merge_delta_type(&a.value_type, &b.value_type)?;
+            Ok(DeltaDataType::Map(Box::new(MapType::new(
+                merged_key,
+                merged_value,
+                a.value_contains_null() || b.value_contains_null(),
+            ))))
+        }
+        (DeltaDataType::Struct(a), DeltaDataType::Struct(b)) => {
+            let merged = merge_delta_struct(a, b)?;
+            Ok(DeltaDataType::Struct(Box::new(merged)))
+        }
+        (a, b) => Err(ArrowError::SchemaError(format!(
+            "Cannot merge types {} and {}",
+            a, b
+        ))),
+    }
+}
+
+pub(crate) fn merge_delta_struct(
+    left: &StructType,
+    right: &StructType,
+) -> Result<StructType, ArrowError> {
+    let mut errors = Vec::new();
+    let merged_fields: Result<Vec<StructField>, ArrowError> = left
+        .fields()
+        .map(|field| {
+            let right_field = right.field(field.name());
+            if let Some(right_field) = right_field {
+                let type_or_not = merge_delta_type(field.data_type(), right_field.data_type());
+                match type_or_not {
+                    Err(e) => {
+                        errors.push(e.to_string());
+                        Err(e)
+                    }
+                    Ok(f) => {
+                        let mut new_field = StructField::new(
+                            field.name(),
+                            f,
+                            field.is_nullable() || right_field.is_nullable(),
+                        );
+
+                        new_field.metadata.clone_from(&field.metadata);
+                        try_merge_metadata(&mut new_field.metadata, &right_field.metadata)?;
+                        Ok(new_field)
+                    }
+                }
+            } else {
+                Ok(field.clone())
+            }
+        })
+        .collect();
+    match merged_fields {
+        Ok(mut fields) => {
+            for field in right.fields() {
+                if !left.field(field.name()).is_some() {
+                    fields.push(field.clone());
+                }
+            }
+
+            Ok(StructType::new(fields))
+        }
+        Err(e) => {
+            errors.push(e.to_string());
+            Err(ArrowError::SchemaError(errors.join("\n")))
+        }
+    }
+}
+
+pub(crate) fn merge_arrow_field(
+    left: &ArrowField,
+    right: &ArrowField,
+    preserve_new_fields: bool,
+) -> Result<ArrowField, ArrowError> {
+    if left == right {
+        return Ok(left.clone());
+    }
+
+    let (table_type, batch_type) = (left.data_type(), right.data_type());
+
+    match (table_type, batch_type) {
+        (Dictionary(key_type, value_type), _)
+            if matches!(
+                value_type.as_ref(),
+                DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8
+            ) && matches!(
+                batch_type,
+                DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+            ) =>
+        {
+            Ok(ArrowField::new(
+                right.name(),
+                Dictionary(key_type.clone(), Box::new(batch_type.clone())),
+                left.is_nullable() || right.is_nullable(),
+            ))
+        }
+        (Dictionary(key_type, value_type), _)
+            if matches!(
+                value_type.as_ref(),
+                DataType::Binary | DataType::BinaryView | DataType::LargeBinary
+            ) && matches!(
+                batch_type,
+                DataType::Binary | DataType::LargeBinary | DataType::BinaryView
+            ) =>
+        {
+            Ok(ArrowField::new(
+                right.name(),
+                Dictionary(key_type.clone(), Box::new(batch_type.clone())),
+                left.is_nullable() || right.is_nullable(),
+            ))
+        }
+        (Dictionary(_, value_type), _) if value_type.equals_datatype(batch_type) => Ok(left
+            .clone()
+            .with_nullable(left.is_nullable() || right.is_nullable())),
+
+        (_, Dictionary(_, value_type))
+            if matches!(
+                table_type,
+                DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8
+            ) && matches!(
+                value_type.as_ref(),
+                DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+            ) =>
+        {
+            Ok(right
+                .clone()
+                .with_nullable(left.is_nullable() || right.is_nullable()))
+        }
+        (_, Dictionary(_, value_type))
+            if matches!(
+                table_type,
+                DataType::Binary | DataType::BinaryView | DataType::LargeBinary
+            ) && matches!(
+                value_type.as_ref(),
+                DataType::Binary | DataType::LargeBinary | DataType::BinaryView
+            ) =>
+        {
+            Ok(right
+                .clone()
+                .with_nullable(left.is_nullable() || right.is_nullable()))
+        }
+        (_, Dictionary(_, value_type)) if value_type.equals_datatype(table_type) => Ok(right
+            .clone()
+            .with_nullable(left.is_nullable() || right.is_nullable())),
+        // With Utf8/binary we always take  the right type since that is coming from the incoming data
+        // by doing that we allow passthrough of any string flavor
+        (
+            DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8,
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View,
+        )
+        | (
+            DataType::Binary | DataType::BinaryView | DataType::LargeBinary,
+            DataType::Binary | DataType::LargeBinary | DataType::BinaryView,
+        ) => Ok(ArrowField::new(
+            left.name(),
+            batch_type.clone(),
+            right.is_nullable() || left.is_nullable(),
+        )),
+        (
+            DataType::List(left_child_fields) | DataType::LargeList(left_child_fields),
+            DataType::LargeList(right_child_fields),
+        ) => {
+            let merged =
+                merge_arrow_field(left_child_fields, right_child_fields, preserve_new_fields)?;
+            Ok(ArrowField::new(
+                left.name(),
+                DataType::LargeList(merged.into()),
+                right.is_nullable() || left.is_nullable(),
+            ))
+        }
+        (
+            DataType::List(left_child_fields) | DataType::LargeList(left_child_fields),
+            DataType::List(right_child_fields),
+        ) => {
+            let merged =
+                merge_arrow_field(left_child_fields, right_child_fields, preserve_new_fields)?;
+            Ok(ArrowField::new(
+                left.name(),
+                DataType::List(merged.into()),
+                right.is_nullable() || left.is_nullable(),
+            ))
+        }
+        (DataType::Struct(left_child_fields), DataType::Struct(right_child_fields)) => {
+            let merged =
+                merge_arrow_vec_fields(left_child_fields, right_child_fields, preserve_new_fields)?;
+            Ok(ArrowField::new(
+                left.name(),
+                DataType::Struct(merged.into()),
+                right.is_nullable() || left.is_nullable(),
+            ))
+        }
+        (DataType::Map(left_field, left_sorted), DataType::Map(right_field, right_sorted))
+            if left_sorted == right_sorted =>
+        {
+            let merged = merge_arrow_field(left_field, right_field, preserve_new_fields)?;
+            Ok(ArrowField::new(
+                left.name(),
+                DataType::Map(merged.into(), *right_sorted),
+                right.is_nullable() || left.is_nullable(),
+            ))
+        }
+        _ => {
+            let mut new_field = left.clone();
+            match new_field.try_merge(right) {
+                Ok(()) => (),
+                Err(_err) => {
+                    // We cannot keep the table field here, there is some weird behavior where
+                    // Decimal(5,1) can be safely casted into Decimal(4,1) with out loss of data
+                    // Then our stats parser fails to parse this decimal(1000.1) into Decimal(4,1)
+                    // even though datafusion was able to write it into parquet
+                    // We manually have to check if the decimal in the recordbatch is a subset of the table decimal
+                    if let (
+                        DataType::Decimal128(left_precision, left_scale)
+                        | DataType::Decimal256(left_precision, left_scale),
+                        DataType::Decimal128(right_precision, right_scale),
+                    ) = (right.data_type(), new_field.data_type())
+                    {
+                        if !(left_precision <= right_precision && left_scale <= right_scale) {
+                            return Err(ArrowError::SchemaError(format!(
+                                "Cannot merge field {} from {} to {}",
+                                right.name(),
+                                right.data_type(),
+                                new_field.data_type()
+                            )));
+                        }
+                    };
+                    // If it's not Decimal datatype, the new_field remains the left table field.
+                }
+            };
+            Ok(new_field)
+        }
+    }
+}
+
+/// Merges Arrow Table schema and Arrow Batch Schema, by allowing Large/View Types to passthrough.
+// Sometimes fields can't be merged because they are not the same types. So table has int32,
+// but batch int64. We want the preserve the table type. At later stage we will call cast_record_batch
+// which will cast the batch int64->int32. This is desired behaviour so we can have flexibility
+// in the batch data types. But preserve the correct table and parquet types.
+//
+// Preserve_new_fields can also be disabled if you just want to only use the passthrough functionality
+pub(crate) fn merge_arrow_schema(
+    table_schema: ArrowSchemaRef,
+    batch_schema: ArrowSchemaRef,
+    preserve_new_fields: bool,
+) -> Result<ArrowSchemaRef, ArrowError> {
+    let table_fields = table_schema.fields();
+    let batch_fields = batch_schema.fields();
+
+    let merged_schema = ArrowSchema::new(merge_arrow_vec_fields(
+        table_fields,
+        batch_fields,
+        preserve_new_fields,
+    )?)
+    .into();
+    Ok(merged_schema)
+}
+
+fn merge_arrow_vec_fields(
+    table_fields: &Fields,
+    batch_fields: &Fields,
+    preserve_new_fields: bool,
+) -> Result<Vec<ArrowField>, ArrowError> {
+    let mut errors = Vec::with_capacity(table_fields.len());
+    let merged_fields: Result<Vec<ArrowField>, ArrowError> = table_fields
+        .iter()
+        .map(|field| {
+            let right_field = batch_fields.find(field.name());
+            if let Some((_, right_field)) = right_field {
+                let field_or_not =
+                    merge_arrow_field(field.as_ref(), right_field, preserve_new_fields);
+                match field_or_not {
+                    Err(e) => {
+                        errors.push(e.to_string());
+                        Err(e)
+                    }
+                    Ok(mut f) => {
+                        let mut field_matadata = f.metadata().clone();
+                        try_merge_metadata(&mut field_matadata, right_field.metadata())?;
+                        f.set_metadata(field_matadata);
+                        Ok(f)
+                    }
+                }
+            } else {
+                Ok(field.as_ref().clone())
+            }
+        })
+        .collect();
+    match merged_fields {
+        Ok(mut fields) => {
+            if preserve_new_fields {
+                for field in batch_fields.into_iter() {
+                    if table_fields.find(field.name()).is_none() {
+                        fields.push(field.as_ref().clone());
+                    }
+                }
+            }
+            Ok(fields)
+        }
+        Err(e) => {
+            errors.push(e.to_string());
+            Err(ArrowError::SchemaError(errors.join("\n")))
+        }
+    }
+}
diff --git a/crates/core/src/operations/cast/mod.rs b/crates/core/src/operations/cast/mod.rs
new file mode 100644
index 0000000000..554373e623
--- /dev/null
+++ b/crates/core/src/operations/cast/mod.rs
@@ -0,0 +1,650 @@
+//! Provide common cast functionality for callers
+//!
+use arrow_array::cast::AsArray;
+use arrow_array::{
+    new_null_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, RecordBatch,
+    RecordBatchOptions, StructArray,
+};
+use arrow_cast::{cast_with_options, CastOptions};
+use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef as ArrowSchemaRef};
+use std::sync::Arc;
+
+pub(crate) mod merge_schema;
+
+use crate::DeltaResult;
+
+fn cast_struct(
+    struct_array: &StructArray,
+    fields: &Fields,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<StructArray, ArrowError> {
+    StructArray::try_new(
+        fields.to_owned(),
+        fields
+            .iter()
+            .map(|field| {
+                let col_or_not = struct_array.column_by_name(field.name());
+                match col_or_not {
+                    None => {
+                        if add_missing && field.is_nullable() {
+                            Ok(new_null_array(field.data_type(), struct_array.len()))
+                        } else {
+                            Err(ArrowError::SchemaError(format!(
+                                "Could not find column {}",
+                                field.name()
+                            )))
+                        }
+                    }
+                    Some(col) => cast_field(col, field, cast_options, add_missing),
+                }
+            })
+            .collect::<Result<Vec<_>, _>>()?,
+        struct_array.nulls().map(ToOwned::to_owned),
+    )
+}
+
+fn cast_list<T: OffsetSizeTrait>(
+    array: &GenericListArray<T>,
+    field: &FieldRef,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<GenericListArray<T>, ArrowError> {
+    let values = cast_field(array.values(), field, cast_options, add_missing)?;
+    GenericListArray::<T>::try_new(
+        field.clone(),
+        array.offsets().clone(),
+        values,
+        array.nulls().cloned(),
+    )
+}
+
+fn cast_map(
+    array: &MapArray,
+    entries_field: &FieldRef,
+    sorted: bool,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<MapArray, ArrowError> {
+    match entries_field.data_type() {
+        DataType::Struct(entry_fields) => {
+            let entries = cast_struct(array.entries(), entry_fields, cast_options, add_missing)?;
+            MapArray::try_new(
+                entries_field.clone(),
+                array.offsets().to_owned(),
+                entries,
+                array.nulls().cloned(),
+                sorted,
+            )
+        }
+        _ => Err(ArrowError::CastError(
+            "Map entries must be a struct".to_string(),
+        )),
+    }
+}
+
+fn cast_field(
+    col: &ArrayRef,
+    field: &FieldRef,
+    cast_options: &CastOptions,
+    add_missing: bool,
+) -> Result<ArrayRef, ArrowError> {
+    let (col_type, field_type) = (col.data_type(), field.data_type());
+
+    match (col_type, field_type) {
+        (DataType::Struct(_), DataType::Struct(child_fields)) => {
+            let child_struct = StructArray::from(col.into_data());
+            Ok(Arc::new(cast_struct(
+                &child_struct,
+                child_fields,
+                cast_options,
+                add_missing,
+            )?) as ArrayRef)
+        }
+        (DataType::List(_), DataType::List(child_fields)) => Ok(Arc::new(cast_list(
+            col.as_any()
+                .downcast_ref::<GenericListArray<i32>>()
+                .ok_or_else(|| {
+                    ArrowError::CastError(format!(
+                        "Expected a list for {} but got {}",
+                        field.name(),
+                        col_type
+                    ))
+                })?,
+            child_fields,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef),
+        (DataType::LargeList(_), DataType::LargeList(child_fields)) => Ok(Arc::new(cast_list(
+            col.as_any()
+                .downcast_ref::<GenericListArray<i64>>()
+                .ok_or_else(|| {
+                    ArrowError::CastError(format!(
+                        "Expected a list for {} but got {}",
+                        field.name(),
+                        col_type
+                    ))
+                })?,
+            child_fields,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef),
+        // TODO: add list view cast
+        (DataType::Map(_, _), DataType::Map(child_fields, sorted)) => Ok(Arc::new(cast_map(
+            col.as_map_opt().ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Expected a map for {} but got {}",
+                    field.name(),
+                    col_type
+                ))
+            })?,
+            child_fields,
+            *sorted,
+            cast_options,
+            add_missing,
+        )?) as ArrayRef),
+        _ if is_cast_required(col_type, field_type) => {
+            cast_with_options(col, field_type, cast_options)
+        }
+        _ => Ok(col.clone()),
+    }
+}
+
+fn is_cast_required(a: &DataType, b: &DataType) -> bool {
+    match (a, b) {
+        (DataType::List(a_item), DataType::List(b_item)) => {
+            // If list item name is not the default('item') the list must be casted
+            !a.equals_datatype(b) || a_item.name() != b_item.name()
+        }
+        (_, _) => !a.equals_datatype(b),
+    }
+}
+
+/// Cast recordbatch to a new target_schema, by casting each column array
+pub fn cast_record_batch(
+    batch: &RecordBatch,
+    target_schema: ArrowSchemaRef,
+    safe: bool,
+    add_missing: bool,
+) -> DeltaResult<RecordBatch> {
+    let cast_options = CastOptions {
+        safe,
+        ..Default::default()
+    };
+
+    let s = StructArray::new(
+        batch.schema().as_ref().to_owned().fields,
+        batch.columns().to_owned(),
+        None,
+    );
+    let struct_array = cast_struct(&s, target_schema.fields(), &cast_options, add_missing)?;
+
+    Ok(RecordBatch::try_new_with_options(
+        target_schema,
+        struct_array.columns().to_vec(),
+        &RecordBatchOptions::new().with_row_count(Some(batch.num_rows())),
+    )?)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::ops::Deref;
+    use std::sync::Arc;
+
+    use super::merge_schema::{merge_arrow_schema, merge_delta_struct};
+    use arrow::array::types::Int32Type;
+    use arrow::array::{
+        new_empty_array, new_null_array, Array, ArrayData, ArrayRef, AsArray, Int32Array,
+        ListArray, PrimitiveArray, RecordBatch, StringArray, StructArray,
+    };
+    use arrow::buffer::{Buffer, NullBuffer};
+    use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
+    use delta_kernel::schema::MetadataValue;
+    use itertools::Itertools;
+
+    use crate::kernel::{
+        ArrayType as DeltaArrayType, DataType as DeltaDataType, StructField as DeltaStructField,
+        StructType as DeltaStructType,
+    };
+    use crate::operations::cast::{cast_record_batch, is_cast_required};
+
+    #[test]
+    fn test_merge_arrow_schema_with_dict() {
+        let left_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            false,
+        )]));
+        let right_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::LargeUtf8,
+            true,
+        )]));
+
+        let result = merge_arrow_schema(left_schema, right_schema, true).unwrap();
+        assert_eq!(result.fields().len(), 1);
+        let delta_type: DeltaDataType = result.fields()[0].data_type().try_into().unwrap();
+        assert_eq!(delta_type, DeltaDataType::STRING);
+        assert!(result.fields()[0].is_nullable());
+    }
+
+    #[test]
+    fn test_merge_delta_schema_with_meta() {
+        let mut left_meta = HashMap::new();
+        left_meta.insert("a".to_string(), "a1".to_string());
+        let left_schema = DeltaStructType::new(vec![DeltaStructField::new(
+            "f",
+            DeltaDataType::STRING,
+            false,
+        )
+        .with_metadata(left_meta)]);
+        let mut right_meta = HashMap::new();
+        right_meta.insert("b".to_string(), "b2".to_string());
+        let right_schema = DeltaStructType::new(vec![DeltaStructField::new(
+            "f",
+            DeltaDataType::STRING,
+            true,
+        )
+        .with_metadata(right_meta)]);
+
+        let result = merge_delta_struct(&left_schema, &right_schema).unwrap();
+        let fields = result.fields().collect_vec();
+        assert_eq!(fields.len(), 1);
+        let delta_type = fields[0].data_type();
+        assert_eq!(delta_type, &DeltaDataType::STRING);
+        let mut expected_meta = HashMap::new();
+        expected_meta.insert("a".to_string(), MetadataValue::String("a1".to_string()));
+        expected_meta.insert("b".to_string(), MetadataValue::String("b2".to_string()));
+        assert_eq!(fields[0].metadata(), &expected_meta);
+    }
+
+    #[test]
+    fn test_merge_arrow_schema_with_nested() {
+        let left_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, false))),
+            false,
+        )]));
+        let right_schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))),
+            true,
+        )]));
+
+        let result = merge_arrow_schema(left_schema, right_schema, true).unwrap();
+        assert_eq!(result.fields().len(), 1);
+        let delta_type: DeltaDataType = result.fields()[0].data_type().try_into().unwrap();
+        assert_eq!(
+            delta_type,
+            DeltaDataType::Array(Box::new(DeltaArrayType::new(DeltaDataType::STRING, false)))
+        );
+        assert!(result.fields()[0].is_nullable());
+    }
+
+    #[test]
+    fn test_cast_record_batch_with_list_non_default_item() {
+        let array = Arc::new(make_list_array()) as ArrayRef;
+        let source_schema = Schema::new(vec![Field::new(
+            "list_column",
+            array.data_type().clone(),
+            false,
+        )]);
+        let record_batch = RecordBatch::try_new(Arc::new(source_schema), vec![array]).unwrap();
+
+        let fields = Fields::from(vec![Field::new_list(
+            "list_column",
+            Field::new("item", DataType::Int8, false),
+            false,
+        )]);
+        let target_schema = Arc::new(Schema::new(fields)) as SchemaRef;
+
+        let result = cast_record_batch(&record_batch, target_schema, false, false);
+
+        let schema = result.unwrap().schema();
+        let field = schema.column_with_name("list_column").unwrap().1;
+        if let DataType::List(list_item) = field.data_type() {
+            assert_eq!(list_item.name(), "item");
+        } else {
+            panic!("Not a list");
+        }
+    }
+
+    fn make_list_array() -> ListArray {
+        let value_data = ArrayData::builder(DataType::Int32)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
+
+        let list_data_type = DataType::List(Arc::new(Field::new("element", DataType::Int32, true)));
+        let list_data = ArrayData::builder(list_data_type)
+            .len(3)
+            .add_buffer(value_offsets)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+        ListArray::from(list_data)
+    }
+
+    #[test]
+    fn test_is_cast_required_with_list() {
+        let field1 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false)));
+        let field2 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false)));
+
+        assert!(!is_cast_required(&field1, &field2));
+    }
+
+    #[test]
+    fn test_is_cast_required_with_list_non_default_item() {
+        let field1 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false)));
+        let field2 = DataType::List(FieldRef::from(Field::new(
+            "element",
+            DataType::Int32,
+            false,
+        )));
+
+        assert!(is_cast_required(&field1, &field2));
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_with_no_missing_fields() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Utf8, true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(StringArray::from(vec![Some("a"), None, Some("c")])),
+            ],
+        )
+        .unwrap();
+        let result = cast_record_batch(&batch, schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        assert_eq!(
+            result.column(1).deref().as_string(),
+            &StringArray::from(vec![Some("a"), None, Some("c")])
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_with_missing_beginning() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field2",
+            DataType::Utf8,
+            true,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(StringArray::from(vec![
+                Some("a"),
+                None,
+                Some("c"),
+            ]))],
+        )
+        .unwrap();
+
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, true),
+            Field::new("field2", DataType::Utf8, true),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            new_null_array(&DataType::Int32, 3)
+                .deref()
+                .as_primitive::<Int32Type>()
+        );
+        assert_eq!(
+            result.column(1).deref().as_string(),
+            &StringArray::from(vec![Some("a"), None, Some("c")])
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_with_missing_end() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Utf8, true),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from(vec![Some(1), Some(2), Some(3)])
+        );
+        assert_eq!(
+            result.column(1).deref().as_string::<i32>(),
+            new_null_array(&DataType::Utf8, 3).deref().as_string()
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_error_on_missing_non_null() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Utf8, false),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_nested_struct_missing() {
+        let nested_fields = Fields::from(vec![Field::new("nested1", DataType::Utf8, true)]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Struct(nested_fields.clone()), true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(StructArray::new(
+                    nested_fields,
+                    vec![Arc::new(StringArray::from(vec![Some("a"), None, Some("c")])) as ArrayRef],
+                    None,
+                )),
+            ],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("nested1", DataType::Utf8, true),
+                    Field::new("nested2", DataType::Utf8, true),
+                ])),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let struct_column = result.column(1).deref().as_struct();
+        assert_eq!(struct_column.num_columns(), 2);
+        assert_eq!(
+            struct_column.column(0).deref().as_string(),
+            &StringArray::from(vec![Some("a"), None, Some("c")])
+        );
+        assert_eq!(
+            struct_column.column(1).deref().as_string::<i32>(),
+            new_null_array(&DataType::Utf8, 3).deref().as_string()
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_nested_struct_missing_non_nullable() {
+        let nested_fields = Fields::from(vec![Field::new("nested1", DataType::Utf8, false)]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new("field2", DataType::Struct(nested_fields.clone()), true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(StructArray::new(
+                    nested_fields,
+                    vec![new_null_array(&DataType::Utf8, 3)],
+                    Some(NullBuffer::new_null(3)),
+                )),
+            ],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("nested1", DataType::Utf8, false),
+                    Field::new("nested2", DataType::Utf8, true),
+                ])),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let struct_column = result.column(1).deref().as_struct();
+        assert_eq!(struct_column.num_columns(), 2);
+        let expected: [Option<&str>; 3] = Default::default();
+        assert_eq!(
+            struct_column.column(0).deref().as_string(),
+            &StringArray::from(Vec::from(expected))
+        );
+        assert_eq!(
+            struct_column.column(1).deref().as_string::<i32>(),
+            new_null_array(&DataType::Utf8, 3).deref().as_string(),
+        );
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_list_missing() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::List(Arc::new(Field::new("nested1", DataType::Utf8, true))),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let list_column = result.column(1).deref().as_list::<i32>();
+        assert_eq!(list_column.len(), 3);
+        assert_eq!(list_column.value_offsets(), &[0, 0, 0, 0]);
+        assert_eq!(
+            list_column.values().deref().as_string::<i32>(),
+            new_empty_array(&DataType::Utf8).deref().as_string()
+        )
+    }
+
+    #[test]
+    fn test_add_missing_null_fields_map_missing() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "field1",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Int32, false),
+            Field::new(
+                "field2",
+                DataType::Map(
+                    Arc::new(Field::new(
+                        "entries",
+                        DataType::Struct(Fields::from(vec![
+                            Field::new("key", DataType::Utf8, true),
+                            Field::new("value", DataType::Utf8, true),
+                        ])),
+                        true,
+                    )),
+                    false,
+                ),
+                true,
+            ),
+        ]));
+        let result = cast_record_batch(&batch, new_schema.clone(), false, true).unwrap();
+        assert_eq!(result.schema(), new_schema);
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(
+            result.column(0).deref().as_primitive::<Int32Type>(),
+            &PrimitiveArray::<Int32Type>::from_iter([1, 2, 3])
+        );
+        let map_column = result.column(1).deref().as_map();
+        assert_eq!(map_column.len(), 3);
+        assert_eq!(map_column.offsets().as_ref(), &[0; 4]);
+        assert_eq!(
+            map_column.keys().deref().as_string::<i32>(),
+            new_empty_array(&DataType::Utf8).deref().as_string()
+        );
+        assert_eq!(
+            map_column.values().deref().as_string::<i32>(),
+            new_empty_array(&DataType::Utf8).deref().as_string()
+        );
+    }
+}
diff --git a/crates/core/src/operations/cdc.rs b/crates/core/src/operations/cdc.rs
new file mode 100644
index 0000000000..b04c794c61
--- /dev/null
+++ b/crates/core/src/operations/cdc.rs
@@ -0,0 +1,415 @@
+//!
+//! The CDC module contains private tools for managing CDC files
+//!
+
+use crate::table::state::DeltaTableState;
+use crate::DeltaResult;
+
+use datafusion::prelude::*;
+use datafusion_common::ScalarValue;
+
+pub const CDC_COLUMN_NAME: &str = "_change_type";
+
+/// The CDCTracker is useful for hooking reads/writes in a manner nececessary to create CDC files
+/// associated with commits
+pub(crate) struct CDCTracker {
+    pre_dataframe: DataFrame,
+    post_dataframe: DataFrame,
+}
+
+impl CDCTracker {
+    ///  construct
+    pub(crate) fn new(pre_dataframe: DataFrame, post_dataframe: DataFrame) -> Self {
+        Self {
+            pre_dataframe,
+            post_dataframe,
+        }
+    }
+
+    pub(crate) fn collect(self) -> DeltaResult<DataFrame> {
+        // Collect _all_ the batches for consideration
+        let pre_df = self.pre_dataframe;
+        let post_df = self.post_dataframe;
+
+        // There is certainly a better way to do this other than stupidly cloning data for diffing
+        // purposes, but this is the quickest and easiest way to "diff" the two sets of batches
+        let preimage = pre_df.clone().except(post_df.clone())?;
+        let postimage = post_df.except(pre_df)?;
+
+        let preimage = preimage.with_column(
+            "_change_type",
+            lit(ScalarValue::Utf8(Some("update_preimage".to_string()))),
+        )?;
+
+        let postimage = postimage.with_column(
+            "_change_type",
+            lit(ScalarValue::Utf8(Some("update_postimage".to_string()))),
+        )?;
+
+        let final_df = preimage.union(postimage)?;
+        Ok(final_df)
+    }
+}
+
+///
+/// Return true if the specified table is capable of writing Change Data files
+///
+/// From the Protocol:
+///
+/// > For Writer Versions 4 up to 6, all writers must respect the delta.enableChangeDataFeed
+/// > configuration flag in the metadata of the table. When delta.enableChangeDataFeed is true,
+/// > writers must produce the relevant AddCDCFile's for any operation that changes data, as
+/// > specified in Change Data Files.
+/// >
+/// > For Writer Version 7, all writers must respect the delta.enableChangeDataFeed configuration flag in
+/// > the metadata of the table only if the feature changeDataFeed exists in the table protocol's
+/// > writerFeatures.
+pub(crate) fn should_write_cdc(snapshot: &DeltaTableState) -> DeltaResult<bool> {
+    if let Some(features) = &snapshot.protocol().writer_features {
+        // Features should only exist at writer version 7 but to avoid cases where
+        // the Option<HashSet<T>> can get filled with an empty set, checking for the value
+        // explicitly
+        if snapshot.protocol().min_writer_version == 7
+            && !features.contains(&crate::kernel::WriterFeatures::ChangeDataFeed)
+        {
+            // If the writer feature has not been set, then the table should not have CDC written
+            // to it. Otherwise fallback to the configured table configuration
+            return Ok(false);
+        }
+    }
+    Ok(snapshot.table_config().enable_change_data_feed())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use super::*;
+    use crate::kernel::DataType as DeltaDataType;
+    use crate::kernel::{Action, PrimitiveType, Protocol};
+    use crate::operations::DeltaOps;
+    use crate::{DeltaTable, TableProperty};
+    use arrow::array::{ArrayRef, Int32Array, StructArray};
+    use arrow::datatypes::{DataType, Field};
+    use arrow_array::RecordBatch;
+    use arrow_schema::Schema;
+    use datafusion::assert_batches_sorted_eq;
+    use datafusion::datasource::{MemTable, TableProvider};
+
+    /// A simple test which validates primitive writer version 1 tables should
+    /// not write Change Data Files
+    #[tokio::test]
+    async fn test_should_write_cdc_basic_table() {
+        let mut table = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .await
+            .expect("Failed to make a table");
+        table.load().await.expect("Failed to reload table");
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(!result, "A default table should not create CDC files");
+    }
+
+    ///
+    /// This test manually creates a table with writer version 4 that has the configuration sets
+    ///
+    #[tokio::test]
+    async fn test_should_write_cdc_table_with_configuration() {
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let mut table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        table.load().await.expect("Failed to reload table");
+
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            result,
+            "A table with the EnableChangeDataFeed should create CDC files"
+        );
+    }
+
+    ///
+    /// This test creates a writer version 7 table which has a slightly different way of
+    /// determining whether CDC files should be written or not.
+    #[tokio::test]
+    async fn test_should_write_cdc_v7_table_no_writer_feature() {
+        let actions = vec![Action::Protocol(Protocol::new(1, 7))];
+        let mut table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        table.load().await.expect("Failed to reload table");
+
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            !result,
+            "A v7 table must not write CDC files unless the writer feature is set"
+        );
+    }
+
+    ///
+    /// This test creates a writer version 7 table with a writer table feature enabled for CDC and
+    /// therefore should write CDC files
+    #[tokio::test]
+    async fn test_should_write_cdc_v7_table_with_writer_feature() {
+        let protocol = Protocol::new(1, 7)
+            .with_writer_features(vec![crate::kernel::WriterFeatures::ChangeDataFeed]);
+        let actions = vec![Action::Protocol(protocol)];
+        let mut table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        table.load().await.expect("Failed to reload table");
+
+        let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table");
+        assert!(
+            result,
+            "A v7 table must not write CDC files unless the writer feature is set"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_sanity_check() {
+        let ctx = SessionContext::new();
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            DataType::Int32,
+            true,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table_provider: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap());
+        let source_df = ctx.read_table(table_provider).unwrap();
+
+        let updated_batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)]))],
+        )
+        .unwrap();
+        let table_provider_updated: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![updated_batch]]).unwrap());
+        let updated_df = ctx.read_table(table_provider_updated).unwrap();
+
+        let tracker = CDCTracker::new(source_df, updated_df);
+
+        match tracker.collect() {
+            Ok(df) => {
+                let batches = &df.collect().await.unwrap();
+                let _ = arrow::util::pretty::print_batches(batches);
+                assert_eq!(batches.len(), 2);
+                assert_batches_sorted_eq! {[
+                "+-------+------------------+",
+                "| value | _change_type     |",
+                "+-------+------------------+",
+                "| 2     | update_preimage  |",
+                "| 12    | update_postimage |",
+                "+-------+------------------+",
+                    ], &batches }
+            }
+            Err(err) => {
+                println!("err: {err:#?}");
+                panic!("Should have never reached this assertion");
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_sanity_check_with_pure_df() {
+        let nested_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("lat", DataType::Int32, true),
+            Field::new("long", DataType::Int32, true),
+        ]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("value", DataType::Int32, true),
+            Field::new(
+                "nested",
+                DataType::Struct(nested_schema.fields.clone()),
+                true,
+            ),
+        ]));
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let updated_batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+        let _ = arrow::util::pretty::print_batches(&[batch.clone()]);
+        let _ = arrow::util::pretty::print_batches(&[updated_batch.clone()]);
+
+        let ctx = SessionContext::new();
+        let before = ctx.read_batch(batch).expect("Failed to make DataFrame");
+        let after = ctx
+            .read_batch(updated_batch)
+            .expect("Failed to make DataFrame");
+
+        let diff = before
+            .except(after)
+            .expect("Failed to except")
+            .collect()
+            .await
+            .expect("Failed to diff");
+        assert_eq!(diff.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_sanity_check_with_struct() {
+        let ctx = SessionContext::new();
+        let nested_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("lat", DataType::Int32, true),
+            Field::new("long", DataType::Int32, true),
+        ]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("value", DataType::Int32, true),
+            Field::new(
+                "nested",
+                DataType::Struct(nested_schema.fields.clone()),
+                true,
+            ),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+        let table_provider: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap());
+        let source_df = ctx.read_table(table_provider).unwrap();
+
+        let updated_batch = RecordBatch::try_new(
+            Arc::clone(&schema.clone()),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("id", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("lat", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("long", DataType::Int32, true)),
+                        Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+                    ),
+                ])),
+            ],
+        )
+        .unwrap();
+        let table_provider_updated: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), vec![vec![updated_batch]]).unwrap());
+        let updated_df = ctx.read_table(table_provider_updated).unwrap();
+
+        let tracker = CDCTracker::new(source_df, updated_df);
+
+        match tracker.collect() {
+            Ok(df) => {
+                let batches = &df.collect().await.unwrap();
+                let _ = arrow::util::pretty::print_batches(batches);
+                assert_eq!(batches.len(), 2);
+                assert_batches_sorted_eq! {[
+                "+-------+--------------------------+------------------+",
+                "| value | nested                   | _change_type     |",
+                "+-------+--------------------------+------------------+",
+                "| 12    | {id: 2, lat: 2, long: 2} | update_postimage |",
+                "| 2     | {id: 2, lat: 2, long: 2} | update_preimage  |",
+                "+-------+--------------------------+------------------+",
+                ], &batches }
+            }
+            Err(err) => {
+                println!("err: {err:#?}");
+                panic!("Should have never reached this assertion");
+            }
+        }
+    }
+}
diff --git a/crates/core/src/operations/constraints.rs b/crates/core/src/operations/constraints.rs
index e5d356f81c..2acf57a03d 100644
--- a/crates/core/src/operations/constraints.rs
+++ b/crates/core/src/operations/constraints.rs
@@ -4,9 +4,9 @@ use std::sync::Arc;
 
 use datafusion::execution::context::SessionState;
 use datafusion::execution::{SendableRecordBatchStream, TaskContext};
-use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
 use datafusion_common::ToDFSchema;
+use datafusion_physical_plan::ExecutionPlan;
 use futures::future::BoxFuture;
 use futures::StreamExt;
 
@@ -89,6 +89,12 @@ impl std::future::IntoFuture for ConstraintBuilder {
         let this = self;
 
         Box::pin(async move {
+            if !this.snapshot.load_config().require_files {
+                return Err(DeltaTableError::NotInitializedWithFiles(
+                    "ADD CONSTRAINTS".into(),
+                ));
+            }
+
             let name = match this.name {
                 Some(v) => v,
                 None => return Err(DeltaTableError::Generic("No name provided".to_string())),
diff --git a/crates/core/src/operations/convert_to_delta.rs b/crates/core/src/operations/convert_to_delta.rs
index 2e157c38c0..148b581d8b 100644
--- a/crates/core/src/operations/convert_to_delta.rs
+++ b/crates/core/src/operations/convert_to_delta.rs
@@ -1,36 +1,32 @@
 //! Command for converting a Parquet table to a Delta table in place
 // https://github.com/delta-io/delta/blob/1d5dd774111395b0c4dc1a69c94abc169b1c83b6/spark/src/main/scala/org/apache/spark/sql/delta/commands/ConvertToDeltaCommand.scala
+use std::collections::{HashMap, HashSet};
+use std::num::TryFromIntError;
+use std::str::{FromStr, Utf8Error};
+use std::sync::Arc;
+
+use arrow_schema::{ArrowError, Schema as ArrowSchema};
+use futures::future::{self, BoxFuture};
+use futures::TryStreamExt;
+use indexmap::IndexMap;
+use itertools::Itertools;
+use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
+use parquet::errors::ParquetError;
+use percent_encoding::percent_decode_str;
+use serde_json::{Map, Value};
+use tracing::debug;
 
 use crate::operations::get_num_idx_cols_and_stats_columns;
 use crate::{
-    kernel::{Add, DataType, Schema, StructField},
+    kernel::{scalars::ScalarExt, Add, DataType, Schema, StructField},
     logstore::{LogStore, LogStoreRef},
     operations::create::CreateBuilder,
     protocol::SaveMode,
     table::builder::ensure_table_uri,
-    table::config::DeltaConfigKey,
+    table::config::TableProperty,
     writer::stats::stats_from_parquet_metadata,
     DeltaResult, DeltaTable, DeltaTableError, ObjectStoreError, NULL_PARTITION_VALUE_DATA_PATH,
 };
-use arrow::{datatypes::Schema as ArrowSchema, error::ArrowError};
-use futures::{
-    future::{self, BoxFuture},
-    TryStreamExt,
-};
-use indexmap::IndexMap;
-use parquet::{
-    arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder},
-    errors::ParquetError,
-};
-use percent_encoding::percent_decode_str;
-use serde_json::{Map, Value};
-use std::{
-    collections::{HashMap, HashSet},
-    num::TryFromIntError,
-    str::{FromStr, Utf8Error},
-    sync::Arc,
-};
-use tracing::debug;
 
 /// Error converting a Parquet table to a Delta table
 #[derive(Debug, thiserror::Error)]
@@ -52,7 +48,7 @@ enum Error {
     #[error("The schema of partition columns must be provided to convert a Parquet table to a Delta table")]
     MissingPartitionSchema,
     #[error("Partition column provided by the user does not exist in the parquet files")]
-    PartitionColumnNotExist(HashSet<StructField>),
+    PartitionColumnNotExist,
     #[error("The given location is already a delta table location")]
     DeltaTableAlready,
     #[error("Location must be provided to convert a Parquet table to a Delta table")]
@@ -104,7 +100,7 @@ pub struct ConvertToDeltaBuilder {
     log_store: Option<LogStoreRef>,
     location: Option<String>,
     storage_options: Option<HashMap<String, String>>,
-    partition_schema: HashSet<StructField>,
+    partition_schema: HashMap<String, StructField>,
     partition_strategy: PartitionStrategy,
     mode: SaveMode,
     name: Option<String>,
@@ -169,7 +165,10 @@ impl ConvertToDeltaBuilder {
         mut self,
         partition_schema: impl IntoIterator<Item = StructField>,
     ) -> Self {
-        self.partition_schema = HashSet::from_iter(partition_schema);
+        self.partition_schema = partition_schema
+            .into_iter()
+            .map(|f| (f.name.clone(), f))
+            .collect();
         self
     }
 
@@ -213,7 +212,7 @@ impl ConvertToDeltaBuilder {
     /// Specify a table property in the table configuration
     pub fn with_configuration_property(
         mut self,
-        key: DeltaConfigKey,
+        key: TableProperty,
         value: Option<impl Into<String>>,
     ) -> Self {
         self.configuration
@@ -240,6 +239,7 @@ impl ConvertToDeltaBuilder {
             crate::logstore::logstore_for(
                 ensure_table_uri(location)?,
                 self.storage_options.unwrap_or_default(),
+                None, // TODO: allow runtime to be passed into builder
             )?
         } else {
             return Err(Error::MissingLocation);
@@ -276,12 +276,7 @@ impl ConvertToDeltaBuilder {
         let mut arrow_schemas = Vec::new();
         let mut actions = Vec::new();
         // partition columns that were defined by caller and are expected to apply on this table
-        let mut expected_partitions: HashMap<String, StructField> = self
-            .partition_schema
-            .clone()
-            .into_iter()
-            .map(|field| (field.name.clone(), field))
-            .collect();
+        let mut expected_partitions: HashMap<String, StructField> = self.partition_schema.clone();
         // A HashSet of all unique partition columns in a Parquet table
         let mut partition_columns = HashSet::new();
         // A vector of StructField of all unique partition columns in a Parquet table
@@ -317,12 +312,14 @@ impl ConvertToDeltaBuilder {
                 // Safety: we just checked that the key is present in the map
                 let field = partition_schema_fields.get(key).unwrap();
                 let scalar = if value == NULL_PARTITION_VALUE_DATA_PATH {
-                    Ok(crate::kernel::Scalar::Null(field.data_type().clone()))
+                    Ok(delta_kernel::expressions::Scalar::Null(
+                        field.data_type().clone(),
+                    ))
                 } else {
                     let decoded = percent_decode_str(value).decode_utf8()?;
                     match field.data_type() {
                         DataType::Primitive(p) => p.parse_scalar(decoded.as_ref()),
-                        _ => Err(crate::kernel::Error::Generic(format!(
+                        _ => Err(delta_kernel::Error::Generic(format!(
                             "Exprected primitive type, found: {:?}",
                             field.data_type()
                         ))),
@@ -390,25 +387,19 @@ impl ConvertToDeltaBuilder {
 
         if !expected_partitions.is_empty() {
             // Partition column provided by the user does not exist in the parquet files
-            return Err(Error::PartitionColumnNotExist(self.partition_schema));
+            return Err(Error::PartitionColumnNotExist);
         }
 
         // Merge parquet file schemas
         // This step is needed because timestamp will not be preserved when copying files in S3. We can't use the schema of the latest parqeut file as Delta table's schema
-        let mut schema_fields = Schema::try_from(&ArrowSchema::try_merge(arrow_schemas)?)?
-            .fields()
-            .clone();
-        schema_fields.append(
-            &mut partition_schema_fields
-                .values()
-                .cloned()
-                .collect::<Vec<_>>(),
-        );
+        let schema = Schema::try_from(&ArrowSchema::try_merge(arrow_schemas)?)?;
+        let mut schema_fields = schema.fields().collect_vec();
+        schema_fields.append(&mut partition_schema_fields.values().collect::<Vec<_>>());
 
         // Generate CreateBuilder with corresponding add actions, schemas and operation meta
         let mut builder = CreateBuilder::new()
             .with_log_store(log_store)
-            .with_columns(schema_fields)
+            .with_columns(schema_fields.into_iter().cloned())
             .with_partition_columns(partition_columns.into_iter())
             .with_actions(actions)
             .with_save_mode(self.mode)
@@ -447,17 +438,20 @@ impl std::future::IntoFuture for ConvertToDeltaBuilder {
 
 #[cfg(test)]
 mod tests {
+    use std::fs;
+
+    use delta_kernel::expressions::Scalar;
+    use itertools::Itertools;
+    use pretty_assertions::assert_eq;
+    use tempfile::tempdir;
+
     use super::*;
     use crate::{
-        kernel::{DataType, PrimitiveType, Scalar},
+        kernel::{DataType, PrimitiveType},
         open_table,
         storage::StorageOptions,
         Path,
     };
-    use itertools::Itertools;
-    use pretty_assertions::assert_eq;
-    use std::fs;
-    use tempfile::tempdir;
 
     fn schema_field(key: &str, primitive: PrimitiveType, nullable: bool) -> StructField {
         StructField::new(key.to_string(), DataType::Primitive(primitive), nullable)
@@ -484,7 +478,7 @@ mod tests {
     fn log_store(path: impl Into<String>) -> LogStoreRef {
         let path: String = path.into();
         let location = ensure_table_uri(path).expect("Failed to get the URI from the path");
-        crate::logstore::logstore_for(location, StorageOptions::default())
+        crate::logstore::logstore_for(location, StorageOptions::default(), None)
             .expect("Failed to create an object store")
     }
 
@@ -563,7 +557,8 @@ mod tests {
             .get_schema()
             .expect("Failed to get schema")
             .fields()
-            .clone();
+            .cloned()
+            .collect_vec();
         schema_fields.sort_by(|a, b| a.name().cmp(b.name()));
         assert_eq!(
             schema_fields, expected_schema,
@@ -603,14 +598,15 @@ mod tests {
             "part-00000-d22c627d-9655-4153-9527-f8995620fa42-c000.snappy.parquet"
         );
 
-        let Some(Scalar::Struct(min_values, _)) = action.min_values() else {
+        let Some(Scalar::Struct(data)) = action.min_values() else {
             panic!("Missing min values");
         };
-        assert_eq!(min_values, vec![Scalar::Date(18628), Scalar::Integer(1)]);
-        let Some(Scalar::Struct(max_values, _)) = action.max_values() else {
+        assert_eq!(data.values(), vec![Scalar::Date(18628), Scalar::Integer(1)]);
+
+        let Some(Scalar::Struct(data)) = action.max_values() else {
             panic!("Missing max values");
         };
-        assert_eq!(max_values, vec![Scalar::Date(18632), Scalar::Integer(5)]);
+        assert_eq!(data.values(), vec![Scalar::Date(18632), Scalar::Integer(5)]);
 
         assert_delta_table(
             table,
diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs
index 53cab30c81..ad0413722e 100644
--- a/crates/core/src/operations/create.rs
+++ b/crates/core/src/operations/create.rs
@@ -4,9 +4,11 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
+use delta_kernel::schema::MetadataValue;
 use futures::future::BoxFuture;
 use maplit::hashset;
 use serde_json::Value;
+use tracing::log::*;
 
 use super::transaction::{CommitBuilder, TableReference, PROTOCOL};
 use crate::errors::{DeltaResult, DeltaTableError};
@@ -14,12 +16,9 @@ use crate::kernel::{
     Action, DataType, Metadata, Protocol, ReaderFeatures, StructField, StructType, WriterFeatures,
 };
 use crate::logstore::{LogStore, LogStoreRef};
-use crate::operations::set_tbl_properties::{
-    apply_properties_to_protocol, convert_properties_to_features,
-};
 use crate::protocol::{DeltaOperation, SaveMode};
 use crate::table::builder::ensure_table_uri;
-use crate::table::config::DeltaConfigKey;
+use crate::table::config::TableProperty;
 use crate::{DeltaTable, DeltaTableBuilder};
 
 #[derive(thiserror::Error, Debug)]
@@ -62,6 +61,7 @@ pub struct CreateBuilder {
     log_store: Option<LogStoreRef>,
     configuration: HashMap<String, Option<String>>,
     metadata: Option<HashMap<String, Value>>,
+    raise_if_key_not_exists: bool,
 }
 
 impl super::Operation<()> for CreateBuilder {}
@@ -87,6 +87,7 @@ impl CreateBuilder {
             log_store: None,
             configuration: Default::default(),
             metadata: Default::default(),
+            raise_if_key_not_exists: true,
         }
     }
 
@@ -126,7 +127,24 @@ impl CreateBuilder {
     ) -> Self {
         let mut field = StructField::new(name.into(), data_type, nullable);
         if let Some(meta) = metadata {
-            field = field.with_metadata(meta);
+            field = field.with_metadata(meta.iter().map(|(k, v)| {
+                (
+                    k,
+                    if let Value::Number(n) = v {
+                        n.as_i64().map_or_else(
+                            || MetadataValue::String(v.to_string()),
+                            |i| {
+                                i32::try_from(i)
+                                    .ok()
+                                    .map(MetadataValue::Number)
+                                    .unwrap_or_else(|| MetadataValue::String(v.to_string()))
+                            },
+                        )
+                    } else {
+                        MetadataValue::String(v.to_string())
+                    },
+                )
+            }));
         };
         self.columns.push(field);
         self
@@ -176,7 +194,7 @@ impl CreateBuilder {
     /// Specify a table property in the table configuration
     pub fn with_configuration_property(
         mut self,
-        key: DeltaConfigKey,
+        key: TableProperty,
         value: Option<impl Into<String>>,
     ) -> Self {
         self.configuration
@@ -196,6 +214,12 @@ impl CreateBuilder {
         self
     }
 
+    /// Specify whether to raise an error if the table properties in the configuration are not TablePropertys
+    pub fn with_raise_if_key_not_exists(mut self, raise_if_key_not_exists: bool) -> Self {
+        self.raise_if_key_not_exists = raise_if_key_not_exists;
+        self
+    }
+
     /// Specify additional actions to be added to the commit.
     ///
     /// This method is mainly meant for internal use. Manually adding inconsistent
@@ -242,8 +266,7 @@ impl CreateBuilder {
         };
 
         let configuration = self.configuration;
-        let contains_timestampntz = PROTOCOL.contains_timestampntz(&self.columns);
-
+        let contains_timestampntz = PROTOCOL.contains_timestampntz(self.columns.iter());
         // TODO configure more permissive versions based on configuration. Also how should this ideally be handled?
         // We set the lowest protocol we can, and if subsequent writes use newer features we update metadata?
 
@@ -273,16 +296,15 @@ impl CreateBuilder {
             })
             .unwrap_or_else(|| current_protocol);
 
-        let protocol = apply_properties_to_protocol(
-            &protocol,
+        let protocol = protocol.apply_properties_to_protocol(
             &configuration
                 .iter()
                 .map(|(k, v)| (k.clone(), v.clone().unwrap()))
                 .collect::<HashMap<String, String>>(),
-            true,
+            self.raise_if_key_not_exists,
         )?;
 
-        let protocol = convert_properties_to_features(protocol, &configuration);
+        let protocol = protocol.move_table_properties_into_features(&configuration);
 
         let mut metadata = Metadata::try_new(
             StructType::new(self.columns),
@@ -372,7 +394,7 @@ impl std::future::IntoFuture for CreateBuilder {
 mod tests {
     use super::*;
     use crate::operations::DeltaOps;
-    use crate::table::config::DeltaConfigKey;
+    use crate::table::config::TableProperty;
     use crate::writer::test_utils::{get_delta_schema, get_record_batch};
     use tempfile::TempDir;
 
@@ -382,7 +404,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(SaveMode::Ignore)
             .await
             .unwrap();
@@ -402,7 +424,7 @@ mod tests {
             .await
             .unwrap()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(SaveMode::Ignore)
             .await
             .unwrap();
@@ -420,7 +442,7 @@ mod tests {
         );
         let table = CreateBuilder::new()
             .with_location(format!("./{relative_path}"))
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -431,7 +453,7 @@ mod tests {
         let schema = get_delta_schema();
         let table = CreateBuilder::new()
             .with_location("memory://")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -454,7 +476,7 @@ mod tests {
         };
         let table = CreateBuilder::new()
             .with_location("memory://")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_actions(vec![Action::Protocol(protocol)])
             .await
             .unwrap();
@@ -463,15 +485,15 @@ mod tests {
 
         let table = CreateBuilder::new()
             .with_location("memory://")
-            .with_columns(schema.fields().clone())
-            .with_configuration_property(DeltaConfigKey::AppendOnly, Some("true"))
+            .with_columns(schema.fields().cloned())
+            .with_configuration_property(TableProperty::AppendOnly, Some("true"))
             .await
             .unwrap();
         let append = table
             .metadata()
             .unwrap()
             .configuration
-            .get(DeltaConfigKey::AppendOnly.as_ref())
+            .get(TableProperty::AppendOnly.as_ref())
             .unwrap()
             .as_ref()
             .unwrap()
@@ -486,7 +508,7 @@ mod tests {
         let schema = get_delta_schema();
         let table = CreateBuilder::new()
             .with_location(tmp_dir.path().to_str().unwrap())
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -497,7 +519,7 @@ mod tests {
         // Check an error is raised when a table exists at location
         let table = CreateBuilder::new()
             .with_log_store(log_store.clone())
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::ErrorIfExists)
             .await;
         assert!(table.is_err());
@@ -505,7 +527,7 @@ mod tests {
         // Check current table is returned when ignore option is chosen.
         let table = CreateBuilder::new()
             .with_log_store(log_store.clone())
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::Ignore)
             .await
             .unwrap();
@@ -514,7 +536,7 @@ mod tests {
         // Check table is overwritten
         let table = CreateBuilder::new()
             .with_log_store(log_store)
-            .with_columns(schema.fields().iter().cloned())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::Overwrite)
             .await
             .unwrap();
@@ -535,7 +557,7 @@ mod tests {
 
         let mut table = DeltaOps(table)
             .create()
-            .with_columns(schema.fields().iter().cloned())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::Overwrite)
             .await
             .unwrap();
@@ -559,7 +581,7 @@ mod tests {
 
         let mut table = DeltaOps(table)
             .create()
-            .with_columns(schema.fields().iter().cloned())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(SaveMode::Overwrite)
             .with_partition_columns(vec!["id"])
             .await
@@ -569,4 +591,43 @@ mod tests {
         // Checks if files got removed after overwrite
         assert_eq!(table.get_files_count(), 0);
     }
+
+    #[tokio::test]
+    async fn test_create_table_metadata_raise_if_key_not_exists() {
+        let schema = get_delta_schema();
+        let config: HashMap<String, Option<String>> =
+            vec![("key".to_string(), Some("value".to_string()))]
+                .into_iter()
+                .collect();
+
+        // Fail to create table with unknown Delta key
+        let table = CreateBuilder::new()
+            .with_location("memory://")
+            .with_columns(schema.fields().cloned())
+            .with_configuration(config.clone())
+            .await;
+        assert!(table.is_err());
+
+        // Succeed in creating table with unknown Delta key since we set raise_if_key_not_exists to false
+        let table = CreateBuilder::new()
+            .with_location("memory://")
+            .with_columns(schema.fields().cloned())
+            .with_raise_if_key_not_exists(false)
+            .with_configuration(config)
+            .await;
+        assert!(table.is_ok());
+
+        // Ensure the non-Delta key was set correctly
+        let value = table
+            .unwrap()
+            .metadata()
+            .unwrap()
+            .configuration
+            .get("key")
+            .unwrap()
+            .as_ref()
+            .unwrap()
+            .clone();
+        assert_eq!(String::from("value"), value);
+    }
 }
diff --git a/crates/core/src/operations/delete.rs b/crates/core/src/operations/delete.rs
index bf17ed6085..7dc58b5929 100644
--- a/crates/core/src/operations/delete.rs
+++ b/crates/core/src/operations/delete.rs
@@ -17,35 +17,47 @@
 //!     .await?;
 //! ````
 
-use core::panic;
-use std::sync::Arc;
-use std::time::{Instant, SystemTime, UNIX_EPOCH};
-
-use crate::logstore::LogStoreRef;
+use async_trait::async_trait;
+use datafusion::dataframe::DataFrame;
+use datafusion::datasource::provider_as_source;
+use datafusion::error::Result as DataFusionResult;
 use datafusion::execution::context::{SessionContext, SessionState};
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::ExecutionPlan;
+use datafusion::execution::session_state::SessionStateBuilder;
+use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
 use datafusion::prelude::Expr;
-use datafusion_common::scalar::ScalarValue;
-use datafusion_common::DFSchema;
+use datafusion_common::ScalarValue;
+use datafusion_expr::{lit, Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode};
+use datafusion_physical_plan::metrics::MetricBuilder;
+use datafusion_physical_plan::ExecutionPlan;
+
 use futures::future::BoxFuture;
+use std::sync::Arc;
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
+
 use parquet::file::properties::WriterProperties;
 use serde::Serialize;
 
+use super::cdc::should_write_cdc;
 use super::datafusion_utils::Expression;
 use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL};
-use super::write::WriterStatsConfig;
 use crate::delta_datafusion::expr::fmt_expr_to_sql;
+use crate::delta_datafusion::logical::MetricObserver;
+use crate::delta_datafusion::physical::{find_metric_node, get_metric, MetricObserverExec};
+use crate::delta_datafusion::planner::DeltaPlanner;
 use crate::delta_datafusion::{
-    create_physical_expr_fix, find_files, register_store, DataFusionMixins, DeltaScanBuilder,
-    DeltaSessionContext,
+    find_files, register_store, DataFusionMixins, DeltaScanConfigBuilder, DeltaSessionContext,
+    DeltaTableProvider,
 };
 use crate::errors::DeltaResult;
 use crate::kernel::{Action, Add, Remove};
-use crate::operations::write::write_execution_plan;
+use crate::logstore::LogStoreRef;
+use crate::operations::write::{write_execution_plan, write_execution_plan_cdc, WriterStatsConfig};
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
-use crate::DeltaTable;
+use crate::{DeltaTable, DeltaTableError};
+
+const SOURCE_COUNT_ID: &str = "delete_source_count";
+const SOURCE_COUNT_METRIC: &str = "num_source_rows";
 
 /// Delete Records from the Delta Table.
 /// See this module's documentation for more information
@@ -72,15 +84,15 @@ pub struct DeleteMetrics {
     /// Number of files removed
     pub num_removed_files: usize,
     /// Number of rows removed
-    pub num_deleted_rows: Option<usize>,
+    pub num_deleted_rows: usize,
     /// Number of rows copied in the process of deleting files
-    pub num_copied_rows: Option<usize>,
+    pub num_copied_rows: usize,
     /// Time taken to execute the entire operation
-    pub execution_time_ms: u128,
+    pub execution_time_ms: u64,
     /// Time taken to scan the file for matches
-    pub scan_time_ms: u128,
+    pub scan_time_ms: u64,
     /// Time taken to rewrite the matched files
-    pub rewrite_time_ms: u128,
+    pub rewrite_time_ms: u64,
 }
 
 impl super::Operation<()> for DeleteBuilder {}
@@ -123,36 +135,81 @@ impl DeleteBuilder {
     }
 }
 
+#[derive(Clone)]
+struct DeleteMetricExtensionPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for DeleteMetricExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(metric_observer) = node.as_any().downcast_ref::<MetricObserver>() {
+            if metric_observer.id.eq(SOURCE_COUNT_ID) {
+                return Ok(Some(MetricObserverExec::try_new(
+                    SOURCE_COUNT_ID.into(),
+                    physical_inputs,
+                    |batch, metrics| {
+                        MetricBuilder::new(metrics)
+                            .global_counter(SOURCE_COUNT_METRIC)
+                            .add(batch.num_rows());
+                    },
+                )?));
+            }
+        }
+        Ok(None)
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
 async fn excute_non_empty_expr(
     snapshot: &DeltaTableState,
     log_store: LogStoreRef,
     state: &SessionState,
     expression: &Expr,
-    metrics: &mut DeleteMetrics,
     rewrite: &[Add],
+    metrics: &mut DeleteMetrics,
     writer_properties: Option<WriterProperties>,
-) -> DeltaResult<Vec<Add>> {
+    partition_scan: bool,
+) -> DeltaResult<Vec<Action>> {
     // For each identified file perform a parquet scan + filter + limit (1) + count.
     // If returned count is not zero then append the file to be rewritten and removed from the log. Otherwise do nothing to the file.
+    let mut actions: Vec<Action> = Vec::new();
+    let table_partition_cols = snapshot.metadata().partition_columns.clone();
 
-    let input_schema = snapshot.input_schema()?;
-    let input_dfschema: DFSchema = input_schema.clone().as_ref().clone().try_into()?;
+    let delete_planner = DeltaPlanner::<DeleteMetricExtensionPlanner> {
+        extension_planner: DeleteMetricExtensionPlanner {},
+    };
 
-    let table_partition_cols = snapshot.metadata().partition_columns.clone();
+    let state = SessionStateBuilder::new_from_existing(state.clone())
+        .with_query_planner(Arc::new(delete_planner))
+        .build();
 
-    let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), state)
-        .with_files(rewrite)
-        .build()
-        .await?;
-    let scan = Arc::new(scan);
+    let scan_config = DeltaScanConfigBuilder::default()
+        .with_file_column(false)
+        .with_schema(snapshot.input_schema()?)
+        .build(snapshot)?;
+
+    let target_provider = Arc::new(
+        DeltaTableProvider::try_new(snapshot.clone(), log_store.clone(), scan_config.clone())?
+            .with_files(rewrite.to_vec()),
+    );
+    let target_provider = provider_as_source(target_provider);
+    let source = LogicalPlanBuilder::scan("target", target_provider.clone(), None)?.build()?;
 
-    // Apply the negation of the filter and rewrite files
-    let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+    let source = LogicalPlan::Extension(Extension {
+        node: Arc::new(MetricObserver {
+            id: "delete_source_count".into(),
+            input: source,
+            enable_pushdown: false,
+        }),
+    });
 
-    let predicate_expr =
-        create_physical_expr_fix(negated_expression, &input_dfschema, state.execution_props())?;
-    let filter: Arc<dyn ExecutionPlan> =
-        Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
+    let df = DataFrame::new(state.clone(), source);
 
     let writer_stats_config = WriterStatsConfig::new(
         snapshot.table_config().num_indexed_cols(),
@@ -162,35 +219,70 @@ async fn excute_non_empty_expr(
             .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
     );
 
-    let add_actions = write_execution_plan(
-        Some(snapshot),
-        state.clone(),
-        filter.clone(),
-        table_partition_cols.clone(),
-        log_store.object_store(),
-        Some(snapshot.table_config().target_file_size() as usize),
-        None,
-        writer_properties,
-        false,
-        None,
-        writer_stats_config,
-    )
-    .await?
-    .into_iter()
-    .map(|a| match a {
-        Action::Add(a) => a,
-        _ => panic!("Expected Add action"),
-    })
-    .collect::<Vec<Add>>();
-
-    let read_records = scan.parquet_scan.metrics().and_then(|m| m.output_rows());
-    let filter_records = filter.metrics().and_then(|m| m.output_rows());
-    metrics.num_copied_rows = filter_records;
-    metrics.num_deleted_rows = read_records
-        .zip(filter_records)
-        .map(|(read, filter)| read - filter);
-
-    Ok(add_actions)
+    if !partition_scan {
+        // Apply the negation of the filter and rewrite files
+        let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+
+        let filter = df
+            .clone()
+            .filter(negated_expression)?
+            .create_physical_plan()
+            .await?;
+
+        let add_actions: Vec<Action> = write_execution_plan(
+            Some(snapshot),
+            state.clone(),
+            filter.clone(),
+            table_partition_cols.clone(),
+            log_store.object_store(),
+            Some(snapshot.table_config().target_file_size() as usize),
+            None,
+            writer_properties.clone(),
+            writer_stats_config.clone(),
+            None,
+        )
+        .await?;
+
+        actions.extend(add_actions);
+
+        let source_count = find_metric_node(SOURCE_COUNT_ID, &filter).ok_or_else(|| {
+            DeltaTableError::Generic("Unable to locate expected metric node".into())
+        })?;
+        let source_count_metrics = source_count.metrics().unwrap();
+        let read_records = get_metric(&source_count_metrics, SOURCE_COUNT_METRIC);
+        let filter_records = filter.metrics().and_then(|m| m.output_rows()).unwrap_or(0);
+
+        metrics.num_copied_rows = filter_records;
+        metrics.num_deleted_rows = read_records - filter_records;
+    }
+
+    // CDC logic, simply filters data with predicate and adds the _change_type="delete" as literal column
+    if let Ok(true) = should_write_cdc(snapshot) {
+        // Create CDC scan
+        let change_type_lit = lit(ScalarValue::Utf8(Some("delete".to_string())));
+        let cdc_filter = df
+            .filter(expression.clone())?
+            .with_column("_change_type", change_type_lit)?
+            .create_physical_plan()
+            .await?;
+
+        let cdc_actions = write_execution_plan_cdc(
+            Some(snapshot),
+            state.clone(),
+            cdc_filter,
+            table_partition_cols.clone(),
+            log_store.object_store(),
+            Some(snapshot.table_config().target_file_size() as usize),
+            None,
+            writer_properties,
+            writer_stats_config,
+            None,
+        )
+        .await?;
+        actions.extend(cdc_actions)
+    }
+
+    Ok(actions)
 }
 
 async fn execute(
@@ -201,30 +293,33 @@ async fn execute(
     writer_properties: Option<WriterProperties>,
     mut commit_properties: CommitProperties,
 ) -> DeltaResult<(DeltaTableState, DeleteMetrics)> {
+    if !&snapshot.load_config().require_files {
+        return Err(DeltaTableError::NotInitializedWithFiles("DELETE".into()));
+    }
+
     let exec_start = Instant::now();
     let mut metrics = DeleteMetrics::default();
 
     let scan_start = Instant::now();
     let candidates = find_files(&snapshot, log_store.clone(), &state, predicate.clone()).await?;
-    metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_millis();
+    metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_millis() as u64;
 
     let predicate = predicate.unwrap_or(Expr::Literal(ScalarValue::Boolean(Some(true))));
 
-    let add = if candidates.partition_scan {
-        Vec::new()
-    } else {
+    let mut actions = {
         let write_start = Instant::now();
         let add = excute_non_empty_expr(
             &snapshot,
             log_store.clone(),
             &state,
             &predicate,
-            &mut metrics,
             &candidates.candidates,
+            &mut metrics,
             writer_properties,
+            candidates.partition_scan,
         )
         .await?;
-        metrics.rewrite_time_ms = Instant::now().duration_since(write_start).as_millis();
+        metrics.rewrite_time_ms = Instant::now().duration_since(write_start).as_millis() as u64;
         add
     };
     let remove = candidates.candidates;
@@ -234,7 +329,6 @@ async fn execute(
         .unwrap()
         .as_millis() as i64;
 
-    let mut actions: Vec<Action> = add.into_iter().map(Action::Add).collect();
     metrics.num_removed_files = remove.len();
     metrics.num_added_files = actions.len();
 
@@ -253,7 +347,7 @@ async fn execute(
         }))
     }
 
-    metrics.execution_time_ms = Instant::now().duration_since(exec_start).as_millis();
+    metrics.execution_time_ms = Instant::now().duration_since(exec_start).as_millis() as u64;
 
     commit_properties
         .app_metadata
@@ -328,6 +422,9 @@ impl std::future::IntoFuture for DeleteBuilder {
 
 #[cfg(test)]
 mod tests {
+    use crate::delta_datafusion::cdf::DeltaCdfScan;
+    use crate::kernel::DataType as DeltaDataType;
+    use crate::operations::collect_sendable_stream;
     use crate::operations::DeltaOps;
     use crate::protocol::*;
     use crate::writer::test_utils::datafusion::get_data;
@@ -335,17 +432,21 @@ mod tests {
     use crate::writer::test_utils::{
         get_arrow_schema, get_delta_schema, get_record_batch, setup_table_with_configuration,
     };
-    use crate::DeltaConfigKey;
     use crate::DeltaTable;
+    use crate::TableProperty;
     use arrow::array::Int32Array;
     use arrow::datatypes::{Field, Schema};
     use arrow::record_batch::RecordBatch;
     use arrow_array::ArrayRef;
+    use arrow_array::StringArray;
     use arrow_array::StructArray;
     use arrow_buffer::NullBuffer;
+    use arrow_schema::DataType;
     use arrow_schema::Fields;
     use datafusion::assert_batches_sorted_eq;
+    use datafusion::physical_plan::ExecutionPlan;
     use datafusion::prelude::*;
+    use delta_kernel::schema::PrimitiveType;
     use serde_json::json;
     use std::sync::Arc;
 
@@ -354,7 +455,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions.unwrap_or_default())
             .await
             .unwrap();
@@ -364,7 +465,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_delete_when_delta_table_is_append_only() {
-        let table = setup_table_with_configuration(DeltaConfigKey::AppendOnly, Some("true")).await;
+        let table = setup_table_with_configuration(TableProperty::AppendOnly, Some("true")).await;
         let batch = get_record_batch(None, false);
         // append some data
         let table = write_batch(table, batch).await;
@@ -409,8 +510,8 @@ mod tests {
         assert_eq!(table.get_files_count(), 0);
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 1);
-        assert_eq!(metrics.num_deleted_rows, None);
-        assert_eq!(metrics.num_copied_rows, None);
+        assert_eq!(metrics.num_deleted_rows, 0);
+        assert_eq!(metrics.num_copied_rows, 0);
 
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
@@ -420,16 +521,13 @@ mod tests {
         //     serde_json::to_value(&metrics).unwrap()
         // );
 
-        // rewrite is not required
-        assert_eq!(metrics.rewrite_time_ms, 0);
-
         // Deletes with no changes to state must not commit
         let (table, metrics) = DeltaOps(table).delete().await.unwrap();
         assert_eq!(table.version(), 2);
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 0);
-        assert_eq!(metrics.num_deleted_rows, None);
-        assert_eq!(metrics.num_copied_rows, None);
+        assert_eq!(metrics.num_deleted_rows, 0);
+        assert_eq!(metrics.num_copied_rows, 0);
     }
 
     #[tokio::test]
@@ -500,8 +598,8 @@ mod tests {
         assert_eq!(metrics.num_added_files, 1);
         assert_eq!(metrics.num_removed_files, 1);
         assert!(metrics.scan_time_ms > 0);
-        assert_eq!(metrics.num_deleted_rows, Some(1));
-        assert_eq!(metrics.num_copied_rows, Some(3));
+        assert_eq!(metrics.num_deleted_rows, 1);
+        assert_eq!(metrics.num_copied_rows, 3);
 
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
@@ -655,10 +753,9 @@ mod tests {
 
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 1);
-        assert_eq!(metrics.num_deleted_rows, None);
-        assert_eq!(metrics.num_copied_rows, None);
+        assert_eq!(metrics.num_deleted_rows, 0);
+        assert_eq!(metrics.num_copied_rows, 0);
         assert!(metrics.scan_time_ms > 0);
-        assert_eq!(metrics.rewrite_time_ms, 0);
 
         let expected = vec![
             "+----+-------+------------+",
@@ -717,8 +814,8 @@ mod tests {
 
         assert_eq!(metrics.num_added_files, 0);
         assert_eq!(metrics.num_removed_files, 1);
-        assert_eq!(metrics.num_deleted_rows, Some(1));
-        assert_eq!(metrics.num_copied_rows, Some(0));
+        assert_eq!(metrics.num_deleted_rows, 1);
+        assert_eq!(metrics.num_copied_rows, 0);
         assert!(metrics.scan_time_ms > 0);
 
         let expected = [
@@ -801,4 +898,174 @@ mod tests {
             .await;
         assert!(res.is_err());
     }
+
+    #[tokio::test]
+    async fn test_delete_cdc_enabled() {
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            arrow::datatypes::DataType::Int32,
+            true,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .delete()
+            .with_predicate(col("value").eq(lit(2)))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+--------------+-----------------+",
+        "| value | _change_type | _commit_version |",
+        "+-------+--------------+-----------------+",
+        "| 1     | insert       | 1               |",
+        "| 2     | delete       | 2               |",
+        "| 2     | insert       | 1               |",
+        "| 3     | insert       | 1               |",
+        "+-------+--------------+-----------------+",
+        ], &batches }
+    }
+
+    #[tokio::test]
+    async fn test_delete_cdc_enabled_partitioned() {
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "year",
+                DeltaDataType::Primitive(PrimitiveType::String),
+                true,
+                None,
+            )
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_partition_columns(vec!["year"])
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("year", DataType::Utf8, true),
+            Field::new("value", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![
+                    Some("2020"),
+                    Some("2020"),
+                    Some("2024"),
+                ])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .delete()
+            .with_predicate(col("value").eq(lit(2)))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+--------------+-----------------+------+",
+        "| value | _change_type | _commit_version | year |",
+        "+-------+--------------+-----------------+------+",
+        "| 1     | insert       | 1               | 2020 |",
+        "| 2     | delete       | 2               | 2020 |",
+        "| 2     | insert       | 1               | 2020 |",
+        "| 3     | insert       | 1               | 2024 |",
+        "+-------+--------------+-----------------+------+",
+        ], &batches }
+    }
+
+    async fn collect_batches(
+        num_partitions: usize,
+        stream: DeltaCdfScan,
+        ctx: SessionContext,
+    ) -> Result<Vec<RecordBatch>, Box<dyn std::error::Error>> {
+        let mut batches = vec![];
+        for p in 0..num_partitions {
+            let data: Vec<RecordBatch> =
+                collect_sendable_stream(stream.execute(p, ctx.task_ctx())?).await?;
+            batches.extend_from_slice(&data);
+        }
+        Ok(batches)
+    }
 }
diff --git a/crates/core/src/operations/filesystem_check.rs b/crates/core/src/operations/filesystem_check.rs
index 44fa84d29a..6129c1cde3 100644
--- a/crates/core/src/operations/filesystem_check.rs
+++ b/crates/core/src/operations/filesystem_check.rs
@@ -24,6 +24,7 @@ use object_store::ObjectStore;
 use serde::Serialize;
 use url::{ParseError, Url};
 
+use super::transaction::{CommitBuilder, CommitProperties};
 use crate::errors::{DeltaResult, DeltaTableError};
 use crate::kernel::{Action, Add, Remove};
 use crate::logstore::LogStoreRef;
@@ -31,9 +32,6 @@ use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
 use crate::DeltaTable;
 
-use super::transaction::CommitBuilder;
-use super::transaction::CommitProperties;
-
 /// Audit the Delta Table's active files with the underlying file system.
 /// See this module's documentation for more information
 #[derive(Debug)]
@@ -102,7 +100,7 @@ impl FileSystemCheckBuilder {
 
     async fn create_fsck_plan(&self) -> DeltaResult<FileSystemCheckPlan> {
         let mut files_relative: HashMap<String, Add> =
-            HashMap::with_capacity(self.snapshot.file_actions()?.len());
+            HashMap::with_capacity(self.snapshot.files_count());
         let log_store = self.log_store.clone();
 
         for active in self.snapshot.file_actions_iter()? {
diff --git a/crates/core/src/operations/load.rs b/crates/core/src/operations/load.rs
index 4bf439cd0d..930b5d48ec 100644
--- a/crates/core/src/operations/load.rs
+++ b/crates/core/src/operations/load.rs
@@ -51,6 +51,9 @@ impl std::future::IntoFuture for LoadBuilder {
 
         Box::pin(async move {
             PROTOCOL.can_read_from(&this.snapshot.snapshot)?;
+            if !this.snapshot.load_config().require_files {
+                return Err(DeltaTableError::NotInitializedWithFiles("reading".into()));
+            }
 
             let table = DeltaTable::new_with_state(this.log_store, this.snapshot);
             let schema = table.snapshot()?.arrow_schema()?;
diff --git a/crates/core/src/operations/load_cdf.rs b/crates/core/src/operations/load_cdf.rs
index 4f3a4bdbd6..ad2986de80 100644
--- a/crates/core/src/operations/load_cdf.rs
+++ b/crates/core/src/operations/load_cdf.rs
@@ -3,24 +3,28 @@
 use std::sync::Arc;
 use std::time::SystemTime;
 
+use arrow_array::RecordBatch;
 use arrow_schema::{ArrowError, Field};
 use chrono::{DateTime, Utc};
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::physical_plan::FileScanConfig;
-use datafusion::physical_plan::union::UnionExec;
-use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
 use datafusion_common::{ScalarValue, Statistics};
+use datafusion_physical_expr::expressions;
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::union::UnionExec;
+use datafusion_physical_plan::ExecutionPlan;
 use tracing::log;
 
-use crate::delta_datafusion::cdf::*;
 use crate::delta_datafusion::{register_store, DataFusionMixins};
 use crate::errors::DeltaResult;
 use crate::kernel::{Action, Add, AddCDCFile, CommitInfo};
 use crate::logstore::{get_actions, LogStoreRef};
 use crate::table::state::DeltaTableState;
 use crate::DeltaTableError;
+use crate::{delta_datafusion::cdf::*, kernel::Remove};
 
 /// Builder for create a read of change data feeds for delta tables
 #[derive(Clone)]
@@ -29,6 +33,8 @@ pub struct CdfLoadBuilder {
     snapshot: DeltaTableState,
     /// Delta object store for handling data files
     log_store: LogStoreRef,
+    /// Columns to project
+    columns: Option<Vec<String>>,
     /// Version to read from
     starting_version: i64,
     /// Version to stop reading at
@@ -47,6 +53,7 @@ impl CdfLoadBuilder {
         Self {
             snapshot,
             log_store,
+            columns: None,
             starting_version: 0,
             ending_version: None,
             starting_timestamp: None,
@@ -85,13 +92,23 @@ impl CdfLoadBuilder {
         self
     }
 
+    /// Columns to select
+    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
+        self.columns = Some(columns);
+        self
+    }
+
     /// This is a rust version of https://github.com/delta-io/delta/blob/master/spark/src/main/scala/org/apache/spark/sql/delta/commands/cdc/CDCReader.scala#L418
     /// Which iterates through versions of the delta table collects the relevant actions / commit info and returns those
     /// groupings for later use. The scala implementation has a lot more edge case handling and read schema checking (and just error checking in general)
     /// than I have right now. I plan to extend the checks once we have a stable state of the initial implementation.
     async fn determine_files_to_read(
         &self,
-    ) -> DeltaResult<(Vec<CdcDataSpec<AddCDCFile>>, Vec<CdcDataSpec<Add>>)> {
+    ) -> DeltaResult<(
+        Vec<CdcDataSpec<AddCDCFile>>,
+        Vec<CdcDataSpec<Add>>,
+        Vec<CdcDataSpec<Remove>>,
+    )> {
         let start = self.starting_version;
         let end = self
             .ending_version
@@ -113,8 +130,9 @@ impl CdfLoadBuilder {
         );
         log::debug!("starting version = {}, ending version = {:?}", start, end);
 
-        let mut change_files = vec![];
-        let mut add_files = vec![];
+        let mut change_files: Vec<CdcDataSpec<AddCDCFile>> = vec![];
+        let mut add_files: Vec<CdcDataSpec<Add>> = vec![];
+        let mut remove_files: Vec<CdcDataSpec<Remove>> = vec![];
 
         for version in start..=end {
             let snapshot_bytes = self
@@ -128,6 +146,8 @@ impl CdfLoadBuilder {
             let mut cdc_actions = vec![];
 
             if self.starting_timestamp.is_some() || self.ending_timestamp.is_some() {
+                // TODO: fallback on other actions for timestamps because CommitInfo action is optional
+                // theoretically.
                 let version_commit = version_actions
                     .iter()
                     .find(|a| matches!(a, Action::CommitInfo(_)));
@@ -188,6 +208,14 @@ impl CdfLoadBuilder {
                     })
                     .collect::<Vec<Add>>();
 
+                let remove_actions = version_actions
+                    .iter()
+                    .filter_map(|r| match r {
+                        Action::Remove(r) if r.data_change => Some(r.clone()),
+                        _ => None,
+                    })
+                    .collect::<Vec<Remove>>();
+
                 if !add_actions.is_empty() {
                     log::debug!(
                         "Located {} cdf actions for version: {}",
@@ -196,10 +224,19 @@ impl CdfLoadBuilder {
                     );
                     add_files.push(CdcDataSpec::new(version, ts, add_actions));
                 }
+
+                if !remove_actions.is_empty() {
+                    log::debug!(
+                        "Located {} cdf actions for version: {}",
+                        remove_actions.len(),
+                        version
+                    );
+                    remove_files.push(CdcDataSpec::new(version, ts, remove_actions));
+                }
             }
         }
 
-        Ok((change_files, add_files))
+        Ok((change_files, add_files, remove_files))
     }
 
     #[inline]
@@ -207,20 +244,24 @@ impl CdfLoadBuilder {
         Some(ScalarValue::Utf8(Some(String::from("insert"))))
     }
 
+    fn get_remove_action_type() -> Option<ScalarValue> {
+        Some(ScalarValue::Utf8(Some(String::from("delete"))))
+    }
+
     /// Executes the scan
     pub async fn build(&self) -> DeltaResult<DeltaCdfScan> {
-        let (cdc, add) = self.determine_files_to_read().await?;
+        let (cdc, add, remove) = self.determine_files_to_read().await?;
         register_store(
             self.log_store.clone(),
             self.ctx.state().runtime_env().clone(),
         );
 
         let partition_values = self.snapshot.metadata().partition_columns.clone();
-        let schema = self.snapshot.arrow_schema()?;
-        let schema_fields: Vec<Field> = self
+        let schema = self.snapshot.input_schema()?;
+        let schema_fields: Vec<Arc<Field>> = self
             .snapshot
-            .arrow_schema()?
-            .all_fields()
+            .input_schema()?
+            .fields()
             .into_iter()
             .filter(|f| !partition_values.contains(f.name()))
             .cloned()
@@ -234,16 +275,16 @@ impl CdfLoadBuilder {
         // Setup for the Read Schemas of each kind of file, CDC files include commit action type so they need a slightly
         // different schema than standard add file reads
         let cdc_file_schema = create_cdc_schema(schema_fields.clone(), true);
-        let add_file_schema = create_cdc_schema(schema_fields, false);
+        let add_remove_file_schema = create_cdc_schema(schema_fields, false);
 
         // Set up the mapping of partition columns to be projected into the final output batch
         // cdc for example has timestamp, version, and any table partitions mapped here.
         // add on the other hand has action type, timestamp, version and any additional table partitions because adds do
         // not include their actions
         let mut cdc_partition_cols = CDC_PARTITION_SCHEMA.clone();
-        let mut add_partition_cols = ADD_PARTITION_SCHEMA.clone();
+        let mut add_remove_partition_cols = ADD_PARTITION_SCHEMA.clone();
         cdc_partition_cols.extend_from_slice(&this_partition_values);
-        add_partition_cols.extend_from_slice(&this_partition_values);
+        add_remove_partition_cols.extend_from_slice(&this_partition_values);
 
         // Set up the partition to physical file mapping, this is a mostly unmodified version of what is done in load
         let cdc_file_groups =
@@ -254,9 +295,14 @@ impl CdfLoadBuilder {
             &partition_values,
             Self::get_add_action_type(),
         )?;
+        let remove_file_groups = create_partition_values(
+            schema.clone(),
+            remove,
+            &partition_values,
+            Self::get_remove_action_type(),
+        )?;
 
-        // Create the parquet scans for each associated type of file. I am not sure when we would use removes yet, but
-        // they would be here if / when they are necessary
+        // Create the parquet scans for each associated type of file.
         let cdc_scan = ParquetFormat::new()
             .create_physical_plan(
                 &self.ctx.state(),
@@ -279,12 +325,29 @@ impl CdfLoadBuilder {
                 &self.ctx.state(),
                 FileScanConfig {
                     object_store_url: self.log_store.object_store_url(),
-                    file_schema: add_file_schema.clone(),
+                    file_schema: add_remove_file_schema.clone(),
                     file_groups: add_file_groups.into_values().collect(),
-                    statistics: Statistics::new_unknown(&add_file_schema),
+                    statistics: Statistics::new_unknown(&add_remove_file_schema.clone()),
+                    projection: None,
+                    limit: None,
+                    table_partition_cols: add_remove_partition_cols.clone(),
+                    output_ordering: vec![],
+                },
+                None,
+            )
+            .await?;
+
+        let remove_scan = ParquetFormat::new()
+            .create_physical_plan(
+                &self.ctx.state(),
+                FileScanConfig {
+                    object_store_url: self.log_store.object_store_url(),
+                    file_schema: add_remove_file_schema.clone(),
+                    file_groups: remove_file_groups.into_values().collect(),
+                    statistics: Statistics::new_unknown(&add_remove_file_schema),
                     projection: None,
                     limit: None,
-                    table_partition_cols: add_partition_cols,
+                    table_partition_cols: add_remove_partition_cols,
                     output_ordering: vec![],
                 },
                 None,
@@ -293,41 +356,60 @@ impl CdfLoadBuilder {
 
         // The output batches are then unioned to create a single output. Coalesce partitions is only here for the time
         // being for development. I plan to parallelize the reads once the base idea is correct.
-        let union_scan: Arc<dyn ExecutionPlan> = Arc::new(UnionExec::new(vec![cdc_scan, add_scan]));
+        let mut union_scan: Arc<dyn ExecutionPlan> =
+            Arc::new(UnionExec::new(vec![cdc_scan, add_scan, remove_scan]));
+
+        if let Some(columns) = &self.columns {
+            let expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = union_scan
+                .schema()
+                .fields()
+                .into_iter()
+                .enumerate()
+                .map(|(idx, field)| -> (Arc<dyn PhysicalExpr>, String) {
+                    let field_name = field.name();
+                    let expr = Arc::new(expressions::Column::new(field_name, idx));
+                    (expr, field_name.to_owned())
+                })
+                .filter(|(_, field_name)| columns.contains(field_name))
+                .collect();
+            union_scan = Arc::new(ProjectionExec::try_new(expressions, union_scan)?);
+        }
         Ok(DeltaCdfScan::new(union_scan))
     }
 }
 
+#[allow(unused)]
+/// Helper function to collect batches associated with reading CDF data
+pub(crate) async fn collect_batches(
+    num_partitions: usize,
+    stream: DeltaCdfScan,
+    ctx: SessionContext,
+) -> Result<Vec<RecordBatch>, Box<dyn std::error::Error>> {
+    let mut batches = vec![];
+    for p in 0..num_partitions {
+        let data: Vec<RecordBatch> =
+            crate::operations::collect_sendable_stream(stream.execute(p, ctx.task_ctx())?).await?;
+        batches.extend_from_slice(&data);
+    }
+    Ok(batches)
+}
+
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
     use super::*;
-    use std::error::Error;
     use std::str::FromStr;
 
-    use arrow_array::RecordBatch;
+    use arrow_array::{Int32Array, RecordBatch, StringArray};
+    use arrow_schema::Schema;
     use chrono::NaiveDateTime;
     use datafusion::physical_plan::ExecutionPlan;
     use datafusion::prelude::SessionContext;
     use datafusion_common::assert_batches_sorted_eq;
+    use itertools::Itertools;
 
-    use crate::delta_datafusion::cdf::DeltaCdfScan;
-    use crate::operations::collect_sendable_stream;
+    use crate::test_utils::TestSchemas;
     use crate::writer::test_utils::TestResult;
-    use crate::DeltaOps;
-
-    async fn collect_batches(
-        num_partitions: usize,
-        stream: DeltaCdfScan,
-        ctx: SessionContext,
-    ) -> Result<Vec<RecordBatch>, Box<dyn Error>> {
-        let mut batches = vec![];
-        for p in 0..num_partitions {
-            let data: Vec<RecordBatch> =
-                collect_sendable_stream(stream.execute(p, ctx.task_ctx())?).await?;
-            batches.extend_from_slice(&data);
-        }
-        Ok(batches)
-    }
+    use crate::{DeltaOps, DeltaTable, TableProperty};
 
     #[tokio::test]
     async fn test_load_local() -> TestResult {
@@ -513,4 +595,107 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_use_remove_actions_for_deletions() -> TestResult {
+        let delta_schema = TestSchemas::simple();
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(delta_schema.fields().cloned())
+            .with_partition_columns(["id"])
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::try_from(delta_schema)?);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("1"), Some("2"), Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StringArray::from(vec![
+                    Some("yes"),
+                    Some("yes"),
+                    Some("no"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let second_batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(10)])),
+                Arc::new(StringArray::from(vec![Some("yes")])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let table = DeltaOps(table)
+            .write([second_batch])
+            .with_save_mode(crate::protocol::SaveMode::Overwrite)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let cdf_scan = DeltaOps(table.clone())
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            cdf_scan
+                .properties()
+                .output_partitioning()
+                .partition_count(),
+            cdf_scan,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(4)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+----------+--------------+-----------------+----+",
+        "| value | modified | _change_type | _commit_version | id |",
+        "+-------+----------+--------------+-----------------+----+",
+        "| 1     | yes      | delete       | 2               | 1  |",
+        "| 1     | yes      | insert       | 1               | 1  |",
+        "| 10    | yes      | insert       | 2               | 3  |",
+        "| 2     | yes      | delete       | 2               | 2  |",
+        "| 2     | yes      | insert       | 1               | 2  |",
+        "| 3     | no       | delete       | 2               | 3  |",
+        "| 3     | no       | insert       | 1               | 3  |",
+        "+-------+----------+--------------+-----------------+----+",
+        ], &batches }
+
+        let snapshot_bytes = table
+            .log_store
+            .read_commit_entry(2)
+            .await?
+            .expect("failed to get snapshot bytes");
+        let version_actions = get_actions(2, snapshot_bytes).await?;
+
+        let cdc_actions = version_actions
+            .iter()
+            .filter(|action| matches!(action, &&Action::Cdc(_)))
+            .collect_vec();
+        assert!(cdc_actions.is_empty());
+        Ok(())
+    }
 }
diff --git a/crates/core/src/operations/merge/barrier.rs b/crates/core/src/operations/merge/barrier.rs
index 7d18843af7..9084d721b7 100644
--- a/crates/core/src/operations/merge/barrier.rs
+++ b/crates/core/src/operations/merge/barrier.rs
@@ -18,12 +18,12 @@ use std::{
 
 use arrow_array::{builder::UInt64Builder, ArrayRef, RecordBatch};
 use arrow_schema::SchemaRef;
-use datafusion::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
-};
 use datafusion_common::{DataFusionError, Result as DataFusionResult};
 use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use datafusion_physical_expr::{Distribution, PhysicalExpr};
+use datafusion_physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
+};
 use futures::{Stream, StreamExt};
 
 use crate::{
@@ -67,6 +67,10 @@ impl MergeBarrierExec {
 }
 
 impl ExecutionPlan for MergeBarrierExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
     fn as_any(&self) -> &dyn std::any::Any {
         self
     }
@@ -83,14 +87,14 @@ impl ExecutionPlan for MergeBarrierExec {
         vec![Distribution::HashPartitioned(vec![self.expr.clone()]); 1]
     }
 
-    fn children(&self) -> Vec<std::sync::Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
     }
 
     fn with_new_children(
-        self: std::sync::Arc<Self>,
-        children: Vec<std::sync::Arc<dyn ExecutionPlan>>,
-    ) -> datafusion_common::Result<std::sync::Arc<dyn ExecutionPlan>> {
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
         if children.len() != 1 {
             return Err(DataFusionError::Plan(
                 "MergeBarrierExec wrong number of children".to_string(),
@@ -106,7 +110,7 @@ impl ExecutionPlan for MergeBarrierExec {
     fn execute(
         &self,
         partition: usize,
-        context: std::sync::Arc<datafusion::execution::TaskContext>,
+        context: Arc<datafusion::execution::TaskContext>,
     ) -> datafusion_common::Result<datafusion::physical_plan::SendableRecordBatchStream> {
         let input = self.input.execute(partition, context)?;
         Ok(Box::pin(MergeBarrierStream::new(
@@ -422,22 +426,33 @@ impl UserDefinedLogicalNodeCore for MergeBarrier {
         exprs: &[datafusion_expr::Expr],
         inputs: &[datafusion_expr::LogicalPlan],
     ) -> Self {
-        MergeBarrier {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        exprs: Vec<datafusion_expr::Expr>,
+        inputs: Vec<datafusion_expr::LogicalPlan>,
+    ) -> DataFusionResult<Self> {
+        Ok(MergeBarrier {
             input: inputs[0].clone(),
             file_column: self.file_column.clone(),
             expr: exprs[0].clone(),
-        }
+        })
     }
 }
 
-pub(crate) fn find_barrier_node(parent: &Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>> {
-    //! Used to locate the physical Barrier Node after the planner converts the logical node
-    if parent.as_any().downcast_ref::<MergeBarrierExec>().is_some() {
+pub(crate) fn find_node<T: 'static>(
+    parent: &Arc<dyn ExecutionPlan>,
+) -> Option<Arc<dyn ExecutionPlan>> {
+    //! Used to locate a Node::<T> after the planner converts the logical node
+    if parent.as_any().downcast_ref::<T>().is_some() {
         return Some(parent.to_owned());
     }
 
     for child in &parent.children() {
-        let res = find_barrier_node(child);
+        let res = find_node::<T>(child);
         if res.is_some() {
             return res;
         }
diff --git a/crates/core/src/operations/merge/filter.rs b/crates/core/src/operations/merge/filter.rs
new file mode 100644
index 0000000000..0745c55830
--- /dev/null
+++ b/crates/core/src/operations/merge/filter.rs
@@ -0,0 +1,943 @@
+//! Utility functions to determine early filters for file/partition pruning
+use datafusion::functions_aggregate::expr_fn::{max, min};
+use std::collections::HashMap;
+
+use datafusion::execution::context::SessionState;
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_common::{ScalarValue, TableReference};
+use datafusion_expr::expr::{InList, Placeholder};
+use datafusion_expr::{Aggregate, BinaryExpr, LogicalPlan, Operator};
+use datafusion_expr::{Between, Expr};
+
+use either::{Left, Right};
+
+use itertools::Itertools;
+
+use crate::delta_datafusion::execute_plan_to_batch;
+use crate::table::state::DeltaTableState;
+use crate::DeltaResult;
+
+#[derive(Debug)]
+enum ReferenceTableCheck {
+    HasReference(String),
+    NoReference,
+    Unknown,
+}
+impl ReferenceTableCheck {
+    fn has_reference(&self) -> bool {
+        matches!(self, ReferenceTableCheck::HasReference(_))
+    }
+}
+
+fn references_table(expr: &Expr, table: &TableReference) -> ReferenceTableCheck {
+    let res = match expr {
+        Expr::Alias(alias) => references_table(&alias.expr, table),
+        Expr::Column(col) => col
+            .relation
+            .as_ref()
+            .map(|rel| {
+                if rel == table {
+                    ReferenceTableCheck::HasReference(col.name.to_owned())
+                } else {
+                    ReferenceTableCheck::NoReference
+                }
+            })
+            .unwrap_or(ReferenceTableCheck::NoReference),
+        Expr::Negative(neg) => references_table(neg, table),
+        Expr::Cast(cast) => references_table(&cast.expr, table),
+        Expr::TryCast(try_cast) => references_table(&try_cast.expr, table),
+        Expr::ScalarFunction(func) => {
+            if func.args.len() == 1 {
+                references_table(&func.args[0], table)
+            } else {
+                ReferenceTableCheck::Unknown
+            }
+        }
+        Expr::IsNull(inner) => references_table(inner, table),
+        Expr::Literal(_) => ReferenceTableCheck::NoReference,
+        _ => ReferenceTableCheck::Unknown,
+    };
+    res
+}
+
+fn construct_placeholder(
+    binary: BinaryExpr,
+    source_left: bool,
+    is_partition_column: bool,
+    column_name: String,
+    placeholders: &mut Vec<PredicatePlaceholder>,
+) -> Option<Expr> {
+    if is_partition_column {
+        let placeholder_name = format!("{column_name}_{}", placeholders.len());
+        let placeholder = Expr::Placeholder(Placeholder {
+            id: placeholder_name.clone(),
+            data_type: None,
+        });
+
+        let (left, right, source_expr): (Box<Expr>, Box<Expr>, Expr) = if source_left {
+            (placeholder.into(), binary.clone().right, *binary.left)
+        } else {
+            (binary.clone().left, placeholder.into(), *binary.right)
+        };
+
+        let replaced = Expr::BinaryExpr(BinaryExpr {
+            left,
+            op: binary.op,
+            right,
+        });
+
+        placeholders.push(PredicatePlaceholder {
+            expr: source_expr,
+            alias: placeholder_name,
+            is_aggregate: false,
+        });
+
+        Some(replaced)
+    } else {
+        match binary.op {
+            Operator::Eq => {
+                let name_min = format!("{column_name}_{}_min", placeholders.len());
+                let placeholder_min = Expr::Placeholder(Placeholder {
+                    id: name_min.clone(),
+                    data_type: None,
+                });
+                let name_max = format!("{column_name}_{}_max", placeholders.len());
+                let placeholder_max = Expr::Placeholder(Placeholder {
+                    id: name_max.clone(),
+                    data_type: None,
+                });
+                let (source_expr, target_expr) = if source_left {
+                    (*binary.left, *binary.right)
+                } else {
+                    (*binary.right, *binary.left)
+                };
+                let replaced = Expr::Between(Between {
+                    expr: target_expr.into(),
+                    negated: false,
+                    low: placeholder_min.into(),
+                    high: placeholder_max.into(),
+                });
+
+                placeholders.push(PredicatePlaceholder {
+                    expr: min(source_expr.clone()),
+                    alias: name_min,
+                    is_aggregate: true,
+                });
+                placeholders.push(PredicatePlaceholder {
+                    expr: max(source_expr),
+                    alias: name_max,
+                    is_aggregate: true,
+                });
+                Some(replaced)
+            }
+            _ => None,
+        }
+    }
+}
+
+fn replace_placeholders(expr: Expr, placeholders: &HashMap<String, ScalarValue>) -> Expr {
+    expr.transform(&|expr| match expr {
+        Expr::Placeholder(Placeholder { id, .. }) => {
+            let value = placeholders[&id].clone();
+            // Replace the placeholder with the value
+            Ok(Transformed::yes(Expr::Literal(value)))
+        }
+        _ => Ok(Transformed::no(expr)),
+    })
+    .unwrap()
+    .data
+}
+
+pub(crate) struct PredicatePlaceholder {
+    pub expr: Expr,
+    pub alias: String,
+    pub is_aggregate: bool,
+}
+
+/// Takes the predicate provided and does three things:
+///
+/// 1. for any relations between a source column and a partition target column,
+///    replace source with a placeholder matching the name of the partition
+///    columns
+///
+/// 2. for any is equal relations between a source column and a non-partition target column,
+///    replace source with is between expression with min(source_column) and max(source_column) placeholders
+///
+/// 3. for any other relation with a source column, remove them.
+///
+/// For example, for the predicate:
+///
+/// `source.date = target.date and source.id = target.id and frob > 42`
+///
+/// where `date` is a partition column, would result in the expr:
+///
+/// `$date_0 = target.date and target.id between $id_1_min and $id_1_max and frob > 42`
+///
+/// This leaves us with a predicate that we can push into delta scan after expanding it out to
+/// a conjunction between the distinct partitions in the source input.
+///
+pub(crate) fn generalize_filter(
+    predicate: Expr,
+    partition_columns: &Vec<String>,
+    source_name: &TableReference,
+    target_name: &TableReference,
+    placeholders: &mut Vec<PredicatePlaceholder>,
+) -> Option<Expr> {
+    match predicate {
+        Expr::BinaryExpr(binary) => {
+            if references_table(&binary.right, source_name).has_reference() {
+                if let ReferenceTableCheck::HasReference(left_target) =
+                    references_table(&binary.left, target_name)
+                {
+                    return construct_placeholder(
+                        binary,
+                        false,
+                        partition_columns.contains(&left_target),
+                        left_target,
+                        placeholders,
+                    );
+                }
+                return None;
+            }
+            if references_table(&binary.left, source_name).has_reference() {
+                if let ReferenceTableCheck::HasReference(right_target) =
+                    references_table(&binary.right, target_name)
+                {
+                    return construct_placeholder(
+                        binary,
+                        true,
+                        partition_columns.contains(&right_target),
+                        right_target,
+                        placeholders,
+                    );
+                }
+                return None;
+            }
+
+            let left = generalize_filter(
+                *binary.left,
+                partition_columns,
+                source_name,
+                target_name,
+                placeholders,
+            );
+            let right = generalize_filter(
+                *binary.right,
+                partition_columns,
+                source_name,
+                target_name,
+                placeholders,
+            );
+
+            match (left, right) {
+                (None, None) => None,
+                (None, Some(one_side)) | (Some(one_side), None) => {
+                    // in the case of an AND clause, it's safe to generalize the filter down to just one side of the AND.
+                    // this is because this filter will be more permissive than the actual predicate, so we know that
+                    // we will catch all data that could be matched by the predicate. For OR this is not the case - we
+                    // could potentially eliminate one side of the predicate and the filter would only match half the
+                    // cases that would have satisfied the match predicate.
+                    match binary.op {
+                        Operator::And => Some(one_side),
+                        Operator::Or => None,
+                        _ => None,
+                    }
+                }
+                (Some(l), Some(r)) => Expr::BinaryExpr(BinaryExpr {
+                    left: l.into(),
+                    op: binary.op,
+                    right: r.into(),
+                })
+                .into(),
+            }
+        }
+        Expr::InList(in_list) => {
+            let compare_expr = match generalize_filter(
+                *in_list.expr,
+                partition_columns,
+                source_name,
+                target_name,
+                placeholders,
+            ) {
+                Some(expr) => expr,
+                None => return None, // Return early
+            };
+
+            let mut list_expr = Vec::new();
+            for item in in_list.list.into_iter() {
+                match item {
+                    // If it's a literal just immediately push it in list_expr so we can avoid the unnecessary generalizing
+                    Expr::Literal(_) => list_expr.push(item),
+                    _ => {
+                        if let Some(item) = generalize_filter(
+                            item.clone(),
+                            partition_columns,
+                            source_name,
+                            target_name,
+                            placeholders,
+                        ) {
+                            list_expr.push(item)
+                        }
+                    }
+                }
+            }
+            if !list_expr.is_empty() {
+                Expr::InList(InList {
+                    expr: compare_expr.into(),
+                    list: list_expr,
+                    negated: in_list.negated,
+                })
+                .into()
+            } else {
+                None
+            }
+        }
+        other => match references_table(&other, source_name) {
+            ReferenceTableCheck::HasReference(col) => {
+                let placeholder_name = format!("{col}_{}", placeholders.len());
+
+                let placeholder = Expr::Placeholder(Placeholder {
+                    id: placeholder_name.clone(),
+                    data_type: None,
+                });
+
+                placeholders.push(PredicatePlaceholder {
+                    expr: other,
+                    alias: placeholder_name,
+                    is_aggregate: true,
+                });
+                Some(placeholder)
+            }
+            ReferenceTableCheck::NoReference => Some(other),
+            ReferenceTableCheck::Unknown => None,
+        },
+    }
+}
+
+pub(crate) async fn try_construct_early_filter(
+    join_predicate: Expr,
+    table_snapshot: &DeltaTableState,
+    session_state: &SessionState,
+    source: &LogicalPlan,
+    source_name: &TableReference,
+    target_name: &TableReference,
+) -> DeltaResult<Option<Expr>> {
+    let table_metadata = table_snapshot.metadata();
+    let partition_columns = &table_metadata.partition_columns;
+
+    let mut placeholders = Vec::default();
+
+    match generalize_filter(
+        join_predicate,
+        partition_columns,
+        source_name,
+        target_name,
+        &mut placeholders,
+    ) {
+        None => Ok(None),
+        Some(filter) => {
+            if placeholders.is_empty() {
+                // if we haven't recognised any source predicates in the join predicate, return our filter with static only predicates
+                Ok(Some(filter))
+            } else {
+                // if we have some filters, which depend on the source df, then collect the placeholders values from the source data
+                // We aggregate the distinct values for partitions with the group_columns and stats(min, max) for dynamic filter as agg_columns
+                // Can be translated into `SELECT partition1 as part1_0, min(id) as id_1_min, max(id) as id_1_max FROM source GROUP BY partition1`
+                let (agg_columns, group_columns) = placeholders.into_iter().partition_map(|p| {
+                    if p.is_aggregate {
+                        Left(p.expr.alias(p.alias))
+                    } else {
+                        Right(p.expr.alias(p.alias))
+                    }
+                });
+                let distinct_partitions = LogicalPlan::Aggregate(Aggregate::try_new(
+                    source.clone().into(),
+                    group_columns,
+                    agg_columns,
+                )?);
+                let execution_plan = session_state
+                    .create_physical_plan(&distinct_partitions)
+                    .await?;
+                let items = execute_plan_to_batch(session_state, execution_plan).await?;
+                let placeholder_names = items
+                    .schema()
+                    .fields()
+                    .iter()
+                    .map(|f| f.name().to_owned())
+                    .collect_vec();
+                let expr = (0..items.num_rows())
+                    .map(|i| {
+                        let replacements = placeholder_names
+                            .iter()
+                            .map(|placeholder| {
+                                let col = items.column_by_name(placeholder).unwrap();
+                                let value = ScalarValue::try_from_array(col, i)?;
+                                DeltaResult::Ok((placeholder.to_owned(), value))
+                            })
+                            .try_collect()?;
+                        Ok(replace_placeholders(filter.clone(), &replacements))
+                    })
+                    .collect::<DeltaResult<Vec<_>>>()?
+                    .into_iter()
+                    .reduce(Expr::or);
+                Ok(expr)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::operations::merge::tests::setup_table;
+    use crate::operations::merge::try_construct_early_filter;
+    use crate::writer::test_utils::get_arrow_schema;
+
+    use arrow::record_batch::RecordBatch;
+
+    use datafusion::datasource::provider_as_source;
+
+    use datafusion::prelude::*;
+    use datafusion_common::Column;
+    use datafusion_common::ScalarValue;
+    use datafusion_common::TableReference;
+    use datafusion_expr::col;
+
+    use datafusion_expr::Expr;
+    use datafusion_expr::LogicalPlanBuilder;
+    use datafusion_expr::Operator;
+
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_partitions_expands() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["id"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C", "X"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2021-02-02",
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }));
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let split_pred = {
+            fn split(expr: Expr, parts: &mut Vec<(String, String)>) {
+                match expr {
+                    Expr::BinaryExpr(ex) if ex.op == Operator::Or => {
+                        split(*ex.left, parts);
+                        split(*ex.right, parts);
+                    }
+                    Expr::BinaryExpr(ex) if ex.op == Operator::Eq => {
+                        let col = match *ex.right {
+                            Expr::Column(col) => col.name,
+                            ex => panic!("expected column in pred, got {ex}!"),
+                        };
+
+                        let value = match *ex.left {
+                            Expr::Literal(ScalarValue::Utf8(Some(value))) => value,
+                            ex => panic!("expected value in predicate, got {ex}!"),
+                        };
+
+                        parts.push((col, value))
+                    }
+
+                    expr => panic!("expected either = or OR, got {expr}"),
+                }
+            }
+
+            let mut parts = vec![];
+            split(pred.unwrap(), &mut parts);
+            parts.sort();
+            parts
+        };
+
+        let expected_pred_parts = [
+            ("id".to_owned(), "B".to_owned()),
+            ("id".to_owned(), "C".to_owned()),
+            ("id".to_owned(), "X".to_owned()),
+        ];
+
+        assert_eq!(split_pred, expected_pred_parts);
+    }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_range() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }));
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("B".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        );
+        assert_eq!(pred.unwrap(), filter);
+    }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_partition_and_range() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }))
+        .and(
+            col(Column {
+                relation: Some(source_name.clone()),
+                name: "modified".to_owned(),
+            })
+            .eq(col(Column {
+                relation: Some(target_name.clone()),
+                name: "modified".to_owned(),
+            })),
+        );
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("B".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        )
+        .and(
+            Expr::Literal(ScalarValue::Utf8(Some("2023-07-04".to_string()))).eq(col(Column {
+                relation: Some(target_name.clone()),
+                name: "modified".to_owned(),
+            })),
+        );
+        assert_eq!(pred.unwrap(), filter);
+    }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_is_in_literals() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-05",
+                    "2023-07-05",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source_df = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source_plan = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source_df.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }))
+        .and(col("modified".to_owned()).in_list(
+            vec![lit("2023-07-05"), lit("2023-07-06"), lit("2023-07-07")],
+            false,
+        ));
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source_plan,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("A".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        )
+        .and(
+            col(Column {
+                relation: None,
+                name: "modified".to_owned(),
+            })
+            .in_list(
+                vec![
+                    Expr::Literal(ScalarValue::Utf8(Some("2023-07-05".to_string()))),
+                    Expr::Literal(ScalarValue::Utf8(Some("2023-07-06".to_string()))),
+                    Expr::Literal(ScalarValue::Utf8(Some("2023-07-07".to_string()))),
+                ],
+                false,
+            ),
+        );
+        assert_eq!(pred.unwrap(), filter);
+    }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_is_in_columns() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-05",
+                    "2023-07-05",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source_df = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source_plan = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source_df.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }))
+        .and(col("modified".to_owned()).in_list(
+            vec![
+                col(Column {
+                    relation: Some(target_name.clone()),
+                    name: "id".to_owned(),
+                }),
+                col(Column {
+                    relation: Some(target_name.clone()),
+                    name: "modified".to_owned(),
+                }),
+            ],
+            false,
+        ));
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source_plan,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("A".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        )
+        .and(
+            col(Column {
+                relation: None,
+                name: "modified".to_owned(),
+            })
+            .in_list(
+                vec![
+                    col(Column {
+                        relation: Some(target_name.clone()),
+                        name: "id".to_owned(),
+                    }),
+                    col(Column {
+                        relation: Some(target_name.clone()),
+                        name: "modified".to_owned(),
+                    }),
+                ],
+                false,
+            ),
+        );
+        assert_eq!(pred.unwrap(), filter);
+    }
+
+    #[tokio::test]
+    async fn test_try_construct_early_filter_with_is_in_ident_and_cols() {
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 0);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2023-07-04",
+                    "2023-07-05",
+                    "2023-07-05",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source_df = ctx.read_batch(batch).unwrap();
+
+        let source_name = TableReference::parse_str("source");
+        let target_name = TableReference::parse_str("target");
+
+        let source_plan = LogicalPlanBuilder::scan(
+            source_name.clone(),
+            provider_as_source(source_df.into_view()),
+            None,
+        )
+        .unwrap()
+        .build()
+        .unwrap();
+
+        let join_predicate = col(Column {
+            relation: Some(source_name.clone()),
+            name: "id".to_owned(),
+        })
+        .eq(col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        }))
+        .and(ident("source.id").in_list(
+            vec![
+                col(Column {
+                    relation: Some(target_name.clone()),
+                    name: "id".to_owned(),
+                }),
+                col(Column {
+                    relation: Some(target_name.clone()),
+                    name: "modified".to_owned(),
+                }),
+            ],
+            false,
+        ));
+
+        let pred = try_construct_early_filter(
+            join_predicate,
+            table.snapshot().unwrap(),
+            &ctx.state(),
+            &source_plan,
+            &source_name,
+            &target_name,
+        )
+        .await
+        .unwrap();
+
+        assert!(pred.is_some());
+
+        let filter = col(Column {
+            relation: Some(target_name.clone()),
+            name: "id".to_owned(),
+        })
+        .between(
+            Expr::Literal(ScalarValue::Utf8(Some("A".to_string()))),
+            Expr::Literal(ScalarValue::Utf8(Some("C".to_string()))),
+        )
+        .and(ident("source.id").in_list(
+            vec![
+                col(Column {
+                    relation: Some(target_name.clone()),
+                    name: "id".to_owned(),
+                }),
+                col(Column {
+                    relation: Some(target_name.clone()),
+                    name: "modified".to_owned(),
+                }),
+            ],
+            false,
+        ));
+        assert_eq!(pred.unwrap(), filter);
+    }
+}
diff --git a/crates/core/src/operations/merge/mod.rs b/crates/core/src/operations/merge/mod.rs
index ddbe113d16..7f87d30d35 100644
--- a/crates/core/src/operations/merge/mod.rs
+++ b/crates/core/src/operations/merge/mod.rs
@@ -29,35 +29,36 @@
 //! ````
 
 use std::collections::HashMap;
+use std::fmt::Debug;
 use std::sync::Arc;
 use std::time::Instant;
 
 use async_trait::async_trait;
 use datafusion::datasource::provider_as_source;
 use datafusion::error::Result as DataFusionResult;
-use datafusion::execution::context::{QueryPlanner, SessionConfig};
+use datafusion::execution::context::SessionConfig;
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::logical_expr::build_join_schema;
-use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+use datafusion::physical_plan::metrics::MetricBuilder;
+use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
 use datafusion::{
     execution::context::SessionState,
-    physical_plan::{
-        metrics::{MetricBuilder, MetricsSet},
-        ExecutionPlan,
-    },
+    physical_plan::ExecutionPlan,
     prelude::{DataFrame, SessionContext},
 };
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{Column, DFSchema, ScalarValue, TableReference};
-use datafusion_expr::expr::Placeholder;
 use datafusion_expr::{col, conditional_expressions::CaseBuilder, lit, when, Expr, JoinType};
 use datafusion_expr::{
-    BinaryExpr, Distinct, Extension, LogicalPlan, LogicalPlanBuilder, Operator, Projection,
-    UserDefinedLogicalNode, UNNAMED_TABLE,
+    Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode, UNNAMED_TABLE,
 };
+
+use filter::try_construct_early_filter;
 use futures::future::BoxFuture;
 use itertools::Itertools;
 use parquet::file::properties::WriterProperties;
 use serde::Serialize;
+use tracing::log::*;
 
 use self::barrier::{MergeBarrier, MergeBarrierExec};
 
@@ -65,21 +66,24 @@ use super::datafusion_utils::{into_expr, maybe_into_expr, Expression};
 use super::transaction::{CommitProperties, PROTOCOL};
 use crate::delta_datafusion::expr::{fmt_expr_to_sql, parse_predicate_expression};
 use crate::delta_datafusion::logical::MetricObserver;
-use crate::delta_datafusion::physical::{find_metric_node, MetricObserverExec};
+use crate::delta_datafusion::physical::{find_metric_node, get_metric, MetricObserverExec};
+use crate::delta_datafusion::planner::DeltaPlanner;
 use crate::delta_datafusion::{
-    execute_plan_to_batch, register_store, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionConfig,
-    DeltaTableProvider,
+    register_store, DataFusionMixins, DeltaColumn, DeltaScan, DeltaScanConfigBuilder,
+    DeltaSessionConfig, DeltaTableProvider,
 };
 use crate::kernel::Action;
 use crate::logstore::LogStoreRef;
-use crate::operations::merge::barrier::find_barrier_node;
+use crate::operations::cdc::*;
+use crate::operations::merge::barrier::find_node;
 use crate::operations::transaction::CommitBuilder;
-use crate::operations::write::{write_execution_plan, WriterStatsConfig};
+use crate::operations::write::{write_execution_plan, write_execution_plan_cdc, WriterStatsConfig};
 use crate::protocol::{DeltaOperation, MergePredicate};
 use crate::table::state::DeltaTableState;
 use crate::{DeltaResult, DeltaTable, DeltaTableError};
 
 mod barrier;
+mod filter;
 
 const SOURCE_COLUMN: &str = "__delta_rs_source";
 const TARGET_COLUMN: &str = "__delta_rs_target";
@@ -502,7 +506,7 @@ impl MergeOperation {
                             relation: Some(TableReference::Bare { table }),
                             name,
                         } => {
-                            if table.eq(alias) {
+                            if table.as_ref() == alias {
                                 Column {
                                     relation: Some(r),
                                     name,
@@ -562,6 +566,10 @@ pub struct MergeMetrics {
     pub num_target_rows_copied: usize,
     /// Total number of rows written out
     pub num_output_rows: usize,
+    /// Amount of files considered during table scan
+    pub num_target_files_scanned: usize,
+    /// Amount of files not considered (pruned) during table scan
+    pub num_target_files_skipped_during_scan: usize,
     /// Number of files added to the sink(target)
     pub num_target_files_added: usize,
     /// Number of files removed from the sink(target)
@@ -573,7 +581,7 @@ pub struct MergeMetrics {
     /// Time taken to rewrite the matched files
     pub rewrite_time_ms: u64,
 }
-
+#[derive(Clone)]
 struct MergeMetricExtensionPlanner {}
 
 #[async_trait]
@@ -666,288 +674,47 @@ impl ExtensionPlanner for MergeMetricExtensionPlanner {
     }
 }
 
-/// Takes the predicate provided and does two things:
-///
-/// 1. for any relations between a source column and a target column, if the target column is a
-/// partition column, then replace source with a placeholder matching the name of the partition
-/// columns
-///
-/// 2. for any other relation with a source column, remove them.
-///
-/// For example, for the predicate:
-///
-/// `source.date = target.date and source.id = target.id and frob > 42`
-///
-/// where `date` is a partition column, would result in the expr:
-///
-/// `$date = target.date and frob > 42`
-///
-/// This leaves us with a predicate that we can push into delta scan after expanding it out to
-/// a conjunction between the distinct partitions in the source input.
-///
-/// TODO: A further improvement here might be for non-partition columns to be replaced with min/max
-/// checks, so the above example could become:
-///
-/// `$date = target.date and target.id between 12345 and 99999 and frob > 42`
-fn generalize_filter(
-    predicate: Expr,
-    partition_columns: &Vec<String>,
-    source_name: &TableReference,
-    target_name: &TableReference,
-    placeholders: &mut HashMap<String, Expr>,
-) -> Option<Expr> {
-    #[derive(Debug)]
-    enum ReferenceTableCheck {
-        HasReference(String),
-        NoReference,
-        Unknown,
-    }
-    impl ReferenceTableCheck {
-        fn has_reference(&self) -> bool {
-            matches!(self, ReferenceTableCheck::HasReference(_))
-        }
-    }
-    fn references_table(expr: &Expr, table: &TableReference) -> ReferenceTableCheck {
-        let res = match expr {
-            Expr::Alias(alias) => references_table(&alias.expr, table),
-            Expr::Column(col) => col
-                .relation
-                .as_ref()
-                .map(|rel| {
-                    if rel == table {
-                        ReferenceTableCheck::HasReference(col.name.to_owned())
-                    } else {
-                        ReferenceTableCheck::NoReference
-                    }
-                })
-                .unwrap_or(ReferenceTableCheck::NoReference),
-            Expr::Negative(neg) => references_table(neg, table),
-            Expr::Cast(cast) => references_table(&cast.expr, table),
-            Expr::TryCast(try_cast) => references_table(&try_cast.expr, table),
-            Expr::ScalarFunction(func) => {
-                if func.args.len() == 1 {
-                    references_table(&func.args[0], table)
-                } else {
-                    ReferenceTableCheck::Unknown
-                }
-            }
-            Expr::IsNull(inner) => references_table(inner, table),
-            Expr::Literal(_) => ReferenceTableCheck::NoReference,
-            _ => ReferenceTableCheck::Unknown,
-        };
-        res
-    }
-
-    match predicate {
-        Expr::BinaryExpr(binary) => {
-            if references_table(&binary.right, source_name).has_reference() {
-                if let ReferenceTableCheck::HasReference(left_target) =
-                    references_table(&binary.left, target_name)
-                {
-                    if partition_columns.contains(&left_target) {
-                        let placeholder_name = format!("{left_target}_{}", placeholders.len());
-
-                        let placeholder = Expr::Placeholder(datafusion_expr::expr::Placeholder {
-                            id: placeholder_name.clone(),
-                            data_type: None,
-                        });
-                        let replaced = Expr::BinaryExpr(BinaryExpr {
-                            left: binary.left,
-                            op: binary.op,
-                            right: placeholder.into(),
-                        });
-
-                        placeholders.insert(placeholder_name, *binary.right);
-
-                        return Some(replaced);
-                    }
-                }
-                return None;
-            }
-            if references_table(&binary.left, source_name).has_reference() {
-                if let ReferenceTableCheck::HasReference(right_target) =
-                    references_table(&binary.right, target_name)
-                {
-                    if partition_columns.contains(&right_target) {
-                        let placeholder_name = format!("{right_target}_{}", placeholders.len());
-
-                        let placeholder = Expr::Placeholder(datafusion_expr::expr::Placeholder {
-                            id: placeholder_name.clone(),
-                            data_type: None,
-                        });
-                        let replaced = Expr::BinaryExpr(BinaryExpr {
-                            right: binary.right,
-                            op: binary.op,
-                            left: placeholder.into(),
-                        });
-
-                        placeholders.insert(placeholder_name, *binary.left);
-
-                        return Some(replaced);
-                    }
-                }
-                return None;
-            }
-
-            let left = generalize_filter(
-                *binary.left,
-                partition_columns,
-                source_name,
-                target_name,
-                placeholders,
-            );
-            let right = generalize_filter(
-                *binary.right,
-                partition_columns,
-                source_name,
-                target_name,
-                placeholders,
-            );
-
-            match (left, right) {
-                (None, None) => None,
-                (None, Some(one_side)) | (Some(one_side), None) => {
-                    // in the case of an AND clause, it's safe to generalize the filter down to just one side of the AND.
-                    // this is because this filter will be more permissive than the actual predicate, so we know that
-                    // we will catch all data that could be matched by the predicate. For OR this is not the case - we
-                    // could potentially eliminate one side of the predicate and the filter would only match half the
-                    // cases that would have satisfied the match predicate.
-                    match binary.op {
-                        Operator::And => Some(one_side),
-                        Operator::Or => None,
-                        _ => None,
-                    }
-                }
-                (Some(l), Some(r)) => Expr::BinaryExpr(BinaryExpr {
-                    left: l.into(),
-                    op: binary.op,
-                    right: r.into(),
-                })
-                .into(),
-            }
-        }
-        other => match references_table(&other, source_name) {
-            ReferenceTableCheck::HasReference(col) => {
-                let placeholder_name = format!("{col}_{}", placeholders.len());
-
-                let placeholder = Expr::Placeholder(datafusion_expr::expr::Placeholder {
-                    id: placeholder_name.clone(),
-                    data_type: None,
-                });
-
-                placeholders.insert(placeholder_name, other);
-
-                Some(placeholder)
-            }
-            ReferenceTableCheck::NoReference => Some(other),
-            ReferenceTableCheck::Unknown => None,
-        },
-    }
-}
-
-fn replace_placeholders(expr: Expr, placeholders: &HashMap<String, ScalarValue>) -> Expr {
-    expr.transform(&|expr| match expr {
-        Expr::Placeholder(Placeholder { id, .. }) => {
-            let value = placeholders[&id].clone();
-            // Replace the placeholder with the value
-            Ok(Transformed::yes(Expr::Literal(value)))
-        }
-        _ => Ok(Transformed::no(expr)),
-    })
-    .unwrap()
-    .data
-}
-
-async fn try_construct_early_filter(
-    join_predicate: Expr,
-    table_snapshot: &DeltaTableState,
-    session_state: &SessionState,
-    source: &LogicalPlan,
-    source_name: &TableReference<'_>,
-    target_name: &TableReference<'_>,
-) -> DeltaResult<Option<Expr>> {
-    let table_metadata = table_snapshot.metadata();
-    let partition_columns = &table_metadata.partition_columns;
-
-    let mut placeholders = HashMap::default();
-
-    match generalize_filter(
-        join_predicate,
-        partition_columns,
-        source_name,
-        target_name,
-        &mut placeholders,
-    ) {
-        None => Ok(None),
-        Some(filter) => {
-            if placeholders.is_empty() {
-                // if we haven't recognised any partition-based predicates in the join predicate, return our reduced filter
-                Ok(Some(filter))
-            } else {
-                // if we have some recognised partitions, then discover the distinct set of partitions in the source data and
-                // make a new filter, which expands out the placeholders for each distinct partition (and then OR these together)
-                let distinct_partitions = LogicalPlan::Distinct(Distinct::All(
-                    LogicalPlan::Projection(Projection::try_new(
-                        placeholders
-                            .into_iter()
-                            .map(|(alias, expr)| expr.alias(alias))
-                            .collect_vec(),
-                        source.clone().into(),
-                    )?)
-                    .into(),
-                ));
-                let execution_plan = session_state
-                    .create_physical_plan(&distinct_partitions)
-                    .await?;
-                let items = execute_plan_to_batch(session_state, execution_plan).await?;
-                let placeholder_names = items
-                    .schema()
-                    .fields()
-                    .iter()
-                    .map(|f| f.name().to_owned())
-                    .collect_vec();
-                let expr = (0..items.num_rows())
-                    .map(|i| {
-                        let replacements = placeholder_names
-                            .iter()
-                            .map(|placeholder| {
-                                let col = items.column_by_name(placeholder).unwrap();
-                                let value = ScalarValue::try_from_array(col, i)?;
-                                DeltaResult::Ok((placeholder.to_owned(), value))
-                            })
-                            .try_collect()?;
-                        Ok(replace_placeholders(filter.clone(), &replacements))
-                    })
-                    .collect::<DeltaResult<Vec<_>>>()?
-                    .into_iter()
-                    .reduce(Expr::or);
-                Ok(expr)
-            }
-        }
-    }
-}
-
 #[allow(clippy::too_many_arguments)]
 async fn execute(
     predicate: Expression,
     source: DataFrame,
     log_store: LogStoreRef,
     snapshot: DeltaTableState,
-    state: SessionState,
+    _state: SessionState,
     writer_properties: Option<WriterProperties>,
     mut commit_properties: CommitProperties,
-    safe_cast: bool,
+    _safe_cast: bool,
     source_alias: Option<String>,
     target_alias: Option<String>,
     match_operations: Vec<MergeOperationConfig>,
     not_match_target_operations: Vec<MergeOperationConfig>,
     not_match_source_operations: Vec<MergeOperationConfig>,
 ) -> DeltaResult<(DeltaTableState, MergeMetrics)> {
+    if !snapshot.load_config().require_files {
+        return Err(DeltaTableError::NotInitializedWithFiles("MERGE".into()));
+    }
+
     let mut metrics = MergeMetrics::default();
     let exec_start = Instant::now();
+    // Determining whether we should write change data once so that computation of change data can
+    // be disabled in the common case(s)
+    let should_cdc = should_write_cdc(&snapshot)?;
+    // Change data may be collected and then written out at the completion of the merge
+    let mut change_data = vec![];
+
+    if should_cdc {
+        debug!("Executing a merge and I should write CDC!");
+    }
 
     let current_metadata = snapshot.metadata();
-    let state = state.with_query_planner(Arc::new(MergePlanner {}));
+    let merge_planner = DeltaPlanner::<MergeMetricExtensionPlanner> {
+        extension_planner: MergeMetricExtensionPlanner {},
+    };
+
+    let state = SessionStateBuilder::new()
+        .with_default_features()
+        .with_query_planner(Arc::new(merge_planner))
+        .build();
 
     // TODO: Given the join predicate, remove any expression that involve the
     // source table and keep expressions that only involve the target table.
@@ -987,6 +754,7 @@ async fn execute(
     let scan_config = DeltaScanConfigBuilder::default()
         .with_file_column(true)
         .with_parquet_pushdown(false)
+        .with_schema(snapshot.input_schema()?)
         .build(&snapshot)?;
 
     let target_provider = Arc::new(DeltaTableProvider::try_new(
@@ -1002,6 +770,7 @@ async fn execute(
     let source_schema = source.schema();
     let target_schema = target.schema();
     let join_schema_df = build_join_schema(source_schema, target_schema, &JoinType::Full)?;
+
     let predicate = match predicate {
         Expression::DataFusion(expr) => expr,
         Expression::String(s) => parse_predicate_expression(&join_schema_df, s, &state)?,
@@ -1045,7 +814,7 @@ async fn execute(
         None => LogicalPlanBuilder::scan(target_name.clone(), target_provider, None)?.build()?,
     };
 
-    let source = DataFrame::new(state.clone(), source);
+    let source = DataFrame::new(state.clone(), source.clone());
     let source = source.with_column(SOURCE_COLUMN, lit(true))?;
 
     // Not match operations imply a full scan of the target table is required
@@ -1324,9 +1093,9 @@ async fn execute(
         let plan = projection.into_unoptimized_plan();
         let mut fields: Vec<Expr> = plan
             .schema()
-            .fields()
+            .columns()
             .iter()
-            .map(|f| col(f.qualified_column()))
+            .map(|f| col(f.clone()))
             .collect();
 
         fields.extend(new_columns.into_iter().map(|(name, ex)| ex.alias(name)));
@@ -1338,7 +1107,7 @@ async fn execute(
 
     let merge_barrier = LogicalPlan::Extension(Extension {
         node: Arc::new(MergeBarrier {
-            input: new_columns,
+            input: new_columns.clone(),
             expr: distrbute_expr,
             file_column,
         }),
@@ -1353,19 +1122,70 @@ async fn execute(
     });
 
     let operation_count = DataFrame::new(state.clone(), operation_count);
+
+    if should_cdc {
+        // Create a dataframe containing the CDC deletes which are present at this point
+        change_data.push(
+            operation_count
+                .clone()
+                .filter(col(DELETE_COLUMN))?
+                .select(write_projection.clone())?
+                .with_column(crate::operations::cdc::CDC_COLUMN_NAME, lit("delete"))?,
+        );
+    }
+
     let filtered = operation_count.filter(col(DELETE_COLUMN).is_false())?;
 
-    let project = filtered.select(write_projection)?;
-    let merge_final = &project.into_unoptimized_plan();
+    if should_cdc {
+        debug!("The merge should triggere a CDC tracking, computing pre/insert/postimage datasets");
+        let cdc_projection = filtered.clone().filter(col(OPERATION_COLUMN).not_eq(
+            // This is a copy operation, but I'm not sure how to turn that enum into an int
+            lit(5),
+        ))?;
+
+        change_data.push(
+            cdc_projection
+                .clone()
+                .filter(
+                    col(SOURCE_COLUMN)
+                        .is_true()
+                        .and(col(TARGET_COLUMN).is_null()),
+                )?
+                .select(write_projection.clone())?
+                .with_column(CDC_COLUMN_NAME, lit("insert"))?,
+        );
+        let before = cdc_projection
+            .clone()
+            .filter(col(crate::delta_datafusion::PATH_COLUMN).is_not_null())?
+            .select(
+                target_schema
+                    .columns()
+                    .iter()
+                    .filter(|c| c.name != crate::delta_datafusion::PATH_COLUMN)
+                    .map(|c| Expr::Column(c.clone()))
+                    .collect_vec(),
+            )?;
+
+        let after = cdc_projection
+            .clone()
+            .filter(col(TARGET_COLUMN).is_true())?
+            .select(write_projection.clone())?;
+
+        let tracker = CDCTracker::new(before, after);
+        change_data.push(tracker.collect()?);
+    }
+
+    let project = filtered.clone().select(write_projection)?;
 
+    let merge_final = &project.into_unoptimized_plan();
     let write = state.create_physical_plan(merge_final).await?;
 
     let err = || DeltaTableError::Generic("Unable to locate expected metric node".into());
     let source_count = find_metric_node(SOURCE_COUNT_ID, &write).ok_or_else(err)?;
     let op_count = find_metric_node(OUTPUT_COUNT_ID, &write).ok_or_else(err)?;
-    let barrier = find_barrier_node(&write).ok_or_else(err)?;
+    let barrier = find_node::<MergeBarrierExec>(&write).ok_or_else(err)?;
+    let scan_count = find_node::<DeltaScan>(&write).ok_or_else(err)?;
 
-    // write projected records
     let table_partition_cols = current_metadata.partition_columns.clone();
 
     let writer_stats_config = WriterStatsConfig::new(
@@ -1377,7 +1197,7 @@ async fn execute(
     );
 
     let rewrite_start = Instant::now();
-    let add_actions = write_execution_plan(
+    let mut add_actions = write_execution_plan(
         Some(&snapshot),
         state.clone(),
         write,
@@ -1385,13 +1205,38 @@ async fn execute(
         log_store.object_store(),
         Some(snapshot.table_config().target_file_size() as usize),
         None,
-        writer_properties,
-        safe_cast,
+        writer_properties.clone(),
+        writer_stats_config.clone(),
         None,
-        writer_stats_config,
     )
     .await?;
 
+    if should_cdc && !change_data.is_empty() {
+        let mut df = change_data
+            .pop()
+            .expect("change_data should never be empty");
+        // Accumulate all the changes together into a single data frame to produce the necessary
+        // change data files
+        for change in change_data {
+            df = df.union(change)?;
+        }
+        add_actions.extend(
+            write_execution_plan_cdc(
+                Some(&snapshot),
+                state.clone(),
+                df.create_physical_plan().await?,
+                table_partition_cols.clone(),
+                log_store.object_store(),
+                Some(snapshot.table_config().target_file_size() as usize),
+                None,
+                writer_properties,
+                writer_stats_config,
+                None,
+            )
+            .await?,
+        );
+    }
+
     metrics.rewrite_time_ms = Instant::now().duration_since(rewrite_start).as_millis() as u64;
 
     let mut actions: Vec<Action> = add_actions.clone();
@@ -1415,9 +1260,7 @@ async fn execute(
 
     let source_count_metrics = source_count.metrics().unwrap();
     let target_count_metrics = op_count.metrics().unwrap();
-    fn get_metric(metrics: &MetricsSet, name: &str) -> usize {
-        metrics.sum_by_name(name).map(|m| m.as_usize()).unwrap_or(0)
-    }
+    let scan_count_metrics = scan_count.metrics().unwrap();
 
     metrics.num_source_rows = get_metric(&source_count_metrics, SOURCE_COUNT_METRIC);
     metrics.num_target_rows_inserted = get_metric(&target_count_metrics, TARGET_INSERTED_METRIC);
@@ -1427,7 +1270,8 @@ async fn execute(
     metrics.num_output_rows = metrics.num_target_rows_inserted
         + metrics.num_target_rows_updated
         + metrics.num_target_rows_copied;
-
+    metrics.num_target_files_scanned = get_metric(&scan_count_metrics, "files_scanned");
+    metrics.num_target_files_skipped_during_scan = get_metric(&scan_count_metrics, "files_pruned");
     metrics.execution_time_ms = Instant::now().duration_since(exec_start).as_millis() as u64;
 
     let app_metadata = &mut commit_properties.app_metadata;
@@ -1484,25 +1328,6 @@ fn remove_table_alias(expr: Expr, table_alias: &str) -> Expr {
     .data
 }
 
-// TODO: Abstract MergePlanner into DeltaPlanner to support other delta operations in the future.
-struct MergePlanner {}
-
-#[async_trait]
-impl QueryPlanner for MergePlanner {
-    async fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        session_state: &SessionState,
-    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
-        let planner = Arc::new(Box::new(DefaultPhysicalPlanner::with_extension_planners(
-            vec![Arc::new(MergeMetricExtensionPlanner {})],
-        )));
-        planner
-            .create_physical_plan(logical_plan, session_state)
-            .await
-    }
-}
-
 impl std::future::IntoFuture for MergeBuilder {
     type Output = DeltaResult<(DeltaTable, MergeMetrics)>;
     type IntoFuture = BoxFuture<'static, Self::Output>;
@@ -1553,48 +1378,43 @@ mod tests {
     use crate::kernel::DataType;
     use crate::kernel::PrimitiveType;
     use crate::kernel::StructField;
-    use crate::operations::merge::generalize_filter;
-    use crate::operations::merge::try_construct_early_filter;
+    use crate::operations::load_cdf::collect_batches;
+    use crate::operations::merge::filter::generalize_filter;
     use crate::operations::DeltaOps;
     use crate::protocol::*;
     use crate::writer::test_utils::datafusion::get_data;
     use crate::writer::test_utils::get_arrow_schema;
     use crate::writer::test_utils::get_delta_schema;
     use crate::writer::test_utils::setup_table_with_configuration;
-    use crate::DeltaConfigKey;
     use crate::DeltaTable;
+    use crate::TableProperty;
     use arrow::datatypes::Schema as ArrowSchema;
     use arrow::record_batch::RecordBatch;
     use arrow_schema::DataType as ArrowDataType;
     use arrow_schema::Field;
     use datafusion::assert_batches_sorted_eq;
-    use datafusion::datasource::provider_as_source;
-    use datafusion::prelude::DataFrame;
-    use datafusion::prelude::SessionContext;
+    use datafusion::physical_plan::ExecutionPlan;
+    use datafusion::prelude::*;
     use datafusion_common::Column;
-    use datafusion_common::ScalarValue;
     use datafusion_common::TableReference;
     use datafusion_expr::col;
     use datafusion_expr::expr::Placeholder;
     use datafusion_expr::lit;
     use datafusion_expr::Expr;
-    use datafusion_expr::LogicalPlanBuilder;
-    use datafusion_expr::Operator;
     use itertools::Itertools;
     use regex::Regex;
     use serde_json::json;
-    use std::collections::HashMap;
     use std::ops::Neg;
     use std::sync::Arc;
 
     use super::MergeMetrics;
 
-    async fn setup_table(partitions: Option<Vec<&str>>) -> DeltaTable {
+    pub(crate) async fn setup_table(partitions: Option<Vec<&str>>) -> DeltaTable {
         let table_schema = get_delta_schema();
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions.unwrap_or_default())
             .await
             .unwrap();
@@ -1606,7 +1426,7 @@ mod tests {
     #[tokio::test]
     async fn test_merge_when_delta_table_is_append_only() {
         let schema = get_arrow_schema(&None);
-        let table = setup_table_with_configuration(DeltaConfigKey::AppendOnly, Some("true")).await;
+        let table = setup_table_with_configuration(TableProperty::AppendOnly, Some("true")).await;
         // append some data
         let table = write_data(table, &schema).await;
         // merge
@@ -2063,7 +1883,10 @@ mod tests {
         let commit_info = table.history(None).await.unwrap();
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
-        assert_eq!(parameters["predicate"], "modified = '2021-02-02'");
+        assert_eq!(
+            parameters["predicate"],
+            "id BETWEEN 'B' AND 'C' AND modified = '2021-02-02'"
+        );
         assert_eq!(
             parameters["mergePredicate"],
             "target.id = source.id AND target.modified = '2021-02-02'"
@@ -2149,6 +1972,115 @@ mod tests {
         assert_batches_sorted_eq!(&expected, &actual);
     }
 
+    #[tokio::test]
+    async fn test_merge_partitions_with_in() {
+        /* Validate the join predicate works with table partitions */
+        let schema = get_arrow_schema(&None);
+        let table = setup_table(Some(vec!["modified"])).await;
+
+        let table = write_data(table, &schema).await;
+        assert_eq!(table.version(), 1);
+        assert_eq!(table.get_files_count(), 2);
+
+        let ctx = SessionContext::new();
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::StringArray::from(vec!["B", "C", "X"])),
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])),
+                Arc::new(arrow::array::StringArray::from(vec![
+                    "2021-02-02",
+                    "2023-07-04",
+                    "2023-07-04",
+                ])),
+            ],
+        )
+        .unwrap();
+        let source = ctx.read_batch(batch).unwrap();
+
+        let (table, metrics) = DeltaOps(table)
+            .merge(
+                source,
+                col("target.id")
+                    .eq(col("source.id"))
+                    .and(col("target.id").in_list(
+                        vec![
+                            col("source.id"),
+                            col("source.modified"),
+                            col("source.value"),
+                        ],
+                        false,
+                    ))
+                    .and(col("target.modified").in_list(vec![lit("2021-02-02")], false)),
+            )
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|update| {
+                update
+                    .update("value", col("source.value"))
+                    .update("modified", col("source.modified"))
+            })
+            .unwrap()
+            .when_not_matched_by_source_update(|update| {
+                update
+                    .predicate(col("target.value").eq(lit(1)))
+                    .update("value", col("target.value") + lit(1))
+            })
+            .unwrap()
+            .when_not_matched_by_source_update(|update| {
+                update
+                    .predicate(col("target.modified").eq(lit("2021-02-01")))
+                    .update("value", col("target.value") - lit(1))
+            })
+            .unwrap()
+            .when_not_matched_insert(|insert| {
+                insert
+                    .set("id", col("source.id"))
+                    .set("value", col("source.value"))
+                    .set("modified", col("source.modified"))
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        assert_eq!(table.version(), 2);
+        assert!(table.get_files_count() >= 3);
+        assert!(metrics.num_target_files_added >= 3);
+        assert_eq!(metrics.num_target_files_removed, 2);
+        assert_eq!(metrics.num_target_rows_copied, 1);
+        assert_eq!(metrics.num_target_rows_updated, 3);
+        assert_eq!(metrics.num_target_rows_inserted, 2);
+        assert_eq!(metrics.num_target_rows_deleted, 0);
+        assert_eq!(metrics.num_output_rows, 6);
+        assert_eq!(metrics.num_source_rows, 3);
+
+        let commit_info = table.history(None).await.unwrap();
+        let last_commit = &commit_info[0];
+        let parameters = last_commit.operation_parameters.clone().unwrap();
+        assert!(!parameters.contains_key("predicate"));
+        assert_eq!(
+            parameters["mergePredicate"],
+            "target.id = source.id AND \
+            target.id IN (source.id, source.modified, source.value) AND \
+            target.modified IN ('2021-02-02')"
+        );
+
+        let expected = vec![
+            "+----+-------+------------+",
+            "| id | value | modified   |",
+            "+----+-------+------------+",
+            "| A  | 2     | 2021-02-01 |",
+            "| B  | 9     | 2021-02-01 |",
+            "| B  | 10    | 2021-02-02 |",
+            "| C  | 20    | 2023-07-04 |",
+            "| D  | 100   | 2021-02-02 |",
+            "| X  | 30    | 2023-07-04 |",
+            "+----+-------+------------+",
+        ];
+        let actual = get_data(&table).await;
+        assert_batches_sorted_eq!(&expected, &actual);
+    }
+
     #[tokio::test]
     async fn test_merge_delete_matched() {
         // Validate behaviours of match delete
@@ -2204,7 +2136,7 @@ mod tests {
             extra_info["operationMetrics"],
             serde_json::to_value(&metrics).unwrap()
         );
-        assert!(!parameters.contains_key("predicate"));
+        assert_eq!(parameters["predicate"], "id BETWEEN 'B' AND 'X'");
         assert_eq!(parameters["mergePredicate"], json!("target.id = source.id"));
         assert_eq!(
             parameters["matchedPredicates"],
@@ -2486,7 +2418,10 @@ mod tests {
         let last_commit = &commit_info[0];
         let parameters = last_commit.operation_parameters.clone().unwrap();
 
-        assert_eq!(parameters["predicate"], json!("modified = '2021-02-02'"));
+        assert_eq!(
+            parameters["predicate"],
+            json!("id BETWEEN 'B' AND 'X' AND modified = '2021-02-02'")
+        );
 
         let expected = vec![
             "+----+-------+------------+",
@@ -2590,7 +2525,7 @@ mod tests {
         let parsed_filter = col(Column::new(source.clone().into(), "id"))
             .eq(col(Column::new(target.clone().into(), "id")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2622,7 +2557,7 @@ mod tests {
         let parsed_filter = (source_id.clone().eq(target_id.clone()))
             .or(source_id.clone().is_null().and(target_id.clone().is_null()));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2645,12 +2580,12 @@ mod tests {
         })
         .and(target_id.clone().is_null()));
 
-        assert!(placeholders.len() == 2);
+        assert_eq!(placeholders.len(), 2);
 
-        let captured_expressions = placeholders.values().collect_vec();
+        let captured_expressions = placeholders.into_iter().map(|p| p.expr).collect_vec();
 
-        assert!(captured_expressions.contains(&&source_id));
-        assert!(captured_expressions.contains(&&source_id.is_null()));
+        assert!(captured_expressions.contains(&source_id));
+        assert!(captured_expressions.contains(&source_id.is_null()));
 
         assert_eq!(generalized, expected_filter);
     }
@@ -2666,7 +2601,7 @@ mod tests {
             .neg()
             .eq(col(Column::new(target.clone().into(), "id")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2686,12 +2621,13 @@ mod tests {
         assert_eq!(generalized, expected_filter);
 
         assert_eq!(placeholders.len(), 1);
-
-        let placeholder_expr = &placeholders["id_0"];
+        let placeholder_expr = placeholders.first().unwrap();
 
         let expected_placeholder = col(Column::new(source.clone().into(), "id")).neg();
 
-        assert_eq!(placeholder_expr, &expected_placeholder);
+        assert_eq!(placeholder_expr.expr, expected_placeholder);
+        assert_eq!(placeholder_expr.alias, "id_0");
+        assert!(!placeholder_expr.is_aggregate);
     }
 
     #[tokio::test]
@@ -2704,7 +2640,7 @@ mod tests {
             .eq(col(Column::new(target.clone().into(), "id")))
             .and(col(Column::new(target.clone().into(), "id")).eq(lit("C")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2727,15 +2663,14 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_generalize_filter_keeps_only_static_target_references() {
+    async fn test_generalize_filter_with_dynamic_target_range_references() {
         let source = TableReference::parse_str("source");
         let target = TableReference::parse_str("target");
 
         let parsed_filter = col(Column::new(source.clone().into(), "id"))
-            .eq(col(Column::new(target.clone().into(), "id")))
-            .and(col(Column::new(target.clone().into(), "id")).eq(lit("C")));
+            .eq(col(Column::new(target.clone().into(), "id")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2745,8 +2680,16 @@ mod tests {
             &mut placeholders,
         )
         .unwrap();
-
-        let expected_filter = col(Column::new(target.clone().into(), "id")).eq(lit("C"));
+        let expected_filter_l = Expr::Placeholder(Placeholder {
+            id: "id_0_min".to_owned(),
+            data_type: None,
+        });
+        let expected_filter_h = Expr::Placeholder(Placeholder {
+            id: "id_0_max".to_owned(),
+            data_type: None,
+        });
+        let expected_filter = col(Column::new(target.clone().into(), "id"))
+            .between(expected_filter_l, expected_filter_h);
 
         assert_eq!(generalized, expected_filter);
     }
@@ -2760,7 +2703,7 @@ mod tests {
             .eq(col(Column::new(target.clone().into(), "id")))
             .and(col(Column::new(source.clone().into(), "id")).eq(lit("C")));
 
-        let mut placeholders = HashMap::default();
+        let mut placeholders = Vec::default();
 
         let generalized = generalize_filter(
             parsed_filter,
@@ -2780,104 +2723,6 @@ mod tests {
         assert_eq!(generalized, expected_filter);
     }
 
-    #[tokio::test]
-    async fn test_try_construct_early_filter_with_partitions_expands() {
-        let schema = get_arrow_schema(&None);
-        let table = setup_table(Some(vec!["id"])).await;
-
-        assert_eq!(table.version(), 0);
-        assert_eq!(table.get_files_count(), 0);
-
-        let ctx = SessionContext::new();
-        let batch = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(arrow::array::StringArray::from(vec!["B", "C", "X"])),
-                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])),
-                Arc::new(arrow::array::StringArray::from(vec![
-                    "2021-02-02",
-                    "2023-07-04",
-                    "2023-07-04",
-                ])),
-            ],
-        )
-        .unwrap();
-        let source = ctx.read_batch(batch).unwrap();
-
-        let source_name = TableReference::parse_str("source");
-        let target_name = TableReference::parse_str("target");
-
-        let source = LogicalPlanBuilder::scan(
-            source_name.clone(),
-            provider_as_source(source.into_view()),
-            None,
-        )
-        .unwrap()
-        .build()
-        .unwrap();
-
-        let join_predicate = col(Column {
-            relation: Some(source_name.clone()),
-            name: "id".to_owned(),
-        })
-        .eq(col(Column {
-            relation: Some(target_name.clone()),
-            name: "id".to_owned(),
-        }));
-
-        let pred = try_construct_early_filter(
-            join_predicate,
-            table.snapshot().unwrap(),
-            &ctx.state(),
-            &source,
-            &source_name,
-            &target_name,
-        )
-        .await
-        .unwrap();
-
-        assert!(pred.is_some());
-
-        let split_pred = {
-            fn split(expr: Expr, parts: &mut Vec<(String, String)>) {
-                match expr {
-                    Expr::BinaryExpr(ex) if ex.op == Operator::Or => {
-                        split(*ex.left, parts);
-                        split(*ex.right, parts);
-                    }
-                    Expr::BinaryExpr(ex) if ex.op == Operator::Eq => {
-                        let col = match *ex.right {
-                            Expr::Column(col) => col.name,
-                            ex => panic!("expected column in pred, got {ex}!"),
-                        };
-
-                        let value = match *ex.left {
-                            Expr::Literal(ScalarValue::Utf8(Some(value))) => value,
-                            ex => panic!("expected value in predicate, got {ex}!"),
-                        };
-
-                        parts.push((col, value))
-                    }
-
-                    expr => panic!("expected either = or OR, got {expr}"),
-                }
-            }
-
-            let mut parts = vec![];
-            split(pred.unwrap(), &mut parts);
-            parts.sort();
-            parts
-        };
-
-        let expected_pred_parts = [
-            ("id".to_owned(), "B".to_owned()),
-            ("id".to_owned(), "C".to_owned()),
-            ("id".to_owned(), "X".to_owned()),
-        ];
-
-        assert_eq!(split_pred, expected_pred_parts);
-    }
-
     #[tokio::test]
     async fn test_merge_pushdowns() {
         //See https://github.com/delta-io/delta-rs/issues/2158
@@ -3200,4 +3045,228 @@ mod tests {
         let actual = get_data(&table).await;
         assert_batches_sorted_eq!(&expected, &actual);
     }
+
+    #[tokio::test]
+    async fn test_merge_cdc_disabled() {
+        let (table, source) = setup().await;
+
+        let (table, metrics) = DeltaOps(table)
+            .merge(source, col("target.id").eq(col("source.id")))
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|update| {
+                update
+                    .update("value", col("source.value"))
+                    .update("modified", col("source.modified"))
+            })
+            .unwrap()
+            .when_not_matched_by_source_update(|update| {
+                update
+                    .predicate(col("target.value").eq(lit(1)))
+                    .update("value", col("target.value") + lit(1))
+            })
+            .unwrap()
+            .when_not_matched_insert(|insert| {
+                insert
+                    .set("id", col("source.id"))
+                    .set("value", col("source.value"))
+                    .set("modified", col("source.modified"))
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        assert_merge(table.clone(), metrics).await;
+
+        // Just checking that the data wasn't actually written instead!
+        if let Ok(files) = crate::storage::utils::flatten_list_stream(
+            &table.object_store(),
+            Some(&object_store::path::Path::from("_change_data")),
+        )
+        .await
+        {
+            assert_eq!(
+                0,
+                files.len(),
+                "This test should not find any written CDC files! {files:#?}"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_merge_cdc_enabled_simple() {
+        // Manually creating the desired table with the right minimum CDC features
+        use crate::kernel::Protocol;
+        use crate::operations::merge::Action;
+
+        let schema = get_delta_schema();
+
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(schema.fields().cloned())
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = get_arrow_schema(&None);
+        let table = write_data(table, &schema).await;
+
+        assert_eq!(table.version(), 1);
+        assert_eq!(table.get_files_count(), 1);
+        let source = merge_source(schema);
+
+        let (table, metrics) = DeltaOps(table)
+            .merge(source, col("target.id").eq(col("source.id")))
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_matched_update(|update| {
+                update
+                    .update("value", col("source.value"))
+                    .update("modified", col("source.modified"))
+            })
+            .unwrap()
+            .when_not_matched_by_source_update(|update| {
+                update
+                    .predicate(col("target.value").eq(lit(1)))
+                    .update("value", col("target.value") + lit(1))
+            })
+            .unwrap()
+            .when_not_matched_insert(|insert| {
+                insert
+                    .set("id", col("source.id"))
+                    .set("value", col("source.value"))
+                    .set("modified", col("source.modified"))
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        assert_merge(table.clone(), metrics).await;
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        let _ = arrow::util::pretty::print_batches(&batches);
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(5)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+----+-------+------------+------------------+-----------------+",
+        "| id | value | modified   | _change_type     | _commit_version |",
+        "+----+-------+------------+------------------+-----------------+",
+        "| A  | 1     | 2021-02-01 | update_preimage  | 2               |",
+        "| A  | 2     | 2021-02-01 | update_postimage | 2               |",
+        "| B  | 10    | 2021-02-01 | update_preimage  | 2               |",
+        "| B  | 10    | 2021-02-02 | update_postimage | 2               |",
+        "| C  | 10    | 2021-02-02 | update_preimage  | 2               |",
+        "| C  | 20    | 2023-07-04 | update_postimage | 2               |",
+        "| X  | 30    | 2023-07-04 | insert           | 2               |",
+        "| A  | 1     | 2021-02-01 | insert           | 1               |",
+        "| B  | 10    | 2021-02-01 | insert           | 1               |",
+        "| C  | 10    | 2021-02-02 | insert           | 1               |",
+        "| D  | 100   | 2021-02-02 | insert           | 1               |",
+        "+----+-------+------------+------------------+-----------------+",
+        ], &batches }
+    }
+
+    #[tokio::test]
+    async fn test_merge_cdc_enabled_delete() {
+        // Manually creating the desired table with the right minimum CDC features
+        use crate::kernel::Protocol;
+        use crate::operations::merge::Action;
+
+        let schema = get_delta_schema();
+
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(schema.fields().cloned())
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = get_arrow_schema(&None);
+        let table = write_data(table, &schema).await;
+
+        assert_eq!(table.version(), 1);
+        assert_eq!(table.get_files_count(), 1);
+        let source = merge_source(schema);
+
+        let (table, _metrics) = DeltaOps(table)
+            .merge(source, col("target.id").eq(col("source.id")))
+            .with_source_alias("source")
+            .with_target_alias("target")
+            .when_not_matched_by_source_delete(|delete| {
+                delete.predicate(col("target.modified").gt(lit("2021-02-01")))
+            })
+            .unwrap()
+            .await
+            .unwrap();
+
+        let expected = vec![
+            "+----+-------+------------+",
+            "| id | value | modified   |",
+            "+----+-------+------------+",
+            "| A  | 1     | 2021-02-01 |",
+            "| B  | 10    | 2021-02-01 |",
+            "| C  | 10    | 2021-02-02 |",
+            "+----+-------+------------+",
+        ];
+        let actual = get_data(&table).await;
+        assert_batches_sorted_eq!(&expected, &actual);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        let _ = arrow::util::pretty::print_batches(&batches);
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(5)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+----+-------+------------+--------------+-----------------+",
+        "| id | value | modified   | _change_type | _commit_version |",
+        "+----+-------+------------+--------------+-----------------+",
+        "| D  | 100   | 2021-02-02 | delete       | 2               |",
+        "| A  | 1     | 2021-02-01 | insert       | 1               |",
+        "| B  | 10    | 2021-02-01 | insert       | 1               |",
+        "| C  | 10    | 2021-02-02 | insert       | 1               |",
+        "| D  | 100   | 2021-02-02 | insert       | 1               |",
+        "+----+-------+------------+--------------+-----------------+",
+        ], &batches }
+    }
 }
diff --git a/crates/core/src/operations/mod.rs b/crates/core/src/operations/mod.rs
index 7923431d45..c71141d277 100644
--- a/crates/core/src/operations/mod.rs
+++ b/crates/core/src/operations/mod.rs
@@ -6,15 +6,33 @@
 //! the operations' behaviors and will return an updated table potentially in conjunction
 //! with a [data stream][datafusion::physical_plan::SendableRecordBatchStream],
 //! if the operation returns data as well.
+use std::collections::HashMap;
+
+use add_feature::AddTableFeatureBuilder;
+#[cfg(feature = "datafusion")]
+use arrow_array::RecordBatch;
+#[cfg(feature = "datafusion")]
+pub use datafusion_physical_plan::common::collect as collect_sendable_stream;
 
+use self::add_column::AddColumnBuilder;
 use self::create::CreateBuilder;
 use self::filesystem_check::FileSystemCheckBuilder;
+use self::optimize::OptimizeBuilder;
+use self::restore::RestoreBuilder;
+use self::set_tbl_properties::SetTablePropertiesBuilder;
 use self::vacuum::VacuumBuilder;
+#[cfg(feature = "datafusion")]
+use self::{
+    constraints::ConstraintBuilder, datafusion_utils::Expression, delete::DeleteBuilder,
+    drop_constraints::DropConstraintBuilder, load::LoadBuilder, load_cdf::CdfLoadBuilder,
+    merge::MergeBuilder, update::UpdateBuilder, write::WriteBuilder,
+};
 use crate::errors::{DeltaResult, DeltaTableError};
 use crate::table::builder::DeltaTableBuilder;
 use crate::DeltaTable;
-use std::collections::HashMap;
 
+pub mod add_column;
+pub mod add_feature;
 pub mod cast;
 pub mod convert_to_delta;
 pub mod create;
@@ -25,20 +43,8 @@ pub mod restore;
 pub mod transaction;
 pub mod vacuum;
 
-#[cfg(feature = "datafusion")]
-use self::{
-    constraints::ConstraintBuilder, datafusion_utils::Expression, delete::DeleteBuilder,
-    drop_constraints::DropConstraintBuilder, load::LoadBuilder, load_cdf::CdfLoadBuilder,
-    merge::MergeBuilder, update::UpdateBuilder, write::WriteBuilder,
-};
-#[cfg(feature = "datafusion")]
-pub use ::datafusion::physical_plan::common::collect as collect_sendable_stream;
-#[cfg(feature = "datafusion")]
-use arrow::record_batch::RecordBatch;
-use optimize::OptimizeBuilder;
-use restore::RestoreBuilder;
-use set_tbl_properties::SetTablePropertiesBuilder;
-
+#[cfg(all(feature = "cdf", feature = "datafusion"))]
+mod cdc;
 #[cfg(feature = "datafusion")]
 pub mod constraints;
 #[cfg(feature = "datafusion")]
@@ -56,6 +62,7 @@ pub mod update;
 pub mod write;
 pub mod writer;
 
+#[allow(unused)]
 /// The [Operation] trait defines common behaviors that all operations builders
 /// should have consistent
 pub(crate) trait Operation<State>: std::future::IntoFuture {}
@@ -215,6 +222,12 @@ impl DeltaOps {
         ConstraintBuilder::new(self.0.log_store, self.0.state.unwrap())
     }
 
+    /// Enable a table feature for a table
+    #[must_use]
+    pub fn add_feature(self) -> AddTableFeatureBuilder {
+        AddTableFeatureBuilder::new(self.0.log_store, self.0.state.unwrap())
+    }
+
     /// Drops constraints from a table
     #[cfg(feature = "datafusion")]
     #[must_use]
@@ -226,6 +239,11 @@ impl DeltaOps {
     pub fn set_tbl_properties(self) -> SetTablePropertiesBuilder {
         SetTablePropertiesBuilder::new(self.0.log_store, self.0.state.unwrap())
     }
+
+    /// Add new columns
+    pub fn add_columns(self) -> AddColumnBuilder {
+        AddColumnBuilder::new(self.0.log_store, self.0.state.unwrap())
+    }
 }
 
 impl From<DeltaTable> for DeltaOps {
@@ -273,6 +291,22 @@ pub fn get_num_idx_cols_and_stats_columns(
     )
 }
 
+/// Get the target_file_size from the table configuration in the sates
+/// If table_config does not exist (only can occur in the first write action) it takes
+/// the configuration that was passed to the writerBuilder.
+pub(crate) fn get_target_file_size(
+    config: &Option<crate::table::config::TableConfig<'_>>,
+    configuration: &HashMap<String, Option<String>>,
+) -> i64 {
+    match &config {
+        Some(conf) => conf.target_file_size(),
+        _ => configuration
+            .get("delta.targetFileSize")
+            .and_then(|v| v.clone().map(|v| v.parse::<i64>().unwrap()))
+            .unwrap_or(crate::table::config::DEFAULT_TARGET_FILE_SIZE),
+    }
+}
+
 #[cfg(feature = "datafusion")]
 mod datafusion_utils {
     use datafusion::execution::context::SessionState;
@@ -282,6 +316,7 @@ mod datafusion_utils {
     use crate::{delta_datafusion::expr::parse_predicate_expression, DeltaResult};
 
     /// Used to represent user input of either a Datafusion expression or string expression
+    #[derive(Debug)]
     pub enum Expression {
         /// Datafusion Expression
         DataFusion(Expr),
diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs
index 10cbb6a22a..cf096d56d1 100644
--- a/crates/core/src/operations/optimize.rs
+++ b/crates/core/src/operations/optimize.rs
@@ -25,8 +25,9 @@ use std::fmt;
 use std::sync::Arc;
 use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 
-use arrow::datatypes::SchemaRef as ArrowSchemaRef;
 use arrow_array::RecordBatch;
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use delta_kernel::expressions::Scalar;
 use futures::future::BoxFuture;
 use futures::stream::BoxStream;
 use futures::{Future, StreamExt, TryStreamExt};
@@ -38,12 +39,13 @@ use parquet::basic::{Compression, ZstdLevel};
 use parquet::errors::ParquetError;
 use parquet::file::properties::WriterProperties;
 use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializer};
-use tracing::debug;
+use tracing::*;
+use url::Url;
 
 use super::transaction::PROTOCOL;
 use super::writer::{PartitionWriter, PartitionWriterConfig};
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Action, PartitionsExt, Remove, Scalar};
+use crate::kernel::{scalars::ScalarExt, Action, PartitionsExt, Remove};
 use crate::logstore::LogStoreRef;
 use crate::operations::transaction::{CommitBuilder, CommitProperties, DEFAULT_RETRIES};
 use crate::protocol::DeltaOperation;
@@ -136,6 +138,7 @@ impl fmt::Display for MetricDetails {
     }
 }
 
+#[derive(Debug)]
 /// Metrics for a single partition
 pub struct PartialMetrics {
     /// Number of optimized files added
@@ -202,9 +205,9 @@ pub struct OptimizeBuilder<'a> {
     commit_properties: CommitProperties,
     /// Whether to preserve insertion order within files (default false)
     preserve_insertion_order: bool,
-    /// Max number of concurrent tasks (default is number of cpus)
+    /// Maximum number of concurrent tasks (default is number of cpus)
     max_concurrent_tasks: usize,
-    /// Maximum number of bytes that are allowed to spill to disk
+    /// Maximum number of bytes allowed in memory before spilling to disk
     max_spill_size: usize,
     /// Optimize type
     optimize_type: OptimizeType,
@@ -225,7 +228,7 @@ impl<'a> OptimizeBuilder<'a> {
             commit_properties: CommitProperties::default(),
             preserve_insertion_order: false,
             max_concurrent_tasks: num_cpus::get(),
-            max_spill_size: 20 * 1024 * 1024 * 2014, // 20 GB.
+            max_spill_size: 20 * 1024 * 1024 * 1024, // 20 GB.
             optimize_type: OptimizeType::Compact,
             min_commit_interval: None,
         }
@@ -295,6 +298,9 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> {
 
         Box::pin(async move {
             PROTOCOL.can_write_to(&this.snapshot.snapshot)?;
+            if !&this.snapshot.load_config().require_files {
+                return Err(DeltaTableError::NotInitializedWithFiles("OPTIMIZE".into()));
+            }
 
             let writer_properties = this.writer_properties.unwrap_or_else(|| {
                 WriterProperties::builder()
@@ -341,6 +347,7 @@ impl From<OptimizeInput> for DeltaOperation {
     }
 }
 
+/// Generate an appropriate remove action for the optimization task
 fn create_remove(
     path: &str,
     partitions: &IndexMap<String, Scalar>,
@@ -602,12 +609,26 @@ impl MergePlan {
         use datafusion_expr::expr::ScalarFunction;
         use datafusion_expr::{Expr, ScalarUDF};
 
-        let locations = files
+        // This code is ... not ideal. Essentially `read_parquet` expects Strings that it will then
+        // parse as URLs and then pass back to the object store (x_x). This can cause problems when
+        // paths in object storage have special characters like spaces, etc.
+        //
+        // This [str::replace] i kind of a hack to address
+        // <https://github.com/delta-io/delta-rs/issues/2834 >
+        let locations: Vec<String> = files
             .iter()
-            .map(|file| format!("delta-rs:///{}", file.location))
-            .collect_vec();
+            .map(|om| {
+                format!(
+                    "delta-rs:///{}",
+                    str::replace(om.location.as_ref(), "%", "%25")
+                )
+            })
+            .collect();
+        debug!("Reading z-order with locations are: {locations:?}");
+
         let df = context
             .ctx
+            // TODO: should read options have the partition columns
             .read_parquet(locations, ParquetReadOptions::default())
             .await?;
 
@@ -708,6 +729,7 @@ impl MergePlan {
                     bins.len() <= num_cpus::get(),
                 ));
 
+                debug!("Starting zorder with the columns: {zorder_columns:?} {bins:?}");
                 #[cfg(feature = "datafusion")]
                 let exec_context = Arc::new(zorder::ZOrderExecContext::new(
                     zorder_columns,
@@ -715,6 +737,7 @@ impl MergePlan {
                     max_spill_size,
                 )?);
                 let task_parameters = self.task_parameters.clone();
+
                 let log_store = log_store.clone();
                 futures::stream::iter(bins)
                     .map(move |(_, (partition, files))| {
@@ -887,9 +910,7 @@ impl MergeBin {
         self.size_bytes += meta.size as i64;
         self.files.push(meta);
     }
-}
 
-impl MergeBin {
     fn iter(&self) -> impl Iterator<Item = &ObjectMeta> {
         self.files.iter()
     }
@@ -1001,7 +1022,6 @@ fn build_zorder_plan(
     let field_names = snapshot
         .schema()
         .fields()
-        .iter()
         .map(|field| field.name().to_string())
         .collect_vec();
     let unknown_columns = zorder_columns
@@ -1033,6 +1053,7 @@ fn build_zorder_plan(
             .or_insert_with(|| (partition_values, MergeBin::new()))
             .1
             .add(object_meta);
+        error!("partition_files inside the zorder plan: {partition_files:?}");
     }
 
     let operation = OptimizeOperations::ZOrder(zorder_columns, partition_files);
@@ -1226,7 +1247,6 @@ pub(super) mod zorder {
                 let runtime = Arc::new(RuntimeEnv::new(config)?);
                 runtime.register_object_store(&Url::parse("delta-rs://").unwrap(), object_store);
 
-                use url::Url;
                 let ctx = SessionContext::new_with_config_rt(SessionConfig::default(), runtime);
                 ctx.register_udf(ScalarUDF::from(datafusion::ZOrderUDF));
                 Ok(Self { columns, ctx })
@@ -1266,6 +1286,7 @@ pub(super) mod zorder {
         fn zorder_key_datafusion(
             columns: &[ColumnarValue],
         ) -> Result<ColumnarValue, DataFusionError> {
+            debug!("zorder_key_datafusion: {columns:#?}");
             let length = columns
                 .iter()
                 .map(|col| match col {
@@ -1420,6 +1441,94 @@ pub(super) mod zorder {
                     .await;
                 assert!(res.is_ok());
             }
+
+            /// Issue <https://github.com/delta-io/delta-rs/issues/2834>
+            #[tokio::test]
+            async fn test_zorder_space_in_partition_value() {
+                use arrow_schema::Schema as ArrowSchema;
+                let _ = pretty_env_logger::try_init();
+                let schema = Arc::new(ArrowSchema::new(vec![
+                    Field::new("modified", DataType::Utf8, true),
+                    Field::new("country", DataType::Utf8, true),
+                    Field::new("value", DataType::Int32, true),
+                ]));
+
+                let batch = RecordBatch::try_new(
+                    schema.clone(),
+                    vec![
+                        Arc::new(arrow::array::StringArray::from(vec![
+                            "2021-02-01",
+                            "2021-02-01",
+                            "2021-02-02",
+                            "2021-02-02",
+                        ])),
+                        Arc::new(arrow::array::StringArray::from(vec![
+                            "Germany",
+                            "China",
+                            "Canada",
+                            "Dominican Republic",
+                        ])),
+                        Arc::new(arrow::array::Int32Array::from(vec![1, 10, 20, 100])),
+                        //Arc::new(arrow::array::StringArray::from(vec!["Dominican Republic"])),
+                        //Arc::new(arrow::array::Int32Array::from(vec![100])),
+                    ],
+                )
+                .unwrap();
+                // write some data
+                let table = crate::DeltaOps::new_in_memory()
+                    .write(vec![batch.clone()])
+                    .with_partition_columns(vec!["country"])
+                    .with_save_mode(crate::protocol::SaveMode::Overwrite)
+                    .await
+                    .unwrap();
+
+                let res = crate::DeltaOps(table)
+                    .optimize()
+                    .with_type(OptimizeType::ZOrder(vec!["modified".into()]))
+                    .await;
+                assert!(res.is_ok(), "Failed to optimize: {res:#?}");
+            }
+
+            #[tokio::test]
+            async fn test_zorder_space_in_partition_value_garbage() {
+                use arrow_schema::Schema as ArrowSchema;
+                let _ = pretty_env_logger::try_init();
+                let schema = Arc::new(ArrowSchema::new(vec![
+                    Field::new("modified", DataType::Utf8, true),
+                    Field::new("country", DataType::Utf8, true),
+                    Field::new("value", DataType::Int32, true),
+                ]));
+
+                let batch = RecordBatch::try_new(
+                    schema.clone(),
+                    vec![
+                        Arc::new(arrow::array::StringArray::from(vec![
+                            "2021-02-01",
+                            "2021-02-01",
+                            "2021-02-02",
+                            "2021-02-02",
+                        ])),
+                        Arc::new(arrow::array::StringArray::from(vec![
+                            "Germany", "China", "Canada", "USA$$!",
+                        ])),
+                        Arc::new(arrow::array::Int32Array::from(vec![1, 10, 20, 100])),
+                    ],
+                )
+                .unwrap();
+                // write some data
+                let table = crate::DeltaOps::new_in_memory()
+                    .write(vec![batch.clone()])
+                    .with_partition_columns(vec!["country"])
+                    .with_save_mode(crate::protocol::SaveMode::Overwrite)
+                    .await
+                    .unwrap();
+
+                let res = crate::DeltaOps(table)
+                    .optimize()
+                    .with_type(OptimizeType::ZOrder(vec!["modified".into()]))
+                    .await;
+                assert!(res.is_ok(), "Failed to optimize: {res:#?}");
+            }
         }
     }
 
@@ -1575,5 +1684,30 @@ pub(super) mod zorder {
             assert_eq!(data.value_data().len(), 3 * 16 * 3);
             assert!(data.iter().all(|x| x.unwrap().len() == 3 * 16));
         }
+
+        #[tokio::test]
+        async fn works_on_spark_table() {
+            use crate::DeltaOps;
+            use tempfile::TempDir;
+            // Create a temporary directory
+            let tmp_dir = TempDir::new().expect("Failed to make temp dir");
+            let table_name = "delta-1.2.1-only-struct-stats";
+
+            // Copy recursively from the test data directory to the temporary directory
+            let source_path = format!("../test/tests/data/{table_name}");
+            fs_extra::dir::copy(source_path, tmp_dir.path(), &Default::default()).unwrap();
+
+            // Run optimize
+            let (_, metrics) =
+                DeltaOps::try_from_uri(tmp_dir.path().join(table_name).to_str().unwrap())
+                    .await
+                    .unwrap()
+                    .optimize()
+                    .await
+                    .unwrap();
+
+            // Verify it worked
+            assert_eq!(metrics.num_files_added, 1);
+        }
     }
 }
diff --git a/crates/core/src/operations/restore.rs b/crates/core/src/operations/restore.rs
index e2ab9741bc..498edc67c0 100644
--- a/crates/core/src/operations/restore.rs
+++ b/crates/core/src/operations/restore.rs
@@ -4,14 +4,14 @@
 //! 1) Read the latest state snapshot of the table.
 //! 2) Read table state for version or datetime to restore
 //! 3) Compute files available in state for restoring (files were removed by some commit)
-//! but missed in the latest. Add these files into commit as AddFile action.
+//!    but missed in the latest. Add these files into commit as AddFile action.
 //! 4) Compute files available in the latest state snapshot (files were added after version to restore)
-//! but missed in the state to restore. Add these files into commit as RemoveFile action.
+//!    but missed in the state to restore. Add these files into commit as RemoveFile action.
 //! 5) If ignore_missing_files option is false (default value) check availability of AddFile
-//! in file system.
+//!    in file system.
 //! 6) Commit Protocol, all RemoveFile and AddFile actions
-//! into delta log using `LogStore::write_commit_entry` (commit will be failed in case of parallel transaction)
-//! TODO: comment is outdated
+//!    into delta log using `LogStore::write_commit_entry` (commit will be failed in case of parallel transaction)
+//!    TODO: comment is outdated
 //! 7) If table was modified in parallel then ignore restore and raise exception.
 //!
 //! # Example
@@ -272,14 +272,19 @@ async fn execute(
         .await?;
 
     let commit_version = snapshot.version() + 1;
-    let commit = prepared_commit.path();
-    match log_store.write_commit_entry(commit_version, commit).await {
+    let commit_bytes = prepared_commit.commit_or_bytes();
+    match log_store
+        .write_commit_entry(commit_version, commit_bytes.clone())
+        .await
+    {
         Ok(_) => {}
         Err(err @ TransactionError::VersionAlreadyExists(_)) => {
             return Err(err.into());
         }
         Err(err) => {
-            log_store.abort_commit_entry(commit_version, commit).await?;
+            log_store
+                .abort_commit_entry(commit_version, commit_bytes.clone())
+                .await?;
             return Err(err.into());
         }
     }
diff --git a/crates/core/src/operations/set_tbl_properties.rs b/crates/core/src/operations/set_tbl_properties.rs
index e0c4ea2e9a..b3ca7607ac 100644
--- a/crates/core/src/operations/set_tbl_properties.rs
+++ b/crates/core/src/operations/set_tbl_properties.rs
@@ -1,18 +1,16 @@
 //! Set table properties on a table
 
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 
 use futures::future::BoxFuture;
-use maplit::hashset;
 
 use super::transaction::{CommitBuilder, CommitProperties};
-use crate::kernel::{Action, Protocol, ReaderFeatures, WriterFeatures};
+use crate::kernel::Action;
 use crate::logstore::LogStoreRef;
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
-use crate::DeltaConfigKey;
+use crate::DeltaResult;
 use crate::DeltaTable;
-use crate::{DeltaResult, DeltaTableError};
 
 /// Remove constraints from the table
 pub struct SetTablePropertiesBuilder {
@@ -59,203 +57,6 @@ impl SetTablePropertiesBuilder {
     }
 }
 
-/// Will apply the properties to the protocol by either bumping the version or setting
-/// features
-pub fn apply_properties_to_protocol(
-    current_protocol: &Protocol,
-    new_properties: &HashMap<String, String>,
-    raise_if_not_exists: bool,
-) -> DeltaResult<Protocol> {
-    let mut parsed_properties: HashMap<DeltaConfigKey, String> = HashMap::new();
-
-    for (key, value) in new_properties {
-        if let Ok(parsed_key) = key.parse::<DeltaConfigKey>() {
-            parsed_properties.insert(parsed_key, value.to_string());
-        } else if raise_if_not_exists {
-            return Err(DeltaTableError::Generic(format!(
-                "Error parsing property '{}':'{}'",
-                key, value
-            )));
-        }
-    }
-
-    let mut new_protocol = current_protocol.clone();
-
-    // Check and update delta.minReaderVersion
-    if let Some(min_reader_version) = parsed_properties.get(&DeltaConfigKey::MinReaderVersion) {
-        let new_min_reader_version = min_reader_version.parse::<i32>();
-        match new_min_reader_version {
-            Ok(version) => match version {
-                1..=3 => {
-                    if version > new_protocol.min_reader_version {
-                        new_protocol.min_reader_version = version
-                    }
-                }
-                _ => {
-                    return Err(DeltaTableError::Generic(format!(
-                        "delta.minReaderVersion = '{}' is invalid, valid values are ['1','2','3']",
-                        min_reader_version
-                    )))
-                }
-            },
-            Err(_) => {
-                return Err(DeltaTableError::Generic(format!(
-                    "delta.minReaderVersion = '{}' is invalid, valid values are ['1','2','3']",
-                    min_reader_version
-                )))
-            }
-        }
-    }
-
-    // Check and update delta.minWriterVersion
-    if let Some(min_writer_version) = parsed_properties.get(&DeltaConfigKey::MinWriterVersion) {
-        let new_min_writer_version = min_writer_version.parse::<i32>();
-        match new_min_writer_version {
-            Ok(version) => match version {
-                2..=7 => {
-                    if version > new_protocol.min_writer_version {
-                        new_protocol.min_writer_version = version
-                    }
-                }
-                _ => {
-                    return Err(DeltaTableError::Generic(format!(
-                        "delta.minWriterVersion = '{}' is invalid, valid values are ['2','3','4','5','6','7']",
-                        min_writer_version
-                    )))
-                }
-            },
-            Err(_) => {
-                return Err(DeltaTableError::Generic(format!(
-                    "delta.minWriterVersion = '{}' is invalid, valid values are ['2','3','4','5','6','7']",
-                    min_writer_version
-                )))
-            }
-        }
-    }
-
-    // Check enableChangeDataFeed and bump protocol or add writerFeature if writer versions is >=7
-    if let Some(enable_cdf) = parsed_properties.get(&DeltaConfigKey::EnableChangeDataFeed) {
-        let if_enable_cdf = enable_cdf.to_ascii_lowercase().parse::<bool>();
-        match if_enable_cdf {
-            Ok(true) => {
-                if new_protocol.min_writer_version >= 7 {
-                    match new_protocol.writer_features {
-                        Some(mut features) => {
-                            features.insert(WriterFeatures::ChangeDataFeed);
-                            new_protocol.writer_features = Some(features);
-                        }
-                        None => {
-                            new_protocol.writer_features =
-                                Some(hashset! {WriterFeatures::ChangeDataFeed})
-                        }
-                    }
-                } else if new_protocol.min_writer_version <= 3 {
-                    new_protocol.min_writer_version = 4
-                }
-            }
-            Ok(false) => {}
-            _ => {
-                return Err(DeltaTableError::Generic(format!(
-                    "delta.enableChangeDataFeed = '{}' is invalid, valid values are ['true']",
-                    enable_cdf
-                )))
-            }
-        }
-    }
-
-    if let Some(enable_dv) = parsed_properties.get(&DeltaConfigKey::EnableDeletionVectors) {
-        let if_enable_dv = enable_dv.to_ascii_lowercase().parse::<bool>();
-        match if_enable_dv {
-            Ok(true) => {
-                let writer_features = match new_protocol.writer_features {
-                    Some(mut features) => {
-                        features.insert(WriterFeatures::DeletionVectors);
-                        features
-                    }
-                    None => hashset! {WriterFeatures::DeletionVectors},
-                };
-                let reader_features = match new_protocol.reader_features {
-                    Some(mut features) => {
-                        features.insert(ReaderFeatures::DeletionVectors);
-                        features
-                    }
-                    None => hashset! {ReaderFeatures::DeletionVectors},
-                };
-                new_protocol.min_reader_version = 3;
-                new_protocol.min_writer_version = 7;
-                new_protocol.writer_features = Some(writer_features);
-                new_protocol.reader_features = Some(reader_features);
-            }
-            Ok(false) => {}
-            _ => {
-                return Err(DeltaTableError::Generic(format!(
-                    "delta.enableDeletionVectors = '{}' is invalid, valid values are ['true']",
-                    enable_dv
-                )))
-            }
-        }
-    }
-
-    Ok(new_protocol)
-}
-
-/// Converts existing properties into features if the reader_version is >=3 or writer_version >=3
-/// only converts features that are "true"
-pub fn convert_properties_to_features(
-    mut new_protocol: Protocol,
-    configuration: &HashMap<String, Option<String>>,
-) -> Protocol {
-    if new_protocol.min_writer_version >= 7 {
-        let mut converted_writer_features = configuration
-            .iter()
-            .filter(|(_, value)| {
-                value.as_ref().map_or(false, |v| {
-                    v.to_ascii_lowercase().parse::<bool>().is_ok_and(|v| v)
-                })
-            })
-            .collect::<HashMap<&String, &Option<String>>>()
-            .keys()
-            .map(|key| (*key).clone().into())
-            .filter(|v| !matches!(v, WriterFeatures::Other(_)))
-            .collect::<HashSet<WriterFeatures>>();
-
-        if configuration
-            .keys()
-            .any(|v| v.contains("delta.constraints."))
-        {
-            converted_writer_features.insert(WriterFeatures::CheckConstraints);
-        }
-
-        match new_protocol.writer_features {
-            Some(mut features) => {
-                features.extend(converted_writer_features);
-                new_protocol.writer_features = Some(features);
-            }
-            None => new_protocol.writer_features = Some(converted_writer_features),
-        }
-    }
-    if new_protocol.min_reader_version >= 3 {
-        let converted_reader_features = configuration
-            .iter()
-            .filter(|(_, value)| {
-                value.as_ref().map_or(false, |v| {
-                    v.to_ascii_lowercase().parse::<bool>().is_ok_and(|v| v)
-                })
-            })
-            .map(|(key, _)| (*key).clone().into())
-            .filter(|v| !matches!(v, ReaderFeatures::Other(_)))
-            .collect::<HashSet<ReaderFeatures>>();
-        match new_protocol.reader_features {
-            Some(mut features) => {
-                features.extend(converted_reader_features);
-                new_protocol.reader_features = Some(features);
-            }
-            None => new_protocol.reader_features = Some(converted_reader_features),
-        }
-    }
-    new_protocol
-}
-
 impl std::future::IntoFuture for SetTablePropertiesBuilder {
     type Output = DeltaResult<DeltaTable>;
 
@@ -270,11 +71,9 @@ impl std::future::IntoFuture for SetTablePropertiesBuilder {
             let current_protocol = this.snapshot.protocol();
             let properties = this.properties;
 
-            let new_protocol = apply_properties_to_protocol(
-                current_protocol,
-                &properties,
-                this.raise_if_not_exists,
-            )?;
+            let new_protocol = current_protocol
+                .clone()
+                .apply_properties_to_protocol(&properties, this.raise_if_not_exists)?;
 
             metadata.configuration.extend(
                 properties
@@ -285,7 +84,7 @@ impl std::future::IntoFuture for SetTablePropertiesBuilder {
             );
 
             let final_protocol =
-                convert_properties_to_features(new_protocol, &metadata.configuration);
+                new_protocol.move_table_properties_into_features(&metadata.configuration);
 
             let operation = DeltaOperation::SetTableProperties { properties };
 
diff --git a/crates/core/src/operations/transaction/conflict_checker.rs b/crates/core/src/operations/transaction/conflict_checker.rs
index d44c704b53..d163ba2f9b 100644
--- a/crates/core/src/operations/transaction/conflict_checker.rs
+++ b/crates/core/src/operations/transaction/conflict_checker.rs
@@ -645,28 +645,30 @@ pub(super) fn can_downgrade_to_snapshot_isolation<'a>(
 #[cfg(test)]
 #[allow(unused)]
 mod tests {
-    use super::super::test_utils as tu;
-    use super::super::test_utils::init_table_actions;
-    use super::*;
-    use crate::kernel::Action;
+    use std::collections::HashMap;
+
     #[cfg(feature = "datafusion")]
     use datafusion_expr::{col, lit};
     use serde_json::json;
 
-    fn get_stats(min: i64, max: i64) -> Option<String> {
-        let data = json!({
-            "numRecords": 18,
-            "minValues": {
-                "value": min
-            },
-            "maxValues": {
-                "value": max
-            },
-            "nullCount": {
-                "value": 0
-            }
-        });
-        Some(data.to_string())
+    use super::*;
+    use crate::kernel::Action;
+    use crate::test_utils::{ActionFactory, TestSchemas};
+
+    fn simple_add(data_change: bool, min: &str, max: &str) -> Add {
+        ActionFactory::add(
+            TestSchemas::simple(),
+            HashMap::from_iter([("value", (min, max))]),
+            Default::default(),
+            true,
+        )
+    }
+
+    fn init_table_actions() -> Vec<Action> {
+        vec![
+            ActionFactory::protocol(None, None, None::<Vec<_>>, None::<Vec<_>>).into(),
+            ActionFactory::metadata(TestSchemas::simple(), None::<Vec<&str>>, None).into(),
+        ]
     }
 
     #[test]
@@ -676,7 +678,8 @@ mod tests {
             predicate: None,
             target_size: 0,
         };
-        let add = tu::create_add_action("p", false, None);
+        let add =
+            ActionFactory::add(TestSchemas::simple(), HashMap::new(), Vec::new(), true).into();
         let res = can_downgrade_to_snapshot_isolation(&[add], &operation, &isolation);
         assert!(!res)
     }
@@ -697,7 +700,7 @@ mod tests {
     ) -> Result<(), CommitConflictError> {
         use crate::table::state::DeltaTableState;
 
-        let setup_actions = setup.unwrap_or_else(|| init_table_actions(None));
+        let setup_actions = setup.unwrap_or_else(init_table_actions);
         let state = DeltaTableState::from_actions(setup_actions).unwrap();
         let snapshot = state.snapshot();
         let transaction_info = TransactionInfo::new(snapshot, reads, &actions, read_whole_table);
@@ -715,22 +718,23 @@ mod tests {
     async fn test_allowed_concurrent_actions() {
         // append - append
         // append file to table while a concurrent writer also appends a file
-        let file1 = tu::create_add_action("file1", true, get_stats(1, 10));
-        let file2 = tu::create_add_action("file2", true, get_stats(1, 10));
+        let file1 = simple_add(true, "1", "10").into();
+        let file2 = simple_add(true, "1", "10").into();
+
         let result = execute_test(None, None, vec![file1], vec![file2], false);
         assert!(result.is_ok());
 
         // disjoint delete - read
         // the concurrent transaction deletes a file that the current transaction did NOT read
-        let file_not_read = tu::create_add_action("file_not_read", true, get_stats(1, 10));
-        let file_read = tu::create_add_action("file_read", true, get_stats(100, 10000));
-        let mut setup_actions = init_table_actions(None);
-        setup_actions.push(file_not_read);
+        let file_not_read = simple_add(true, "1", "10");
+        let file_read = simple_add(true, "100", "10000").into();
+        let mut setup_actions = init_table_actions();
+        setup_actions.push(file_not_read.clone().into());
         setup_actions.push(file_read);
         let result = execute_test(
             Some(setup_actions),
             Some(col("value").gt(lit::<i32>(10))),
-            vec![tu::create_remove_action("file_not_read", true)],
+            vec![ActionFactory::remove(&file_not_read, true).into()],
             vec![],
             false,
         );
@@ -738,9 +742,9 @@ mod tests {
 
         // disjoint add - read
         // concurrently add file, that the current transaction would not have read
-        let file_added = tu::create_add_action("file_added", true, get_stats(1, 10));
-        let file_read = tu::create_add_action("file_read", true, get_stats(100, 10000));
-        let mut setup_actions = init_table_actions(None);
+        let file_added = simple_add(true, "1", "10").into();
+        let file_read = simple_add(true, "100", "10000").into();
+        let mut setup_actions = init_table_actions();
         setup_actions.push(file_read);
         let result = execute_test(
             Some(setup_actions),
@@ -774,7 +778,8 @@ mod tests {
     async fn test_disallowed_concurrent_actions() {
         // delete - delete
         // remove file from table that has previously been removed
-        let removed_file = tu::create_remove_action("removed_file", true);
+        let removed_file = simple_add(true, "1", "10");
+        let removed_file: Action = ActionFactory::remove(&removed_file, true).into();
         let result = execute_test(
             None,
             None,
@@ -789,9 +794,8 @@ mod tests {
 
         // add / read + write
         // a file is concurrently added that should have been read by the current transaction
-        let file_added = tu::create_add_action("file_added", true, get_stats(1, 10));
-        let file_should_have_read =
-            tu::create_add_action("file_should_have_read", true, get_stats(1, 10));
+        let file_added = simple_add(true, "1", "10").into();
+        let file_should_have_read = simple_add(true, "1", "10").into();
         let result = execute_test(
             None,
             Some(col("value").lt_eq(lit::<i32>(10))),
@@ -803,13 +807,13 @@ mod tests {
 
         // delete / read
         // transaction reads a file that is removed by concurrent transaction
-        let file_read = tu::create_add_action("file_read", true, get_stats(1, 10));
-        let mut setup_actions = init_table_actions(None);
-        setup_actions.push(file_read);
+        let file_read = simple_add(true, "1", "10");
+        let mut setup_actions = init_table_actions();
+        setup_actions.push(file_read.clone().into());
         let result = execute_test(
             Some(setup_actions),
             Some(col("value").lt_eq(lit::<i32>(10))),
-            vec![tu::create_remove_action("file_read", true)],
+            vec![ActionFactory::remove(&file_read, true).into()],
             vec![],
             false,
         );
@@ -823,7 +827,7 @@ mod tests {
         let result = execute_test(
             None,
             None,
-            vec![tu::create_metadata_action(None, None)],
+            vec![ActionFactory::metadata(TestSchemas::simple(), None::<Vec<&str>>, None).into()],
             vec![],
             false,
         );
@@ -834,8 +838,8 @@ mod tests {
         let result = execute_test(
             None,
             None,
-            vec![tu::create_protocol_action(None, None)],
-            vec![tu::create_protocol_action(None, None)],
+            vec![ActionFactory::protocol(None, None, None::<Vec<_>>, None::<Vec<_>>).into()],
+            vec![ActionFactory::protocol(None, None, None::<Vec<_>>, None::<Vec<_>>).into()],
             false,
         );
         assert!(matches!(
@@ -846,10 +850,10 @@ mod tests {
         // taint whole table
         // `read_whole_table` should disallow any concurrent change, even if the change
         // is disjoint with the earlier filter
-        let file_part1 = tu::create_add_action("file_part1", true, get_stats(1, 10));
-        let file_part2 = tu::create_add_action("file_part2", true, get_stats(11, 100));
-        let file_part3 = tu::create_add_action("file_part3", true, get_stats(101, 1000));
-        let mut setup_actions = init_table_actions(None);
+        let file_part1 = simple_add(true, "1", "10").into();
+        let file_part2 = simple_add(true, "11", "100").into();
+        let file_part3 = simple_add(true, "101", "1000").into();
+        let mut setup_actions = init_table_actions();
         setup_actions.push(file_part1);
         let result = execute_test(
             Some(setup_actions),
@@ -863,14 +867,14 @@ mod tests {
 
         // taint whole table + concurrent remove
         // `read_whole_table` should disallow any concurrent remove actions
-        let file_part1 = tu::create_add_action("file_part1", true, get_stats(1, 10));
-        let file_part2 = tu::create_add_action("file_part2", true, get_stats(11, 100));
-        let mut setup_actions = init_table_actions(None);
-        setup_actions.push(file_part1);
+        let file_part1 = simple_add(true, "1", "10");
+        let file_part2 = simple_add(true, "11", "100").into();
+        let mut setup_actions = init_table_actions();
+        setup_actions.push(file_part1.clone().into());
         let result = execute_test(
             Some(setup_actions),
             None,
-            vec![tu::create_remove_action("file_part1", true)],
+            vec![ActionFactory::remove(&file_part1, true).into()],
             vec![file_part2],
             true,
         );
diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs
index 31cbc3a33b..69027cc4b7 100644
--- a/crates/core/src/operations/transaction/mod.rs
+++ b/crates/core/src/operations/transaction/mod.rs
@@ -73,28 +73,31 @@
 //!       │                               │                   
 //!       └───────────────────────────────┘           
 //!</pre>
+use std::collections::HashMap;
 
+use bytes::Bytes;
 use chrono::Utc;
 use conflict_checker::ConflictChecker;
 use futures::future::BoxFuture;
 use object_store::path::Path;
-use object_store::{Error as ObjectStoreError, ObjectStore};
+use object_store::Error as ObjectStoreError;
 use serde_json::Value;
-use std::collections::HashMap;
 
-use self::conflict_checker::{CommitConflictError, TransactionInfo, WinningCommitSummary};
-use crate::checkpoints::create_checkpoint_for;
+use self::conflict_checker::{TransactionInfo, WinningCommitSummary};
+use crate::checkpoints::{cleanup_expired_logs_for, create_checkpoint_for};
 use crate::errors::DeltaTableError;
 use crate::kernel::{
     Action, CommitInfo, EagerSnapshot, Metadata, Protocol, ReaderFeatures, Transaction,
     WriterFeatures,
 };
-use crate::logstore::LogStoreRef;
+use crate::logstore::{CommitOrBytes, LogStoreRef};
 use crate::protocol::DeltaOperation;
+use crate::storage::ObjectStoreRef;
 use crate::table::config::TableConfig;
 use crate::table::state::DeltaTableState;
 use crate::{crate_version, DeltaResult};
 
+pub use self::conflict_checker::CommitConflictError;
 pub use self::protocol::INSTANCE as PROTOCOL;
 
 #[cfg(test)]
@@ -103,8 +106,6 @@ mod conflict_checker;
 mod protocol;
 #[cfg(feature = "datafusion")]
 mod state;
-#[cfg(test)]
-pub(crate) mod test_utils;
 
 const DELTA_LOG_FOLDER: &str = "_delta_log";
 pub(crate) const DEFAULT_RETRIES: usize = 15;
@@ -309,6 +310,8 @@ impl CommitData {
 /// Properties for post commit hook.
 pub struct PostCommitHookProperties {
     create_checkpoint: bool,
+    /// Override the EnableExpiredLogCleanUp setting, if None config setting is used
+    cleanup_expired_logs: Option<bool>,
 }
 
 #[derive(Clone, Debug)]
@@ -319,6 +322,7 @@ pub struct CommitProperties {
     pub(crate) app_transaction: Vec<Transaction>,
     max_retries: usize,
     create_checkpoint: bool,
+    cleanup_expired_logs: Option<bool>,
 }
 
 impl Default for CommitProperties {
@@ -328,6 +332,7 @@ impl Default for CommitProperties {
             app_transaction: Vec::new(),
             max_retries: DEFAULT_RETRIES,
             create_checkpoint: true,
+            cleanup_expired_logs: None,
         }
     }
 }
@@ -342,6 +347,12 @@ impl CommitProperties {
         self
     }
 
+    /// Specify maximum number of times to retry the transaction before failing to commit
+    pub fn with_max_retries(mut self, max_retries: usize) -> Self {
+        self.max_retries = max_retries;
+        self
+    }
+
     /// Specify if it should create a checkpoint when the commit interval condition is met
     pub fn with_create_checkpoint(mut self, create_checkpoint: bool) -> Self {
         self.create_checkpoint = create_checkpoint;
@@ -359,6 +370,12 @@ impl CommitProperties {
         self.app_transaction = txn;
         self
     }
+
+    /// Specify if it should clean up the logs when the logRetentionDuration interval is met
+    pub fn with_cleanup_expired_logs(mut self, cleanup_expired_logs: Option<bool>) -> Self {
+        self.cleanup_expired_logs = cleanup_expired_logs;
+        self
+    }
 }
 
 impl From<CommitProperties> for CommitBuilder {
@@ -368,6 +385,7 @@ impl From<CommitProperties> for CommitBuilder {
             app_metadata: value.app_metadata,
             post_commit_hook: Some(PostCommitHookProperties {
                 create_checkpoint: value.create_checkpoint,
+                cleanup_expired_logs: value.cleanup_expired_logs,
             }),
             app_transaction: value.app_transaction,
             ..Default::default()
@@ -467,20 +485,34 @@ impl<'a> PreCommit<'a> {
     pub fn into_prepared_commit_future(self) -> BoxFuture<'a, DeltaResult<PreparedCommit<'a>>> {
         let this = self;
 
+        // Write delta log entry as temporary file to storage. For the actual commit,
+        // the temporary file is moved (atomic rename) to the delta log folder within `commit` function.
+        async fn write_tmp_commit(
+            log_entry: Bytes,
+            store: ObjectStoreRef,
+        ) -> DeltaResult<CommitOrBytes> {
+            let token = uuid::Uuid::new_v4().to_string();
+            let path = Path::from_iter([DELTA_LOG_FOLDER, &format!("_commit_{token}.json.tmp")]);
+            store.put(&path, log_entry.into()).await?;
+            Ok(CommitOrBytes::TmpCommit(path))
+        }
+
         Box::pin(async move {
             if let Some(table_reference) = this.table_data {
                 PROTOCOL.can_commit(table_reference, &this.data.actions, &this.data.operation)?;
             }
-
-            // Write delta log entry as temporary file to storage. For the actual commit,
-            // the temporary file is moved (atomic rename) to the delta log folder within `commit` function.
             let log_entry = this.data.get_bytes()?;
-            let token = uuid::Uuid::new_v4().to_string();
-            let path = Path::from_iter([DELTA_LOG_FOLDER, &format!("_commit_{token}.json.tmp")]);
-            this.log_store.object_store().put(&path, log_entry).await?;
+
+            // With the DefaultLogStore, we just pass the bytes around, since we use conditionalPuts
+            // Other stores will use tmp_commits
+            let commit_or_bytes = if this.log_store.name() == "DefaultLogStore" {
+                CommitOrBytes::LogBytes(log_entry)
+            } else {
+                write_tmp_commit(log_entry, this.log_store.object_store()).await?
+            };
 
             Ok(PreparedCommit {
-                path,
+                commit_or_bytes,
                 log_store: this.log_store,
                 table_data: this.table_data,
                 max_retries: this.max_retries,
@@ -491,9 +523,9 @@ impl<'a> PreCommit<'a> {
     }
 }
 
-/// Represents a inflight commit with a temporary commit marker on the log store
+/// Represents a inflight commit
 pub struct PreparedCommit<'a> {
-    path: Path,
+    commit_or_bytes: CommitOrBytes,
     log_store: LogStoreRef,
     data: CommitData,
     table_data: Option<&'a dyn TableReference>,
@@ -503,8 +535,8 @@ pub struct PreparedCommit<'a> {
 
 impl<'a> PreparedCommit<'a> {
     /// The temporary commit file created
-    pub fn path(&self) -> &Path {
-        &self.path
+    pub fn commit_or_bytes(&self) -> &CommitOrBytes {
+        &self.commit_or_bytes
     }
 }
 
@@ -516,14 +548,17 @@ impl<'a> std::future::IntoFuture for PreparedCommit<'a> {
         let this = self;
 
         Box::pin(async move {
-            let tmp_commit = &this.path;
+            let commit_or_bytes = this.commit_or_bytes;
 
             if this.table_data.is_none() {
-                this.log_store.write_commit_entry(0, tmp_commit).await?;
+                this.log_store
+                    .write_commit_entry(0, commit_or_bytes.clone())
+                    .await?;
                 return Ok(PostCommit {
                     version: 0,
                     data: this.data,
                     create_checkpoint: false,
+                    cleanup_expired_logs: None,
                     log_store: this.log_store,
                     table_data: this.table_data,
                 });
@@ -536,7 +571,11 @@ impl<'a> std::future::IntoFuture for PreparedCommit<'a> {
             let mut attempt_number = 1;
             while attempt_number <= this.max_retries {
                 let version = read_snapshot.version() + attempt_number as i64;
-                match this.log_store.write_commit_entry(version, tmp_commit).await {
+                match this
+                    .log_store
+                    .write_commit_entry(version, commit_or_bytes.clone())
+                    .await
+                {
                     Ok(()) => {
                         return Ok(PostCommit {
                             version,
@@ -545,6 +584,10 @@ impl<'a> std::future::IntoFuture for PreparedCommit<'a> {
                                 .post_commit
                                 .map(|v| v.create_checkpoint)
                                 .unwrap_or_default(),
+                            cleanup_expired_logs: this
+                                .post_commit
+                                .map(|v| v.cleanup_expired_logs)
+                                .unwrap_or_default(),
                             log_store: this.log_store,
                             table_data: this.table_data,
                         });
@@ -573,7 +616,7 @@ impl<'a> std::future::IntoFuture for PreparedCommit<'a> {
                             }
                             Err(err) => {
                                 this.log_store
-                                    .abort_commit_entry(version, tmp_commit)
+                                    .abort_commit_entry(version, commit_or_bytes)
                                     .await?;
                                 return Err(TransactionError::CommitConflict(err).into());
                             }
@@ -581,7 +624,7 @@ impl<'a> std::future::IntoFuture for PreparedCommit<'a> {
                     }
                     Err(err) => {
                         this.log_store
-                            .abort_commit_entry(version, tmp_commit)
+                            .abort_commit_entry(version, commit_or_bytes)
                             .await?;
                         return Err(err.into());
                     }
@@ -600,6 +643,7 @@ pub struct PostCommit<'a> {
     /// The data that was comitted to the log store
     pub data: CommitData,
     create_checkpoint: bool,
+    cleanup_expired_logs: Option<bool>,
     log_store: LogStoreRef,
     table_data: Option<&'a dyn TableReference>,
 }
@@ -625,6 +669,21 @@ impl<'a> PostCommit<'a> {
                 self.create_checkpoint(&state, &self.log_store, self.version)
                     .await?;
             }
+            let cleanup_logs = if let Some(cleanup_logs) = self.cleanup_expired_logs {
+                cleanup_logs
+            } else {
+                state.table_config().enable_expired_log_cleanup()
+            };
+
+            if cleanup_logs {
+                cleanup_expired_logs_for(
+                    self.version,
+                    self.log_store.as_ref(),
+                    Utc::now().timestamp_millis()
+                        - state.table_config().log_retention_duration().as_millis() as i64,
+                )
+                .await?;
+            }
             Ok(state)
         } else {
             let state = DeltaTableState::try_new(
@@ -699,7 +758,7 @@ mod tests {
         logstore::{default_logstore::DefaultLogStore, LogStore},
         storage::commit_uri_from_version,
     };
-    use object_store::memory::InMemory;
+    use object_store::{memory::InMemory, ObjectStore, PutPayload};
     use url::Url;
 
     #[test]
@@ -721,16 +780,19 @@ mod tests {
                 options: HashMap::new().into(),
             },
         );
-        let tmp_path = Path::from("_delta_log/tmp");
         let version_path = Path::from("_delta_log/00000000000000000000.json");
-        store.put(&tmp_path, bytes::Bytes::new()).await.unwrap();
-        store.put(&version_path, bytes::Bytes::new()).await.unwrap();
+        store.put(&version_path, PutPayload::new()).await.unwrap();
 
-        let res = log_store.write_commit_entry(0, &tmp_path).await;
+        let res = log_store
+            .write_commit_entry(0, CommitOrBytes::LogBytes(PutPayload::new().into()))
+            .await;
         // fails if file version already exists
         assert!(res.is_err());
 
         // succeeds for next version
-        log_store.write_commit_entry(1, &tmp_path).await.unwrap();
+        log_store
+            .write_commit_entry(1, CommitOrBytes::LogBytes(PutPayload::new().into()))
+            .await
+            .unwrap();
     }
 }
diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/operations/transaction/protocol.rs
index c5d9cdf650..b9ea7d65aa 100644
--- a/crates/core/src/operations/transaction/protocol.rs
+++ b/crates/core/src/operations/transaction/protocol.rs
@@ -2,6 +2,7 @@ use std::collections::HashSet;
 
 use lazy_static::lazy_static;
 use once_cell::sync::Lazy;
+use tracing::log::*;
 
 use super::{TableReference, TransactionError};
 use crate::kernel::{
@@ -80,20 +81,19 @@ impl ProtocolChecker {
     }
 
     /// checks if table contains timestamp_ntz in any field including nested fields.
-    pub fn contains_timestampntz(&self, fields: &[StructField]) -> bool {
-        fn check_vec_fields(fields: &[StructField]) -> bool {
-            fields.iter().any(|f| _check_type(f.data_type()))
-        }
-
+    pub fn contains_timestampntz<'a>(
+        &self,
+        mut fields: impl Iterator<Item = &'a StructField>,
+    ) -> bool {
         fn _check_type(dtype: &DataType) -> bool {
             match dtype {
-                &DataType::TIMESTAMPNTZ => true,
+                &DataType::TIMESTAMP_NTZ => true,
                 DataType::Array(inner) => _check_type(inner.element_type()),
-                DataType::Struct(inner) => check_vec_fields(inner.fields()),
+                DataType::Struct(inner) => inner.fields().any(|f| _check_type(f.data_type())),
                 _ => false,
             }
         }
-        check_vec_fields(fields)
+        fields.any(|f| _check_type(f.data_type()))
     }
 
     /// Check can write_timestamp_ntz
@@ -148,17 +148,33 @@ impl ProtocolChecker {
     pub fn can_write_to(&self, snapshot: &dyn TableReference) -> Result<(), TransactionError> {
         // NOTE: writers must always support all required reader features
         self.can_read_from(snapshot)?;
+        let min_writer_version = snapshot.protocol().min_writer_version;
+
+        let required_features: Option<&HashSet<WriterFeatures>> = match min_writer_version {
+            0 | 1 => None,
+            2 => Some(&WRITER_V2),
+            3 => Some(&WRITER_V3),
+            4 => Some(&WRITER_V4),
+            5 => Some(&WRITER_V5),
+            6 => Some(&WRITER_V6),
+            _ => snapshot.protocol().writer_features.as_ref(),
+        };
 
-        let required_features: Option<&HashSet<WriterFeatures>> =
-            match snapshot.protocol().min_writer_version {
-                0 | 1 => None,
-                2 => Some(&WRITER_V2),
-                3 => Some(&WRITER_V3),
-                4 => Some(&WRITER_V4),
-                5 => Some(&WRITER_V5),
-                6 => Some(&WRITER_V6),
-                _ => snapshot.protocol().writer_features.as_ref(),
-            };
+        if (4..7).contains(&min_writer_version) {
+            debug!("min_writer_version is less 4-6, checking for unsupported table features");
+            if let Ok(schema) = snapshot.metadata().schema() {
+                for field in schema.fields() {
+                    if field.metadata.contains_key(
+                        crate::kernel::ColumnMetadataKey::GenerationExpression.as_ref(),
+                    ) {
+                        error!("The table contains `delta.generationExpression` settings on columns which mean this table cannot be currently written to by delta-rs");
+                        return Err(TransactionError::UnsupportedWriterFeatures(vec![
+                            WriterFeatures::GeneratedColumns,
+                        ]));
+                    }
+                }
+            }
+        }
 
         if let Some(features) = required_features {
             let mut diff = features.difference(&self.writer_features).peekable();
@@ -228,6 +244,11 @@ pub static INSTANCE: Lazy<ProtocolChecker> = Lazy::new(|| {
     let mut writer_features = HashSet::new();
     writer_features.insert(WriterFeatures::AppendOnly);
     writer_features.insert(WriterFeatures::TimestampWithoutTimezone);
+    #[cfg(feature = "cdf")]
+    {
+        writer_features.insert(WriterFeatures::ChangeDataFeed);
+        writer_features.insert(WriterFeatures::GeneratedColumns);
+    }
     #[cfg(feature = "datafusion")]
     {
         writer_features.insert(WriterFeatures::Invariants);
@@ -243,13 +264,19 @@ pub static INSTANCE: Lazy<ProtocolChecker> = Lazy::new(|| {
 
 #[cfg(test)]
 mod tests {
-    use super::super::test_utils::create_metadata_action;
+    use std::collections::HashMap;
+
     use super::*;
-    use crate::kernel::{Action, Add, Protocol, Remove};
+    use crate::kernel::DataType as DeltaDataType;
+    use crate::kernel::{Action, Add, Metadata, PrimitiveType, Protocol, Remove};
     use crate::protocol::SaveMode;
     use crate::table::state::DeltaTableState;
-    use crate::DeltaConfigKey;
-    use std::collections::HashMap;
+    use crate::test_utils::{ActionFactory, TestSchemas};
+    use crate::TableProperty;
+
+    fn metadata_action(configuration: Option<HashMap<String, Option<String>>>) -> Metadata {
+        ActionFactory::metadata(TestSchemas::simple(), None::<Vec<&str>>, configuration)
+    }
 
     #[test]
     fn test_can_commit_append_only() {
@@ -300,13 +327,11 @@ mod tests {
                     writer_features: Some(feat.into_iter().collect()),
                     ..Default::default()
                 }),
-                create_metadata_action(
-                    None,
-                    Some(HashMap::from([(
-                        DeltaConfigKey::AppendOnly.as_ref().to_string(),
-                        Some(append.to_string()),
-                    )])),
-                ),
+                metadata_action(Some(HashMap::from([(
+                    TableProperty::AppendOnly.as_ref().to_string(),
+                    Some(append.to_string()),
+                )])))
+                .into(),
             ]
         };
 
@@ -400,7 +425,7 @@ mod tests {
                 min_writer_version: 1,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_1 = DeltaTableState::from_actions(actions).unwrap();
         let eager_1 = snapshot_1.snapshot();
@@ -414,7 +439,7 @@ mod tests {
                 min_writer_version: 1,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_2 = DeltaTableState::from_actions(actions).unwrap();
         let eager_2 = snapshot_2.snapshot();
@@ -431,7 +456,7 @@ mod tests {
                 min_writer_version: 2,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_3 = DeltaTableState::from_actions(actions).unwrap();
         let eager_3 = snapshot_3.snapshot();
@@ -451,7 +476,7 @@ mod tests {
                 min_writer_version: 3,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_4 = DeltaTableState::from_actions(actions).unwrap();
         let eager_4 = snapshot_4.snapshot();
@@ -474,7 +499,7 @@ mod tests {
                 min_writer_version: 4,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_5 = DeltaTableState::from_actions(actions).unwrap();
         let eager_5 = snapshot_5.snapshot();
@@ -500,7 +525,7 @@ mod tests {
                 min_writer_version: 5,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_6 = DeltaTableState::from_actions(actions).unwrap();
         let eager_6 = snapshot_6.snapshot();
@@ -529,7 +554,7 @@ mod tests {
                 min_writer_version: 6,
                 ..Default::default()
             }),
-            create_metadata_action(None, Some(HashMap::new())),
+            metadata_action(None).into(),
         ];
         let snapshot_7 = DeltaTableState::from_actions(actions).unwrap();
         let eager_7 = snapshot_7.snapshot();
@@ -554,4 +579,63 @@ mod tests {
         assert!(checker_7.can_read_from(eager_7).is_ok());
         assert!(checker_7.can_write_to(eager_7).is_ok());
     }
+
+    #[tokio::test]
+    async fn test_minwriter_v4_with_cdf() {
+        let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
+        let actions = vec![
+            Action::Protocol(
+                Protocol::new(2, 4)
+                    .with_writer_features(vec![crate::kernel::WriterFeatures::ChangeDataFeed]),
+            ),
+            metadata_action(None).into(),
+        ];
+        let snapshot_5 = DeltaTableState::from_actions(actions).unwrap();
+        let eager_5 = snapshot_5.snapshot();
+        assert!(checker_5.can_write_to(eager_5).is_ok());
+    }
+
+    /// Technically we do not yet support generated columns, but it is okay to "accept" writing to
+    /// a column with minWriterVersion=4 and the generated columns feature as long as the
+    /// `delta.generationExpression` isn't actually defined the write is still allowed
+    #[tokio::test]
+    async fn test_minwriter_v4_with_generated_columns() {
+        let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
+        let actions = vec![
+            Action::Protocol(
+                Protocol::new(2, 4)
+                    .with_writer_features(vec![crate::kernel::WriterFeatures::GeneratedColumns]),
+            ),
+            metadata_action(None).into(),
+        ];
+        let snapshot_5 = DeltaTableState::from_actions(actions).unwrap();
+        let eager_5 = snapshot_5.snapshot();
+        assert!(checker_5.can_write_to(eager_5).is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_minwriter_v4_with_generated_columns_and_expressions() {
+        let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone());
+        let actions = vec![Action::Protocol(Protocol::new(2, 4))];
+
+        let table: crate::DeltaTable = crate::DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                Some(HashMap::from([(
+                    "delta.generationExpression".into(),
+                    "x IS TRUE".into(),
+                )])),
+            )
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .expect("failed to make a version 4 table with EnableChangeDataFeed");
+        let eager_5 = table
+            .snapshot()
+            .expect("Failed to get snapshot from test table");
+        assert!(checker_5.can_write_to(eager_5).is_err());
+    }
 }
diff --git a/crates/core/src/operations/transaction/state.rs b/crates/core/src/operations/transaction/state.rs
index e979cda363..56769c8c62 100644
--- a/crates/core/src/operations/transaction/state.rs
+++ b/crates/core/src/operations/transaction/state.rs
@@ -1,85 +1,18 @@
 use std::collections::HashSet;
-use std::sync::Arc;
 
-use arrow::array::{ArrayRef, BooleanArray};
-use arrow::datatypes::{
-    DataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
-};
+use arrow_array::{ArrayRef, BooleanArray};
+use arrow_schema::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef};
+use datafusion::execution::context::SessionContext;
 use datafusion::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
 use datafusion_common::scalar::ScalarValue;
-use datafusion_common::Column;
+use datafusion_common::{Column, ToDFSchema};
 use datafusion_expr::Expr;
-use itertools::Itertools;
-use object_store::ObjectStore;
-use parquet::arrow::arrow_reader::ArrowReaderOptions;
-use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
-
-use crate::delta_datafusion::{
-    get_null_of_arrow_type, logical_expr_to_physical_expr, to_correct_scalar_value,
-    DataFusionMixins,
-};
+
+use crate::delta_datafusion::{get_null_of_arrow_type, to_correct_scalar_value};
 use crate::errors::DeltaResult;
 use crate::kernel::{Add, EagerSnapshot};
 use crate::table::state::DeltaTableState;
 
-impl DeltaTableState {
-    /// Get the physical table schema.
-    ///
-    /// This will construct a schema derived from the parquet schema of the latest data file,
-    /// and fields for partition columns from the schema defined in table meta data.
-    pub async fn physical_arrow_schema(
-        &self,
-        object_store: Arc<dyn ObjectStore>,
-    ) -> DeltaResult<ArrowSchemaRef> {
-        self.snapshot.physical_arrow_schema(object_store).await
-    }
-}
-
-impl EagerSnapshot {
-    /// Get the physical table schema.
-    ///
-    /// This will construct a schema derived from the parquet schema of the latest data file,
-    /// and fields for partition columns from the schema defined in table meta data.
-    pub async fn physical_arrow_schema(
-        &self,
-        object_store: Arc<dyn ObjectStore>,
-    ) -> DeltaResult<ArrowSchemaRef> {
-        if let Some(add) = self.file_actions()?.max_by_key(|obj| obj.modification_time) {
-            let file_meta = add.try_into()?;
-            let file_reader = ParquetObjectReader::new(object_store, file_meta);
-            let file_schema = ParquetRecordBatchStreamBuilder::new_with_options(
-                file_reader,
-                ArrowReaderOptions::new().with_skip_arrow_metadata(true),
-            )
-            .await?
-            .build()?
-            .schema()
-            .clone();
-
-            let table_schema = Arc::new(ArrowSchema::new(
-                self.arrow_schema()?
-                    .fields
-                    .clone()
-                    .into_iter()
-                    .map(|field| {
-                        // field is an &Arc<Field>
-                        let owned_field: ArrowField = field.as_ref().clone();
-                        file_schema
-                            .field_with_name(field.name())
-                            // yielded with &Field
-                            .cloned()
-                            .unwrap_or(owned_field)
-                    })
-                    .collect::<Vec<ArrowField>>(),
-            ));
-
-            Ok(table_schema)
-        } else {
-            self.arrow_schema()
-        }
-    }
-}
-
 pub struct AddContainer<'a> {
     inner: &'a Vec<Add>,
     partition_columns: &'a Vec<String>,
@@ -104,7 +37,7 @@ impl<'a> AddContainer<'a> {
         let (_, field) = self.schema.column_with_name(&column.name)?;
 
         // See issue 1214. Binary type does not support natural order which is required for Datafusion to prune
-        if field.data_type() == &DataType::Binary {
+        if field.data_type() == &ArrowDataType::Binary {
             return None;
         }
 
@@ -153,7 +86,9 @@ impl<'a> AddContainer<'a> {
     /// so evaluating expressions is inexact. However, excluded files are guaranteed (for a correct log)
     /// to not contain matches by the predicate expression.
     pub fn predicate_matches(&self, predicate: Expr) -> DeltaResult<impl Iterator<Item = &Add>> {
-        let expr = logical_expr_to_physical_expr(predicate, &self.schema);
+        //let expr = logical_expr_to_physical_expr(predicate, &self.schema);
+        let expr = SessionContext::new()
+            .create_physical_expr(predicate, &self.schema.clone().to_dfschema()?)?;
         let pruning_predicate = PruningPredicate::try_new(expr, self.schema.clone())?;
         Ok(self
             .inner
@@ -249,25 +184,19 @@ impl PruningStatistics for EagerSnapshot {
     /// return the minimum values for the named column, if known.
     /// Note: the returned array must contain `num_containers()` rows
     fn min_values(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?.collect_vec();
-        let partition_columns = &self.metadata().partition_columns;
-        let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
-        container.min_values(column)
+        self.log_data().min_values(column)
     }
 
     /// return the maximum values for the named column, if known.
     /// Note: the returned array must contain `num_containers()` rows.
     fn max_values(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?.collect_vec();
-        let partition_columns = &self.metadata().partition_columns;
-        let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
-        container.max_values(column)
+        self.log_data().max_values(column)
     }
 
     /// return the number of containers (e.g. row groups) being
     /// pruned with these statistics
     fn num_containers(&self) -> usize {
-        self.files_count()
+        self.log_data().num_containers()
     }
 
     /// return the number of null values for the named column as an
@@ -275,10 +204,7 @@ impl PruningStatistics for EagerSnapshot {
     ///
     /// Note: the returned array must contain `num_containers()` rows.
     fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?.collect_vec();
-        let partition_columns = &self.metadata().partition_columns;
-        let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
-        container.null_counts(column)
+        self.log_data().null_counts(column)
     }
 
     /// return the number of rows for the named column in each container
@@ -286,56 +212,64 @@ impl PruningStatistics for EagerSnapshot {
     ///
     /// Note: the returned array must contain `num_containers()` rows
     fn row_counts(&self, column: &Column) -> Option<ArrayRef> {
-        let files = self.file_actions().ok()?.collect_vec();
-        let partition_columns = &self.metadata().partition_columns;
-        let container = AddContainer::new(&files, partition_columns, self.arrow_schema().ok()?);
-        container.row_counts(column)
+        self.log_data().row_counts(column)
     }
 
     // This function is required since DataFusion 35.0, but is implemented as a no-op
     // https://github.com/apache/arrow-datafusion/blob/ec6abece2dcfa68007b87c69eefa6b0d7333f628/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs#L550
-    fn contained(&self, _column: &Column, _value: &HashSet<ScalarValue>) -> Option<BooleanArray> {
-        None
+    fn contained(&self, column: &Column, value: &HashSet<ScalarValue>) -> Option<BooleanArray> {
+        self.log_data().contained(column, value)
     }
 }
 
 impl PruningStatistics for DeltaTableState {
     fn min_values(&self, column: &Column) -> Option<ArrayRef> {
-        self.snapshot.min_values(column)
+        self.snapshot.log_data().min_values(column)
     }
 
     fn max_values(&self, column: &Column) -> Option<ArrayRef> {
-        self.snapshot.max_values(column)
+        self.snapshot.log_data().max_values(column)
     }
 
     fn num_containers(&self) -> usize {
-        self.snapshot.num_containers()
+        self.snapshot.log_data().num_containers()
     }
 
     fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
-        self.snapshot.null_counts(column)
+        self.snapshot.log_data().null_counts(column)
     }
 
     fn row_counts(&self, column: &Column) -> Option<ArrayRef> {
-        self.snapshot.row_counts(column)
+        self.snapshot.log_data().row_counts(column)
     }
 
     fn contained(&self, column: &Column, values: &HashSet<ScalarValue>) -> Option<BooleanArray> {
-        self.snapshot.contained(column, values)
+        self.snapshot.log_data().contained(column, values)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::delta_datafusion::DataFusionFileMixins;
-    use crate::operations::transaction::test_utils::{create_add_action, init_table_actions};
+    use std::collections::HashMap;
+
     use datafusion::prelude::SessionContext;
     use datafusion_expr::{col, lit};
 
+    use super::*;
+    use crate::delta_datafusion::{files_matching_predicate, DataFusionMixins};
+    use crate::kernel::Action;
+    use crate::test_utils::{ActionFactory, TestSchemas};
+
+    fn init_table_actions() -> Vec<Action> {
+        vec![
+            ActionFactory::protocol(None, None, None::<Vec<_>>, None::<Vec<_>>).into(),
+            ActionFactory::metadata(TestSchemas::simple(), None::<Vec<&str>>, None).into(),
+        ]
+    }
+
     #[test]
     fn test_parse_predicate_expression() {
-        let snapshot = DeltaTableState::from_actions(init_table_actions(None)).unwrap();
+        let snapshot = DeltaTableState::from_actions(init_table_actions()).unwrap();
         let session = SessionContext::new();
         let state = session.state();
 
@@ -362,15 +296,29 @@ mod tests {
 
     #[test]
     fn test_files_matching_predicate() {
-        let mut actions = init_table_actions(None);
-        actions.push(create_add_action("excluded", true, Some("{\"numRecords\":10,\"minValues\":{\"value\":1},\"maxValues\":{\"value\":10},\"nullCount\":{\"value\":0}}".into())));
-        actions.push(create_add_action("included-1", true, Some("{\"numRecords\":10,\"minValues\":{\"value\":1},\"maxValues\":{\"value\":100},\"nullCount\":{\"value\":0}}".into())));
-        actions.push(create_add_action("included-2", true, Some("{\"numRecords\":10,\"minValues\":{\"value\":-10},\"maxValues\":{\"value\":3},\"nullCount\":{\"value\":0}}".into())));
+        let mut actions = init_table_actions();
+
+        actions.push(Action::Add(ActionFactory::add(
+            TestSchemas::simple(),
+            HashMap::from_iter([("value", ("1", "10"))]),
+            Default::default(),
+            true,
+        )));
+        actions.push(Action::Add(ActionFactory::add(
+            TestSchemas::simple(),
+            HashMap::from_iter([("value", ("1", "100"))]),
+            Default::default(),
+            true,
+        )));
+        actions.push(Action::Add(ActionFactory::add(
+            TestSchemas::simple(),
+            HashMap::from_iter([("value", ("-10", "3"))]),
+            Default::default(),
+            true,
+        )));
 
         let state = DeltaTableState::from_actions(actions).unwrap();
-        let files = state
-            .snapshot
-            .files_matching_predicate(&[])
+        let files = files_matching_predicate(&state.snapshot, &[])
             .unwrap()
             .collect::<Vec<_>>();
         assert_eq!(files.len(), 3);
@@ -379,12 +327,9 @@ mod tests {
             .gt(lit::<i32>(10))
             .or(col("value").lt_eq(lit::<i32>(0)));
 
-        let files = state
-            .snapshot
-            .files_matching_predicate(&[predictate])
+        let files = files_matching_predicate(&state.snapshot, &[predictate])
             .unwrap()
             .collect::<Vec<_>>();
         assert_eq!(files.len(), 2);
-        assert!(files.iter().all(|add| add.path.contains("included")));
     }
 }
diff --git a/crates/core/src/operations/transaction/test_utils.rs b/crates/core/src/operations/transaction/test_utils.rs
deleted file mode 100644
index ada5ded056..0000000000
--- a/crates/core/src/operations/transaction/test_utils.rs
+++ /dev/null
@@ -1,171 +0,0 @@
-#![allow(unused)]
-use std::collections::HashMap;
-
-use super::CommitBuilder;
-use crate::kernel::{
-    Action, Add, CommitInfo, DataType, Metadata, PrimitiveType, Protocol, Remove, StructField,
-    StructType,
-};
-use crate::operations::transaction::PROTOCOL;
-use crate::protocol::{DeltaOperation, SaveMode};
-use crate::table::state::DeltaTableState;
-use crate::{DeltaTable, DeltaTableBuilder};
-
-pub fn create_add_action(
-    path: impl Into<String>,
-    data_change: bool,
-    stats: Option<String>,
-) -> Action {
-    Action::Add(Add {
-        path: path.into(),
-        size: 100,
-        data_change,
-        stats,
-        modification_time: -1,
-        partition_values: Default::default(),
-        stats_parsed: None,
-        base_row_id: None,
-        default_row_commit_version: None,
-        tags: None,
-        deletion_vector: None,
-        clustering_provider: None,
-    })
-}
-
-pub fn create_remove_action(path: impl Into<String>, data_change: bool) -> Action {
-    Action::Remove(Remove {
-        path: path.into(),
-        data_change,
-        size: None,
-        deletion_timestamp: None,
-        deletion_vector: None,
-        partition_values: Default::default(),
-        extended_file_metadata: None,
-        base_row_id: None,
-        default_row_commit_version: None,
-        tags: None,
-    })
-}
-
-pub fn create_protocol_action(max_reader: Option<i32>, max_writer: Option<i32>) -> Action {
-    let protocol = Protocol {
-        min_reader_version: max_reader.unwrap_or(PROTOCOL.default_reader_version()),
-        min_writer_version: max_writer.unwrap_or(PROTOCOL.default_writer_version()),
-        writer_features: None,
-        reader_features: None,
-    };
-    Action::Protocol(protocol)
-}
-
-pub fn create_metadata_action(
-    parttiton_columns: Option<Vec<String>>,
-    configuration: Option<HashMap<String, Option<String>>>,
-) -> Action {
-    let table_schema = StructType::new(vec![
-        StructField::new(
-            "id".to_string(),
-            DataType::Primitive(PrimitiveType::String),
-            true,
-        ),
-        StructField::new(
-            "value".to_string(),
-            DataType::Primitive(PrimitiveType::Integer),
-            true,
-        ),
-        StructField::new(
-            "modified".to_string(),
-            DataType::Primitive(PrimitiveType::String),
-            true,
-        ),
-    ]);
-    Action::Metadata(
-        Metadata::try_new(
-            table_schema,
-            parttiton_columns.unwrap_or_default(),
-            configuration.unwrap_or_default(),
-        )
-        .unwrap(),
-    )
-}
-
-pub fn init_table_actions(configuration: Option<HashMap<String, Option<String>>>) -> Vec<Action> {
-    let raw = r#"
-        {
-            "timestamp": 1670892998177,
-            "operation": "WRITE",
-            "operationParameters": {
-                "mode": "Append",
-                "partitionBy": "[\"c1\",\"c2\"]"
-            },
-            "isolationLevel": "Serializable",
-            "isBlindAppend": true,
-            "operationMetrics": {
-                "numFiles": "3",
-                "numOutputRows": "3",
-                "numOutputBytes": "1356"
-            },
-            "engineInfo": "Apache-Spark/3.3.1 Delta-Lake/2.2.0",
-            "txnId": "046a258f-45e3-4657-b0bf-abfb0f76681c"
-        }"#;
-
-    let commit_info = serde_json::from_str::<CommitInfo>(raw).unwrap();
-    vec![
-        Action::CommitInfo(commit_info),
-        create_protocol_action(None, None),
-        create_metadata_action(None, configuration),
-    ]
-}
-
-pub async fn create_initialized_table(
-    partition_cols: &[String],
-    configuration: Option<HashMap<String, Option<String>>>,
-) -> DeltaTable {
-    let log_store = DeltaTableBuilder::from_uri("memory://")
-        .build_storage()
-        .unwrap();
-    let table_schema = StructType::new(vec![
-        StructField::new(
-            "id".to_string(),
-            DataType::Primitive(PrimitiveType::String),
-            true,
-        ),
-        StructField::new(
-            "value".to_string(),
-            DataType::Primitive(PrimitiveType::Integer),
-            true,
-        ),
-        StructField::new(
-            "modified".to_string(),
-            DataType::Primitive(PrimitiveType::String),
-            true,
-        ),
-    ]);
-    let state = DeltaTableState::from_actions(init_table_actions(None)).unwrap();
-    let operation = DeltaOperation::Create {
-        mode: SaveMode::ErrorIfExists,
-        location: "location".into(),
-        protocol: Protocol {
-            min_reader_version: 1,
-            min_writer_version: 1,
-            writer_features: None,
-            reader_features: None,
-        },
-        metadata: Metadata {
-            id: uuid::Uuid::new_v4().to_string(),
-            name: None,
-            description: None,
-            format: Default::default(),
-            schema_string: serde_json::to_string(&table_schema).unwrap(),
-            partition_columns: partition_cols.to_vec(),
-            configuration: configuration.unwrap_or_default(),
-            created_time: Some(chrono::Utc::now().timestamp_millis()),
-        },
-    };
-    let actions = init_table_actions(None);
-    CommitBuilder::default()
-        .with_actions(actions)
-        .build(None, log_store.clone(), operation)
-        .await
-        .unwrap();
-    DeltaTable::new_with_state(log_store, state)
-}
diff --git a/crates/core/src/operations/update.rs b/crates/core/src/operations/update.rs
index 9a088c6ae9..61dc4b2f46 100644
--- a/crates/core/src/operations/update.rs
+++ b/crates/core/src/operations/update.rs
@@ -19,45 +19,62 @@
 //! ````
 
 use std::{
-    collections::{HashMap, HashSet},
+    collections::HashMap,
     sync::Arc,
     time::{Instant, SystemTime, UNIX_EPOCH},
 };
 
-use arrow::datatypes::Schema as ArrowSchema;
-use arrow_schema::Field;
+use async_trait::async_trait;
+use datafusion::error::Result as DataFusionResult;
 use datafusion::{
+    dataframe::DataFrame,
+    datasource::provider_as_source,
     execution::context::SessionState,
-    physical_plan::{metrics::MetricBuilder, projection::ProjectionExec, ExecutionPlan},
+    execution::session_state::SessionStateBuilder,
+    physical_plan::{metrics::MetricBuilder, ExecutionPlan},
+    physical_planner::{ExtensionPlanner, PhysicalPlanner},
     prelude::SessionContext,
 };
-use datafusion_common::{Column, DFSchema, ScalarValue};
-use datafusion_expr::{case, col, lit, when, Expr};
-use datafusion_physical_expr::{
-    expressions::{self},
-    PhysicalExpr,
+use datafusion_common::{Column, ScalarValue};
+use datafusion_expr::{
+    case, col, lit, when, Expr, Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode,
 };
 use futures::future::BoxFuture;
 use parquet::file::properties::WriterProperties;
 use serde::Serialize;
+use tracing::log::*;
 
-use super::write::write_execution_plan;
+use super::write::{write_execution_plan, write_execution_plan_cdc};
 use super::{
     datafusion_utils::Expression,
     transaction::{CommitBuilder, CommitProperties},
 };
 use super::{transaction::PROTOCOL, write::WriterStatsConfig};
-use crate::delta_datafusion::{
-    create_physical_expr_fix, expr::fmt_expr_to_sql, physical::MetricObserverExec,
-    DataFusionMixins, DeltaColumn, DeltaSessionContext,
-};
-use crate::delta_datafusion::{find_files, register_store, DeltaScanBuilder};
+use crate::delta_datafusion::{find_files, planner::DeltaPlanner, register_store};
 use crate::kernel::{Action, Remove};
 use crate::logstore::LogStoreRef;
+use crate::operations::cdc::*;
 use crate::protocol::DeltaOperation;
 use crate::table::state::DeltaTableState;
+use crate::{
+    delta_datafusion::{
+        expr::fmt_expr_to_sql,
+        logical::MetricObserver,
+        physical::{find_metric_node, get_metric, MetricObserverExec},
+        DataFusionMixins, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionContext,
+        DeltaTableProvider,
+    },
+    DeltaTableError,
+};
 use crate::{DeltaResult, DeltaTable};
 
+/// Custom column name used for marking internal [RecordBatch] rows as updated
+pub(crate) const UPDATE_PREDICATE_COLNAME: &str = "__delta_rs_update_predicate";
+
+const UPDATE_COUNT_ID: &str = "update_source_count";
+const UPDATE_ROW_COUNT: &str = "num_updated_rows";
+const COPIED_ROW_COUNT: &str = "num_copied_rows";
+
 /// Updates records in the Delta Table.
 /// See this module's documentation for more information
 pub struct UpdateBuilder {
@@ -163,6 +180,44 @@ impl UpdateBuilder {
     }
 }
 
+#[derive(Clone)]
+struct UpdateMetricExtensionPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for UpdateMetricExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> DataFusionResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(metric_observer) = node.as_any().downcast_ref::<MetricObserver>() {
+            if metric_observer.id.eq(UPDATE_COUNT_ID) {
+                return Ok(Some(MetricObserverExec::try_new(
+                    UPDATE_COUNT_ID.into(),
+                    physical_inputs,
+                    |batch, metrics| {
+                        let array = batch.column_by_name(UPDATE_PREDICATE_COLNAME).unwrap();
+                        let copied_rows = array.null_count();
+                        let num_updated = array.len() - copied_rows;
+
+                        MetricBuilder::new(metrics)
+                            .global_counter(UPDATE_ROW_COUNT)
+                            .add(num_updated);
+
+                        MetricBuilder::new(metrics)
+                            .global_counter(COPIED_ROW_COUNT)
+                            .add(copied_rows);
+                    },
+                )?));
+            }
+        }
+        Ok(None)
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 async fn execute(
     predicate: Option<Expression>,
@@ -172,7 +227,7 @@ async fn execute(
     state: SessionState,
     writer_properties: Option<WriterProperties>,
     mut commit_properties: CommitProperties,
-    safe_cast: bool,
+    _safe_cast: bool,
 ) -> DeltaResult<(DeltaTableState, UpdateMetrics)> {
     // Validate the predicate and update expressions.
     //
@@ -183,6 +238,17 @@ async fn execute(
     // For files that were identified, scan for records that match the predicate,
     // perform update operations, and then commit add and remove actions to
     // the log.
+    if !&snapshot.load_config().require_files {
+        return Err(DeltaTableError::NotInitializedWithFiles("UPDATE".into()));
+    }
+
+    let update_planner = DeltaPlanner::<UpdateMetricExtensionPlanner> {
+        extension_planner: UpdateMetricExtensionPlanner {},
+    };
+
+    let state = SessionStateBuilder::new_from_existing(state)
+        .with_query_planner(Arc::new(update_planner))
+        .build();
 
     let exec_start = Instant::now();
     let mut metrics = UpdateMetrics::default();
@@ -199,15 +265,15 @@ async fn execute(
         None => None,
     };
 
-    let updates: HashMap<Column, Expr> = updates
+    let updates = updates
         .into_iter()
         .map(|(key, expr)| match expr {
-            Expression::DataFusion(e) => Ok((key, e)),
+            Expression::DataFusion(e) => Ok((key.name, e)),
             Expression::String(s) => snapshot
                 .parse_predicate_expression(s, &state)
-                .map(|e| (key, e)),
+                .map(|e| (key.name, e)),
         })
-        .collect::<Result<HashMap<Column, Expr>, _>>()?;
+        .collect::<Result<HashMap<String, Expr>, _>>()?;
 
     let current_metadata = snapshot.metadata();
     let table_partition_cols = current_metadata.partition_columns.clone();
@@ -222,132 +288,63 @@ async fn execute(
 
     let predicate = predicate.unwrap_or(Expr::Literal(ScalarValue::Boolean(Some(true))));
 
-    let execution_props = state.execution_props();
+    let scan_config = DeltaScanConfigBuilder::default()
+        .with_file_column(false)
+        .with_schema(snapshot.input_schema()?)
+        .build(&snapshot)?;
+
     // For each rewrite evaluate the predicate and then modify each expression
     // to either compute the new value or obtain the old one then write these batches
-    let scan = DeltaScanBuilder::new(&snapshot, log_store.clone(), &state)
-        .with_files(&candidates.candidates)
-        .build()
-        .await?;
-    let scan = Arc::new(scan);
+    let target_provider = Arc::new(
+        DeltaTableProvider::try_new(snapshot.clone(), log_store.clone(), scan_config.clone())?
+            .with_files(candidates.candidates.clone()),
+    );
 
-    // Create a projection for a new column with the predicate evaluated
-    let input_schema = snapshot.input_schema()?;
+    let target_provider = provider_as_source(target_provider);
+    let plan = LogicalPlanBuilder::scan("target", target_provider.clone(), None)?.build()?;
 
-    let mut fields = Vec::new();
-    for field in input_schema.fields.iter() {
-        fields.push(field.to_owned());
-    }
-    fields.push(Arc::new(Field::new(
-        "__delta_rs_update_predicate",
-        arrow_schema::DataType::Boolean,
-        true,
-    )));
-    // Recreate the schemas with the new column included
-    let input_schema = Arc::new(ArrowSchema::new(fields));
-    let input_dfschema: DFSchema = input_schema.as_ref().clone().try_into()?;
-
-    let mut expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = Vec::new();
-    let scan_schema = scan.schema();
-    for (i, field) in scan_schema.fields().into_iter().enumerate() {
-        expressions.push((
-            Arc::new(expressions::Column::new(field.name(), i)),
-            field.name().to_owned(),
-        ));
-    }
+    let df = DataFrame::new(state.clone(), plan);
 
     // Take advantage of how null counts are tracked in arrow arrays use the
     // null count to track how many records do NOT statisfy the predicate.  The
     // count is then exposed through the metrics through the `UpdateCountExec`
     // execution plan
-
     let predicate_null =
         when(predicate.clone(), lit(true)).otherwise(lit(ScalarValue::Boolean(None)))?;
-    let predicate_expr =
-        create_physical_expr_fix(predicate_null, &input_dfschema, execution_props)?;
-    expressions.push((predicate_expr, "__delta_rs_update_predicate".to_string()));
-
-    let projection_predicate: Arc<dyn ExecutionPlan> =
-        Arc::new(ProjectionExec::try_new(expressions, scan)?);
-
-    let count_plan = Arc::new(MetricObserverExec::new(
-        "update_count".into(),
-        projection_predicate.clone(),
-        |batch, metrics| {
-            let array = batch.column_by_name("__delta_rs_update_predicate").unwrap();
-            let copied_rows = array.null_count();
-            let num_updated = array.len() - copied_rows;
-
-            MetricBuilder::new(metrics)
-                .global_counter("num_updated_rows")
-                .add(num_updated);
-
-            MetricBuilder::new(metrics)
-                .global_counter("num_copied_rows")
-                .add(copied_rows);
-        },
-    ));
 
-    // Perform another projection but instead calculate updated values based on
-    // the predicate value.  If the predicate is true then evalute the user
-    // provided expression otherwise return the original column value
-    //
-    // For each update column a new column with a name of __delta_rs_ + `original name` is created
-    let mut expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = Vec::new();
-    let scan_schema = count_plan.schema();
-    for (i, field) in scan_schema.fields().into_iter().enumerate() {
-        expressions.push((
-            Arc::new(expressions::Column::new(field.name(), i)),
-            field.name().to_owned(),
-        ));
-    }
+    let df_with_update_col = df
+        .clone()
+        .with_column(UPDATE_PREDICATE_COLNAME, predicate_null)?;
 
-    // Maintain a map from the original column name to its temporary column index
-    let mut map = HashMap::<String, usize>::new();
-    let mut control_columns = HashSet::<String>::new();
-    control_columns.insert("__delta_rs_update_predicate".to_owned());
-
-    for (column, expr) in updates {
-        let expr = case(col("__delta_rs_update_predicate"))
-            .when(lit(true), expr.to_owned())
-            .otherwise(col(column.to_owned()))?;
-        let predicate_expr = create_physical_expr_fix(expr, &input_dfschema, execution_props)?;
-        map.insert(column.name.clone(), expressions.len());
-        let c = "__delta_rs_".to_string() + &column.name;
-        expressions.push((predicate_expr, c.clone()));
-        control_columns.insert(c);
-    }
+    let plan_with_metrics = LogicalPlan::Extension(Extension {
+        node: Arc::new(MetricObserver {
+            id: UPDATE_COUNT_ID.into(),
+            input: df_with_update_col.into_unoptimized_plan(),
+            enable_pushdown: false,
+        }),
+    });
 
-    let projection_update: Arc<dyn ExecutionPlan> =
-        Arc::new(ProjectionExec::try_new(expressions, count_plan.clone())?);
-
-    // Project again to remove __delta_rs columns and rename update columns to their original name
-    let mut expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = Vec::new();
-    let scan_schema = projection_update.schema();
-    for (i, field) in scan_schema.fields().into_iter().enumerate() {
-        if !control_columns.contains(field.name()) {
-            match map.get(field.name()) {
-                Some(value) => {
-                    expressions.push((
-                        Arc::new(expressions::Column::new(field.name(), *value)),
-                        field.name().to_owned(),
-                    ));
-                }
-                None => {
-                    expressions.push((
-                        Arc::new(expressions::Column::new(field.name(), i)),
-                        field.name().to_owned(),
-                    ));
-                }
-            }
-        }
-    }
+    let df_with_predicate_and_metrics = DataFrame::new(state.clone(), plan_with_metrics);
 
-    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
-        expressions,
-        projection_update.clone(),
-    )?);
+    let expressions: Vec<Expr> = df_with_predicate_and_metrics
+        .schema()
+        .fields()
+        .into_iter()
+        .map(|field| {
+            let field_name = field.name();
+            let expr = match updates.get(field_name) {
+                Some(expr) => case(col(UPDATE_PREDICATE_COLNAME))
+                    .when(lit(true), expr.to_owned())
+                    .otherwise(col(Column::from_name(field_name)))?
+                    .alias(field_name),
+                None => col(Column::from_name(field_name)),
+            };
+            Ok(expr)
+        })
+        .collect::<DeltaResult<Vec<Expr>>>()?;
 
+    let updated_df = df_with_predicate_and_metrics.select(expressions.clone())?;
+    let physical_plan = updated_df.clone().create_physical_plan().await?;
     let writer_stats_config = WriterStatsConfig::new(
         snapshot.table_config().num_indexed_cols(),
         snapshot
@@ -356,32 +353,28 @@ async fn execute(
             .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()),
     );
 
+    let tracker = CDCTracker::new(df, updated_df.drop_columns(&[UPDATE_PREDICATE_COLNAME])?);
+
     let add_actions = write_execution_plan(
         Some(&snapshot),
         state.clone(),
-        projection.clone(),
+        physical_plan.clone(),
         table_partition_cols.clone(),
         log_store.object_store().clone(),
         Some(snapshot.table_config().target_file_size() as usize),
         None,
-        writer_properties,
-        safe_cast,
+        writer_properties.clone(),
+        writer_stats_config.clone(),
         None,
-        writer_stats_config,
     )
     .await?;
 
-    let count_metrics = count_plan.metrics().unwrap();
+    let err = || DeltaTableError::Generic("Unable to locate expected metric node".into());
+    let update_count = find_metric_node(UPDATE_COUNT_ID, &physical_plan).ok_or_else(err)?;
+    let update_count_metrics = update_count.metrics().unwrap();
 
-    metrics.num_updated_rows = count_metrics
-        .sum_by_name("num_updated_rows")
-        .map(|m| m.as_usize())
-        .unwrap_or(0);
-
-    metrics.num_copied_rows = count_metrics
-        .sum_by_name("num_copied_rows")
-        .map(|m| m.as_usize())
-        .unwrap_or(0);
+    metrics.num_updated_rows = get_metric(&update_count_metrics, UPDATE_ROW_COUNT);
+    metrics.num_copied_rows = get_metric(&update_count_metrics, COPIED_ROW_COUNT);
 
     let deletion_timestamp = SystemTime::now()
         .duration_since(UNIX_EPOCH)
@@ -422,6 +415,30 @@ async fn execute(
         serde_json::to_value(&metrics)?,
     );
 
+    if let Ok(true) = should_write_cdc(&snapshot) {
+        match tracker.collect() {
+            Ok(df) => {
+                let cdc_actions = write_execution_plan_cdc(
+                    Some(&snapshot),
+                    state,
+                    df.create_physical_plan().await?,
+                    table_partition_cols,
+                    log_store.object_store(),
+                    Some(snapshot.table_config().target_file_size() as usize),
+                    None,
+                    writer_properties,
+                    writer_stats_config,
+                    None,
+                )
+                .await?;
+                actions.extend(cdc_actions);
+            }
+            Err(err) => {
+                error!("Failed to collect CDC batches: {err:#?}");
+            }
+        };
+    }
+
     let commit = CommitBuilder::from(commit_properties)
         .with_actions(actions)
         .build(Some(&snapshot), log_store, operation)
@@ -472,24 +489,25 @@ impl std::future::IntoFuture for UpdateBuilder {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
+
     use crate::kernel::DataType as DeltaDataType;
-    use crate::kernel::PrimitiveType;
-    use crate::kernel::StructField;
-    use crate::kernel::StructType;
+    use crate::kernel::{Action, PrimitiveType, Protocol, StructField, StructType};
+    use crate::operations::load_cdf::*;
     use crate::operations::DeltaOps;
     use crate::writer::test_utils::datafusion::get_data;
     use crate::writer::test_utils::datafusion::write_batch;
     use crate::writer::test_utils::{
         get_arrow_schema, get_delta_schema, get_record_batch, setup_table_with_configuration,
     };
-    use crate::DeltaConfigKey;
-    use crate::DeltaTable;
+    use crate::{DeltaTable, TableProperty};
+    use arrow::array::{Int32Array, StringArray};
     use arrow::datatypes::Schema as ArrowSchema;
     use arrow::datatypes::{Field, Schema};
     use arrow::record_batch::RecordBatch;
-    use arrow_array::Int32Array;
     use arrow_schema::DataType;
     use datafusion::assert_batches_sorted_eq;
+    use datafusion::physical_plan::ExecutionPlan;
     use datafusion::prelude::*;
     use serde_json::json;
     use std::sync::Arc;
@@ -499,7 +517,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions.unwrap_or_default())
             .await
             .unwrap();
@@ -531,7 +549,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_update_when_delta_table_is_append_only() {
-        let table = setup_table_with_configuration(DeltaConfigKey::AppendOnly, Some("true")).await;
+        let table = setup_table_with_configuration(TableProperty::AppendOnly, Some("true")).await;
         let batch = get_record_batch(None, false);
         // Append
         let table = write_batch(table, batch).await;
@@ -789,7 +807,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         let table = write_batch(table, batch).await;
@@ -969,4 +987,234 @@ mod tests {
             .await;
         assert!(res.is_err());
     }
+
+    #[tokio::test]
+    async fn test_no_cdc_on_older_tables() {
+        let table = prepare_values_table().await;
+        assert_eq!(table.version(), 0);
+        assert_eq!(table.get_files_count(), 1);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            arrow::datatypes::DataType::Int32,
+            true,
+        )]));
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .update()
+            .with_predicate(col("value").eq(lit(2)))
+            .with_update("value", lit(12))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        // NOTE: This currently doesn't really assert anything because cdc_files() is not reading
+        // actions correct
+        if let Some(state) = table.state.clone() {
+            let cdc_files = state.cdc_files();
+            assert!(cdc_files.is_ok());
+            if let Ok(cdc_files) = cdc_files {
+                let cdc_files: Vec<_> = cdc_files.collect();
+                assert_eq!(cdc_files.len(), 0);
+            }
+        } else {
+            panic!("I shouldn't exist!");
+        }
+
+        // Too close for missiles, switching to guns. Just checking that the data wasn't actually
+        // written instead!
+        if let Ok(files) = crate::storage::utils::flatten_list_stream(
+            &table.object_store(),
+            Some(&object_store::path::Path::from("_change_data")),
+        )
+        .await
+        {
+            assert_eq!(
+                0,
+                files.len(),
+                "This test should not find any written CDC files! {files:#?}"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_update_cdc_enabled() {
+        // Currently you cannot pass EnableChangeDataFeed through `with_configuration_property`
+        // so the only way to create a truly CDC enabled table is by shoving the Protocol
+        // directly into the actions list
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            arrow::datatypes::DataType::Int32,
+            true,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]))],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .update()
+            .with_predicate(col("value").eq(lit(2)))
+            .with_update("value", lit(12))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+------------------+-----------------+",
+        "| value | _change_type     | _commit_version |",
+        "+-------+------------------+-----------------+",
+        "| 1     | insert           | 1               |",
+        "| 2     | insert           | 1               |",
+        "| 2     | update_preimage  | 2               |",
+        "| 12    | update_postimage | 2               |",
+        "| 3     | insert           | 1               |",
+        "+-------+------------------+-----------------+",
+            ], &batches }
+    }
+
+    #[tokio::test]
+    async fn test_update_cdc_enabled_partitions() {
+        // Currently you cannot pass EnableChangeDataFeed through `with_configuration_property`
+        // so the only way to create a truly CDC enabled table is by shoving the Protocol
+        // directly into the actions list
+        let actions = vec![Action::Protocol(Protocol::new(1, 4))];
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_column(
+                "year",
+                DeltaDataType::Primitive(PrimitiveType::String),
+                true,
+                None,
+            )
+            .with_column(
+                "value",
+                DeltaDataType::Primitive(PrimitiveType::Integer),
+                true,
+                None,
+            )
+            .with_partition_columns(vec!["year"])
+            .with_actions(actions)
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("year", DataType::Utf8, true),
+            Field::new("value", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![
+                    Some("2020"),
+                    Some("2020"),
+                    Some("2024"),
+                ])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+            ],
+        )
+        .unwrap();
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let (table, _metrics) = DeltaOps(table)
+            .update()
+            .with_predicate(col("value").eq(lit(2)))
+            .with_update("year", "2024")
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let table = DeltaOps(table)
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            table.properties().output_partitioning().partition_count(),
+            table,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        let _ = arrow::util::pretty::print_batches(&batches);
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(3)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+------------------+-----------------+------+",
+        "| value | _change_type     | _commit_version | year |",
+        "+-------+------------------+-----------------+------+",
+        "| 1     | insert           | 1               | 2020 |",
+        "| 2     | insert           | 1               | 2020 |",
+        "| 2     | update_preimage  | 2               | 2020 |",
+        "| 2     | update_postimage | 2               | 2024 |",
+        "| 3     | insert           | 1               | 2024 |",
+        "+-------+------------------+-----------------+------+",
+            ], &batches }
+    }
 }
diff --git a/crates/core/src/operations/vacuum.rs b/crates/core/src/operations/vacuum.rs
index 0e4bd2b467..4452526258 100644
--- a/crates/core/src/operations/vacuum.rs
+++ b/crates/core/src/operations/vacuum.rs
@@ -240,8 +240,11 @@ impl std::future::IntoFuture for VacuumBuilder {
 
     fn into_future(self) -> Self::IntoFuture {
         let this = self;
-
         Box::pin(async move {
+            if !&this.snapshot.load_config().require_files {
+                return Err(DeltaTableError::NotInitializedWithFiles("VACUUM".into()));
+            }
+
             let plan = this.create_vacuum_plan().await?;
             if this.dry_run {
                 return Ok((
diff --git a/crates/core/src/operations/write.rs b/crates/core/src/operations/write.rs
index f3b87d4f66..36dcec5b70 100644
--- a/crates/core/src/operations/write.rs
+++ b/crates/core/src/operations/write.rs
@@ -27,21 +27,29 @@
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::Arc;
-use std::time::{SystemTime, UNIX_EPOCH};
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
 use std::vec;
 
 use arrow_array::RecordBatch;
 use arrow_cast::can_cast_types;
 use arrow_schema::{ArrowError, DataType, Fields, SchemaRef as ArrowSchemaRef};
 use datafusion::execution::context::{SessionContext, SessionState, TaskContext};
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::{memory::MemoryExec, ExecutionPlan};
 use datafusion_common::DFSchema;
-use datafusion_expr::Expr;
+use datafusion_expr::{lit, Expr};
+use datafusion_physical_expr::expressions::{self};
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::union::UnionExec;
+use datafusion_physical_plan::{memory::MemoryExec, ExecutionPlan};
 use futures::future::BoxFuture;
 use futures::StreamExt;
+use object_store::prefix::PrefixStore;
 use parquet::file::properties::WriterProperties;
+use serde::{Deserialize, Serialize};
+use tracing::log::*;
 
+use super::cdc::should_write_cdc;
 use super::datafusion_utils::Expression;
 use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL};
 use super::writer::{DeltaWriter, WriterConfig};
@@ -49,13 +57,15 @@ use super::CreateBuilder;
 use crate::delta_datafusion::expr::fmt_expr_to_sql;
 use crate::delta_datafusion::expr::parse_predicate_expression;
 use crate::delta_datafusion::{
-    create_physical_expr_fix, find_files, register_store, DeltaScanBuilder,
+    find_files, register_store, DeltaScanBuilder, DeltaScanConfigBuilder,
 };
 use crate::delta_datafusion::{DataFusionMixins, DeltaDataChecker};
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Action, Add, Metadata, PartitionsExt, Remove, StructType};
+use crate::kernel::{
+    Action, ActionType, Add, AddCDCFile, Metadata, PartitionsExt, Remove, StructType,
+};
 use crate::logstore::LogStoreRef;
-use crate::operations::cast::{cast_record_batch, merge_schema};
+use crate::operations::cast::{cast_record_batch, merge_schema::merge_arrow_schema};
 use crate::protocol::{DeltaOperation, SaveMode};
 use crate::storage::ObjectStoreRef;
 use crate::table::state::DeltaTableState;
@@ -63,6 +73,8 @@ use crate::table::Constraint as DeltaConstraint;
 use crate::writer::record_batch::divide_by_partition_values;
 use crate::DeltaTable;
 
+use tokio::sync::mpsc::Sender;
+
 #[derive(thiserror::Error, Debug)]
 enum WriteError {
     #[error("No data source supplied to write command.")]
@@ -153,6 +165,21 @@ pub struct WriteBuilder {
     configuration: HashMap<String, Option<String>>,
 }
 
+#[derive(Default, Debug, Serialize, Deserialize)]
+/// Metrics for the Write Operation
+pub struct WriteMetrics {
+    /// Number of files added
+    pub num_added_files: usize,
+    /// Number of files removed
+    pub num_removed_files: usize,
+    /// Number of partitions
+    pub num_partitions: usize,
+    /// Number of rows added
+    pub num_added_rows: usize,
+    /// Time taken to execute the entire operation
+    pub execution_time_ms: u64,
+}
+
 impl super::Operation<()> for WriteBuilder {}
 
 impl WriteBuilder {
@@ -286,17 +313,20 @@ impl WriteBuilder {
             Some(snapshot) => {
                 PROTOCOL.can_write_to(snapshot)?;
 
-                if let Some(plan) = &self.input {
-                    let schema: StructType = (plan.schema()).try_into()?;
-                    PROTOCOL.check_can_write_timestamp_ntz(snapshot, &schema)?;
+                let schema: StructType = if let Some(plan) = &self.input {
+                    (plan.schema()).try_into()?
                 } else if let Some(batches) = &self.batches {
                     if batches.is_empty() {
                         return Err(WriteError::MissingData.into());
                     }
-                    let schema: StructType = (batches[0].schema()).try_into()?;
+                    (batches[0].schema()).try_into()?
+                } else {
+                    return Err(WriteError::MissingData.into());
+                };
+
+                if self.schema_mode.is_none() {
                     PROTOCOL.check_can_write_timestamp_ntz(snapshot, &schema)?;
                 }
-
                 match self.mode {
                     SaveMode::ErrorIfExists => {
                         Err(WriteError::AlreadyExists(self.log_store.root_uri()).into())
@@ -317,7 +347,7 @@ impl WriteBuilder {
                 }?;
                 let mut builder = CreateBuilder::new()
                     .with_log_store(self.log_store.clone())
-                    .with_columns(schema.fields().clone())
+                    .with_columns(schema.fields().cloned())
                     .with_configuration(self.configuration.clone());
                 if let Some(partition_columns) = self.partition_columns.as_ref() {
                     builder = builder.with_partition_columns(partition_columns.clone())
@@ -367,18 +397,12 @@ async fn write_execution_plan_with_predicate(
     target_file_size: Option<usize>,
     write_batch_size: Option<usize>,
     writer_properties: Option<WriterProperties>,
-    safe_cast: bool,
-    schema_mode: Option<SchemaMode>,
     writer_stats_config: WriterStatsConfig,
+    sender: Option<Sender<RecordBatch>>,
 ) -> DeltaResult<Vec<Action>> {
-    let schema: ArrowSchemaRef = if schema_mode.is_some() {
-        plan.schema()
-    } else {
-        snapshot
-            .and_then(|s| s.input_schema().ok())
-            .unwrap_or(plan.schema())
-    };
-
+    // We always take the plan Schema since the data may contain Large/View arrow types,
+    // the schema and batches were prior constructed with this in mind.
+    let schema: ArrowSchemaRef = plan.schema();
     let checker = if let Some(snapshot) = snapshot {
         DeltaDataChecker::new(snapshot)
     } else {
@@ -392,7 +416,6 @@ async fn write_execution_plan_with_predicate(
         }
         _ => checker,
     };
-
     // Write data to disk
     let mut tasks = vec![];
     for i in 0..plan.properties().output_partitioning().partition_count() {
@@ -410,26 +433,33 @@ async fn write_execution_plan_with_predicate(
         );
         let mut writer = DeltaWriter::new(object_store.clone(), config);
         let checker_stream = checker.clone();
+        let sender_stream = sender.clone();
         let mut stream = inner_plan.execute(i, task_ctx)?;
-        let handle: tokio::task::JoinHandle<DeltaResult<Vec<Action>>> =
-            tokio::task::spawn(async move {
+
+        let handle: tokio::task::JoinHandle<DeltaResult<Vec<Action>>> = tokio::task::spawn(
+            async move {
+                let sendable = sender_stream.clone();
                 while let Some(maybe_batch) = stream.next().await {
                     let batch = maybe_batch?;
+
                     checker_stream.check_batch(&batch).await?;
-                    let arr = super::cast::cast_record_batch(
-                        &batch,
-                        inner_schema.clone(),
-                        safe_cast,
-                        schema_mode == Some(SchemaMode::Merge),
-                    )?;
-                    writer.write(&arr).await?;
+
+                    if let Some(s) = sendable.as_ref() {
+                        if let Err(e) = s.send(batch.clone()).await {
+                            error!("Failed to send data to observer: {e:#?}");
+                        }
+                    } else {
+                        debug!("write_execution_plan_with_predicate did not send any batches, no sender.");
+                    }
+                    writer.write(&batch).await?;
                 }
                 let add_actions = writer.close().await;
                 match add_actions {
                     Ok(actions) => Ok(actions.into_iter().map(Action::Add).collect::<Vec<_>>()),
                     Err(err) => Err(err),
                 }
-            });
+            },
+        );
 
         tasks.push(handle);
     }
@@ -447,6 +477,55 @@ async fn write_execution_plan_with_predicate(
     Ok(actions)
 }
 
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn write_execution_plan_cdc(
+    snapshot: Option<&DeltaTableState>,
+    state: SessionState,
+    plan: Arc<dyn ExecutionPlan>,
+    partition_columns: Vec<String>,
+    object_store: ObjectStoreRef,
+    target_file_size: Option<usize>,
+    write_batch_size: Option<usize>,
+    writer_properties: Option<WriterProperties>,
+    writer_stats_config: WriterStatsConfig,
+    sender: Option<Sender<RecordBatch>>,
+) -> DeltaResult<Vec<Action>> {
+    let cdc_store = Arc::new(PrefixStore::new(object_store, "_change_data"));
+
+    Ok(write_execution_plan(
+        snapshot,
+        state,
+        plan,
+        partition_columns,
+        cdc_store,
+        target_file_size,
+        write_batch_size,
+        writer_properties,
+        writer_stats_config,
+        sender,
+    )
+    .await?
+    .into_iter()
+    .map(|add| {
+        // Modify add actions into CDC actions
+        match add {
+            Action::Add(add) => {
+                Action::Cdc(AddCDCFile {
+                    // This is a gnarly hack, but the action needs the nested path, not the
+                    // path isnide the prefixed store
+                    path: format!("_change_data/{}", add.path),
+                    size: add.size,
+                    partition_values: add.partition_values,
+                    data_change: false,
+                    tags: add.tags,
+                })
+            }
+            _ => panic!("Expected Add action"),
+        }
+    })
+    .collect::<Vec<_>>())
+}
+
 #[allow(clippy::too_many_arguments)]
 pub(crate) async fn write_execution_plan(
     snapshot: Option<&DeltaTableState>,
@@ -457,9 +536,8 @@ pub(crate) async fn write_execution_plan(
     target_file_size: Option<usize>,
     write_batch_size: Option<usize>,
     writer_properties: Option<WriterProperties>,
-    safe_cast: bool,
-    schema_mode: Option<SchemaMode>,
     writer_stats_config: WriterStatsConfig,
+    sender: Option<Sender<RecordBatch>>,
 ) -> DeltaResult<Vec<Action>> {
     write_execution_plan_with_predicate(
         None,
@@ -471,9 +549,8 @@ pub(crate) async fn write_execution_plan(
         target_file_size,
         write_batch_size,
         writer_properties,
-        safe_cast,
-        schema_mode,
         writer_stats_config,
+        sender,
     )
     .await
 }
@@ -488,44 +565,165 @@ async fn execute_non_empty_expr(
     rewrite: &[Add],
     writer_properties: Option<WriterProperties>,
     writer_stats_config: WriterStatsConfig,
+    partition_scan: bool,
+    insert_plan: Arc<dyn ExecutionPlan>,
 ) -> DeltaResult<Vec<Action>> {
     // For each identified file perform a parquet scan + filter + limit (1) + count.
     // If returned count is not zero then append the file to be rewritten and removed from the log. Otherwise do nothing to the file.
+    let mut actions: Vec<Action> = Vec::new();
 
-    let input_schema = snapshot.input_schema()?;
-    let input_dfschema: DFSchema = input_schema.clone().as_ref().clone().try_into()?;
+    // Take the insert plan schema since it might have been schema evolved, if its not
+    // it is simply the table schema
+    let df_schema = insert_plan.schema();
+    let input_dfschema: DFSchema = df_schema.as_ref().clone().try_into()?;
+
+    let scan_config = DeltaScanConfigBuilder::new()
+        .with_schema(snapshot.input_schema()?)
+        .build(snapshot)?;
 
     let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), &state)
         .with_files(rewrite)
+        // Use input schema which doesn't wrap partition values, otherwise divide_by_partition_value won't work on UTF8 partitions
+        // Since it can't fetch a scalar from a dictionary type
+        .with_scan_config(scan_config)
         .build()
         .await?;
     let scan = Arc::new(scan);
 
-    // Apply the negation of the filter and rewrite files
-    let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+    // We don't want to verify the predicate against existing data
+    if !partition_scan {
+        // Apply the negation of the filter and rewrite files
+        let negated_expression = Expr::Not(Box::new(Expr::IsTrue(Box::new(expression.clone()))));
+
+        let predicate_expr = state.create_physical_expr(negated_expression, &input_dfschema)?;
+        let filter: Arc<dyn ExecutionPlan> =
+            Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
+
+        let add_actions: Vec<Action> = write_execution_plan(
+            Some(snapshot),
+            state.clone(),
+            filter,
+            partition_columns.clone(),
+            log_store.object_store(),
+            Some(snapshot.table_config().target_file_size() as usize),
+            None,
+            writer_properties.clone(),
+            writer_stats_config.clone(),
+            None,
+        )
+        .await?;
 
-    let predicate_expr =
-        create_physical_expr_fix(negated_expression, &input_dfschema, state.execution_props())?;
-    let filter: Arc<dyn ExecutionPlan> =
-        Arc::new(FilterExec::try_new(predicate_expr, scan.clone())?);
+        actions.extend(add_actions);
+    }
 
-    // We don't want to verify the predicate against existing data
-    let add_actions = write_execution_plan(
-        Some(snapshot),
-        state,
-        filter,
-        partition_columns,
-        log_store.object_store(),
-        Some(snapshot.table_config().target_file_size() as usize),
-        None,
-        writer_properties,
-        false,
-        None,
-        writer_stats_config,
-    )
-    .await?;
+    // CDC logic, simply filters data with predicate and adds the _change_type="delete" as literal column
+    // Only write when CDC actions when it was not a partition scan, load_cdf can deduce the deletes in that case
+    // based on the remove actions if a partition got deleted
+    if !partition_scan {
+        // We only write deletions when it was not a partition scan
+        if let Some(cdc_actions) = execute_non_empty_expr_cdc(
+            snapshot,
+            log_store,
+            state.clone(),
+            scan,
+            input_dfschema,
+            expression,
+            partition_columns,
+            writer_properties,
+            writer_stats_config,
+            insert_plan,
+        )
+        .await?
+        {
+            actions.extend(cdc_actions)
+        }
+    }
+    Ok(actions)
+}
 
-    Ok(add_actions)
+/// If CDC is enabled it writes all the deletions based on predicate into _change_data directory
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn execute_non_empty_expr_cdc(
+    snapshot: &DeltaTableState,
+    log_store: LogStoreRef,
+    state: SessionState,
+    scan: Arc<crate::delta_datafusion::DeltaScan>,
+    input_dfschema: DFSchema,
+    expression: &Expr,
+    table_partition_cols: Vec<String>,
+    writer_properties: Option<WriterProperties>,
+    writer_stats_config: WriterStatsConfig,
+    insert_plan: Arc<dyn ExecutionPlan>,
+) -> DeltaResult<Option<Vec<Action>>> {
+    match should_write_cdc(snapshot) {
+        // Create CDC scan
+        Ok(true) => {
+            let cdc_predicate_expr =
+                state.create_physical_expr(expression.clone(), &input_dfschema)?;
+            let cdc_scan: Arc<dyn ExecutionPlan> =
+                Arc::new(FilterExec::try_new(cdc_predicate_expr, scan.clone())?);
+
+            // Add literal column "_change_type"
+            let delete_change_type_expr =
+                state.create_physical_expr(lit("delete"), &input_dfschema)?;
+
+            let insert_change_type_expr =
+                state.create_physical_expr(lit("insert"), &input_dfschema)?;
+
+            // Project columns and lit
+            let mut delete_project_expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = scan
+                .schema()
+                .fields()
+                .into_iter()
+                .enumerate()
+                .map(|(idx, field)| -> (Arc<dyn PhysicalExpr>, String) {
+                    (
+                        Arc::new(expressions::Column::new(field.name(), idx)),
+                        field.name().to_owned(),
+                    )
+                })
+                .collect();
+
+            let mut insert_project_expressions = delete_project_expressions.clone();
+            delete_project_expressions.insert(
+                delete_project_expressions.len(),
+                (delete_change_type_expr, "_change_type".to_owned()),
+            );
+            insert_project_expressions.insert(
+                insert_project_expressions.len(),
+                (insert_change_type_expr, "_change_type".to_owned()),
+            );
+
+            let delete_plan: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
+                delete_project_expressions,
+                cdc_scan.clone(),
+            )?);
+
+            let insert_plan: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
+                insert_project_expressions,
+                insert_plan.clone(),
+            )?);
+
+            let cdc_plan: Arc<dyn ExecutionPlan> =
+                Arc::new(UnionExec::new(vec![delete_plan, insert_plan]));
+
+            let cdc_actions = write_execution_plan_cdc(
+                Some(snapshot),
+                state.clone(),
+                cdc_plan.clone(),
+                table_partition_cols.clone(),
+                log_store.object_store(),
+                Some(snapshot.table_config().target_file_size() as usize),
+                None,
+                writer_properties,
+                writer_stats_config,
+                None,
+            )
+            .await?;
+            Ok(Some(cdc_actions))
+        }
+        _ => Ok(None),
+    }
 }
 
 // This should only be called wth a valid predicate
@@ -539,28 +737,26 @@ async fn prepare_predicate_actions(
     writer_properties: Option<WriterProperties>,
     deletion_timestamp: i64,
     writer_stats_config: WriterStatsConfig,
+    insert_plan: Arc<dyn ExecutionPlan>,
 ) -> DeltaResult<Vec<Action>> {
     let candidates =
         find_files(snapshot, log_store.clone(), &state, Some(predicate.clone())).await?;
 
-    let add = if candidates.partition_scan {
-        Vec::new()
-    } else {
-        execute_non_empty_expr(
-            snapshot,
-            log_store,
-            state,
-            partition_columns,
-            &predicate,
-            &candidates.candidates,
-            writer_properties,
-            writer_stats_config,
-        )
-        .await?
-    };
-    let remove = candidates.candidates;
+    let mut actions = execute_non_empty_expr(
+        snapshot,
+        log_store,
+        state,
+        partition_columns,
+        &predicate,
+        &candidates.candidates,
+        writer_properties,
+        writer_stats_config,
+        candidates.partition_scan,
+        insert_plan,
+    )
+    .await?;
 
-    let mut actions: Vec<Action> = add.into_iter().collect();
+    let remove = candidates.candidates;
 
     for action in remove {
         actions.push(Action::Remove(Remove {
@@ -587,9 +783,15 @@ impl std::future::IntoFuture for WriteBuilder {
         let this = self;
 
         Box::pin(async move {
+            let mut metrics = WriteMetrics::default();
+            let exec_start = Instant::now();
+
             if this.mode == SaveMode::Overwrite {
                 if let Some(snapshot) = &this.snapshot {
                     PROTOCOL.check_append_only(&snapshot.snapshot)?;
+                    if !snapshot.load_config().require_files {
+                        return Err(DeltaTableError::NotInitializedWithFiles("WRITE".into()));
+                    }
                 }
             }
             if this.schema_mode == Some(SchemaMode::Overwrite) && this.mode != SaveMode::Overwrite {
@@ -639,35 +841,52 @@ impl std::future::IntoFuture for WriteBuilder {
 
                     let mut new_schema = None;
                     if let Some(snapshot) = &this.snapshot {
-                        let table_schema = snapshot
-                            .physical_arrow_schema(this.log_store.object_store().clone())
-                            .await
-                            .or_else(|_| snapshot.arrow_schema())
-                            .unwrap_or(schema.clone());
-
+                        let table_schema = snapshot.input_schema()?;
                         if let Err(schema_err) =
                             try_cast_batch(schema.fields(), table_schema.fields())
                         {
                             schema_drift = true;
-                            if this.mode == SaveMode::Overwrite && this.schema_mode.is_some() {
+                            if this.mode == SaveMode::Overwrite
+                                && this.schema_mode == Some(SchemaMode::Overwrite)
+                            {
                                 new_schema = None // we overwrite anyway, so no need to cast
                             } else if this.schema_mode == Some(SchemaMode::Merge) {
-                                new_schema =
-                                    Some(merge_schema(table_schema.clone(), schema.clone())?);
+                                new_schema = Some(merge_arrow_schema(
+                                    table_schema.clone(),
+                                    schema.clone(),
+                                    schema_drift,
+                                )?);
                             } else {
                                 return Err(schema_err.into());
                             }
+                        } else if this.mode == SaveMode::Overwrite
+                            && this.schema_mode == Some(SchemaMode::Overwrite)
+                        {
+                            new_schema = None // we overwrite anyway, so no need to cast
+                        } else {
+                            // Schema needs to be merged so that utf8/binary/list types are preserved from the batch side if both table
+                            // and batch contains such type. Other types are preserved from the table side.
+                            // At this stage it will never introduce more fields since try_cast_batch passed correctly.
+                            new_schema = Some(merge_arrow_schema(
+                                table_schema.clone(),
+                                schema.clone(),
+                                schema_drift,
+                            )?);
                         }
                     }
-
                     let data = if !partition_columns.is_empty() {
                         // TODO partitioning should probably happen in its own plan ...
                         let mut partitions: HashMap<String, Vec<RecordBatch>> = HashMap::new();
+                        let mut num_partitions = 0;
+                        let mut num_added_rows = 0;
                         for batch in batches {
                             let real_batch = match new_schema.clone() {
-                                Some(new_schema) => {
-                                    cast_record_batch(&batch, new_schema, false, true)?
-                                }
+                                Some(new_schema) => cast_record_batch(
+                                    &batch,
+                                    new_schema,
+                                    this.safe_cast,
+                                    schema_drift, // Schema drifted so we have to add the missing columns/structfields.
+                                )?,
                                 None => batch,
                             };
 
@@ -676,7 +895,9 @@ impl std::future::IntoFuture for WriteBuilder {
                                 partition_columns.clone(),
                                 &real_batch,
                             )?;
+                            num_partitions += divided.len();
                             for part in divided {
+                                num_added_rows += part.record_batch.num_rows();
                                 let key = part.partition_values.hive_partition_path();
                                 match partitions.get_mut(&key) {
                                     Some(part_batches) => {
@@ -688,22 +909,30 @@ impl std::future::IntoFuture for WriteBuilder {
                                 }
                             }
                         }
+                        metrics.num_partitions = num_partitions;
+                        metrics.num_added_rows = num_added_rows;
                         partitions.into_values().collect::<Vec<_>>()
                     } else {
                         match new_schema {
                             Some(ref new_schema) => {
                                 let mut new_batches = vec![];
+                                let mut num_added_rows = 0;
                                 for batch in batches {
                                     new_batches.push(cast_record_batch(
                                         &batch,
                                         new_schema.clone(),
-                                        false,
-                                        true,
+                                        this.safe_cast,
+                                        schema_drift, // Schema drifted so we have to add the missing columns/structfields.
                                     )?);
+                                    num_added_rows += batch.num_rows();
                                 }
+                                metrics.num_added_rows = num_added_rows;
                                 vec![new_batches]
                             }
-                            None => vec![batches],
+                            None => {
+                                metrics.num_added_rows = batches.iter().map(|b| b.num_rows()).sum();
+                                vec![batches]
+                            }
                         }
                     };
 
@@ -720,12 +949,38 @@ impl std::future::IntoFuture for WriteBuilder {
             if this.schema_mode == Some(SchemaMode::Merge) && schema_drift {
                 if let Some(snapshot) = &this.snapshot {
                     let schema_struct: StructType = schema.clone().try_into()?;
+                    let current_protocol = snapshot.protocol();
+                    let configuration = snapshot.metadata().configuration.clone();
+                    let maybe_new_protocol = if PROTOCOL
+                        .contains_timestampntz(schema_struct.fields())
+                        && !current_protocol
+                            .reader_features
+                            .clone()
+                            .unwrap_or_default()
+                            .contains(&crate::kernel::ReaderFeatures::TimestampWithoutTimezone)
+                    // We can check only reader features, as reader and writer timestampNtz
+                    // should be always enabled together
+                    {
+                        let new_protocol = current_protocol.clone().enable_timestamp_ntz();
+                        if !(current_protocol.min_reader_version == 3
+                            && current_protocol.min_writer_version == 7)
+                        {
+                            Some(new_protocol.move_table_properties_into_features(&configuration))
+                        } else {
+                            Some(new_protocol)
+                        }
+                    } else {
+                        None
+                    };
                     let schema_action = Action::Metadata(Metadata::try_new(
                         schema_struct,
                         partition_columns.clone(),
-                        snapshot.metadata().configuration.clone(),
+                        configuration,
                     )?);
                     actions.push(schema_action);
+                    if let Some(new_protocol) = maybe_new_protocol {
+                        actions.push(new_protocol.into())
+                    }
                 }
             }
             let state = match this.state {
@@ -757,6 +1012,9 @@ impl std::future::IntoFuture for WriteBuilder {
                 .as_ref()
                 .map(|snapshot| snapshot.table_config());
 
+            let target_file_size = this.target_file_size.or_else(|| {
+                Some(super::get_target_file_size(&config, &this.configuration) as usize)
+            });
             let (num_indexed_cols, stats_columns) =
                 super::get_num_idx_cols_and_stats_columns(config, this.configuration);
 
@@ -764,33 +1022,58 @@ impl std::future::IntoFuture for WriteBuilder {
                 num_indexed_cols,
                 stats_columns,
             };
+
             // Here we need to validate if the new data conforms to a predicate if one is provided
             let add_actions = write_execution_plan_with_predicate(
                 predicate.clone(),
                 this.snapshot.as_ref(),
                 state.clone(),
-                plan,
+                plan.clone(),
                 partition_columns.clone(),
                 this.log_store.object_store().clone(),
-                this.target_file_size,
+                target_file_size,
                 this.write_batch_size,
                 this.writer_properties.clone(),
-                this.safe_cast,
-                this.schema_mode,
                 writer_stats_config.clone(),
+                None,
             )
             .await?;
+            metrics.num_added_files = add_actions.len();
             actions.extend(add_actions);
 
             // Collect remove actions if we are overwriting the table
             if let Some(snapshot) = &this.snapshot {
                 if matches!(this.mode, SaveMode::Overwrite) {
                     // Update metadata with new schema
-                    let table_schema = snapshot
-                        .physical_arrow_schema(this.log_store.object_store().clone())
-                        .await
-                        .or_else(|_| snapshot.arrow_schema())
-                        .unwrap_or(schema.clone());
+                    let table_schema = snapshot.input_schema()?;
+
+                    let configuration = snapshot.metadata().configuration.clone();
+                    let current_protocol = snapshot.protocol();
+                    let maybe_new_protocol = if PROTOCOL.contains_timestampntz(
+                        TryInto::<StructType>::try_into(schema.clone())?.fields(),
+                    ) && !current_protocol
+                        .reader_features
+                        .clone()
+                        .unwrap_or_default()
+                        .contains(&crate::kernel::ReaderFeatures::TimestampWithoutTimezone)
+                    // We can check only reader features, as reader and writer timestampNtz
+                    // should be always enabled together
+                    {
+                        let new_protocol = current_protocol.clone().enable_timestamp_ntz();
+                        if !(current_protocol.min_reader_version == 3
+                            && current_protocol.min_writer_version == 7)
+                        {
+                            Some(new_protocol.move_table_properties_into_features(&configuration))
+                        } else {
+                            Some(new_protocol)
+                        }
+                    } else {
+                        None
+                    };
+
+                    if let Some(protocol) = maybe_new_protocol {
+                        actions.push(protocol.into())
+                    }
 
                     if schema != table_schema {
                         let mut metadata = snapshot.metadata().clone();
@@ -815,6 +1098,7 @@ impl std::future::IntoFuture for WriteBuilder {
                                 this.writer_properties,
                                 deletion_timestamp,
                                 writer_stats_config,
+                                plan,
                             )
                             .await?;
                             if !predicate_actions.is_empty() {
@@ -830,8 +1114,15 @@ impl std::future::IntoFuture for WriteBuilder {
                         }
                     };
                 }
+                metrics.num_removed_files = actions
+                    .iter()
+                    .filter(|a| a.action_type() == ActionType::Remove)
+                    .count();
             }
 
+            metrics.execution_time_ms =
+                Instant::now().duration_since(exec_start).as_millis() as u64;
+
             let operation = DeltaOperation::Write {
                 mode: this.mode,
                 partition_by: if !partition_columns.is_empty() {
@@ -842,7 +1133,13 @@ impl std::future::IntoFuture for WriteBuilder {
                 predicate: predicate_str,
             };
 
-            let commit = CommitBuilder::from(this.commit_properties)
+            let mut commit_properties = this.commit_properties.clone();
+            commit_properties.app_metadata.insert(
+                "operationMetrics".to_owned(),
+                serde_json::to_value(&metrics)?,
+            );
+
+            let commit = CommitBuilder::from(commit_properties)
                 .with_actions(actions)
                 .build(
                     this.snapshot.as_ref().map(|f| f as &dyn TableReference),
@@ -924,26 +1221,51 @@ fn try_cast_batch(from_fields: &Fields, to_fields: &Fields) -> Result<(), ArrowE
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::logstore::get_actions;
+    use crate::operations::load_cdf::collect_batches;
     use crate::operations::{collect_sendable_stream, DeltaOps};
     use crate::protocol::SaveMode;
+    use crate::test_utils::{TestResult, TestSchemas};
     use crate::writer::test_utils::datafusion::{get_data, get_data_sorted, write_batch};
     use crate::writer::test_utils::{
         get_arrow_schema, get_delta_schema, get_delta_schema_with_nested_struct, get_record_batch,
         get_record_batch_with_nested_struct, setup_table_with_configuration,
     };
-    use crate::DeltaConfigKey;
+    use crate::TableProperty;
     use arrow_array::{Int32Array, StringArray, TimestampMicrosecondArray};
     use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
     use datafusion::prelude::*;
     use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
+    use itertools::Itertools;
     use serde_json::{json, Value};
 
+    async fn get_write_metrics(table: DeltaTable) -> WriteMetrics {
+        let mut commit_info = table.history(Some(1)).await.unwrap();
+        let metrics = commit_info
+            .first_mut()
+            .unwrap()
+            .info
+            .remove("operationMetrics")
+            .unwrap();
+        serde_json::from_value(metrics).unwrap()
+    }
+
+    fn assert_common_write_metrics(write_metrics: WriteMetrics) {
+        assert!(write_metrics.execution_time_ms > 0);
+        assert!(write_metrics.num_added_files > 0);
+    }
+
     #[tokio::test]
     async fn test_write_when_delta_table_is_append_only() {
-        let table = setup_table_with_configuration(DeltaConfigKey::AppendOnly, Some("true")).await;
+        let table = setup_table_with_configuration(TableProperty::AppendOnly, Some("true")).await;
         let batch = get_record_batch(None, false);
         // Append
         let table = write_batch(table, batch.clone()).await;
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, batch.num_rows());
+        assert_eq!(write_metrics.num_removed_files, 0);
+        assert_common_write_metrics(write_metrics);
+
         // Overwrite
         let _err = DeltaOps(table)
             .write(vec![batch])
@@ -959,7 +1281,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -975,6 +1297,12 @@ mod tests {
             .unwrap();
         assert_eq!(table.version(), 1);
         assert_eq!(table.get_files_count(), 1);
+
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, batch.num_rows());
+        assert_eq!(write_metrics.num_added_files, table.get_files_count());
+        assert_common_write_metrics(write_metrics);
+
         table.load().await.unwrap();
         assert_eq!(table.history(None).await.unwrap().len(), 2);
         assert_eq!(
@@ -982,7 +1310,7 @@ mod tests {
                 .info
                 .clone()
                 .into_iter()
-                .filter(|(k, _)| k != "clientVersion")
+                .filter(|(k, _)| k == "k1")
                 .collect::<HashMap<String, Value>>(),
             metadata
         );
@@ -998,6 +1326,11 @@ mod tests {
             .unwrap();
         assert_eq!(table.version(), 2);
         assert_eq!(table.get_files_count(), 2);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, batch.num_rows());
+        assert_eq!(write_metrics.num_added_files, 1);
+        assert_common_write_metrics(write_metrics);
+
         table.load().await.unwrap();
         assert_eq!(table.history(None).await.unwrap().len(), 3);
         assert_eq!(
@@ -1005,7 +1338,7 @@ mod tests {
                 .info
                 .clone()
                 .into_iter()
-                .filter(|(k, _)| k != "clientVersion")
+                .filter(|(k, _)| k == "k1")
                 .collect::<HashMap<String, Value>>(),
             metadata
         );
@@ -1014,13 +1347,18 @@ mod tests {
         let metadata: HashMap<String, Value> =
             HashMap::from_iter(vec![("k2".to_string(), json!("v2.1"))]);
         let mut table = DeltaOps(table)
-            .write(vec![batch])
+            .write(vec![batch.clone()])
             .with_save_mode(SaveMode::Overwrite)
             .with_commit_properties(CommitProperties::default().with_metadata(metadata.clone()))
             .await
             .unwrap();
         assert_eq!(table.version(), 3);
         assert_eq!(table.get_files_count(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, batch.num_rows());
+        assert!(write_metrics.num_removed_files > 0);
+        assert_common_write_metrics(write_metrics);
+
         table.load().await.unwrap();
         assert_eq!(table.history(None).await.unwrap().len(), 4);
         assert_eq!(
@@ -1028,7 +1366,7 @@ mod tests {
                 .info
                 .clone()
                 .into_iter()
-                .filter(|(k, _)| k != "clientVersion")
+                .filter(|(k, _)| k == "k2")
                 .collect::<HashMap<String, Value>>(),
             metadata
         );
@@ -1051,6 +1389,9 @@ mod tests {
         )
         .unwrap();
         let table = DeltaOps::new_in_memory().write(vec![batch]).await.unwrap();
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 2);
+        assert_common_write_metrics(write_metrics);
 
         let schema = Arc::new(ArrowSchema::new(vec![Field::new(
             "value",
@@ -1075,6 +1416,10 @@ mod tests {
             .await
             .unwrap();
 
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 3);
+        assert_common_write_metrics(write_metrics);
+
         let expected = [
             "+-------+",
             "| value |",
@@ -1108,6 +1453,10 @@ mod tests {
         .unwrap();
         let table = DeltaOps::new_in_memory().write(vec![batch]).await.unwrap();
 
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 1);
+        assert_common_write_metrics(write_metrics);
+
         let schema = Arc::new(ArrowSchema::new(vec![Field::new(
             "value",
             DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into())),
@@ -1143,7 +1492,9 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
-        assert_eq!(table.get_files_count(), 1)
+        assert_eq!(table.get_files_count(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
     }
 
     #[tokio::test]
@@ -1157,6 +1508,10 @@ mod tests {
             .unwrap();
         assert_eq!(table.version(), 0);
         assert_eq!(table.get_files_count(), 2);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert!(write_metrics.num_partitions > 0);
+        assert_eq!(write_metrics.num_added_files, 2);
+        assert_common_write_metrics(write_metrics);
 
         let table = DeltaOps::new_in_memory()
             .write(vec![batch])
@@ -1165,7 +1520,12 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
-        assert_eq!(table.get_files_count(), 4)
+        assert_eq!(table.get_files_count(), 4);
+
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert!(write_metrics.num_partitions > 0);
+        assert_eq!(write_metrics.num_added_files, 4);
+        assert_common_write_metrics(write_metrics);
     }
 
     #[tokio::test]
@@ -1178,6 +1538,9 @@ mod tests {
             .unwrap();
         assert_eq!(table.version(), 0);
 
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
+
         let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
         for field in batch.schema().fields() {
             if field.name() != "modified" {
@@ -1222,8 +1585,11 @@ mod tests {
         assert_eq!(table.version(), 1);
         let new_schema = table.metadata().unwrap().schema().unwrap();
         let fields = new_schema.fields();
-        let names = fields.iter().map(|f| f.name()).collect::<Vec<_>>();
+        let names = fields.map(|f| f.name()).collect::<Vec<_>>();
         assert_eq!(names, vec!["id", "value", "modified", "inserted_by"]);
+
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
     }
 
     #[tokio::test]
@@ -1237,6 +1603,10 @@ mod tests {
             .unwrap();
         assert_eq!(table.version(), 0);
 
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert!(write_metrics.num_partitions > 0);
+        assert_common_write_metrics(write_metrics);
+
         let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
         for field in batch.schema().fields() {
             if field.name() != "modified" {
@@ -1270,7 +1640,6 @@ mod tests {
             ],
         )
         .unwrap();
-        println!("new_batch: {:?}", new_batch.schema());
         let table = DeltaOps(table)
             .write(vec![new_batch])
             .with_save_mode(SaveMode::Append)
@@ -1281,11 +1650,15 @@ mod tests {
         assert_eq!(table.version(), 1);
         let new_schema = table.metadata().unwrap().schema().unwrap();
         let fields = new_schema.fields();
-        let mut names = fields.iter().map(|f| f.name()).collect::<Vec<_>>();
+        let mut names = fields.map(|f| f.name()).collect::<Vec<_>>();
         names.sort();
         assert_eq!(names, vec!["id", "inserted_by", "modified", "value"]);
         let part_cols = table.metadata().unwrap().partition_columns.clone();
         assert_eq!(part_cols, vec!["id", "value"]); // we want to preserve partitions
+
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert!(write_metrics.num_partitions > 0);
+        assert_common_write_metrics(write_metrics);
     }
 
     #[tokio::test]
@@ -1297,7 +1670,8 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
-
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
         let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
         for field in batch.schema().fields() {
             if field.name() != "modified" {
@@ -1350,6 +1724,8 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
 
         let mut new_schema_builder = arrow_schema::SchemaBuilder::new();
 
@@ -1398,13 +1774,15 @@ mod tests {
         let table = DeltaOps::new_in_memory()
             .create()
             .with_save_mode(SaveMode::ErrorIfExists)
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
 
         let table = DeltaOps(table).write(vec![batch.clone()]).await.unwrap();
         assert_eq!(table.version(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
 
         let schema: StructType = serde_json::from_value(json!({
             "type": "struct",
@@ -1420,13 +1798,13 @@ mod tests {
         let table = DeltaOps::new_in_memory()
             .create()
             .with_save_mode(SaveMode::ErrorIfExists)
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
 
         let table = DeltaOps(table).write(vec![batch.clone()]).await;
-        assert!(table.is_err())
+        assert!(table.is_err());
     }
 
     #[tokio::test]
@@ -1436,7 +1814,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
@@ -1447,6 +1825,8 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
 
         let actual = get_data(&table).await;
         let expected = DataType::Struct(Fields::from(vec![Field::new(
@@ -1485,6 +1865,8 @@ mod tests {
             .with_partition_columns(["string"])
             .await
             .unwrap();
+        let write_metrics: WriteMetrics = get_write_metrics(_table.clone()).await;
+        assert_common_write_metrics(write_metrics);
 
         let table = crate::open_table(tmp_path.as_os_str().to_str().unwrap())
             .await
@@ -1528,6 +1910,9 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 4);
+        assert_common_write_metrics(write_metrics);
 
         let batch_add = RecordBatch::try_new(
             Arc::clone(&schema),
@@ -1546,6 +1931,9 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 1);
+        assert_common_write_metrics(write_metrics);
 
         let expected = [
             "+----+-------+------------+",
@@ -1584,6 +1972,8 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
 
         // Take clones of these before an operation resulting in error, otherwise it will
         // be impossible to refer to an in-memory table
@@ -1626,6 +2016,8 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 0);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_common_write_metrics(write_metrics);
 
         let batch_add = RecordBatch::try_new(
             Arc::clone(&schema),
@@ -1648,6 +2040,9 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(table.version(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 3);
+        assert_common_write_metrics(write_metrics);
 
         let expected = [
             "+----+-------+------------+",
@@ -1665,4 +2060,255 @@ mod tests {
         let actual = get_data_sorted(&table, "id,value,modified").await;
         assert_batches_sorted_eq!(&expected, &actual);
     }
+
+    #[tokio::test]
+    async fn test_dont_write_cdc_with_overwrite() -> TestResult {
+        let delta_schema = TestSchemas::simple();
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(delta_schema.fields().cloned())
+            .with_partition_columns(["id"])
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(ArrowSchema::try_from(delta_schema)?);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("1"), Some("2"), Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StringArray::from(vec![
+                    Some("yes"),
+                    Some("yes"),
+                    Some("no"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let second_batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(10)])),
+                Arc::new(StringArray::from(vec![Some("yes")])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 3);
+        assert_common_write_metrics(write_metrics);
+
+        let table = DeltaOps(table)
+            .write([second_batch])
+            .with_save_mode(crate::protocol::SaveMode::Overwrite)
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 1);
+        assert!(write_metrics.num_removed_files > 0);
+        assert_common_write_metrics(write_metrics);
+
+        let snapshot_bytes = table
+            .log_store
+            .read_commit_entry(2)
+            .await?
+            .expect("failed to get snapshot bytes");
+        let version_actions = get_actions(2, snapshot_bytes).await?;
+
+        let cdc_actions = version_actions
+            .iter()
+            .filter(|action| matches!(action, &&Action::Cdc(_)))
+            .collect_vec();
+        assert!(cdc_actions.is_empty());
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_dont_write_cdc_with_overwrite_predicate_partitioned() -> TestResult {
+        let delta_schema = TestSchemas::simple();
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(delta_schema.fields().cloned())
+            .with_partition_columns(["id"])
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(ArrowSchema::try_from(delta_schema)?);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("1"), Some("2"), Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StringArray::from(vec![
+                    Some("yes"),
+                    Some("yes"),
+                    Some("no"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let second_batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(10)])),
+                Arc::new(StringArray::from(vec![Some("yes")])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 3);
+        assert!(write_metrics.num_partitions > 0);
+        assert_common_write_metrics(write_metrics);
+
+        let table = DeltaOps(table)
+            .write([second_batch])
+            .with_save_mode(crate::protocol::SaveMode::Overwrite)
+            .with_replace_where("id='3'")
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+        let write_metrics: WriteMetrics = get_write_metrics(table.clone()).await;
+        assert_eq!(write_metrics.num_added_rows, 1);
+        assert!(write_metrics.num_partitions > 0);
+        assert!(write_metrics.num_removed_files > 0);
+        assert_common_write_metrics(write_metrics);
+
+        let snapshot_bytes = table
+            .log_store
+            .read_commit_entry(2)
+            .await?
+            .expect("failed to get snapshot bytes");
+        let version_actions = get_actions(2, snapshot_bytes).await?;
+
+        let cdc_actions = version_actions
+            .iter()
+            .filter(|action| matches!(action, &&Action::Cdc(_)))
+            .collect_vec();
+        assert!(cdc_actions.is_empty());
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_dont_write_cdc_with_overwrite_predicate_unpartitioned() -> TestResult {
+        let delta_schema = TestSchemas::simple();
+        let table: DeltaTable = DeltaOps::new_in_memory()
+            .create()
+            .with_columns(delta_schema.fields().cloned())
+            .with_partition_columns(["id"])
+            .with_configuration_property(TableProperty::EnableChangeDataFeed, Some("true"))
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 0);
+
+        let schema = Arc::new(ArrowSchema::try_from(delta_schema)?);
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("1"), Some("2"), Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
+                Arc::new(StringArray::from(vec![
+                    Some("yes"),
+                    Some("yes"),
+                    Some("no"),
+                ])),
+            ],
+        )
+        .unwrap();
+
+        let second_batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(StringArray::from(vec![Some("3")])),
+                Arc::new(Int32Array::from(vec![Some(3)])),
+                Arc::new(StringArray::from(vec![Some("yes")])),
+            ],
+        )
+        .unwrap();
+
+        let table = DeltaOps(table)
+            .write(vec![batch])
+            .await
+            .expect("Failed to write first batch");
+        assert_eq!(table.version(), 1);
+
+        let table = DeltaOps(table)
+            .write([second_batch])
+            .with_save_mode(crate::protocol::SaveMode::Overwrite)
+            .with_replace_where("value=3")
+            .await
+            .unwrap();
+        assert_eq!(table.version(), 2);
+
+        let ctx = SessionContext::new();
+        let cdf_scan = DeltaOps(table.clone())
+            .load_cdf()
+            .with_session_ctx(ctx.clone())
+            .with_starting_version(0)
+            .build()
+            .await
+            .expect("Failed to load CDF");
+
+        let mut batches = collect_batches(
+            cdf_scan
+                .properties()
+                .output_partitioning()
+                .partition_count(),
+            cdf_scan,
+            ctx,
+        )
+        .await
+        .expect("Failed to collect batches");
+
+        // The batches will contain a current _commit_timestamp which shouldn't be check_append_only
+        let _: Vec<_> = batches.iter_mut().map(|b| b.remove_column(4)).collect();
+
+        assert_batches_sorted_eq! {[
+        "+-------+----------+--------------+-----------------+----+",
+        "| value | modified | _change_type | _commit_version | id |",
+        "+-------+----------+--------------+-----------------+----+",
+        "| 1     | yes      | insert       | 1               | 1  |",
+        "| 2     | yes      | insert       | 1               | 2  |",
+        "| 3     | no       | delete       | 2               | 3  |",
+        "| 3     | no       | insert       | 1               | 3  |",
+        "| 3     | yes      | insert       | 2               | 3  |",
+        "+-------+----------+--------------+-----------------+----+",
+        ], &batches }
+
+        let snapshot_bytes = table
+            .log_store
+            .read_commit_entry(2)
+            .await?
+            .expect("failed to get snapshot bytes");
+        let version_actions = get_actions(2, snapshot_bytes).await?;
+
+        let cdc_actions = version_actions
+            .iter()
+            .filter(|action| matches!(action, &&Action::Cdc(_)))
+            .collect_vec();
+        assert!(!cdc_actions.is_empty());
+        Ok(())
+    }
 }
diff --git a/crates/core/src/operations/writer.rs b/crates/core/src/operations/writer.rs
index f04d68e412..3c9d3bda97 100644
--- a/crates/core/src/operations/writer.rs
+++ b/crates/core/src/operations/writer.rs
@@ -2,10 +2,10 @@
 
 use std::collections::HashMap;
 
-use arrow::datatypes::SchemaRef as ArrowSchemaRef;
-use arrow::error::ArrowError;
-use arrow::record_batch::RecordBatch;
+use arrow_array::RecordBatch;
+use arrow_schema::{ArrowError, SchemaRef as ArrowSchemaRef};
 use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
 use indexmap::IndexMap;
 use object_store::{path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
@@ -15,7 +15,7 @@ use tracing::debug;
 
 use crate::crate_version;
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Add, PartitionsExt, Scalar};
+use crate::kernel::{Add, PartitionsExt};
 use crate::storage::ObjectStoreRef;
 use crate::writer::record_batch::{divide_by_partition_values, PartitionResult};
 use crate::writer::stats::create_add;
@@ -368,7 +368,8 @@ impl PartitionWriter {
         let file_size = buffer.len() as i64;
 
         // write file to object store
-        self.object_store.put(&path, buffer).await?;
+        self.object_store.put(&path, buffer.into()).await?;
+
         self.files_written.push(
             create_add(
                 &self.config.partition_values,
diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs
index 67994c5e49..fc2238d03b 100644
--- a/crates/core/src/protocol/checkpoints.rs
+++ b/crates/core/src/protocol/checkpoints.rs
@@ -8,6 +8,7 @@ use arrow_schema::ArrowError;
 
 use chrono::{Datelike, NaiveDate, NaiveDateTime, Utc};
 use futures::{StreamExt, TryStreamExt};
+use itertools::Itertools;
 use lazy_static::lazy_static;
 use object_store::{Error, ObjectStore};
 use parquet::arrow::ArrowWriter;
@@ -27,6 +28,7 @@ use crate::logstore::LogStore;
 use crate::table::state::DeltaTableState;
 use crate::table::{get_partition_col_data_types, CheckPoint, CheckPointBuilder};
 use crate::{open_table_with_version, DeltaTable};
+
 type SchemaPath = Vec<String>;
 
 /// Error returned when there is an error during creating a checkpoint.
@@ -57,7 +59,7 @@ enum CheckpointError {
         source: ArrowError,
     },
 
-    #[error("missing rewquired action type in snapshot: {0}")]
+    #[error("missing required action type in snapshot: {0}")]
     MissingActionType(String),
 }
 
@@ -169,14 +171,16 @@ pub async fn create_checkpoint_for(
 
     let object_store = log_store.object_store();
     debug!("Writing checkpoint to {:?}.", checkpoint_path);
-    object_store.put(&checkpoint_path, parquet_bytes).await?;
+    object_store
+        .put(&checkpoint_path, parquet_bytes.into())
+        .await?;
 
     let last_checkpoint_content: Value = serde_json::to_value(checkpoint)?;
     let last_checkpoint_content = bytes::Bytes::from(serde_json::to_vec(&last_checkpoint_content)?);
 
     debug!("Writing _last_checkpoint to {:?}.", last_checkpoint_path);
     object_store
-        .put(&last_checkpoint_path, last_checkpoint_content)
+        .put(&last_checkpoint_path, last_checkpoint_content.into())
         .await?;
 
     Ok(())
@@ -259,7 +263,8 @@ fn parquet_bytes_from_state(
 
     // Collect a map of paths that require special stats conversion.
     let mut stats_conversions: Vec<(SchemaPath, DataType)> = Vec::new();
-    collect_stats_conversions(&mut stats_conversions, schema.fields().as_slice());
+    let fields = schema.fields().collect_vec();
+    collect_stats_conversions(&mut stats_conversions, fields.as_slice());
 
     // if any, tombstones do not include extended file metadata, we must omit the extended metadata fields from the remove schema
     // See https://github.com/delta-io/delta/blob/master/PROTOCOL.md#add-file-and-remove-file
@@ -477,7 +482,7 @@ fn typed_partition_value_from_option_string(
     }
 }
 
-fn collect_stats_conversions(paths: &mut Vec<(SchemaPath, DataType)>, fields: &[StructField]) {
+fn collect_stats_conversions(paths: &mut Vec<(SchemaPath, DataType)>, fields: &[&StructField]) {
     let mut _path = SchemaPath::new();
     fields
         .iter()
@@ -498,9 +503,7 @@ fn collect_field_conversion(
         DataType::Struct(struct_field) => {
             let struct_fields = struct_field.fields();
             current_path.push(field.name().to_owned());
-            struct_fields
-                .iter()
-                .for_each(|f| collect_field_conversion(current_path, all_paths, f));
+            struct_fields.for_each(|f| collect_field_conversion(current_path, all_paths, f));
             current_path.pop();
         }
         _ => { /* noop */ }
@@ -560,7 +563,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(crate::protocol::SaveMode::Ignore)
             .await
             .unwrap();
@@ -592,7 +595,7 @@ mod tests {
 
         let mut table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(crate::protocol::SaveMode::Ignore)
             .await
             .unwrap();
@@ -668,7 +671,7 @@ mod tests {
 
         let table = DeltaOps::new_in_memory()
             .create()
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_save_mode(crate::protocol::SaveMode::Ignore)
             .await
             .unwrap();
@@ -802,9 +805,8 @@ mod tests {
     #[test]
     fn collect_stats_conversions_test() {
         let delta_schema: StructType = serde_json::from_value(SCHEMA.clone()).unwrap();
-        let fields = delta_schema.fields();
+        let fields = delta_schema.fields().collect_vec();
         let mut paths = Vec::new();
-
         collect_stats_conversions(&mut paths, fields.as_slice());
 
         assert_eq!(2, paths.len());
diff --git a/crates/core/src/protocol/mod.rs b/crates/core/src/protocol/mod.rs
index 9cfa429fde..f82f48411a 100644
--- a/crates/core/src/protocol/mod.rs
+++ b/crates/core/src/protocol/mod.rs
@@ -2,9 +2,11 @@
 
 #![allow(non_camel_case_types)]
 
-pub mod checkpoints;
-mod parquet_read;
-mod time_utils;
+use std::borrow::Borrow;
+use std::collections::HashMap;
+use std::hash::{Hash, Hasher};
+use std::mem::take;
+use std::str::FromStr;
 
 use arrow_schema::ArrowError;
 use futures::StreamExt;
@@ -13,18 +15,17 @@ use object_store::{path::Path, Error as ObjectStoreError, ObjectStore};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
-use std::borrow::Borrow;
-use std::collections::HashMap;
-use std::hash::{Hash, Hasher};
-use std::mem::take;
-use std::str::FromStr;
 use tracing::{debug, error};
 
 use crate::errors::{DeltaResult, DeltaTableError};
-use crate::kernel::{Add, CommitInfo, Metadata, Protocol, Remove};
+use crate::kernel::{Add, CommitInfo, Metadata, Protocol, Remove, StructField, TableFeatures};
 use crate::logstore::LogStore;
 use crate::table::CheckPoint;
 
+pub mod checkpoints;
+mod parquet_read;
+mod time_utils;
+
 /// Error returned when an invalid Delta log action is encountered.
 #[allow(missing_docs)]
 #[derive(thiserror::Error, Debug)]
@@ -196,18 +197,9 @@ impl PartialStats {
         let null_count = take(&mut self.null_count);
         Stats {
             num_records: self.num_records,
-            min_values: match min_values {
-                Some(minv) => minv,
-                None => HashMap::default(),
-            },
-            max_values: match max_values {
-                Some(maxv) => maxv,
-                None => HashMap::default(),
-            },
-            null_count: match null_count {
-                Some(nc) => nc,
-                None => HashMap::default(),
-            },
+            min_values: min_values.unwrap_or_default(),
+            max_values: max_values.unwrap_or_default(),
+            null_count: null_count.unwrap_or_default(),
         }
     }
 }
@@ -267,17 +259,11 @@ impl Add {
 
     /// Returns the serde_json representation of stats contained in the action if present.
     /// Since stats are defined as optional in the protocol, this may be None.
-    fn get_json_stats(&self) -> Result<Option<Stats>, serde_json::error::Error> {
-        let ps: Result<Option<PartialStats>, serde_json::error::Error> = self
-            .stats
+    pub fn get_json_stats(&self) -> Result<Option<Stats>, serde_json::error::Error> {
+        self.stats
             .as_ref()
-            .map_or(Ok(None), |s| serde_json::from_str(s));
-
-        match ps {
-            Ok(Some(mut partial)) => Ok(Some(partial.as_stats())),
-            Ok(None) => Ok(None),
-            Err(e) => Err(e),
-        }
+            .map(|stats| serde_json::from_str(stats).map(|mut ps: PartialStats| ps.as_stats()))
+            .transpose()
     }
 }
 
@@ -326,6 +312,13 @@ pub struct MergePredicate {
 #[derive(Serialize, Deserialize, Debug, Clone)]
 #[serde(rename_all = "camelCase")]
 pub enum DeltaOperation {
+    /// Represents a Delta `Add Column` operation.
+    /// Used to add new columns or field in a struct
+    AddColumn {
+        /// Fields added to existing schema
+        fields: Vec<StructField>,
+    },
+
     /// Represents a Delta `Create` operation.
     /// Would usually only create the table, if also data is written,
     /// a `Write` operations is more appropriate
@@ -371,6 +364,12 @@ pub enum DeltaOperation {
         expr: String,
     },
 
+    /// Add table features to a table
+    AddFeature {
+        /// Name of the feature
+        name: Vec<TableFeatures>,
+    },
+
     /// Drops constraints from a table
     DropConstraint {
         /// Constraints name
@@ -458,6 +457,7 @@ impl DeltaOperation {
     pub fn name(&self) -> &str {
         // operation names taken from https://learn.microsoft.com/en-us/azure/databricks/delta/history#--operation-metrics-keys
         match &self {
+            DeltaOperation::AddColumn { .. } => "ADD COLUMN",
             DeltaOperation::Create {
                 mode: SaveMode::Overwrite,
                 ..
@@ -476,6 +476,7 @@ impl DeltaOperation {
             DeltaOperation::VacuumEnd { .. } => "VACUUM END",
             DeltaOperation::AddConstraint { .. } => "ADD CONSTRAINT",
             DeltaOperation::DropConstraint { .. } => "DROP CONSTRAINT",
+            DeltaOperation::AddFeature { .. } => "ADD FEATURE",
         }
     }
 
@@ -513,6 +514,8 @@ impl DeltaOperation {
         match self {
             Self::Optimize { .. }
             | Self::SetTableProperties { .. }
+            | Self::AddColumn { .. }
+            | Self::AddFeature { .. }
             | Self::VacuumStart { .. }
             | Self::VacuumEnd { .. }
             | Self::AddConstraint { .. }
@@ -1082,6 +1085,7 @@ mod tests {
         }
 
         #[tokio::test]
+        #[ignore = "column mapping not yet supported."]
         async fn test_with_column_mapping() {
             // test table with column mapping and partitions
             let path = "../test/tests/data/table_with_column_mapping";
@@ -1225,6 +1229,15 @@ mod tests {
             assert_eq!(&expected_null_count, null_count_column);
         }
 
+        #[tokio::test]
+        async fn test_table_checkpoint_not_always_with_stats() {
+            let path = "../test/tests/data/delta-checkpoint-stats-optional";
+            let mut table = crate::open_table(path).await.unwrap();
+            table.load().await.unwrap();
+
+            assert_eq!(2, table.snapshot().unwrap().file_actions().unwrap().len());
+        }
+
         #[tokio::test]
         async fn test_only_struct_stats() {
             // test table with no json stats
diff --git a/crates/core/src/schema/partitions.rs b/crates/core/src/schema/partitions.rs
index c766c1d630..23abb3896e 100644
--- a/crates/core/src/schema/partitions.rs
+++ b/crates/core/src/schema/partitions.rs
@@ -1,12 +1,13 @@
 //! Delta Table partition handling logic.
-//!
-use serde::{Serialize, Serializer};
 use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::convert::TryFrom;
 
+use delta_kernel::expressions::Scalar;
+use serde::{Serialize, Serializer};
+
 use crate::errors::DeltaTableError;
-use crate::kernel::{DataType, PrimitiveType, Scalar};
+use crate::kernel::{scalars::ScalarExt, DataType, PrimitiveType};
 
 /// A special value used in Hive to represent the null partition in partitioned tables
 pub const NULL_PARTITION_VALUE_DATA_PATH: &str = "__HIVE_DEFAULT_PARTITION__";
@@ -32,6 +33,42 @@ pub enum PartitionValue {
     NotIn(Vec<String>),
 }
 
+#[derive(Clone, Debug, PartialEq)]
+struct ScalarHelper<'a>(&'a Scalar);
+
+impl PartialOrd for ScalarHelper<'_> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        use Scalar::*;
+        match (self.0, other.0) {
+            (Null(_), Null(_)) => Some(Ordering::Equal),
+            (Integer(a), Integer(b)) => a.partial_cmp(b),
+            (Long(a), Long(b)) => a.partial_cmp(b),
+            (Short(a), Short(b)) => a.partial_cmp(b),
+            (Byte(a), Byte(b)) => a.partial_cmp(b),
+            (Float(a), Float(b)) => a.partial_cmp(b),
+            (Double(a), Double(b)) => a.partial_cmp(b),
+            (String(a), String(b)) => a.partial_cmp(b),
+            (Boolean(a), Boolean(b)) => a.partial_cmp(b),
+            (Timestamp(a), Timestamp(b)) => a.partial_cmp(b),
+            (TimestampNtz(a), TimestampNtz(b)) => a.partial_cmp(b),
+            (Date(a), Date(b)) => a.partial_cmp(b),
+            (Binary(a), Binary(b)) => a.partial_cmp(b),
+            (Decimal(a, p1, s1), Decimal(b, p2, s2)) => {
+                // TODO implement proper decimal comparison
+                if p1 != p2 || s1 != s2 {
+                    return None;
+                };
+                a.partial_cmp(b)
+            }
+            // TODO should we make an assumption about the ordering of nulls?
+            // rigth now this is only used for internal purposes.
+            (Null(_), _) => Some(Ordering::Less),
+            (_, Null(_)) => Some(Ordering::Greater),
+            _ => None,
+        }
+    }
+}
+
 /// A Struct used for filtering a DeltaTable partition by key and value.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct PartitionFilter {
@@ -49,7 +86,7 @@ fn compare_typed_value(
     match data_type {
         DataType::Primitive(primitive_type) => {
             let other = primitive_type.parse_scalar(filter_value).ok()?;
-            partition_value.partial_cmp(&other)
+            ScalarHelper(partition_value).partial_cmp(&ScalarHelper(&other))
         }
         // NOTE: complex types are not supported as partition columns
         _ => None,
@@ -239,6 +276,37 @@ impl DeltaTablePartition {
     }
 }
 
+///
+/// A HivePartition string is represented by a "key=value" format.
+///
+/// ```rust
+/// # use delta_kernel::expressions::Scalar;
+/// use deltalake_core::DeltaTablePartition;
+///
+/// let hive_part = "ds=2023-01-01";
+/// let partition = DeltaTablePartition::try_from(hive_part).unwrap();
+/// assert_eq!("ds", partition.key);
+/// assert_eq!(Scalar::String("2023-01-01".into()), partition.value);
+/// ```
+impl TryFrom<&str> for DeltaTablePartition {
+    type Error = DeltaTableError;
+
+    /// Try to create a DeltaTable partition from a HivePartition string.
+    /// Returns a DeltaTableError if the string is not in the form of a HivePartition.
+    fn try_from(partition: &str) -> Result<Self, DeltaTableError> {
+        let partition_splitted: Vec<&str> = partition.split('=').collect();
+        match partition_splitted {
+            partition_splitted if partition_splitted.len() == 2 => Ok(DeltaTablePartition {
+                key: partition_splitted[0].to_owned(),
+                value: Scalar::String(partition_splitted[1].to_owned()),
+            }),
+            _ => Err(DeltaTableError::PartitionError {
+                partition: partition.to_string(),
+            }),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -289,4 +357,133 @@ mod tests {
             "date NOT IN ('2023-11-04', '2023-06-07')",
         );
     }
+
+    #[test]
+    fn tryfrom_invalid() {
+        let buf = "this-is-not-a-partition";
+        let partition = DeltaTablePartition::try_from(buf);
+        assert!(partition.is_err());
+    }
+
+    #[test]
+    fn tryfrom_valid() {
+        let buf = "ds=2024-04-01";
+        let partition = DeltaTablePartition::try_from(buf);
+        assert!(partition.is_ok());
+        let partition = partition.unwrap();
+        assert_eq!(partition.key, "ds");
+        assert_eq!(partition.value, Scalar::String("2024-04-01".into()));
+    }
+
+    #[test]
+    fn test_create_delta_table_partition() {
+        let year = "2021".to_string();
+        let path = format!("year={year}");
+        assert_eq!(
+            DeltaTablePartition::try_from(path.as_ref()).unwrap(),
+            DeltaTablePartition {
+                key: "year".into(),
+                value: Scalar::String(year.into()),
+            }
+        );
+
+        let _wrong_path = "year=2021/month=";
+        assert!(matches!(
+            DeltaTablePartition::try_from(_wrong_path).unwrap_err(),
+            DeltaTableError::PartitionError {
+                partition: _wrong_path
+            },
+        ))
+    }
+
+    #[test]
+    fn test_match_partition() {
+        let partition_2021 = DeltaTablePartition {
+            key: "year".into(),
+            value: Scalar::String("2021".into()),
+        };
+        let partition_2020 = DeltaTablePartition {
+            key: "year".into(),
+            value: Scalar::String("2020".into()),
+        };
+        let partition_2019 = DeltaTablePartition {
+            key: "year".into(),
+            value: Scalar::String("2019".into()),
+        };
+
+        let partition_year_2020_filter = PartitionFilter {
+            key: "year".to_string(),
+            value: PartitionValue::Equal("2020".to_string()),
+        };
+        let partition_month_12_filter = PartitionFilter {
+            key: "month".to_string(),
+            value: PartitionValue::Equal("12".to_string()),
+        };
+        let string_type = DataType::Primitive(PrimitiveType::String);
+
+        assert!(!partition_year_2020_filter.match_partition(&partition_2021, &string_type));
+        assert!(partition_year_2020_filter.match_partition(&partition_2020, &string_type));
+        assert!(!partition_year_2020_filter.match_partition(&partition_2019, &string_type));
+        assert!(!partition_month_12_filter.match_partition(&partition_2019, &string_type));
+
+        /* TODO: To be re-enabled at a future date, needs some type futzing
+        let partition_2020_12_31_23_59_59 = DeltaTablePartition {
+            key: "time".into(),
+            value: PrimitiveType::TimestampNtz.parse_scalar("2020-12-31 23:59:59").expect("Failed to parse timestamp"),
+        };
+
+        let partition_time_2020_12_31_23_59_59_filter = PartitionFilter {
+            key: "time".to_string(),
+            value: PartitionValue::Equal("2020-12-31 23:59:59.000000".into()),
+        };
+
+        assert!(partition_time_2020_12_31_23_59_59_filter.match_partition(
+            &partition_2020_12_31_23_59_59,
+            &DataType::Primitive(PrimitiveType::TimestampNtz)
+        ));
+        assert!(!partition_time_2020_12_31_23_59_59_filter
+            .match_partition(&partition_2020_12_31_23_59_59, &string_type));
+        */
+    }
+
+    #[test]
+    fn test_match_filters() {
+        let partitions = vec![
+            DeltaTablePartition {
+                key: "year".into(),
+                value: Scalar::String("2021".into()),
+            },
+            DeltaTablePartition {
+                key: "month".into(),
+                value: Scalar::String("12".into()),
+            },
+        ];
+
+        let string_type = DataType::Primitive(PrimitiveType::String);
+        let partition_data_types: HashMap<&String, &DataType> = vec![
+            (&partitions[0].key, &string_type),
+            (&partitions[1].key, &string_type),
+        ]
+        .into_iter()
+        .collect();
+
+        let valid_filters = PartitionFilter {
+            key: "year".to_string(),
+            value: PartitionValue::Equal("2021".to_string()),
+        };
+
+        let valid_filter_month = PartitionFilter {
+            key: "month".to_string(),
+            value: PartitionValue::Equal("12".to_string()),
+        };
+
+        let invalid_filter = PartitionFilter {
+            key: "year".to_string(),
+            value: PartitionValue::Equal("2020".to_string()),
+        };
+
+        assert!(valid_filters.match_partitions(&partitions, &partition_data_types),);
+        assert!(valid_filter_month.match_partitions(&partitions, &partition_data_types),);
+        assert!(!invalid_filter.match_partitions(&partitions, &partition_data_types),);
+    }
 }
diff --git a/crates/core/src/storage/file.rs b/crates/core/src/storage/file.rs
index c63a00dae6..100faafcc5 100644
--- a/crates/core/src/storage/file.rs
+++ b/crates/core/src/storage/file.rs
@@ -1,17 +1,17 @@
 //! Local file storage backend. This backend read and write objects from local filesystem.
 //!
 //! The local file storage backend is multi-writer safe.
+use std::ops::Range;
+use std::sync::Arc;
 
 use bytes::Bytes;
 use futures::stream::BoxStream;
 use object_store::{
     local::LocalFileSystem, path::Path as ObjectStorePath, Error as ObjectStoreError, GetOptions,
-    GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    GetResult, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult,
     Result as ObjectStoreResult,
 };
-use std::ops::Range;
-use std::sync::Arc;
-use tokio::io::AsyncWrite;
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
 use url::Url;
 
 const STORE_NAME: &str = "DeltaLocalObjectStore";
@@ -106,14 +106,14 @@ impl From<LocalFileSystemError> for ObjectStoreError {
 /// Multi-writer support for different platforms:
 ///
 /// * Modern Linux kernels are well supported. However because Linux implementation leverages
-/// `RENAME_NOREPLACE`, older versions of the kernel might not work depending on what filesystem is
-/// being used:
+///   `RENAME_NOREPLACE`, older versions of the kernel might not work depending on what filesystem is
+///   being used:
 ///   *  ext4 requires >= Linux 3.15
 ///   *  btrfs, shmem, and cif requires >= Linux 3.17
 ///   *  xfs requires >= Linux 4.0
 ///   *  ext2, minix, reiserfs, jfs, vfat, and bpf requires >= Linux 4.9
 /// * Darwin is supported but not fully tested.
-/// Patches welcome.
+///   Patches welcome.
 /// * Support for other platforms are not implemented at the moment.
 #[derive(Debug)]
 pub struct FileStorageBackend {
@@ -166,14 +166,18 @@ impl std::fmt::Display for FileStorageBackend {
 
 #[async_trait::async_trait]
 impl ObjectStore for FileStorageBackend {
-    async fn put(&self, location: &ObjectStorePath, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(
+        &self,
+        location: &ObjectStorePath,
+        bytes: PutPayload,
+    ) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &ObjectStorePath,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -254,16 +258,16 @@ impl ObjectStore for FileStorageBackend {
     async fn put_multipart(
         &self,
         location: &ObjectStorePath,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &ObjectStorePath,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
@@ -275,10 +279,7 @@ async fn rename_noreplace(from: &str, to: &str) -> Result<(), LocalFileSystemErr
 }
 
 // Generic implementation (Requires 2 system calls)
-#[cfg(not(any(
-    all(target_os = "linux", target_env = "gnu", glibc_renameat2),
-    target_os = "macos"
-)))]
+#[cfg(not(any(all(target_os = "linux", target_env = "gnu"), target_os = "macos")))]
 mod imp {
     use super::*;
 
@@ -319,10 +320,7 @@ mod imp {
 }
 
 // Optimized implementations (Only 1 system call)
-#[cfg(any(
-    all(target_os = "linux", target_env = "gnu", glibc_renameat2),
-    target_os = "macos"
-))]
+#[cfg(any(all(target_os = "linux", target_env = "gnu"), target_os = "macos"))]
 mod imp {
     use super::*;
     use std::ffi::CString;
diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index 3c38a337af..0ad1435d1c 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -1,36 +1,336 @@
 //! Object storage backend abstraction layer for Delta Table transaction logs and data
-
-use dashmap::DashMap;
-use object_store::limit::LimitStore;
 use std::collections::HashMap;
 use std::sync::{Arc, OnceLock};
 
+use crate::{DeltaResult, DeltaTableError};
+use dashmap::DashMap;
+use futures::future::BoxFuture;
+use futures::FutureExt;
+use futures::TryFutureExt;
 use lazy_static::lazy_static;
+use object_store::limit::LimitStore;
+use object_store::local::LocalFileSystem;
+use object_store::memory::InMemory;
+use object_store::prefix::PrefixStore;
+use object_store::{GetOptions, PutOptions, PutPayload, PutResult};
 use serde::{Deserialize, Serialize};
+use tokio::runtime::{Builder as RuntimeBuilder, Handle, Runtime};
 use url::Url;
 
-pub mod file;
-pub mod retry_ext;
-pub mod utils;
-
-use crate::{DeltaResult, DeltaTableError};
-
+use bytes::Bytes;
+use futures::stream::BoxStream;
 pub use object_store;
-use object_store::local::LocalFileSystem;
-use object_store::memory::InMemory;
 pub use object_store::path::{Path, DELIMITER};
-use object_store::prefix::PrefixStore;
 pub use object_store::{
     DynObjectStore, Error as ObjectStoreError, GetResult, ListResult, MultipartId, ObjectMeta,
     ObjectStore, Result as ObjectStoreResult,
 };
+use object_store::{MultipartUpload, PutMultipartOpts};
 pub use retry_ext::ObjectStoreRetryExt;
+use std::ops::Range;
 pub use utils::*;
 
+pub mod file;
+pub mod retry_ext;
+pub mod utils;
+
 lazy_static! {
     static ref DELTA_LOG_PATH: Path = Path::from("_delta_log");
 }
 
+/// Creates static IO Runtime with optional configuration
+fn io_rt(config: Option<&RuntimeConfig>) -> &Runtime {
+    static IO_RT: OnceLock<Runtime> = OnceLock::new();
+    IO_RT.get_or_init(|| {
+        let rt = match config {
+            Some(config) => {
+                let mut builder = if config.multi_threaded {
+                    RuntimeBuilder::new_multi_thread()
+                } else {
+                    RuntimeBuilder::new_current_thread()
+                };
+                let builder = builder.worker_threads(config.worker_threads);
+                let mut builder = if config.enable_io && config.enable_time {
+                    builder.enable_all()
+                } else if !config.enable_io && config.enable_time {
+                    builder.enable_time()
+                } else {
+                    builder
+                };
+                #[cfg(unix)]
+                {
+                    if config.enable_io && !config.enable_time {
+                        builder = builder.enable_io();
+                    }
+                }
+                builder
+                    .thread_name(
+                        config
+                            .thread_name
+                            .clone()
+                            .unwrap_or("IO-runtime".to_string()),
+                    )
+                    .build()
+            }
+            _ => Runtime::new(),
+        };
+        rt.expect("Failed to create a tokio runtime for IO.")
+    })
+}
+
+/// Configuration for Tokio runtime
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RuntimeConfig {
+    multi_threaded: bool,
+    worker_threads: usize,
+    thread_name: Option<String>,
+    enable_io: bool,
+    enable_time: bool,
+}
+
+/// Provide custom Tokio RT or a runtime config
+#[derive(Debug, Clone)]
+pub enum IORuntime {
+    /// Tokio RT handle
+    RT(Handle),
+    /// Configuration for tokio runtime
+    Config(RuntimeConfig),
+}
+
+impl Default for IORuntime {
+    fn default() -> Self {
+        IORuntime::RT(io_rt(None).handle().clone())
+    }
+}
+
+impl IORuntime {
+    /// Retrieves the Tokio runtime for IO bound operations
+    pub fn get_handle(&self) -> Handle {
+        match self {
+            IORuntime::RT(handle) => handle,
+            IORuntime::Config(config) => io_rt(Some(config)).handle(),
+        }
+        .clone()
+    }
+}
+
+/// Wraps any object store and runs IO in it's own runtime [EXPERIMENTAL]
+pub struct DeltaIOStorageBackend {
+    inner: ObjectStoreRef,
+    rt_handle: Handle,
+}
+
+impl DeltaIOStorageBackend {
+    /// create wrapped object store which spawns tasks in own runtime
+    pub fn new(storage: ObjectStoreRef, rt_handle: Handle) -> Self {
+        Self {
+            inner: storage,
+            rt_handle,
+        }
+    }
+
+    /// spawn taks on IO runtime
+    pub fn spawn_io_rt<F, O>(
+        &self,
+        f: F,
+        store: &Arc<dyn ObjectStore>,
+        path: Path,
+    ) -> BoxFuture<'_, ObjectStoreResult<O>>
+    where
+        F: for<'a> FnOnce(
+                &'a Arc<dyn ObjectStore>,
+                &'a Path,
+            ) -> BoxFuture<'a, ObjectStoreResult<O>>
+            + Send
+            + 'static,
+        O: Send + 'static,
+    {
+        let store = Arc::clone(store);
+        let fut = self.rt_handle.spawn(async move { f(&store, &path).await });
+        fut.unwrap_or_else(|e| match e.try_into_panic() {
+            Ok(p) => std::panic::resume_unwind(p),
+            Err(e) => Err(ObjectStoreError::JoinError { source: e }),
+        })
+        .boxed()
+    }
+
+    /// spawn taks on IO runtime
+    pub fn spawn_io_rt_from_to<F, O>(
+        &self,
+        f: F,
+        store: &Arc<dyn ObjectStore>,
+        from: Path,
+        to: Path,
+    ) -> BoxFuture<'_, ObjectStoreResult<O>>
+    where
+        F: for<'a> FnOnce(
+                &'a Arc<dyn ObjectStore>,
+                &'a Path,
+                &'a Path,
+            ) -> BoxFuture<'a, ObjectStoreResult<O>>
+            + Send
+            + 'static,
+        O: Send + 'static,
+    {
+        let store = Arc::clone(store);
+        let fut = self
+            .rt_handle
+            .spawn(async move { f(&store, &from, &to).await });
+        fut.unwrap_or_else(|e| match e.try_into_panic() {
+            Ok(p) => std::panic::resume_unwind(p),
+            Err(e) => Err(ObjectStoreError::JoinError { source: e }),
+        })
+        .boxed()
+    }
+}
+
+impl std::fmt::Debug for DeltaIOStorageBackend {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(fmt, "DeltaIOStorageBackend")
+    }
+}
+
+impl std::fmt::Display for DeltaIOStorageBackend {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(fmt, "DeltaIOStorageBackend")
+    }
+}
+
+#[async_trait::async_trait]
+impl ObjectStore for DeltaIOStorageBackend {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
+        self.spawn_io_rt(
+            |store, path| store.put(path, bytes),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    async fn put_opts(
+        &self,
+        location: &Path,
+        bytes: PutPayload,
+        options: PutOptions,
+    ) -> ObjectStoreResult<PutResult> {
+        self.spawn_io_rt(
+            |store, path| store.put_opts(path, bytes, options),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    async fn get(&self, location: &Path) -> ObjectStoreResult<GetResult> {
+        self.spawn_io_rt(|store, path| store.get(path), &self.inner, location.clone())
+            .await
+    }
+
+    async fn get_opts(&self, location: &Path, options: GetOptions) -> ObjectStoreResult<GetResult> {
+        self.spawn_io_rt(
+            |store, path| store.get_opts(path, options),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    async fn get_range(&self, location: &Path, range: Range<usize>) -> ObjectStoreResult<Bytes> {
+        self.spawn_io_rt(
+            |store, path| store.get_range(path, range),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    async fn head(&self, location: &Path) -> ObjectStoreResult<ObjectMeta> {
+        self.spawn_io_rt(
+            |store, path| store.head(path),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    async fn delete(&self, location: &Path) -> ObjectStoreResult<()> {
+        self.spawn_io_rt(
+            |store, path| store.delete(path),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list(prefix)
+    }
+
+    fn list_with_offset(
+        &self,
+        prefix: Option<&Path>,
+        offset: &Path,
+    ) -> BoxStream<'_, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list_with_offset(prefix, offset)
+    }
+
+    async fn list_with_delimiter(&self, prefix: Option<&Path>) -> ObjectStoreResult<ListResult> {
+        self.inner.list_with_delimiter(prefix).await
+    }
+
+    async fn copy(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> {
+        self.spawn_io_rt_from_to(
+            |store, from_path, to_path| store.copy(from_path, to_path),
+            &self.inner,
+            from.clone(),
+            to.clone(),
+        )
+        .await
+    }
+
+    async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> {
+        self.spawn_io_rt_from_to(
+            |store, from_path, to_path| store.copy_if_not_exists(from_path, to_path),
+            &self.inner,
+            from.clone(),
+            to.clone(),
+        )
+        .await
+    }
+
+    async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> {
+        self.spawn_io_rt_from_to(
+            |store, from_path, to_path| store.rename_if_not_exists(from_path, to_path),
+            &self.inner,
+            from.clone(),
+            to.clone(),
+        )
+        .await
+    }
+
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.spawn_io_rt(
+            |store, path| store.put_multipart(path),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+
+    async fn put_multipart_opts(
+        &self,
+        location: &Path,
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.spawn_io_rt(
+            |store, path| store.put_multipart_opts(path, options),
+            &self.inner,
+            location.clone(),
+        )
+        .await
+    }
+}
+
 /// Sharable reference to [`ObjectStore`]
 pub type ObjectStoreRef = Arc<DynObjectStore>;
 
diff --git a/crates/core/src/storage/retry_ext.rs b/crates/core/src/storage/retry_ext.rs
index 81a52f3ba3..b63c29a8ae 100644
--- a/crates/core/src/storage/retry_ext.rs
+++ b/crates/core/src/storage/retry_ext.rs
@@ -1,7 +1,6 @@
 //! Retry extension for [`ObjectStore`]
 
-use bytes::Bytes;
-use object_store::{path::Path, Error, ObjectStore, PutResult, Result};
+use object_store::{path::Path, Error, ObjectStore, PutPayload, PutResult, Result};
 use tracing::log::*;
 
 /// Retry extension for [`ObjectStore`]
@@ -29,7 +28,7 @@ pub trait ObjectStoreRetryExt: ObjectStore {
     async fn put_with_retries(
         &self,
         location: &Path,
-        bytes: Bytes,
+        bytes: PutPayload,
         max_retries: usize,
     ) -> Result<PutResult> {
         let mut attempt_number = 1;
diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs
index b421a6199b..5631079269 100644
--- a/crates/core/src/table/builder.rs
+++ b/crates/core/src/table/builder.rs
@@ -13,7 +13,7 @@ use url::Url;
 use super::DeltaTable;
 use crate::errors::{DeltaResult, DeltaTableError};
 use crate::logstore::LogStoreRef;
-use crate::storage::{factories, StorageOptions};
+use crate::storage::{factories, IORuntime, StorageOptions};
 
 #[allow(dead_code)]
 #[derive(Debug, thiserror::Error)]
@@ -51,7 +51,7 @@ pub enum DeltaVersion {
 }
 
 /// Configuration options for delta table
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct DeltaTableConfig {
     /// Indicates whether our use case requires tracking tombstones.
@@ -79,6 +79,9 @@ pub struct DeltaTableConfig {
     /// Control the number of records to read / process from the commit / checkpoint files
     /// when processing record batches.
     pub log_batch_size: usize,
+    #[serde(skip_serializing, skip_deserializing)]
+    /// When a runtime handler is provided, all IO tasks are spawn in that handle
+    pub io_runtime: Option<IORuntime>,
 }
 
 impl Default for DeltaTableConfig {
@@ -88,68 +91,34 @@ impl Default for DeltaTableConfig {
             require_files: true,
             log_buffer_size: num_cpus::get() * 4,
             log_batch_size: 1024,
+            io_runtime: None,
         }
     }
 }
 
-/// Load-time delta table configuration options
-#[derive(Debug)]
-pub struct DeltaTableLoadOptions {
-    /// table root uri
-    pub table_uri: String,
-    /// backend to access storage system
-    pub storage_backend: Option<(Arc<DynObjectStore>, Url)>,
-    /// specify the version we are going to load: a time stamp, a version, or just the newest
-    /// available version
-    pub version: DeltaVersion,
-    /// Indicates whether our use case requires tracking tombstones.
-    /// This defaults to `true`
-    ///
-    /// Read-only applications never require tombstones. Tombstones
-    /// are only required when writing checkpoints, so even many writers
-    /// may want to skip them.
-    pub require_tombstones: bool,
-    /// Indicates whether DeltaTable should track files.
-    /// This defaults to `true`
-    ///
-    /// Some append-only applications might have no need of tracking any files.
-    /// Hence, DeltaTable will be loaded with significant memory reduction.
-    pub require_files: bool,
-    /// Controls how many files to buffer from the commit log when updating the table.
-    /// This defaults to 4 * number of cpus
-    ///
-    /// Setting a value greater than 1 results in concurrent calls to the storage api.
-    /// This can be helpful to decrease latency if there are many files in the log since the
-    /// last checkpoint, but will also increase memory usage. Possible rate limits of the storage backend should
-    /// also be considered for optimal performance.
-    pub log_buffer_size: usize,
-    /// Control the number of records to read / process from the commit / checkpoint files
-    /// when processing record batches.
-    pub log_batch_size: usize,
-}
-
-impl DeltaTableLoadOptions {
-    /// create default table load options for a table uri
-    pub fn new(table_uri: impl Into<String>) -> Self {
-        Self {
-            table_uri: table_uri.into(),
-            storage_backend: None,
-            require_tombstones: true,
-            require_files: true,
-            log_buffer_size: num_cpus::get() * 4,
-            version: DeltaVersion::default(),
-            log_batch_size: 1024,
-        }
+impl PartialEq for DeltaTableConfig {
+    fn eq(&self, other: &Self) -> bool {
+        self.require_tombstones == other.require_tombstones
+            && self.require_files == other.require_files
+            && self.log_buffer_size == other.log_buffer_size
+            && self.log_batch_size == other.log_batch_size
     }
 }
 
 /// builder for configuring a delta table load.
 #[derive(Debug)]
 pub struct DeltaTableBuilder {
-    options: DeltaTableLoadOptions,
+    /// table root uri
+    table_uri: String,
+    /// backend to access storage system
+    storage_backend: Option<(Arc<DynObjectStore>, Url)>,
+    /// specify the version we are going to load: a time stamp, a version, or just the newest
+    /// available version
+    version: DeltaVersion,
     storage_options: Option<HashMap<String, String>>,
     #[allow(unused_variables)]
     allow_http: Option<bool>,
+    table_config: DeltaTableConfig,
 }
 
 impl DeltaTableBuilder {
@@ -190,27 +159,30 @@ impl DeltaTableBuilder {
         debug!("creating table builder with {url}");
 
         Ok(Self {
-            options: DeltaTableLoadOptions::new(url),
+            table_uri: url.into(),
+            storage_backend: None,
+            version: DeltaVersion::default(),
             storage_options: None,
             allow_http: None,
+            table_config: DeltaTableConfig::default(),
         })
     }
 
     /// Sets `require_tombstones=false` to the builder
     pub fn without_tombstones(mut self) -> Self {
-        self.options.require_tombstones = false;
+        self.table_config.require_tombstones = false;
         self
     }
 
     /// Sets `require_files=false` to the builder
     pub fn without_files(mut self) -> Self {
-        self.options.require_files = false;
+        self.table_config.require_files = false;
         self
     }
 
     /// Sets `version` to the builder
     pub fn with_version(mut self, version: i64) -> Self {
-        self.options.version = DeltaVersion::Version(version);
+        self.version = DeltaVersion::Version(version);
         self
     }
 
@@ -221,7 +193,7 @@ impl DeltaTableBuilder {
                 "Log buffer size should be positive",
             )));
         }
-        self.options.log_buffer_size = log_buffer_size;
+        self.table_config.log_buffer_size = log_buffer_size;
         Ok(self)
     }
 
@@ -235,7 +207,7 @@ impl DeltaTableBuilder {
 
     /// specify a timestamp
     pub fn with_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
-        self.options.version = DeltaVersion::Timestamp(timestamp);
+        self.version = DeltaVersion::Timestamp(timestamp);
         self
     }
 
@@ -248,20 +220,39 @@ impl DeltaTableBuilder {
     /// * `storage` - A shared reference to an [`ObjectStore`](object_store::ObjectStore) with "/" pointing at delta table root (i.e. where `_delta_log` is located).
     /// * `location` - A url corresponding to the storagle location of `storage`.
     pub fn with_storage_backend(mut self, storage: Arc<DynObjectStore>, location: Url) -> Self {
-        self.options.storage_backend = Some((storage, location));
+        self.storage_backend = Some((storage, location));
         self
     }
 
     /// Set options used to initialize storage backend
     ///
     /// Options may be passed in the HashMap or set as environment variables. See documentation of
-    /// underlying object store implementation for details.
+    /// underlying object store implementation for details. Trailing slash will be trimmed in
+    /// the option's value to avoid failures. Trimming will only be done if one or more of below
+    /// conditions are met:
+    /// - key ends with `_URL` (e.g., `ENDPOINT_URL`, `S3_URL`, `JDBC_URL`, etc.)
+    /// - value starts with `http://`` or `https://` (e.g., `http://localhost:8000/`)
     ///
     /// - [Azure options](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants)
     /// - [S3 options](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants)
     /// - [Google options](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants)
     pub fn with_storage_options(mut self, storage_options: HashMap<String, String>) -> Self {
-        self.storage_options = Some(storage_options);
+        self.storage_options = Some(
+            storage_options
+                .clone()
+                .into_iter()
+                .map(|(k, v)| {
+                    let needs_trim = v.starts_with("http://")
+                        || v.starts_with("https://")
+                        || k.to_lowercase().ends_with("_url");
+                    if needs_trim {
+                        (k.to_owned(), v.trim_end_matches('/').to_owned())
+                    } else {
+                        (k, v)
+                    }
+                })
+                .collect(),
+        );
         self
     }
 
@@ -273,6 +264,12 @@ impl DeltaTableBuilder {
         self
     }
 
+    /// Provide a custom runtime handle or runtime config
+    pub fn with_io_runtime(mut self, io_runtime: IORuntime) -> Self {
+        self.table_config.io_runtime = Some(io_runtime);
+        self
+    }
+
     /// Storage options for configuring backend object store
     pub fn storage_options(&self) -> StorageOptions {
         let mut storage_options = self.storage_options.clone().unwrap_or_default();
@@ -286,22 +283,28 @@ impl DeltaTableBuilder {
     }
 
     /// Build a delta storage backend for the given config
-    pub fn build_storage(self) -> DeltaResult<LogStoreRef> {
-        debug!("build_storage() with {}", &self.options.table_uri);
-        let location = Url::parse(&self.options.table_uri).map_err(|_| {
-            DeltaTableError::NotATable(format!(
-                "Could not turn {} into a URL",
-                self.options.table_uri
-            ))
+    pub fn build_storage(&self) -> DeltaResult<LogStoreRef> {
+        debug!("build_storage() with {}", self.table_uri);
+        let location = Url::parse(&self.table_uri).map_err(|_| {
+            DeltaTableError::NotATable(format!("Could not turn {} into a URL", self.table_uri))
         })?;
 
-        if let Some((store, _url)) = self.options.storage_backend.as_ref() {
+        if let Some((store, _url)) = self.storage_backend.as_ref() {
             debug!("Loading a logstore with a custom store: {store:?}");
-            crate::logstore::logstore_with(store.clone(), location, self.storage_options())
+            crate::logstore::logstore_with(
+                store.clone(),
+                location,
+                self.storage_options(),
+                self.table_config.io_runtime.clone(),
+            )
         } else {
             // If there has been no backend defined just default to the normal logstore look up
             debug!("Loading a logstore based off the location: {location:?}");
-            crate::logstore::logstore_for(location, self.storage_options())
+            crate::logstore::logstore_for(
+                location,
+                self.storage_options(),
+                self.table_config.io_runtime.clone(),
+            )
         }
     }
 
@@ -310,18 +313,12 @@ impl DeltaTableBuilder {
     /// This will not load the log, i.e. the table is not initialized. To get an initialized
     /// table use the `load` function
     pub fn build(self) -> DeltaResult<DeltaTable> {
-        let config = DeltaTableConfig {
-            require_tombstones: self.options.require_tombstones,
-            require_files: self.options.require_files,
-            log_buffer_size: self.options.log_buffer_size,
-            log_batch_size: self.options.log_batch_size,
-        };
-        Ok(DeltaTable::new(self.build_storage()?, config))
+        Ok(DeltaTable::new(self.build_storage()?, self.table_config))
     }
 
     /// Build the [`DeltaTable`] and load its state
     pub async fn load(self) -> DeltaResult<DeltaTable> {
-        let version = self.options.version;
+        let version = self.version;
         let mut table = self.build()?;
         match version {
             DeltaVersion::Newest => table.load().await?,
@@ -561,4 +558,49 @@ mod tests {
         DeltaTableBuilder::from_valid_uri("this://is.nonsense")
             .expect_err("this should be an error");
     }
+
+    #[test]
+    fn test_writer_storage_opts_url_trim() {
+        let cases = [
+            // Trim Case 1 - Key indicating a url
+            ("SOMETHING_URL", "something://else/", "something://else"),
+            // Trim Case 2 - Value https url ending with slash
+            (
+                "SOMETHING",
+                "http://something:port/",
+                "http://something:port",
+            ),
+            // Trim Case 3 - Value https url ending with slash
+            (
+                "SOMETHING",
+                "https://something:port/",
+                "https://something:port",
+            ),
+            // No Trim Case 4 - JDBC MySQL url with slash
+            (
+                "SOME_JDBC_PREFIX",
+                "jdbc:mysql://mysql.db.server:3306/",
+                "jdbc:mysql://mysql.db.server:3306/",
+            ),
+            // No Trim Case 5 - S3A file system link
+            ("SOME_S3_LINK", "s3a://bucket-name/", "s3a://bucket-name/"),
+            // No Trim Case 6 - Not a url but ending with slash
+            ("SOME_RANDOM_STRING", "a1b2c3d4e5f#/", "a1b2c3d4e5f#/"),
+            // No Trim Case 7 - Some value not a url
+            (
+                "SOME_VALUE",
+                "/ This is some value 123 /",
+                "/ This is some value 123 /",
+            ),
+        ];
+        for (key, val, expected) in cases {
+            let table_uri = Url::parse("memory:///test/tests/data/delta-0.8.0").unwrap();
+            let mut storage_opts = HashMap::<String, String>::new();
+            storage_opts.insert(key.to_owned(), val.to_owned());
+
+            let table = DeltaTableBuilder::from_uri(table_uri).with_storage_options(storage_opts);
+            let found_opts = table.storage_options();
+            assert_eq!(expected, found_opts.0.get(key).unwrap());
+        }
+    }
 }
diff --git a/crates/core/src/table/config.rs b/crates/core/src/table/config.rs
index 05fb0c53ca..bc04ec6e91 100644
--- a/crates/core/src/table/config.rs
+++ b/crates/core/src/table/config.rs
@@ -2,19 +2,19 @@
 use std::time::Duration;
 use std::{collections::HashMap, str::FromStr};
 
+use delta_kernel::features::ColumnMappingMode;
 use lazy_static::lazy_static;
 use serde::{Deserialize, Serialize};
 
-use crate::errors::DeltaTableError;
-
 use super::Constraint;
+use crate::errors::DeltaTableError;
 
 /// Typed property keys that can be defined on a delta table
 /// <https://docs.delta.io/latest/table-properties.html#delta-table-properties-reference>
 /// <https://learn.microsoft.com/en-us/azure/databricks/delta/table-properties>
 #[derive(PartialEq, Eq, Hash)]
 #[non_exhaustive]
-pub enum DeltaConfigKey {
+pub enum TableProperty {
     /// true for this Delta table to be append-only. If append-only,
     /// existing records cannot be deleted, and existing values cannot be updated.
     AppendOnly,
@@ -116,7 +116,7 @@ pub enum DeltaConfigKey {
     CheckpointPolicy,
 }
 
-impl AsRef<str> for DeltaConfigKey {
+impl AsRef<str> for TableProperty {
     fn as_ref(&self) -> &str {
         match self {
             Self::AppendOnly => "delta.appendOnly",
@@ -146,7 +146,7 @@ impl AsRef<str> for DeltaConfigKey {
     }
 }
 
-impl FromStr for DeltaConfigKey {
+impl FromStr for TableProperty {
     type Err = DeltaTableError;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -210,33 +210,35 @@ pub struct TableConfig<'a>(pub(crate) &'a HashMap<String, Option<String>>);
 
 /// Default num index cols
 pub const DEFAULT_NUM_INDEX_COLS: i32 = 32;
+/// Default target file size
+pub const DEFAULT_TARGET_FILE_SIZE: i64 = 104857600;
 
 impl<'a> TableConfig<'a> {
     table_config!(
         (
             "true for this Delta table to be append-only",
-            DeltaConfigKey::AppendOnly,
+            TableProperty::AppendOnly,
             append_only,
             bool,
             false
         ),
         (
             "true for Delta Lake to write file statistics in checkpoints in JSON format for the stats column.",
-            DeltaConfigKey::CheckpointWriteStatsAsJson,
+            TableProperty::CheckpointWriteStatsAsJson,
             write_stats_as_json,
             bool,
             true
         ),
         (
             "true for Delta Lake to write file statistics to checkpoints in struct format",
-            DeltaConfigKey::CheckpointWriteStatsAsStruct,
+            TableProperty::CheckpointWriteStatsAsStruct,
             write_stats_as_struct,
             bool,
             false
         ),
         (
             "The target file size in bytes or higher units for file tuning",
-            DeltaConfigKey::TargetFileSize,
+            TableProperty::TargetFileSize,
             target_file_size,
             i64,
             // Databricks / spark defaults to 104857600 (bytes) or 100mb
@@ -244,14 +246,14 @@ impl<'a> TableConfig<'a> {
         ),
         (
             "true to enable change data feed.",
-            DeltaConfigKey::EnableChangeDataFeed,
+            TableProperty::EnableChangeDataFeed,
             enable_change_data_feed,
             bool,
             false
         ),
         (
             "true to enable deletion vectors and predictive I/O for updates.",
-            DeltaConfigKey::EnableDeletionVectors,
+            TableProperty::EnableDeletionVectors,
             enable_deletion_vectors,
             bool,
             // in databricks the default is dependent on the workspace settings and runtime version
@@ -260,21 +262,21 @@ impl<'a> TableConfig<'a> {
         ),
         (
             "The number of columns for Delta Lake to collect statistics about for data skipping.",
-            DeltaConfigKey::DataSkippingNumIndexedCols,
+            TableProperty::DataSkippingNumIndexedCols,
             num_indexed_cols,
             i32,
             32
         ),
         (
             "whether to cleanup expired logs",
-            DeltaConfigKey::EnableExpiredLogCleanup,
+            TableProperty::EnableExpiredLogCleanup,
             enable_expired_log_cleanup,
             bool,
             true
         ),
         (
             "Interval (number of commits) after which a new checkpoint should be created",
-            DeltaConfigKey::CheckpointInterval,
+            TableProperty::CheckpointInterval,
             checkpoint_interval,
             i32,
             100
@@ -295,7 +297,7 @@ impl<'a> TableConfig<'a> {
             static ref DEFAULT_DURATION: Duration = parse_interval("interval 1 weeks").unwrap();
         }
         self.0
-            .get(DeltaConfigKey::DeletedFileRetentionDuration.as_ref())
+            .get(TableProperty::DeletedFileRetentionDuration.as_ref())
             .and_then(|o| o.as_ref().and_then(|v| parse_interval(v).ok()))
             .unwrap_or_else(|| DEFAULT_DURATION.to_owned())
     }
@@ -311,7 +313,7 @@ impl<'a> TableConfig<'a> {
             static ref DEFAULT_DURATION: Duration = parse_interval("interval 30 days").unwrap();
         }
         self.0
-            .get(DeltaConfigKey::LogRetentionDuration.as_ref())
+            .get(TableProperty::LogRetentionDuration.as_ref())
             .and_then(|o| o.as_ref().and_then(|v| parse_interval(v).ok()))
             .unwrap_or_else(|| DEFAULT_DURATION.to_owned())
     }
@@ -321,7 +323,7 @@ impl<'a> TableConfig<'a> {
     /// Valid values are `Serializable` and `WriteSerializable`.
     pub fn isolation_level(&self) -> IsolationLevel {
         self.0
-            .get(DeltaConfigKey::IsolationLevel.as_ref())
+            .get(TableProperty::IsolationLevel.as_ref())
             .and_then(|o| o.as_ref().and_then(|v| v.parse().ok()))
             .unwrap_or_default()
     }
@@ -329,7 +331,7 @@ impl<'a> TableConfig<'a> {
     /// Policy applied during chepoint creation
     pub fn checkpoint_policy(&self) -> CheckpointPolicy {
         self.0
-            .get(DeltaConfigKey::CheckpointPolicy.as_ref())
+            .get(TableProperty::CheckpointPolicy.as_ref())
             .and_then(|o| o.as_ref().and_then(|v| v.parse().ok()))
             .unwrap_or_default()
     }
@@ -337,7 +339,7 @@ impl<'a> TableConfig<'a> {
     /// Return the column mapping mode according to delta.columnMapping.mode
     pub fn column_mapping_mode(&self) -> ColumnMappingMode {
         self.0
-            .get(DeltaConfigKey::ColumnMappingMode.as_ref())
+            .get(TableProperty::ColumnMappingMode.as_ref())
             .and_then(|o| o.as_ref().and_then(|v| v.parse().ok()))
             .unwrap_or_default()
     }
@@ -360,7 +362,7 @@ impl<'a> TableConfig<'a> {
     /// This property takes precedence over [num_indexed_cols](Self::num_indexed_cols).
     pub fn stats_columns(&self) -> Option<Vec<&str>> {
         self.0
-            .get(DeltaConfigKey::DataSkippingStatsColumns.as_ref())
+            .get(TableProperty::DataSkippingStatsColumns.as_ref())
             .and_then(|o| o.as_ref().map(|v| v.split(',').collect()))
     }
 }
@@ -463,49 +465,6 @@ impl FromStr for CheckpointPolicy {
     }
 }
 
-#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
-/// The Column Mapping modes used for reading and writing data
-#[serde(rename_all = "camelCase")]
-pub enum ColumnMappingMode {
-    /// No column mapping is applied
-    None,
-    /// Columns are mapped by their field_id in parquet
-    Id,
-    /// Columns are mapped to a physical name
-    Name,
-}
-
-impl Default for ColumnMappingMode {
-    fn default() -> Self {
-        Self::None
-    }
-}
-
-impl AsRef<str> for ColumnMappingMode {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::None => "none",
-            Self::Id => "id",
-            Self::Name => "name",
-        }
-    }
-}
-
-impl FromStr for ColumnMappingMode {
-    type Err = DeltaTableError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s.to_ascii_lowercase().as_str() {
-            "none" => Ok(Self::None),
-            "id" => Ok(Self::Id),
-            "name" => Ok(Self::Name),
-            _ => Err(DeltaTableError::Generic(
-                "Invalid string for ColumnMappingMode".into(),
-            )),
-        }
-    }
-}
-
 const SECONDS_PER_MINUTE: u64 = 60;
 const SECONDS_PER_HOUR: u64 = 60 * SECONDS_PER_MINUTE;
 const SECONDS_PER_DAY: u64 = 24 * SECONDS_PER_HOUR;
@@ -577,7 +536,7 @@ mod tests {
         // change to 2 day
         let mut md = dummy_metadata();
         md.configuration.insert(
-            DeltaConfigKey::DeletedFileRetentionDuration
+            TableProperty::DeletedFileRetentionDuration
                 .as_ref()
                 .to_string(),
             Some("interval 2 day".to_string()),
@@ -608,7 +567,7 @@ mod tests {
         // change to false
         let mut md = dummy_metadata();
         md.configuration.insert(
-            DeltaConfigKey::EnableExpiredLogCleanup.as_ref().into(),
+            TableProperty::EnableExpiredLogCleanup.as_ref().into(),
             Some("false".to_string()),
         );
         let config = TableConfig(&md.configuration);
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index 4b818513b0..65d84985c7 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -30,6 +30,7 @@ pub mod state_arrow;
 
 /// Metadata for a checkpoint file
 #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
+#[serde(rename_all = "camelCase")]
 pub struct CheckPoint {
     /// Delta table version
     pub(crate) version: i64, // 20 digits decimals
@@ -163,7 +164,6 @@ pub(crate) fn get_partition_col_data_types<'a>(
     // When loading `partitionValues_parsed` we have to convert the stringified partition values back to the correct data type.
     schema
         .fields()
-        .iter()
         .filter_map(|f| {
             if metadata
                 .partition_columns
@@ -240,9 +240,12 @@ impl<'de> Deserialize<'de> for DeltaTable {
                 let storage_config: LogStoreConfig = seq
                     .next_element()?
                     .ok_or_else(|| A::Error::invalid_length(0, &self))?;
-                let log_store =
-                    crate::logstore::logstore_for(storage_config.location, storage_config.options)
-                        .map_err(|_| A::Error::custom("Failed deserializing LogStore"))?;
+                let log_store = crate::logstore::logstore_for(
+                    storage_config.location,
+                    storage_config.options,
+                    None,
+                )
+                .map_err(|_| A::Error::custom("Failed deserializing LogStore"))?;
 
                 let table = DeltaTable {
                     state,
@@ -288,6 +291,11 @@ impl DeltaTable {
         self.log_store.object_store()
     }
 
+    /// Check if the [`DeltaTable`] exists
+    pub async fn verify_deltatable_existence(&self) -> DeltaResult<bool> {
+        self.log_store.is_delta_table_location().await
+    }
+
     /// The URI of the underlying data
     pub fn table_uri(&self) -> String {
         self.log_store.root_uri()
@@ -619,4 +627,21 @@ mod tests {
             .unwrap();
         (dt, tmp_dir)
     }
+
+    #[test]
+    fn checkpoint_should_serialize_in_camel_case() {
+        let checkpoint = CheckPoint {
+            version: 1,
+            size: 1,
+            parts: None,
+            size_in_bytes: Some(1),
+            num_of_add_files: Some(1),
+        };
+
+        let checkpoint_json_serialized =
+            serde_json::to_string(&checkpoint).expect("could not serialize to json");
+
+        assert!(checkpoint_json_serialized.contains("sizeInBytes"));
+        assert!(checkpoint_json_serialized.contains("numOfAddFiles"));
+    }
 }
diff --git a/crates/core/src/table/state.rs b/crates/core/src/table/state.rs
index 9544198581..0876dc9e79 100644
--- a/crates/core/src/table/state.rs
+++ b/crates/core/src/table/state.rs
@@ -181,6 +181,11 @@ impl DeltaTableState {
         self.snapshot.schema()
     }
 
+    /// Get the table config which is loaded with of the snapshot
+    pub fn load_config(&self) -> &DeltaTableConfig {
+        &self.snapshot.load_config()
+    }
+
     /// Well known table configuration
     pub fn table_config(&self) -> TableConfig<'_> {
         self.snapshot.table_config()
diff --git a/crates/core/src/table/state_arrow.rs b/crates/core/src/table/state_arrow.rs
index fe35787cb4..e4a374b763 100644
--- a/crates/core/src/table/state_arrow.rs
+++ b/crates/core/src/table/state_arrow.rs
@@ -6,17 +6,17 @@ use std::borrow::Cow;
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::sync::Arc;
 
-use arrow::compute::cast;
-use arrow::compute::kernels::cast_utils::Parser;
 use arrow_array::types::{Date32Type, TimestampMicrosecondType};
 use arrow_array::{
     Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Float64Array, Int64Array, NullArray,
     StringArray, StructArray, TimestampMicrosecondArray, TimestampMillisecondArray,
 };
+use arrow_cast::cast;
+use arrow_cast::parse::Parser;
 use arrow_schema::{DataType, Field, Fields, TimeUnit};
+use delta_kernel::features::ColumnMappingMode;
 use itertools::Itertools;
 
-use super::config::ColumnMappingMode;
 use super::state::DeltaTableState;
 use crate::errors::DeltaTableError;
 use crate::kernel::{Add, DataType as DeltaDataType, StructType};
@@ -149,7 +149,13 @@ impl DeltaTableState {
             .map(
                 |name| -> Result<arrow::datatypes::DataType, DeltaTableError> {
                     let schema = metadata.schema()?;
-                    let field = schema.field_with_name(name)?;
+                    let field =
+                        schema
+                            .field(name)
+                            .ok_or(DeltaTableError::MetadataError(format!(
+                                "Invalid partition column {0}",
+                                name
+                            )))?;
                     Ok(field.data_type().try_into()?)
                 },
             )
@@ -173,12 +179,12 @@ impl DeltaTableState {
                 .map(|name| -> Result<_, DeltaTableError> {
                     let physical_name = self
                         .schema()
-                        .field_with_name(name)
-                        .or(Err(DeltaTableError::MetadataError(format!(
+                        .field(name)
+                        .ok_or(DeltaTableError::MetadataError(format!(
                             "Invalid partition column {0}",
                             name
-                        ))))?
-                        .physical_name()?
+                        )))?
+                        .physical_name(column_mapping_mode)?
                         .to_string();
                     Ok((physical_name, name.as_str()))
                 })
@@ -674,7 +680,6 @@ impl<'a> SchemaLeafIterator<'a> {
         SchemaLeafIterator {
             fields_remaining: schema
                 .fields()
-                .iter()
                 .map(|field| (vec![field.name().as_ref()], field.data_type()))
                 .collect(),
         }
diff --git a/crates/core/src/test_utils/factories/actions.rs b/crates/core/src/test_utils/factories/actions.rs
new file mode 100644
index 0000000000..1f1e13a793
--- /dev/null
+++ b/crates/core/src/test_utils/factories/actions.rs
@@ -0,0 +1,153 @@
+use std::collections::HashMap;
+
+use arrow_array::*;
+use chrono::Utc;
+use delta_kernel::schema::{DataType, PrimitiveType};
+use object_store::path::Path;
+use object_store::ObjectMeta;
+
+use super::{get_parquet_bytes, DataFactory, FileStats};
+use crate::kernel::arrow::extract::{self as ex};
+use crate::kernel::partitions_schema;
+use crate::kernel::{Add, Metadata, Protocol, ReaderFeatures, Remove, StructType, WriterFeatures};
+use crate::operations::transaction::PROTOCOL;
+
+pub struct ActionFactory;
+
+impl ActionFactory {
+    pub fn add_raw(
+        meta: ObjectMeta,
+        stats: FileStats,
+        partition_values: HashMap<String, Option<String>>,
+        data_change: bool,
+    ) -> Add {
+        Add {
+            path: meta.location.to_string(),
+            size: meta.size as i64,
+            partition_values,
+            data_change,
+            modification_time: meta.last_modified.timestamp_millis(),
+            stats: serde_json::to_string(&stats).ok(),
+            tags: Some(HashMap::new()),
+            default_row_commit_version: None,
+            deletion_vector: None,
+            base_row_id: None,
+            clustering_provider: None,
+            stats_parsed: None,
+        }
+    }
+
+    pub fn add(
+        schema: &StructType,
+        bounds: HashMap<&str, (&str, &str)>,
+        partition_columns: Vec<String>,
+        data_change: bool,
+    ) -> Add {
+        let partitions_schema = partitions_schema(&schema, &partition_columns).unwrap();
+        let partition_values = if let Some(p_schema) = partitions_schema {
+            let batch = DataFactory::record_batch(&p_schema, 1, &bounds).unwrap();
+            p_schema
+                .fields()
+                .map(|f| {
+                    let value = match f.data_type() {
+                        DataType::Primitive(PrimitiveType::String) => {
+                            let arr =
+                                ex::extract_and_cast::<StringArray>(&batch, f.name()).unwrap();
+                            Some(arr.value(0).to_string())
+                        }
+                        DataType::Primitive(PrimitiveType::Integer) => {
+                            let arr = ex::extract_and_cast::<Int32Array>(&batch, f.name()).unwrap();
+                            Some(arr.value(0).to_string())
+                        }
+                        DataType::Primitive(PrimitiveType::Long) => {
+                            let arr = ex::extract_and_cast::<Int64Array>(&batch, f.name()).unwrap();
+                            Some(arr.value(0).to_string())
+                        }
+                        _ => unimplemented!(),
+                    };
+                    (f.name().to_owned(), value)
+                })
+                .collect()
+        } else {
+            HashMap::new()
+        };
+
+        let data_schema = StructType::new(
+            schema
+                .fields()
+                .filter(|f| !partition_columns.contains(f.name()))
+                .cloned()
+                .collect(),
+        );
+
+        let batch = DataFactory::record_batch(&data_schema, 10, &bounds).unwrap();
+        let stats = DataFactory::file_stats(&batch).unwrap();
+        let path = Path::from(generate_file_name());
+        let data = get_parquet_bytes(&batch).unwrap();
+        let meta = ObjectMeta {
+            location: path.clone(),
+            size: data.len(),
+            last_modified: Utc::now(),
+            e_tag: None,
+            version: None,
+        };
+        ActionFactory::add_raw(meta, stats, partition_values, data_change)
+    }
+
+    pub fn remove(add: &Add, data_change: bool) -> Remove {
+        add_as_remove(add, data_change)
+    }
+
+    pub fn protocol(
+        max_reader: Option<i32>,
+        max_writer: Option<i32>,
+        reader_features: Option<impl IntoIterator<Item = ReaderFeatures>>,
+        writer_features: Option<impl IntoIterator<Item = WriterFeatures>>,
+    ) -> Protocol {
+        Protocol {
+            min_reader_version: max_reader.unwrap_or(PROTOCOL.default_reader_version()),
+            min_writer_version: max_writer.unwrap_or(PROTOCOL.default_writer_version()),
+            writer_features: writer_features.map(|i| i.into_iter().collect()),
+            reader_features: reader_features.map(|i| i.into_iter().collect()),
+        }
+    }
+
+    pub fn metadata(
+        schema: &StructType,
+        partition_columns: Option<impl IntoIterator<Item = impl ToString>>,
+        configuration: Option<HashMap<String, Option<String>>>,
+    ) -> Metadata {
+        Metadata {
+            id: uuid::Uuid::new_v4().hyphenated().to_string(),
+            format: Default::default(),
+            schema_string: serde_json::to_string(schema).unwrap(),
+            partition_columns: partition_columns
+                .map(|i| i.into_iter().map(|c| c.to_string()).collect())
+                .unwrap_or_default(),
+            configuration: configuration.unwrap_or_default(),
+            name: None,
+            description: None,
+            created_time: Some(Utc::now().timestamp_millis()),
+        }
+    }
+}
+
+pub fn add_as_remove(add: &Add, data_change: bool) -> Remove {
+    Remove {
+        path: add.path.clone(),
+        data_change,
+        deletion_timestamp: Some(Utc::now().timestamp_millis()),
+        size: Some(add.size),
+        extended_file_metadata: Some(true),
+        partition_values: Some(add.partition_values.clone()),
+        tags: add.tags.clone(),
+        deletion_vector: add.deletion_vector.clone(),
+        base_row_id: add.base_row_id,
+        default_row_commit_version: add.default_row_commit_version,
+    }
+}
+
+fn generate_file_name() -> String {
+    let file_name = uuid::Uuid::new_v4().hyphenated().to_string();
+    format!("part-0001-{}.parquet", file_name)
+}
diff --git a/crates/core/src/test_utils/factories/data.rs b/crates/core/src/test_utils/factories/data.rs
new file mode 100644
index 0000000000..d69869ae92
--- /dev/null
+++ b/crates/core/src/test_utils/factories/data.rs
@@ -0,0 +1,247 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use arrow_arith::aggregate::{max as arrow_max, max_string, min as arrow_min, min_string};
+use arrow_array::*;
+use arrow_schema::DataType as ArrowDataType;
+use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
+use parquet::arrow::arrow_writer::ArrowWriter;
+use parquet::file::properties::WriterProperties;
+use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform};
+
+use super::super::TestResult;
+use super::FileStats;
+use crate::kernel::scalars::ScalarExt;
+use crate::kernel::{DataType, PrimitiveType, StructType};
+
+pub struct DataFactory;
+
+impl DataFactory {
+    pub fn record_batch(
+        schema: &StructType,
+        length: usize,
+        bounds: &HashMap<&str, (&str, &str)>,
+    ) -> TestResult<RecordBatch> {
+        generate_random_batch(schema, length, bounds)
+    }
+
+    pub fn file_stats(batch: &RecordBatch) -> TestResult<FileStats> {
+        get_stats(batch)
+    }
+
+    pub fn array(
+        data_type: DataType,
+        length: usize,
+        min_val: Option<String>,
+        max_val: Option<String>,
+    ) -> TestResult<ArrayRef> {
+        generate_random_array(data_type, length, min_val, max_val)
+    }
+}
+
+fn generate_random_batch(
+    schema: &StructType,
+    length: usize,
+    bounds: &HashMap<&str, (&str, &str)>,
+) -> TestResult<RecordBatch> {
+    schema
+        .fields()
+        .map(|field| {
+            let (min_val, max_val) =
+                if let Some((min_val, max_val)) = bounds.get(field.name().as_str()) {
+                    (*min_val, *max_val)
+                } else {
+                    // NOTE providing illegal strings will resolve to default bounds,
+                    // an empty string will resolve to null.
+                    ("$%&", "$%&")
+                };
+            generate_random_array(
+                field.data_type().clone(),
+                length,
+                Some(min_val.to_string()),
+                Some(max_val.to_string()),
+            )
+        })
+        .collect::<TestResult<Vec<_>>>()
+        .map(|columns| RecordBatch::try_new(Arc::new(schema.try_into().unwrap()), columns).unwrap())
+}
+
+pub fn generate_random_array(
+    data_type: DataType,
+    length: usize,
+    min_val: Option<String>,
+    max_val: Option<String>,
+) -> TestResult<ArrayRef> {
+    use DataType::*;
+    use PrimitiveType::*;
+    let mut rng = rand::thread_rng();
+
+    match data_type {
+        Primitive(Integer) => {
+            let min_val = min_val
+                .and_then(|min| Integer.parse_scalar(&min).ok())
+                .unwrap_or(Scalar::Integer(-10));
+            let max_val = max_val
+                .and_then(|max| Integer.parse_scalar(&max).ok())
+                .unwrap_or(Scalar::Integer(10));
+            let between = match (min_val, max_val) {
+                (Scalar::Integer(min), Scalar::Integer(max)) => Uniform::from(min..=max),
+                _ => unreachable!(),
+            };
+            let arr = Int32Array::from(
+                (0..length)
+                    .map(|_| between.sample(&mut rng))
+                    .collect::<Vec<_>>(),
+            );
+            Ok(Arc::new(arr))
+        }
+        Primitive(Long) => {
+            let min_val = min_val
+                .and_then(|min| Long.parse_scalar(&min).ok())
+                .unwrap_or(Scalar::Long(-10));
+            let max_val = max_val
+                .and_then(|max| Long.parse_scalar(&max).ok())
+                .unwrap_or(Scalar::Long(10));
+            let between = match (min_val, max_val) {
+                (Scalar::Long(min), Scalar::Long(max)) => Uniform::from(min..=max),
+                _ => unreachable!(),
+            };
+            let arr = Int64Array::from(
+                (0..length)
+                    .map(|_| between.sample(&mut rng))
+                    .collect::<Vec<_>>(),
+            );
+            Ok(Arc::new(arr))
+        }
+        Primitive(Float) => {
+            let min_val = min_val
+                .and_then(|min| Float.parse_scalar(&min).ok())
+                .unwrap_or(Scalar::Float(-10.1));
+            let max_val = max_val
+                .and_then(|max| Float.parse_scalar(&max).ok())
+                .unwrap_or(Scalar::Float(10.1));
+            let between = match (min_val, max_val) {
+                (Scalar::Float(min), Scalar::Float(max)) => Uniform::from(min..=max),
+                _ => unreachable!(),
+            };
+            let arr = Float32Array::from(
+                (0..length)
+                    .map(|_| between.sample(&mut rng))
+                    .collect::<Vec<_>>(),
+            );
+            Ok(Arc::new(arr))
+        }
+        Primitive(Double) => {
+            let min_val = min_val
+                .and_then(|min| Double.parse_scalar(&min).ok())
+                .unwrap_or(Scalar::Double(-10.1));
+            let max_val = max_val
+                .and_then(|max| Double.parse_scalar(&max).ok())
+                .unwrap_or(Scalar::Double(10.1));
+            let between = match (min_val, max_val) {
+                (Scalar::Double(min), Scalar::Double(max)) => Uniform::from(min..=max),
+                _ => unreachable!(),
+            };
+            let arr = Float64Array::from(
+                (0..length)
+                    .map(|_| between.sample(&mut rng))
+                    .collect::<Vec<_>>(),
+            );
+            Ok(Arc::new(arr))
+        }
+        Primitive(String) => {
+            let arr = StringArray::from(
+                (0..length)
+                    .map(|_| Alphanumeric.sample_string(&mut rng, 3))
+                    .collect::<Vec<_>>(),
+            );
+            Ok(Arc::new(arr))
+        }
+        _ => todo!(),
+    }
+}
+
+fn get_stats(batch: &RecordBatch) -> TestResult<FileStats> {
+    use ArrowDataType::*;
+
+    let mut file_stats = FileStats::new(batch.num_rows() as i64);
+    for (i, field) in batch.schema().fields().iter().enumerate() {
+        let array = batch.column(i);
+        let stats = match array.data_type() {
+            Int8 => {
+                let array = array.as_any().downcast_ref::<Int8Array>().unwrap();
+                let min = Scalar::Byte(arrow_min(array).unwrap());
+                let max = Scalar::Byte(arrow_max(array).unwrap());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Int16 => {
+                let array = array.as_any().downcast_ref::<Int16Array>().unwrap();
+                let min = Scalar::Short(arrow_min(array).unwrap());
+                let max = Scalar::Short(arrow_max(array).unwrap());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Int32 => {
+                let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+                let min = Scalar::Integer(arrow_min(array).unwrap());
+                let max = Scalar::Integer(arrow_max(array).unwrap());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Int64 => {
+                let array = array.as_any().downcast_ref::<Int64Array>().unwrap();
+                let min = Scalar::Long(arrow_min(array).unwrap());
+                let max = Scalar::Long(arrow_max(array).unwrap());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Float32 => {
+                let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
+                let min = Scalar::Float(arrow_min(array).unwrap());
+                let max = Scalar::Float(arrow_max(array).unwrap());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Float64 => {
+                let array = array.as_any().downcast_ref::<Float64Array>().unwrap();
+                let min = Scalar::Double(arrow_min(array).unwrap());
+                let max = Scalar::Double(arrow_max(array).unwrap());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Utf8 => {
+                let array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                let min = Scalar::String(min_string(array).unwrap().into());
+                let max = Scalar::String(max_string(array).unwrap().into());
+                let null_count = Scalar::Long(array.null_count() as i64);
+                Some((null_count, min, max))
+            }
+            Struct(_) => None,
+            _ => todo!(),
+        };
+        if let Some((null_count, min, max)) = stats {
+            file_stats
+                .null_count
+                .insert(field.name().to_string(), null_count.to_json());
+            file_stats
+                .min_values
+                .insert(field.name().to_string(), min.to_json());
+            file_stats
+                .max_values
+                .insert(field.name().to_string(), max.to_json());
+        }
+    }
+    Ok(file_stats)
+}
+
+pub(crate) fn get_parquet_bytes(batch: &RecordBatch) -> TestResult<Bytes> {
+    let mut data: Vec<u8> = Vec::new();
+    let props = WriterProperties::builder().build();
+    let mut writer = ArrowWriter::try_new(&mut data, batch.schema(), Some(props))?;
+    writer.write(batch)?;
+    // writer must be closed to write footer
+    writer.close()?;
+    Ok(data.into())
+}
diff --git a/crates/core/src/test_utils/factories/mod.rs b/crates/core/src/test_utils/factories/mod.rs
new file mode 100644
index 0000000000..551749a89d
--- /dev/null
+++ b/crates/core/src/test_utils/factories/mod.rs
@@ -0,0 +1,66 @@
+use std::collections::HashMap;
+
+use lazy_static::lazy_static;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+use crate::kernel::{DataType, PrimitiveType, StructField, StructType};
+
+mod actions;
+mod data;
+
+pub use actions::*;
+pub use data::*;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct FileStats {
+    pub num_records: i64,
+    pub null_count: HashMap<String, Value>,
+    pub min_values: HashMap<String, Value>,
+    pub max_values: HashMap<String, Value>,
+}
+
+impl FileStats {
+    pub fn new(num_records: i64) -> Self {
+        Self {
+            num_records,
+            null_count: HashMap::new(),
+            min_values: HashMap::new(),
+            max_values: HashMap::new(),
+        }
+    }
+}
+
+pub struct TestSchemas;
+
+impl TestSchemas {
+    /// A simple flat schema with  string and integer columns.
+    ///
+    /// ### Columns
+    /// - id: string
+    /// - value: integer
+    /// - modified: string
+    pub fn simple() -> &'static StructType {
+        lazy_static! {
+            static ref _simple: StructType = StructType::new(vec![
+                StructField::new(
+                    "id".to_string(),
+                    DataType::Primitive(PrimitiveType::String),
+                    true
+                ),
+                StructField::new(
+                    "value".to_string(),
+                    DataType::Primitive(PrimitiveType::Integer),
+                    true
+                ),
+                StructField::new(
+                    "modified".to_string(),
+                    DataType::Primitive(PrimitiveType::String),
+                    true
+                ),
+            ]);
+        }
+        &_simple
+    }
+}
diff --git a/crates/core/src/test_utils/mod.rs b/crates/core/src/test_utils/mod.rs
new file mode 100644
index 0000000000..0d3ff9ed65
--- /dev/null
+++ b/crates/core/src/test_utils/mod.rs
@@ -0,0 +1,5 @@
+mod factories;
+
+pub use factories::*;
+
+pub type TestResult<T = ()> = Result<T, Box<dyn std::error::Error + 'static>>;
diff --git a/crates/core/src/writer/json.rs b/crates/core/src/writer/json.rs
index d97d3ef16c..2cf7f6a950 100644
--- a/crates/core/src/writer/json.rs
+++ b/crates/core/src/writer/json.rs
@@ -6,6 +6,7 @@ use std::sync::Arc;
 use arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
 use arrow::record_batch::*;
 use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
 use indexmap::IndexMap;
 use object_store::path::Path;
 use object_store::ObjectStore;
@@ -24,7 +25,7 @@ use super::utils::{
 };
 use super::{DeltaWriter, DeltaWriterError, WriteMode};
 use crate::errors::DeltaTableError;
-use crate::kernel::{Add, PartitionsExt, Scalar, StructType};
+use crate::kernel::{scalars::ScalarExt, Add, PartitionsExt, StructType};
 use crate::storage::ObjectStoreRetryExt;
 use crate::table::builder::DeltaTableBuilder;
 use crate::table::config::DEFAULT_NUM_INDEX_COLS;
@@ -362,7 +363,9 @@ impl DeltaWriter<Vec<Value>> for JsonWriter {
             let path = next_data_path(&prefix, 0, &uuid, &writer.writer_properties);
             let obj_bytes = Bytes::from(writer.buffer.to_vec());
             let file_size = obj_bytes.len() as i64;
-            self.storage.put_with_retries(&path, obj_bytes, 15).await?;
+            self.storage
+                .put_with_retries(&path, obj_bytes.into(), 15)
+                .await?;
 
             actions.push(create_add(
                 &writer.partition_values,
@@ -616,7 +619,7 @@ mod tests {
                 .with_location(&path)
                 .with_table_name("test-table")
                 .with_comment("A table for running tests")
-                .with_columns(schema.fields().clone())
+                .with_columns(schema.fields().cloned())
                 .await
                 .unwrap();
             table.load().await.expect("Failed to load table");
diff --git a/crates/core/src/writer/record_batch.rs b/crates/core/src/writer/record_batch.rs
index c21435dd14..10ba52ae62 100644
--- a/crates/core/src/writer/record_batch.rs
+++ b/crates/core/src/writer/record_batch.rs
@@ -7,13 +7,13 @@
 
 use std::{collections::HashMap, sync::Arc};
 
-use arrow::array::{new_null_array, Array, UInt32Array};
-use arrow::compute::{partition, take};
-use arrow::record_batch::RecordBatch;
-use arrow_array::ArrayRef;
+use arrow_array::{new_null_array, Array, ArrayRef, RecordBatch, UInt32Array};
+use arrow_ord::partition::partition;
 use arrow_row::{RowConverter, SortField};
 use arrow_schema::{ArrowError, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
+use arrow_select::take::take;
 use bytes::Bytes;
+use delta_kernel::expressions::Scalar;
 use indexmap::IndexMap;
 use object_store::{path::Path, ObjectStore};
 use parquet::{arrow::ArrowWriter, errors::ParquetError};
@@ -28,8 +28,8 @@ use super::utils::{
 };
 use super::{DeltaWriter, DeltaWriterError, WriteMode};
 use crate::errors::DeltaTableError;
-use crate::kernel::{Action, Add, PartitionsExt, Scalar, StructType};
-use crate::operations::cast::merge_schema;
+use crate::kernel::{scalars::ScalarExt, Action, Add, PartitionsExt, StructType};
+use crate::operations::cast::merge_schema::merge_arrow_schema;
 use crate::storage::ObjectStoreRetryExt;
 use crate::table::builder::DeltaTableBuilder;
 use crate::table::config::DEFAULT_NUM_INDEX_COLS;
@@ -224,7 +224,9 @@ impl DeltaWriter<RecordBatch> for RecordBatchWriter {
             let path = next_data_path(&prefix, 0, &uuid, &writer.writer_properties);
             let obj_bytes = Bytes::from(writer.buffer.to_vec());
             let file_size = obj_bytes.len() as i64;
-            self.storage.put_with_retries(&path, obj_bytes, 15).await?;
+            self.storage
+                .put_with_retries(&path, obj_bytes.into(), 15)
+                .await?;
 
             actions.push(create_add(
                 &writer.partition_values,
@@ -319,8 +321,11 @@ impl PartitionWriter {
                 WriteMode::MergeSchema => {
                     debug!("The writer and record batch schemas do not match, merging");
 
-                    let merged =
-                        merge_schema(self.arrow_schema.clone(), record_batch.schema().clone())?;
+                    let merged = merge_arrow_schema(
+                        self.arrow_schema.clone(),
+                        record_batch.schema().clone(),
+                        true,
+                    )?;
                     self.arrow_schema = merged;
 
                     let mut cols = vec![];
@@ -539,7 +544,7 @@ mod tests {
         let table = DeltaOps(table)
             .create()
             .with_partition_columns(partition_cols.to_vec())
-            .with_columns(delta_schema.fields().clone())
+            .with_columns(delta_schema.fields().cloned())
             .await
             .unwrap();
 
@@ -659,7 +664,7 @@ mod tests {
             .with_location(table_path.to_str().unwrap())
             .with_table_name("test-table")
             .with_comment("A table for running tests")
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partition_cols)
             .await
             .unwrap();
@@ -735,7 +740,7 @@ mod tests {
                 .with_location(table_path.to_str().unwrap())
                 .with_table_name("test-table")
                 .with_comment("A table for running tests")
-                .with_columns(table_schema.fields().clone())
+                .with_columns(table_schema.fields().cloned())
                 .await
                 .unwrap();
             table.load().await.expect("Failed to load table");
@@ -779,8 +784,7 @@ mod tests {
 
             let new_schema = table.metadata().unwrap().schema().unwrap();
             let expected_columns = vec!["id", "value", "modified", "vid", "name"];
-            let found_columns: Vec<&String> =
-                new_schema.fields().iter().map(|f| f.name()).collect();
+            let found_columns: Vec<&String> = new_schema.fields().map(|f| f.name()).collect();
             assert_eq!(
                 expected_columns, found_columns,
                 "The new table schema does not contain all evolved columns as expected"
@@ -797,7 +801,7 @@ mod tests {
                 .with_location(table_path.to_str().unwrap())
                 .with_table_name("test-table")
                 .with_comment("A table for running tests")
-                .with_columns(table_schema.fields().clone())
+                .with_columns(table_schema.fields().cloned())
                 .with_partition_columns(["id"])
                 .await
                 .unwrap();
@@ -928,7 +932,7 @@ mod tests {
                 .with_location(table_path.to_str().unwrap())
                 .with_table_name("test-table")
                 .with_comment("A table for running tests")
-                .with_columns(table_schema.fields().clone())
+                .with_columns(table_schema.fields().cloned())
                 .await
                 .unwrap();
             table.load().await.expect("Failed to load table");
diff --git a/crates/core/src/writer/stats.rs b/crates/core/src/writer/stats.rs
index 0cea01ee6a..e4b93a54f5 100644
--- a/crates/core/src/writer/stats.rs
+++ b/crates/core/src/writer/stats.rs
@@ -3,7 +3,9 @@ use std::sync::Arc;
 use std::time::{SystemTime, UNIX_EPOCH};
 use std::{collections::HashMap, ops::AddAssign};
 
+use delta_kernel::expressions::Scalar;
 use indexmap::IndexMap;
+use itertools::Itertools;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::format::FileMetaData;
 use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor};
@@ -14,7 +16,7 @@ use parquet::{
 };
 
 use super::*;
-use crate::kernel::{Add, Scalar};
+use crate::kernel::{scalars::ScalarExt, Add};
 use crate::protocol::{ColumnValueStat, Stats};
 
 /// Creates an [`Add`] log action struct.
@@ -129,8 +131,29 @@ fn stats_from_metadata(
     let mut min_values: HashMap<String, ColumnValueStat> = HashMap::new();
     let mut max_values: HashMap<String, ColumnValueStat> = HashMap::new();
     let mut null_count: HashMap<String, ColumnCountStat> = HashMap::new();
+    let dialect = sqlparser::dialect::GenericDialect {};
 
     let idx_to_iterate = if let Some(stats_cols) = stats_columns {
+        let stats_cols = stats_cols
+            .into_iter()
+            .map(|v| {
+                match sqlparser::parser::Parser::new(&dialect)
+                    .try_with_sql(v)
+                    .map_err(|e| DeltaTableError::generic(e.to_string()))?
+                    .parse_multipart_identifier()
+                {
+                    Ok(parts) => Ok(parts.into_iter().map(|v| v.value).join(".")),
+                    Err(e) => {
+                        return Err(DeltaWriterError::DeltaTable(
+                            DeltaTableError::GenericError {
+                                source: Box::new(e),
+                            },
+                        ))
+                    }
+                }
+            })
+            .collect::<Result<Vec<String>, DeltaWriterError>>()?;
+
         schema_descriptor
             .columns()
             .iter()
diff --git a/crates/core/src/writer/test_utils.rs b/crates/core/src/writer/test_utils.rs
index 093ad7cbd0..be0dfebb66 100644
--- a/crates/core/src/writer/test_utils.rs
+++ b/crates/core/src/writer/test_utils.rs
@@ -3,14 +3,14 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use arrow::compute::take;
 use arrow_array::{Int32Array, Int64Array, RecordBatch, StringArray, StructArray, UInt32Array};
 use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+use arrow_select::take::take;
 
 use crate::kernel::{DataType as DeltaDataType, Metadata, PrimitiveType, StructField, StructType};
 use crate::operations::create::CreateBuilder;
 use crate::operations::DeltaOps;
-use crate::{DeltaConfigKey, DeltaTable, DeltaTableBuilder};
+use crate::{DeltaTable, DeltaTableBuilder, TableProperty};
 
 pub type TestResult = Result<(), Box<dyn std::error::Error + 'static>>;
 
@@ -270,13 +270,13 @@ pub fn get_delta_schema_with_nested_struct() -> StructType {
 }
 
 pub async fn setup_table_with_configuration(
-    key: DeltaConfigKey,
+    key: TableProperty,
     value: Option<impl Into<String>>,
 ) -> DeltaTable {
     let table_schema = get_delta_schema();
     DeltaOps::new_in_memory()
         .create()
-        .with_columns(table_schema.fields().clone())
+        .with_columns(table_schema.fields().cloned())
         .with_configuration_property(key, value)
         .await
         .expect("Failed to create table")
@@ -299,7 +299,7 @@ pub async fn create_initialized_table(partition_cols: &[String]) -> DeltaTable {
         .with_location(table_path.to_str().unwrap())
         .with_table_name("test-table")
         .with_comment("A table for running tests")
-        .with_columns(table_schema.fields().clone())
+        .with_columns(table_schema.fields().cloned())
         .with_partition_columns(partition_cols)
         .await
         .unwrap()
diff --git a/crates/core/src/writer/utils.rs b/crates/core/src/writer/utils.rs
index 3c95942993..864476684a 100644
--- a/crates/core/src/writer/utils.rs
+++ b/crates/core/src/writer/utils.rs
@@ -4,9 +4,9 @@
 use std::io::Write;
 use std::sync::Arc;
 
-use arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
-use arrow::json::ReaderBuilder;
-use arrow::record_batch::*;
+use arrow_array::RecordBatch;
+use arrow_json::ReaderBuilder;
+use arrow_schema::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
 use object_store::path::Path;
 use parking_lot::RwLock;
 use parquet::basic::Compression;
diff --git a/crates/core/tests/checkpoint_writer.rs b/crates/core/tests/checkpoint_writer.rs
index 696e379569..1be439f9e5 100644
--- a/crates/core/tests/checkpoint_writer.rs
+++ b/crates/core/tests/checkpoint_writer.rs
@@ -87,7 +87,7 @@ mod delete_expired_delta_log_in_checkpoint {
 
     use ::object_store::path::Path as ObjectStorePath;
     use chrono::Utc;
-    use deltalake_core::table::config::DeltaConfigKey;
+    use deltalake_core::table::config::TableProperty;
     use deltalake_core::*;
     use maplit::hashmap;
 
@@ -96,8 +96,8 @@ mod delete_expired_delta_log_in_checkpoint {
         let mut table = fs_common::create_table(
             "../test/tests/data/checkpoints_with_expired_logs/expired",
             Some(hashmap! {
-                DeltaConfigKey::LogRetentionDuration.as_ref().into() => Some("interval 10 minute".to_string()),
-                DeltaConfigKey::EnableExpiredLogCleanup.as_ref().into() => Some("true".to_string())
+                TableProperty::LogRetentionDuration.as_ref().into() => Some("interval 10 minute".to_string()),
+                TableProperty::EnableExpiredLogCleanup.as_ref().into() => Some("true".to_string())
             }),
         )
         .await;
@@ -160,8 +160,8 @@ mod delete_expired_delta_log_in_checkpoint {
         let mut table = fs_common::create_table(
             "../test/tests/data/checkpoints_with_expired_logs/not_delete_expired",
             Some(hashmap! {
-                DeltaConfigKey::LogRetentionDuration.as_ref().into() => Some("interval 1 second".to_string()),
-                DeltaConfigKey::EnableExpiredLogCleanup.as_ref().into() => Some("false".to_string())
+                TableProperty::LogRetentionDuration.as_ref().into() => Some("interval 1 second".to_string()),
+                TableProperty::EnableExpiredLogCleanup.as_ref().into() => Some("false".to_string())
             }),
         )
         .await;
@@ -208,7 +208,7 @@ mod checkpoints_with_tombstones {
     use ::object_store::path::Path as ObjectStorePath;
     use chrono::Utc;
     use deltalake_core::kernel::*;
-    use deltalake_core::table::config::DeltaConfigKey;
+    use deltalake_core::table::config::TableProperty;
     use deltalake_core::*;
     use maplit::hashmap;
     use parquet::file::reader::{FileReader, SerializedFileReader};
@@ -235,7 +235,7 @@ mod checkpoints_with_tombstones {
     #[ignore]
     async fn test_expired_tombstones() {
         let mut table = fs_common::create_table("../test/tests/data/checkpoints_tombstones/expired", Some(hashmap! {
-            DeltaConfigKey::DeletedFileRetentionDuration.as_ref().into() => Some("interval 1 minute".to_string())
+            TableProperty::DeletedFileRetentionDuration.as_ref().into() => Some("interval 1 minute".to_string())
         })).await;
 
         let a1 = fs_common::add(3 * 60 * 1000); // 3 mins ago,
diff --git a/crates/core/tests/command_merge.rs b/crates/core/tests/command_merge.rs
index 59a941a24f..783c858750 100644
--- a/crates/core/tests/command_merge.rs
+++ b/crates/core/tests/command_merge.rs
@@ -19,7 +19,7 @@ async fn create_table(table_uri: &str, partition: Option<Vec<&str>>) -> DeltaTab
     let ops = DeltaOps::try_from_uri(table_uri).await.unwrap();
     let table = ops
         .create()
-        .with_columns(table_schema.fields().clone())
+        .with_columns(table_schema.fields().cloned())
         .with_partition_columns(partition.unwrap_or_default())
         .await
         .expect("Failed to create table");
@@ -138,17 +138,17 @@ async fn merge(
 
 #[tokio::test]
 async fn test_merge_concurrent_conflict() {
-    // No partition key or filter predicate -> Commit conflict
+    // Overlapping id ranges -> Commit conflict
     let tmp_dir = tempfile::tempdir().unwrap();
     let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
 
     let table_ref1 = create_table(table_uri, Some(vec!["event_date"])).await;
     let table_ref2 = open_table(table_uri).await.unwrap();
-    let (df1, df2) = create_test_data();
+    let (df1, _df2) = create_test_data();
 
     let expr = col("target.id").eq(col("source.id"));
-    let (_table_ref1, _metrics) = merge(table_ref1, df1, expr.clone()).await.unwrap();
-    let result = merge(table_ref2, df2, expr).await;
+    let (_table_ref1, _metrics) = merge(table_ref1, df1.clone(), expr.clone()).await.unwrap();
+    let result = merge(table_ref2, df1, expr).await;
 
     assert!(matches!(
         result.as_ref().unwrap_err(),
@@ -159,6 +159,23 @@ async fn test_merge_concurrent_conflict() {
     }
 }
 
+#[tokio::test]
+async fn test_merge_different_range() {
+    // No overlapping id ranges -> No conflict
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
+
+    let table_ref1 = create_table(table_uri, Some(vec!["event_date"])).await;
+    let table_ref2 = open_table(table_uri).await.unwrap();
+    let (df1, df2) = create_test_data();
+
+    let expr = col("target.id").eq(col("source.id"));
+    let (_table_ref1, _metrics) = merge(table_ref1, df1, expr.clone()).await.unwrap();
+    let result = merge(table_ref2, df2, expr).await;
+
+    assert!(result.is_ok());
+}
+
 #[tokio::test]
 async fn test_merge_concurrent_different_partition() {
     // partition key in predicate -> Successful merge
@@ -175,9 +192,7 @@ async fn test_merge_concurrent_different_partition() {
     let (_table_ref1, _metrics) = merge(table_ref1, df1, expr.clone()).await.unwrap();
     let result = merge(table_ref2, df2, expr).await;
 
-    // TODO: Currently it throws a Version mismatch error, but the merge commit was successfully
-    // This bug needs to be fixed, see pull request #2280
-    assert!(result.as_ref().is_ok());
+    assert!(result.is_ok());
 }
 
 #[tokio::test]
diff --git a/crates/core/tests/command_optimize.rs b/crates/core/tests/command_optimize.rs
index 4f26c55fd4..13cbd168e4 100644
--- a/crates/core/tests/command_optimize.rs
+++ b/crates/core/tests/command_optimize.rs
@@ -249,7 +249,7 @@ async fn test_optimize_with_partitions() -> Result<(), Box<dyn Error>> {
     let partition_values = partition_adds[0].partition_values()?;
     assert_eq!(
         partition_values.get("date"),
-        Some(&deltalake_core::kernel::Scalar::String(
+        Some(&delta_kernel::expressions::Scalar::String(
             "2022-05-22".to_string()
         ))
     );
diff --git a/crates/core/tests/command_restore.rs b/crates/core/tests/command_restore.rs
index aa5b598347..5013556ab8 100644
--- a/crates/core/tests/command_restore.rs
+++ b/crates/core/tests/command_restore.rs
@@ -6,6 +6,7 @@ use deltalake_core::kernel::{DataType, PrimitiveType, StructField};
 use deltalake_core::protocol::SaveMode;
 use deltalake_core::storage::commit_uri_from_version;
 use deltalake_core::{DeltaOps, DeltaTable};
+use itertools::Itertools;
 use rand::Rng;
 use std::error::Error;
 use std::fs;
@@ -103,10 +104,9 @@ async fn test_restore_by_version() -> Result<(), Box<dyn Error>> {
     let table_uri = context.tmp_dir.path().to_str().to_owned().unwrap();
     let mut table = DeltaOps::try_from_uri(table_uri).await?;
     table.0.load_version(1).await?;
-    assert_eq!(
-        table.0.snapshot()?.file_actions()?,
-        result.0.snapshot()?.file_actions()?
-    );
+    let curr_files = table.0.snapshot()?.file_paths_iter().collect_vec();
+    let result_files = result.0.snapshot()?.file_paths_iter().collect_vec();
+    assert_eq!(curr_files, result_files);
 
     let result = DeltaOps(result.0)
         .restore()
diff --git a/crates/core/tests/fs_common/mod.rs b/crates/core/tests/fs_common/mod.rs
index 3ef7c82edf..13683b408a 100644
--- a/crates/core/tests/fs_common/mod.rs
+++ b/crates/core/tests/fs_common/mod.rs
@@ -8,7 +8,9 @@ use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::storage::{GetResult, ObjectStoreResult};
 use deltalake_core::DeltaTable;
 use object_store::path::Path as StorePath;
-use object_store::{ObjectStore, PutOptions, PutResult};
+use object_store::{
+    MultipartUpload, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult,
+};
 use serde_json::Value;
 use std::collections::HashMap;
 use std::fs;
@@ -55,7 +57,7 @@ pub async fn create_test_table(
         .with_location(path)
         .with_table_name("test-table")
         .with_comment("A table for running tests")
-        .with_columns(schema.fields().clone())
+        .with_columns(schema.fields().cloned())
         .with_partition_columns(partition_columns)
         .with_configuration(config)
         .await
@@ -158,14 +160,14 @@ impl SlowStore {
 #[async_trait::async_trait]
 impl ObjectStore for SlowStore {
     /// Save the provided bytes to the specified location.
-    async fn put(&self, location: &StorePath, bytes: bytes::Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &StorePath, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &StorePath,
-        bytes: bytes::Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -272,18 +274,15 @@ impl ObjectStore for SlowStore {
     async fn put_multipart(
         &self,
         location: &StorePath,
-    ) -> ObjectStoreResult<(
-        object_store::MultipartId,
-        Box<dyn tokio::io::AsyncWrite + Unpin + Send>,
-    )> {
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &StorePath,
-        multipart_id: &object_store::MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
diff --git a/crates/core/tests/integration_checkpoint.rs b/crates/core/tests/integration_checkpoint.rs
index ce4525ba83..e90d4ec0cc 100644
--- a/crates/core/tests/integration_checkpoint.rs
+++ b/crates/core/tests/integration_checkpoint.rs
@@ -1,5 +1,3 @@
-#![cfg(feature = "integration_test")]
-
 use chrono::Utc;
 use deltalake_core::checkpoints::{cleanup_expired_logs_for, create_checkpoint};
 use deltalake_core::kernel::{DataType, PrimitiveType};
@@ -14,6 +12,8 @@ use tokio::time::sleep;
 
 #[tokio::test]
 #[serial]
+// This test requires refactoring and a revisit
+#[ignore]
 async fn cleanup_metadata_fs_test() -> TestResult {
     let storage = Box::new(LocalStorageIntegration::default());
     let context = IntegrationContext::new(storage)?;
@@ -34,19 +34,19 @@ async fn cleanup_metadata_test(context: &IntegrationContext) -> TestResult {
 
     // we don't need to actually populate files with content as cleanup works only with file's metadata
     object_store
-        .put(&log_path(0), bytes::Bytes::from("foo"))
+        .put(&log_path(0), bytes::Bytes::from("foo").into())
         .await?;
 
     // since we cannot alter s3 object metadata, we mimic it with pauses
     // also we forced to use 2 seconds since Last-Modified is stored in seconds
     std::thread::sleep(Duration::from_secs(2));
     object_store
-        .put(&log_path(1), bytes::Bytes::from("foo"))
+        .put(&log_path(1), bytes::Bytes::from("foo").into())
         .await?;
 
     std::thread::sleep(Duration::from_secs(3));
     object_store
-        .put(&log_path(2), bytes::Bytes::from("foo"))
+        .put(&log_path(2), bytes::Bytes::from("foo").into())
         .await?;
 
     let v0time = object_store.head(&log_path(0)).await?.last_modified;
diff --git a/crates/core/tests/integration_datafusion.rs b/crates/core/tests/integration_datafusion.rs
index 64d80e3bce..3a55c63bb5 100644
--- a/crates/core/tests/integration_datafusion.rs
+++ b/crates/core/tests/integration_datafusion.rs
@@ -1,14 +1,10 @@
 #![cfg(feature = "datafusion")]
-
-use arrow::array::Int64Array;
-use deltalake_test::datafusion::*;
-use deltalake_test::utils::*;
-use serial_test::serial;
-
 use std::collections::{HashMap, HashSet};
+use std::error::Error;
 use std::path::PathBuf;
 use std::sync::Arc;
 
+use arrow::array::Int64Array;
 use arrow::array::*;
 use arrow::record_batch::RecordBatch;
 use arrow_schema::{
@@ -28,8 +24,6 @@ use datafusion_expr::Expr;
 use datafusion_proto::bytes::{
     physical_plan_from_bytes_with_extension_codec, physical_plan_to_bytes_with_extension_codec,
 };
-use url::Url;
-
 use deltalake_core::delta_datafusion::{DeltaPhysicalCodec, DeltaScan};
 use deltalake_core::kernel::{DataType, MapType, PrimitiveType, StructField, StructType};
 use deltalake_core::logstore::logstore_for;
@@ -41,7 +35,10 @@ use deltalake_core::{
     operations::{write::WriteBuilder, DeltaOps},
     DeltaTable, DeltaTableError,
 };
-use std::error::Error;
+use deltalake_test::datafusion::*;
+use deltalake_test::utils::*;
+use serial_test::serial;
+use url::Url;
 
 mod local {
     use datafusion::common::stats::Precision;
@@ -68,6 +65,8 @@ mod local {
     #[derive(Debug, Default)]
     pub struct ExecutionMetricsCollector {
         scanned_files: HashSet<Label>,
+        pub skip_count: usize,
+        pub keep_count: usize,
     }
 
     impl ExecutionMetricsCollector {
@@ -86,6 +85,15 @@ mod local {
             if let Some(exec) = plan.as_any().downcast_ref::<ParquetExec>() {
                 let files = get_scanned_files(exec);
                 self.scanned_files.extend(files);
+            } else if let Some(exec) = plan.as_any().downcast_ref::<DeltaScan>() {
+                self.keep_count = exec
+                    .metrics()
+                    .and_then(|m| m.sum_by_name("files_scanned").map(|v| v.as_usize()))
+                    .unwrap_or_default();
+                self.skip_count = exec
+                    .metrics()
+                    .and_then(|m| m.sum_by_name("files_pruned").map(|v| v.as_usize()))
+                    .unwrap_or_default();
             }
             Ok(true)
         }
@@ -106,7 +114,7 @@ mod local {
             .unwrap()
             .create()
             .with_save_mode(SaveMode::Ignore)
-            .with_columns(table_schema.fields().clone())
+            .with_columns(table_schema.fields().cloned())
             .with_partition_columns(partitions)
             .await
             .unwrap();
@@ -198,10 +206,8 @@ mod local {
             &ctx,
             &DeltaPhysicalCodec {},
         )?;
-        let fields = StructType::try_from(source_scan.schema())
-            .unwrap()
-            .fields()
-            .clone();
+        let schema = StructType::try_from(source_scan.schema()).unwrap();
+        let fields = schema.fields().cloned();
 
         // Create target Delta Table
         let target_table = CreateBuilder::new()
@@ -232,7 +238,7 @@ mod local {
                 .clone(),
         )
         .unwrap();
-        let source_store = logstore_for(source_uri, HashMap::new()).unwrap();
+        let source_store = logstore_for(source_uri, HashMap::new(), None).unwrap();
         let object_store_url = source_store.object_store_url();
         let source_store_url: &Url = object_store_url.as_ref();
         state
@@ -322,7 +328,7 @@ mod local {
 
         let expected = vec![
             "+-----------------------+-----------------------+",
-            "| MAX(test_table.value) | MIN(test_table.value) |",
+            "| max(test_table.value) | min(test_table.value) |",
             "+-----------------------+-----------------------+",
             "| 4                     | 0                     |",
             "+-----------------------+-----------------------+",
@@ -353,7 +359,7 @@ mod local {
 
         let expected = vec![
             "+------------------------+------------------------+",
-            "| MAX(test_table2.value) | MIN(test_table2.value) |",
+            "| max(test_table2.value) | min(test_table2.value) |",
             "+------------------------+------------------------+",
             "| 3                      | 1                      |",
             "+------------------------+------------------------+",
@@ -445,6 +451,10 @@ mod local {
             let task_ctx = Arc::new(TaskContext::from(state));
             let _result = collect(plan.execute(0, task_ctx)?).await?;
             visit_execution_plan(&plan, &mut metrics).unwrap();
+        } else {
+            // if scan produces no output from ParquetExec, we still want to visit DeltaScan
+            // to check its metrics
+            visit_execution_plan(scan.as_ref(), &mut metrics).unwrap();
         }
 
         Ok(metrics)
@@ -621,6 +631,8 @@ mod local {
 
         let metrics = get_scan_metrics(&table, &state, &[]).await?;
         assert_eq!(metrics.num_scanned_files(), 3);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 0);
 
         // (Column name, value from file 1, value from file 2, value from file 3, non existent value)
         let tests = [
@@ -667,11 +679,15 @@ mod local {
             let e = col(column).eq(file1_value.clone());
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 1);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 2);
 
             // Value does not exist
             let e = col(column).eq(non_existent_value.clone());
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 0);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 3);
 
             // Conjunction
             let e = col(column)
@@ -679,6 +695,8 @@ mod local {
                 .and(col(column).lt(file2_value.clone()));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 1);
 
             // Disjunction
             let e = col(column)
@@ -686,6 +704,8 @@ mod local {
                 .or(col(column).gt(file3_value.clone()));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 1);
         }
 
         // Validate Boolean type
@@ -697,10 +717,14 @@ mod local {
         let e = col("boolean").eq(lit(true));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         let e = col("boolean").eq(lit(false));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         let tests = [
             TestCase::new_wrapped("utf8", |value| lit(value.to_string())),
@@ -767,11 +791,15 @@ mod local {
             let e = col(column).eq(file1_value.clone());
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 1);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 8);
 
             // Value does not exist
             let e = col(column).eq(non_existent_value);
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 0);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 9);
 
             // Conjunction
             let e = col(column)
@@ -779,11 +807,15 @@ mod local {
                 .and(col(column).lt(file2_value));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 7);
 
             // Disjunction
             let e = col(column).lt(file1_value).or(col(column).gt(file3_value));
             let metrics = get_scan_metrics(&table, &state, &[e]).await?;
             assert_eq!(metrics.num_scanned_files(), 2);
+            assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+            assert_eq!(metrics.skip_count, 7);
 
             // TODO how to get an expression with the right datatypes eludes me ..
             // Validate null pruning
@@ -813,10 +845,14 @@ mod local {
         let e = col("boolean").eq(lit(true));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         let e = col("boolean").eq(lit(false));
         let metrics = get_scan_metrics(&table, &state, &[e]).await?;
         assert_eq!(metrics.num_scanned_files(), 1);
+        assert_eq!(metrics.num_scanned_files(), metrics.keep_count);
+        assert_eq!(metrics.skip_count, 1);
 
         // Ensure that tables without stats and partition columns can be pruned for just partitions
         // let table = open_table("./tests/data/delta-0.8.0-null-partition").await?;
@@ -905,13 +941,14 @@ mod local {
 
         let batches = ctx.sql("SELECT * FROM demo").await?.collect().await?;
 
+        // Without defining a schema of the select the default for a timestamp is ms UTC
         let expected = vec![
-            "+-------------------------------+---------------------+------------+",
-            "| BIG_DATE                      | NORMAL_DATE         | SOME_VALUE |",
-            "+-------------------------------+---------------------+------------+",
-            "| 1816-03-28T05:56:08.066277376 | 2022-02-01T00:00:00 | 2          |",
-            "| 1816-03-29T05:56:08.066277376 | 2022-01-01T00:00:00 | 1          |",
-            "+-------------------------------+---------------------+------------+",
+            "+-----------------------------+----------------------+------------+",
+            "| BIG_DATE                    | NORMAL_DATE          | SOME_VALUE |",
+            "+-----------------------------+----------------------+------------+",
+            "| 1816-03-28T05:56:08.066278Z | 2022-02-01T00:00:00Z | 2          |",
+            "| 1816-03-29T05:56:08.066278Z | 2022-01-01T00:00:00Z | 1          |",
+            "+-----------------------------+----------------------+------------+",
         ];
 
         assert_batches_sorted_eq!(&expected, &batches);
@@ -1035,7 +1072,7 @@ mod local {
             deltalake_core::DeltaTableBuilder::from_uri("./tests/data/issue-1619").build()?;
         let _ = DeltaOps::from(table)
             .create()
-            .with_columns(schema.fields().to_owned())
+            .with_columns(schema.fields().cloned())
             .await?;
 
         let mut table = open_table("./tests/data/issue-1619").await?;
@@ -1118,7 +1155,7 @@ mod local {
             .unwrap();
         let batch = batches.pop().unwrap();
 
-        let expected_schema = Schema::new(vec![Field::new("id", ArrowDataType::Int32, true)]);
+        let expected_schema = Schema::new(vec![Field::new("id", ArrowDataType::Int64, false)]);
         assert_eq!(batch.schema().as_ref(), &expected_schema);
         Ok(())
     }
diff --git a/crates/core/tests/read_delta_partitions_test.rs b/crates/core/tests/read_delta_partitions_test.rs
index 712664ebfe..65f98bb91f 100644
--- a/crates/core/tests/read_delta_partitions_test.rs
+++ b/crates/core/tests/read_delta_partitions_test.rs
@@ -1,122 +1,6 @@
-#![cfg(feature = "deltalake")]
-use std::collections::HashMap;
-use std::convert::TryFrom;
-
-use deltalake_core::kernel::{DataType, PrimitiveType};
-
 #[allow(dead_code)]
 mod fs_common;
 
-#[test]
-fn test_create_delta_table_partition() {
-    let year = "2021".to_string();
-    let path = format!("year={year}");
-    assert_eq!(
-        deltalake_core::DeltaTablePartition::try_from(path.as_ref()).unwrap(),
-        deltalake_core::DeltaTablePartition {
-            key: "year".to_string(),
-            value: year
-        }
-    );
-
-    let _wrong_path = "year=2021/month=";
-    assert!(matches!(
-        deltalake_core::DeltaTablePartition::try_from(_wrong_path).unwrap_err(),
-        deltalake_core::errors::DeltaTableError::PartitionError {
-            partition: _wrong_path
-        },
-    ))
-}
-
-#[test]
-fn test_match_partition() {
-    let partition_2021 = deltalake_core::DeltaTablePartition {
-        key: "year".to_string(),
-        value: "2021".to_string(),
-    };
-    let partition_2020 = deltalake_core::DeltaTablePartition {
-        key: "year".to_string(),
-        value: "2020".to_string(),
-    };
-    let partition_2019 = deltalake_core::DeltaTablePartition {
-        key: "year".to_string(),
-        value: "2019".to_string(),
-    };
-
-    let partition_year_2020_filter = deltalake_core::PartitionFilter {
-        key: "year".to_string(),
-        value: deltalake_core::PartitionValue::Equal("2020".to_string()),
-    };
-    let partition_month_12_filter = deltalake_core::PartitionFilter {
-        key: "month".to_string(),
-        value: deltalake_core::PartitionValue::Equal("12".to_string()),
-    };
-    let string_type = DataType::Primitive(PrimitiveType::String);
-
-    assert!(!partition_year_2020_filter.match_partition(&partition_2021, &string_type));
-    assert!(partition_year_2020_filter.match_partition(&partition_2020, &string_type));
-    assert!(!partition_year_2020_filter.match_partition(&partition_2019, &string_type));
-    assert!(!partition_month_12_filter.match_partition(&partition_2019, &string_type));
-
-    let partition_2020_12_31_23_59_59 = deltalake_core::DeltaTablePartition {
-        key: "time".to_string(),
-        value: "2020-12-31 23:59:59".to_string(),
-    };
-
-    let partition_time_2020_12_31_23_59_59_filter = deltalake_core::PartitionFilter {
-        key: "time".to_string(),
-        value: deltalake_core::PartitionValue::Equal("2020-12-31 23:59:59.000000".to_string()),
-    };
-
-    assert!(partition_time_2020_12_31_23_59_59_filter.match_partition(
-        &partition_2020_12_31_23_59_59,
-        &DataType::Primitive(PrimitiveType::Timestamp)
-    ));
-    assert!(!partition_time_2020_12_31_23_59_59_filter
-        .match_partition(&partition_2020_12_31_23_59_59, &string_type));
-}
-
-#[test]
-fn test_match_filters() {
-    let partitions = vec![
-        deltalake_core::DeltaTablePartition {
-            key: "year".to_string(),
-            value: "2021".to_string(),
-        },
-        deltalake_core::DeltaTablePartition {
-            key: "month".to_string(),
-            value: "12".to_string(),
-        },
-    ];
-
-    let string_type = DataType::Primitive(PrimitiveType::String);
-    let partition_data_types: HashMap<&String, &DataType> = vec![
-        (&partitions[0].key, &string_type),
-        (&partitions[1].key, &string_type),
-    ]
-    .into_iter()
-    .collect();
-
-    let valid_filters = deltalake_core::PartitionFilter {
-        key: "year".to_string(),
-        value: deltalake_core::PartitionValue::Equal("2021".to_string()),
-    };
-
-    let valid_filter_month = deltalake_core::PartitionFilter {
-        key: "month".to_string(),
-        value: deltalake_core::PartitionValue::Equal("12".to_string()),
-    };
-
-    let invalid_filter = deltalake_core::PartitionFilter {
-        key: "year".to_string(),
-        value: deltalake_core::PartitionValue::Equal("2020".to_string()),
-    };
-
-    assert!(valid_filters.match_partitions(&partitions, &partition_data_types),);
-    assert!(valid_filter_month.match_partitions(&partitions, &partition_data_types),);
-    assert!(!invalid_filter.match_partitions(&partitions, &partition_data_types),);
-}
-
 #[tokio::test]
 async fn read_null_partitions_from_checkpoint() {
     use deltalake_core::kernel::Add;
diff --git a/crates/deltalake/Cargo.toml b/crates/deltalake/Cargo.toml
index 4d38652478..4d4df632c5 100644
--- a/crates/deltalake/Cargo.toml
+++ b/crates/deltalake/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake"
-version = "0.18.0"
+version = "0.20.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -13,15 +13,15 @@ rust-version.workspace = true
 
 [package.metadata.docs.rs]
 # We cannot use all_features because TLS features are mutually exclusive.
-# We cannot use hdfs feature because it requires Java to be installed.
 features = ["azure", "datafusion", "gcs", "hdfs", "json", "python", "s3", "unity-experimental"]
 
 [dependencies]
-deltalake-core = { version = "~0.18.0", path = "../core" }
-deltalake-aws = { version = "0.1.1", path = "../aws", default-features = false, optional = true }
-deltalake-azure = { version = "0.1.1", path = "../azure", optional = true }
-deltalake-gcp = { version = "0.2.1", path = "../gcp", optional = true }
-deltalake-catalog-glue = { version = "0.1.0", path = "../catalog-glue", optional = true }
+deltalake-core = { version = "0.20.0", path = "../core" }
+deltalake-aws = { version = "0.3.0", path = "../aws", default-features = false, optional = true }
+deltalake-azure = { version = "0.3.0", path = "../azure", optional = true }
+deltalake-gcp = { version = "0.4.0", path = "../gcp", optional = true }
+deltalake-hdfs = { version = "0.4.0", path = "../hdfs", optional = true }
+deltalake-catalog-glue = { version = "0.4.0", path = "../catalog-glue", optional = true }
 
 [features]
 # All of these features are just reflected into the core crate until that
@@ -32,7 +32,7 @@ datafusion = ["deltalake-core/datafusion"]
 datafusion-ext = ["datafusion"]
 gcs = ["deltalake-gcp"]
 glue = ["deltalake-catalog-glue"]
-hdfs = []
+hdfs = ["deltalake-hdfs"]
 json = ["deltalake-core/json"]
 python = ["deltalake-core/python"]
 s3-native-tls = ["deltalake-aws/native-tls"]
diff --git a/crates/deltalake/src/lib.rs b/crates/deltalake/src/lib.rs
index 38dc5d52dc..8da5c15146 100644
--- a/crates/deltalake/src/lib.rs
+++ b/crates/deltalake/src/lib.rs
@@ -9,3 +9,5 @@ pub use deltalake_aws as aws;
 pub use deltalake_azure as azure;
 #[cfg(feature = "gcs")]
 pub use deltalake_gcp as gcp;
+#[cfg(feature = "hdfs")]
+pub use deltalake_hdfs as hdfs;
diff --git a/crates/gcp/Cargo.toml b/crates/gcp/Cargo.toml
index 2f171a2fba..380aa84852 100644
--- a/crates/gcp/Cargo.toml
+++ b/crates/gcp/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-gcp"
-version = "0.2.1"
+version = "0.4.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -12,7 +12,7 @@ repository.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
+deltalake-core = { version = "0.20.0", path = "../core" }
 lazy_static = "1"
 
 # workspace depenndecies
diff --git a/crates/gcp/src/storage.rs b/crates/gcp/src/storage.rs
index 9b938b737e..db02d33687 100644
--- a/crates/gcp/src/storage.rs
+++ b/crates/gcp/src/storage.rs
@@ -4,11 +4,11 @@ use bytes::Bytes;
 use deltalake_core::storage::ObjectStoreRef;
 use deltalake_core::Path;
 use futures::stream::BoxStream;
+use object_store::{MultipartUpload, PutMultipartOpts, PutPayload};
 use std::ops::Range;
-use tokio::io::AsyncWrite;
 
 use deltalake_core::storage::object_store::{
-    GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    GetOptions, GetResult, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult,
     Result as ObjectStoreResult,
 };
 
@@ -36,14 +36,14 @@ impl std::fmt::Display for GcsStorageBackend {
 
 #[async_trait::async_trait]
 impl ObjectStore for GcsStorageBackend {
-    async fn put(&self, location: &Path, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(&self, location: &Path, bytes: PutPayload) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &Path,
-        bytes: Bytes,
+        bytes: PutPayload,
         options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
         self.inner.put_opts(location, bytes, options).await
@@ -120,18 +120,15 @@ impl ObjectStore for GcsStorageBackend {
         }
     }
 
-    async fn put_multipart(
-        &self,
-        location: &Path,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    async fn put_multipart(&self, location: &Path) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &Path,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
diff --git a/crates/gcp/tests/context.rs b/crates/gcp/tests/context.rs
index b96bd1f41b..5419075f68 100644
--- a/crates/gcp/tests/context.rs
+++ b/crates/gcp/tests/context.rs
@@ -39,7 +39,7 @@ pub async fn sync_stores(
     while let Some(file) = meta_stream.next().await {
         if let Ok(meta) = file {
             let bytes = from_store.get(&meta.location).await?.bytes().await?;
-            to_store.put(&meta.location, bytes).await?;
+            to_store.put(&meta.location, bytes.into()).await?;
         }
     }
     Ok(())
diff --git a/crates/hdfs/Cargo.toml b/crates/hdfs/Cargo.toml
new file mode 100644
index 0000000000..f601f55c6d
--- /dev/null
+++ b/crates/hdfs/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+name = "deltalake-hdfs"
+version = "0.4.0"
+authors.workspace = true
+keywords.workspace = true
+readme.workspace = true
+edition.workspace = true
+homepage.workspace = true
+description.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+deltalake-core = { version = "0.20.0", path = "../core" }
+hdfs-native-object-store = "0.11"
+
+# workspace dependecies
+object_store = { workspace = true }
+tokio = { workspace = true }
+url = { workspace = true }
+
+[dev-dependencies]
+serial_test = "3"
+deltalake-test = { path = "../test" }
+which = "6"
+
+[features]
+integration_test = ["hdfs-native-object-store/integration-test"]
diff --git a/crates/hdfs/src/lib.rs b/crates/hdfs/src/lib.rs
new file mode 100644
index 0000000000..45b14740b7
--- /dev/null
+++ b/crates/hdfs/src/lib.rs
@@ -0,0 +1,48 @@
+use std::sync::Arc;
+
+use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory};
+use deltalake_core::storage::{
+    factories, url_prefix_handler, ObjectStoreFactory, ObjectStoreRef, StorageOptions,
+};
+use deltalake_core::{DeltaResult, Path};
+use hdfs_native_object_store::HdfsObjectStore;
+use url::Url;
+
+#[derive(Clone, Default, Debug)]
+pub struct HdfsFactory {}
+
+impl ObjectStoreFactory for HdfsFactory {
+    fn parse_url_opts(
+        &self,
+        url: &Url,
+        options: &StorageOptions,
+    ) -> DeltaResult<(ObjectStoreRef, Path)> {
+        let store: ObjectStoreRef = Arc::new(HdfsObjectStore::with_config(
+            url.as_str(),
+            options.0.clone(),
+        )?);
+        let prefix = Path::parse(url.path())?;
+        Ok((url_prefix_handler(store, prefix.clone()), prefix))
+    }
+}
+
+impl LogStoreFactory for HdfsFactory {
+    fn with_options(
+        &self,
+        store: ObjectStoreRef,
+        location: &Url,
+        options: &StorageOptions,
+    ) -> DeltaResult<Arc<dyn LogStore>> {
+        Ok(default_logstore(store, location, options))
+    }
+}
+
+/// Register an [ObjectStoreFactory] for common HDFS [Url] schemes
+pub fn register_handlers(_additional_prefixes: Option<Url>) {
+    let factory = Arc::new(HdfsFactory {});
+    for scheme in ["hdfs", "viewfs"].iter() {
+        let url = Url::parse(&format!("{}://", scheme)).unwrap();
+        factories().insert(url.clone(), factory.clone());
+        logstores().insert(url.clone(), factory.clone());
+    }
+}
diff --git a/crates/hdfs/tests/context.rs b/crates/hdfs/tests/context.rs
new file mode 100644
index 0000000000..29011d9836
--- /dev/null
+++ b/crates/hdfs/tests/context.rs
@@ -0,0 +1,60 @@
+#![cfg(feature = "integration_test")]
+use deltalake_hdfs::register_handlers;
+use deltalake_test::utils::*;
+use hdfs_native_object_store::minidfs::MiniDfs;
+use std::{
+    collections::HashSet,
+    process::{Command, ExitStatus},
+};
+
+use which::which;
+
+pub struct HdfsIntegration {
+    minidfs: MiniDfs,
+}
+
+impl Default for HdfsIntegration {
+    fn default() -> Self {
+        register_handlers(None);
+        let minidfs = MiniDfs::with_features(&HashSet::new());
+        Self { minidfs }
+    }
+}
+
+impl StorageIntegration for HdfsIntegration {
+    fn prepare_env(&self) {
+        println!("Preparing env");
+    }
+
+    fn create_bucket(&self) -> std::io::Result<ExitStatus> {
+        let hadoop_exc = which("hadoop").expect("Failed to find hadoop executable");
+
+        Ok(Command::new(hadoop_exc)
+            .args(["fs", "-mkdir", &self.root_uri()])
+            .status()
+            .unwrap())
+    }
+
+    fn bucket_name(&self) -> String {
+        "/test-deltalake".to_string()
+    }
+
+    fn root_uri(&self) -> String {
+        format!("{}{}", self.minidfs.url, self.bucket_name())
+    }
+
+    fn copy_directory(&self, source: &str, destination: &str) -> std::io::Result<ExitStatus> {
+        println!("Copy directory called with {} {}", source, destination);
+        let hadoop_exc = which("hadoop").expect("Failed to find hadoop executable");
+        Ok(Command::new(hadoop_exc)
+            .args([
+                "fs",
+                "-copyFromLocal",
+                "-p",
+                source,
+                &format!("{}/{}", self.root_uri(), destination),
+            ])
+            .status()
+            .unwrap())
+    }
+}
diff --git a/crates/hdfs/tests/integration.rs b/crates/hdfs/tests/integration.rs
new file mode 100644
index 0000000000..a2b63449dc
--- /dev/null
+++ b/crates/hdfs/tests/integration.rs
@@ -0,0 +1,16 @@
+#![cfg(feature = "integration_test")]
+use deltalake_test::{test_read_tables, IntegrationContext, TestResult};
+use serial_test::serial;
+
+mod context;
+use context::*;
+
+#[tokio::test]
+#[serial]
+async fn test_read_tables_hdfs() -> TestResult {
+    let context = IntegrationContext::new(Box::<HdfsIntegration>::default())?;
+
+    test_read_tables(&context).await?;
+
+    Ok(())
+}
diff --git a/crates/mount/Cargo.toml b/crates/mount/Cargo.toml
index a111e8b16e..a770200b98 100644
--- a/crates/mount/Cargo.toml
+++ b/crates/mount/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-mount"
-version = "0.1.0"
+version = "0.4.0"
 authors.workspace = true
 keywords.workspace = true
 readme.workspace = true
@@ -12,7 +12,7 @@ repository.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core", features = [
+deltalake-core = { version = "0.20.0", path = "../core", features = [
     "datafusion",
 ] }
 lazy_static = "1"
diff --git a/crates/mount/src/file.rs b/crates/mount/src/file.rs
index 0169d1c8ce..090562d442 100644
--- a/crates/mount/src/file.rs
+++ b/crates/mount/src/file.rs
@@ -6,12 +6,12 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use object_store::{
     local::LocalFileSystem, path::Path as ObjectStorePath, Error as ObjectStoreError, GetOptions,
-    GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    GetResult, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult,
     Result as ObjectStoreResult,
 };
+use object_store::{MultipartUpload, PutMode, PutMultipartOpts, PutPayload};
 use std::ops::Range;
 use std::sync::Arc;
-use tokio::io::AsyncWrite;
 use url::Url;
 
 pub(crate) const STORE_NAME: &str = "MountObjectStore";
@@ -156,16 +156,23 @@ impl std::fmt::Display for MountFileStorageBackend {
 
 #[async_trait::async_trait]
 impl ObjectStore for MountFileStorageBackend {
-    async fn put(&self, location: &ObjectStorePath, bytes: Bytes) -> ObjectStoreResult<PutResult> {
+    async fn put(
+        &self,
+        location: &ObjectStorePath,
+        bytes: PutPayload,
+    ) -> ObjectStoreResult<PutResult> {
         self.inner.put(location, bytes).await
     }
 
     async fn put_opts(
         &self,
         location: &ObjectStorePath,
-        bytes: Bytes,
-        options: PutOptions,
+        bytes: PutPayload,
+        mut options: PutOptions,
     ) -> ObjectStoreResult<PutResult> {
+        // In mounted storage we do an unsafe rename/overwrite
+        // We don't conditionally check whether the file already exists
+        options.mode = PutMode::Overwrite;
         self.inner.put_opts(location, bytes, options).await
     }
 
@@ -244,16 +251,16 @@ impl ObjectStore for MountFileStorageBackend {
     async fn put_multipart(
         &self,
         location: &ObjectStorePath,
-    ) -> ObjectStoreResult<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
+    async fn put_multipart_opts(
         &self,
         location: &ObjectStorePath,
-        multipart_id: &MultipartId,
-    ) -> ObjectStoreResult<()> {
-        self.inner.abort_multipart(location, multipart_id).await
+        options: PutMultipartOpts,
+    ) -> ObjectStoreResult<Box<dyn MultipartUpload>> {
+        self.inner.put_multipart_opts(location, options).await
     }
 }
 
diff --git a/crates/sql/src/logical_plan.rs b/crates/sql/src/logical_plan.rs
index 164462a90c..6e3c7d5dbc 100644
--- a/crates/sql/src/logical_plan.rs
+++ b/crates/sql/src/logical_plan.rs
@@ -1,7 +1,7 @@
 use std::fmt::{self, Debug, Display};
 use std::sync::Arc;
 
-use datafusion_common::{DFSchema, DFSchemaRef, OwnedTableReference};
+use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, TableReference};
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::{Expr, UserDefinedLogicalNodeCore};
 
@@ -90,13 +90,31 @@ impl UserDefinedLogicalNodeCore for DeltaStatement {
     }
 
     fn from_template(&self, exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        self.with_exprs_and_inputs(exprs.to_vec(), inputs.to_vec())
+            .unwrap()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        exprs: Vec<Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> datafusion_common::Result<Self> {
         match self {
             Self::Vacuum(_) | Self::DescribeHistory(_) => {
-                assert_eq!(inputs.len(), 0, "input size inconsistent");
-                assert_eq!(exprs.len(), 0, "expression size inconsistent");
-                self.clone()
+                if !inputs.is_empty() {
+                    return Err(DataFusionError::External("Input size inconsistent".into()));
+                }
+                if !exprs.is_empty() {
+                    return Err(DataFusionError::External(
+                        "Expression size inconsistent".into(),
+                    ));
+                }
+                Ok(self.clone())
             }
-            _ => todo!(),
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "with_exprs_and_inputs not implemented for {:?}",
+                self
+            ))),
         }
     }
 }
@@ -107,7 +125,7 @@ impl UserDefinedLogicalNodeCore for DeltaStatement {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct Vacuum {
     /// A reference to the table being vacuumed
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// The retention threshold.
     pub retention_hours: Option<i32>,
     /// Return a list of up to 1000 files to be deleted.
@@ -117,7 +135,7 @@ pub struct Vacuum {
 }
 
 impl Vacuum {
-    pub fn new(table: OwnedTableReference, retention_hours: Option<i32>, dry_run: bool) -> Self {
+    pub fn new(table: TableReference, retention_hours: Option<i32>, dry_run: bool) -> Self {
         Self {
             table,
             retention_hours,
@@ -133,13 +151,13 @@ impl Vacuum {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct DescribeHistory {
     /// A reference to the table
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// Schema for commit provenence information
     pub schema: DFSchemaRef,
 }
 
 impl DescribeHistory {
-    pub fn new(table: OwnedTableReference) -> Self {
+    pub fn new(table: TableReference) -> Self {
         Self {
             table,
             // TODO: add proper schema
@@ -153,13 +171,13 @@ impl DescribeHistory {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct DescribeDetails {
     /// A reference to the table
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// Schema for commit provenence information
     pub schema: DFSchemaRef,
 }
 
 impl DescribeDetails {
-    pub fn new(table: OwnedTableReference) -> Self {
+    pub fn new(table: TableReference) -> Self {
         Self {
             table,
             // TODO: add proper schema
@@ -172,13 +190,13 @@ impl DescribeDetails {
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct DescribeFiles {
     /// A reference to the table
-    pub table: OwnedTableReference,
+    pub table: TableReference,
     /// Schema for commit provenence information
     pub schema: DFSchemaRef,
 }
 
 impl DescribeFiles {
-    pub fn new(table: OwnedTableReference) -> Self {
+    pub fn new(table: TableReference) -> Self {
         Self {
             table,
             // TODO: add proper schema
diff --git a/crates/sql/src/planner.rs b/crates/sql/src/planner.rs
index 0be14d59b0..88596b0d5b 100644
--- a/crates/sql/src/planner.rs
+++ b/crates/sql/src/planner.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use datafusion_common::{OwnedTableReference, Result as DFResult};
+use datafusion_common::{Result as DFResult, TableReference};
 use datafusion_expr::logical_plan::{Extension, LogicalPlan};
 use datafusion_sql::planner::{
     object_name_to_table_reference, ContextProvider, IdentNormalizer, ParserOptions, SqlToRel,
@@ -42,6 +42,8 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
                     ParserOptions {
                         parse_float_as_decimal: self.options.parse_float_as_decimal,
                         enable_ident_normalization: self.options.enable_ident_normalization,
+                        support_varchar_with_length: false,
+                        enable_options_value_normalization: false,
                     },
                 );
                 planner.statement_to_plan(s)
@@ -54,7 +56,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
     fn vacuum_to_plan(&self, vacuum: VacuumStatement) -> DFResult<LogicalPlan> {
         let table_ref = self.object_name_to_table_reference(vacuum.table)?;
         let plan = DeltaStatement::Vacuum(Vacuum::new(
-            table_ref.to_owned_reference(),
+            table_ref.clone(),
             vacuum.retention_hours,
             vacuum.dry_run,
         ));
@@ -65,8 +67,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
 
     fn describe_to_plan(&self, describe: DescribeStatement) -> DFResult<LogicalPlan> {
         let table_ref = self.object_name_to_table_reference(describe.table)?;
-        let plan =
-            DeltaStatement::DescribeFiles(DescribeFiles::new(table_ref.to_owned_reference()));
+        let plan = DeltaStatement::DescribeFiles(DescribeFiles::new(table_ref.clone()));
         Ok(LogicalPlan::Extension(Extension {
             node: Arc::new(plan),
         }))
@@ -75,7 +76,7 @@ impl<'a, S: ContextProvider> DeltaSqlToRel<'a, S> {
     pub(crate) fn object_name_to_table_reference(
         &self,
         object_name: ObjectName,
-    ) -> DFResult<OwnedTableReference> {
+    ) -> DFResult<TableReference> {
         object_name_to_table_reference(object_name, self.options.enable_ident_normalization)
     }
 }
@@ -122,10 +123,6 @@ mod tests {
     }
 
     impl ContextProvider for TestSchemaProvider {
-        fn get_table_provider(&self, name: TableReference) -> DFResult<Arc<dyn TableSource>> {
-            self.get_table_source(name)
-        }
-
         fn get_table_source(&self, name: TableReference) -> DFResult<Arc<dyn TableSource>> {
             match self.tables.get(name.table()) {
                 Some(table) => Ok(table.clone()),
@@ -156,15 +153,15 @@ mod tests {
             None
         }
 
-        fn udfs_names(&self) -> Vec<String> {
+        fn udf_names(&self) -> Vec<String> {
             Vec::new()
         }
 
-        fn udafs_names(&self) -> Vec<String> {
+        fn udaf_names(&self) -> Vec<String> {
             Vec::new()
         }
 
-        fn udwfs_names(&self) -> Vec<String> {
+        fn udwf_names(&self) -> Vec<String> {
             Vec::new()
         }
     }
diff --git a/crates/test/Cargo.toml b/crates/test/Cargo.toml
index b4fa816176..3638e6fefa 100644
--- a/crates/test/Cargo.toml
+++ b/crates/test/Cargo.toml
@@ -1,13 +1,13 @@
 [package]
 name = "deltalake-test"
-version = "0.1.0"
+version = "0.3.0"
 edition = "2021"
 publish = false
 
 [dependencies]
 bytes = { workspace = true }
 chrono = { workspace = true, default-features = false, features = ["clock"] }
-deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" }
+deltalake-core = { version = "0.20.0", path = "../core" }
 dotenvy = "0"
 fs_extra = "1.3.0"
 futures = { version = "0.3" }
diff --git a/crates/test/src/concurrent.rs b/crates/test/src/concurrent.rs
index dc4f3168e3..d028917a1e 100644
--- a/crates/test/src/concurrent.rs
+++ b/crates/test/src/concurrent.rs
@@ -34,7 +34,7 @@ async fn prepare_table(
 
     let table = DeltaOps(table)
         .create()
-        .with_columns(schema.fields().clone())
+        .with_columns(schema.fields().cloned())
         .await?;
 
     assert_eq!(0, table.version());
diff --git a/crates/test/src/datafusion.rs b/crates/test/src/datafusion.rs
index 8207233ef9..f6357ab3b7 100644
--- a/crates/test/src/datafusion.rs
+++ b/crates/test/src/datafusion.rs
@@ -1,5 +1,6 @@
-use deltalake_core::datafusion::execution::context::{SessionContext, SessionState};
+use deltalake_core::datafusion::execution::context::SessionContext;
 use deltalake_core::datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use deltalake_core::datafusion::execution::session_state::SessionStateBuilder;
 use deltalake_core::datafusion::prelude::SessionConfig;
 use deltalake_core::delta_datafusion::DeltaTableFactory;
 use std::sync::Arc;
@@ -8,7 +9,10 @@ pub fn context_with_delta_table_factory() -> SessionContext {
     let cfg = RuntimeConfig::new();
     let env = RuntimeEnv::new(cfg).unwrap();
     let ses = SessionConfig::new();
-    let mut state = SessionState::new_with_config_rt(ses, Arc::new(env));
+    let mut state = SessionStateBuilder::new()
+        .with_config(ses)
+        .with_runtime_env(Arc::new(env))
+        .build();
     state
         .table_factories_mut()
         .insert("DELTATABLE".to_string(), Arc::new(DeltaTableFactory {}));
diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs
index aedb24844c..dd8c2a2951 100644
--- a/crates/test/src/lib.rs
+++ b/crates/test/src/lib.rs
@@ -1,4 +1,7 @@
 #![allow(dead_code, unused_variables)]
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
 
 use bytes::Bytes;
 use deltalake_core::kernel::{Action, Add, Remove, StructType};
@@ -9,9 +12,6 @@ use deltalake_core::protocol::{DeltaOperation, SaveMode};
 use deltalake_core::DeltaTable;
 use deltalake_core::DeltaTableBuilder;
 use deltalake_core::{ObjectStore, Path};
-use std::any::Any;
-use std::collections::HashMap;
-use std::sync::Arc;
 use tempfile::TempDir;
 
 pub mod clock;
@@ -46,10 +46,6 @@ impl TestContext {
         let backend_ref = backend.as_ref().map(|s| s.as_str());
         match backend_ref {
             Ok("LOCALFS") | Err(std::env::VarError::NotPresent) => setup_local_context().await,
-            #[cfg(feature = "azure")]
-            Ok("AZURE_GEN2") => adls::setup_azure_gen2_context().await,
-            #[cfg(feature = "hdfs")]
-            Ok("HDFS") => hdfs::setup_hdfs_context(),
             _ => panic!("Invalid backend for delta-rs tests"),
         }
     }
@@ -86,7 +82,7 @@ impl TestContext {
             .with_log_store(log_store)
             .with_table_name("delta-rs_test_table")
             .with_comment("Table created by delta-rs tests")
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_partition_columns(p)
             .await
             .unwrap()
@@ -119,7 +115,7 @@ pub async fn add_file(
     commit_to_log: bool,
 ) {
     let backend = table.object_store();
-    backend.put(path, data.clone()).await.unwrap();
+    backend.put(path, data.clone().into()).await.unwrap();
 
     if commit_to_log {
         let mut part_values = HashMap::new();
diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000000.json b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000000.json
new file mode 100644
index 0000000000..a67c417df7
--- /dev/null
+++ b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000000.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1666652369577,"userId":"6114986638742036","userName":"dummy_username","operation":"CREATE OR REPLACE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpoint.writeStatsAsJson\":\"false\",\"delta.checkpoint.writeStatsAsStruct\":\"true\"}"},"notebook":{"notebookId":"1829280694121074"},"clusterId":"1007-161845-fa2h8e50","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"a8510a45-92dc-4e9f-9f7a-42bbcc9b752d"}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
+{"metaData":{"id":"8d3d2b8a-f091-4d7d-8a37-432a9beaf17b","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"integer\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true"},"createdTime":1666652369483}}
diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000001.json b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000001.json
new file mode 100644
index 0000000000..9ed804569e
--- /dev/null
+++ b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000001.json
@@ -0,0 +1,3 @@
+{"commitInfo":{"timestamp":1666652373383,"userId":"6114986638742036","userName":"dummy_username","operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"notebook":{"notebookId":"1829280694121074"},"clusterId":"1007-161845-fa2h8e50","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"5489"},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"35e88c76-9cfb-4e0e-bce8-2317f3c49c75"}}
+{"metaData":{"id":"8d3d2b8a-f091-4d7d-8a37-432a9beaf17b","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"integer\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"null\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decimal\",\"type\":\"decimal(8,5)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"binary\",\"type\":\"binary\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"array\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"nested_struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_element\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"nested_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"struct_of_array_of_map\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_element\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true"},"createdTime":1666652369483}}
+{"add":{"path":"part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet","partitionValues":{},"size":5489,"modificationTime":1666652373000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"integer\":0,\"double\":1.234,\"decimal\":-5.67800,\"string\":\"string\",\"date\":\"2022-10-24\",\"timestamp\":\"2022-10-24T22:59:32.846Z\",\"struct\":{\"struct_element\":\"struct_value\"},\"nested_struct\":{\"struct_element\":{\"nested_struct_element\":\"nested_struct_value\"}}},\"maxValues\":{\"integer\":0,\"double\":1.234,\"decimal\":-5.67800,\"string\":\"string\",\"date\":\"2022-10-24\",\"timestamp\":\"2022-10-24T22:59:32.846Z\",\"struct\":{\"struct_element\":\"struct_value\"},\"nested_struct\":{\"struct_element\":{\"nested_struct_element\":\"nested_struct_value\"}}},\"nullCount\":{\"integer\":0,\"null\":1,\"boolean\":0,\"double\":0,\"decimal\":0,\"string\":0,\"binary\":0,\"date\":0,\"timestamp\":0,\"struct\":{\"struct_element\":0},\"map\":0,\"array\":0,\"nested_struct\":{\"struct_element\":{\"nested_struct_element\":0}},\"struct_of_array_of_map\":{\"struct_element\":0}}}","tags":{"INSERTION_TIME":"1666652373000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.checkpoint.parquet b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.checkpoint.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e944de8c60d279244ba6aaf1a2f9d5f32b13a792
GIT binary patch
literal 41898
zcmeHw3v^W1b?BWz2!REGgnK*kqgjq>-^xXT(MSS`(u6x2X(U+^k}UL@CZ+S!kI_ih
zj1ZD*KbP_drG!#S38s`%hhRz^N?hU+>Qdqo>bkD$=em9@N?9(;O?j^CvQn4l=d#?l
z_dfUDbAK}$f&Btq(n^MN&u9P7KEHjA+mw!SF;8q0JDWwfD125BN)A0J2tswNBIb)!
zhs*lBHRWZMvDLA%>Tt#CvdRiyxF!-0_m!`Xh=TZ8Q7RP+#KmI1n7?dQe#wKmwDaUt
zYsnHIWBvnk;CD`eAh^ZCqADSOaZyRZa6BCjgwtX7U`%vN1=0n#^r~APm4#1%0+5X$
zx|`t#Wdpy!>r$O%K`2;81m;1JjlR5OcsNcJwo0Ay=aD{O<W3;o7mKH&V}qmV!I7jB
z;I};D`Fu^0-zq%+)A@q%cyW4iG+w->IF%k7kEV-Di~9!SiCC(5&Cfqxj1)rAU@{%=
zkB=3XE-4iT#U)#k<B3E#l7OPVa3U2iEk=>YK;*@19)IGAQnA)mQnc=Y$jC?{9!?gQ
z&h^Y+R{+raz9bZtFFaYa#1-1<7Ybq{<F03g(j|(x`0zSmaXdOW9RA4*3zsY{sV=R0
zxcHgB5S9W>gUSA)uZoKgepOf+8BB)9CX3d=*E*pP@WqS1xNy<IwZa4G!Qpr+9UdMn
zQdWFUI9M-i7<l-V-A}paS9mtY6Y=4AQu)UGpLKa2ZW()A$ot9@&m4B;4Tp=ah)>TK
z3pd3lJI5cMzj^WIaAG{(AqthXBgu4lFqvuvK8x2Vmyc}|3d3V#;hKLCw=emLvbkTJ
z5QONvV}d6MSmUvc5BF7F5vN2?eNo>0|K@+%wfkoU-?-rS3$D<^@tc3=I&sUj@YRBm
zzKsuUa?OAEqsHOgZwd=*%f3<H<eJ-?S6sU4f=le3pL~D7-8gXZKKEAH1vENzpJxka
zq%BRey84QzQJk)DtOu5$T86GjLwiuCP49Yh-r#m2fNh|C=_KgioP(?s#K@WqJg6DW
zv`TYjgxMb_L;uc^3}&&sz5bnJd$e)hTK&qgt==a%0Ap5;P;{@rG?|P2u_kK2Lx;j0
z40LAr&WidgPfSyxR5!EX@Td-5CyZrcpVr+P!%Dr|G1J`L(XMi*0o}r+oeC|RIiIu@
zZPesoW`<GqIiH+jbkXS`?IVB*>8GaJ8R%X$GVrM+>Wz?aG>0rasUTDp<)}klSf7-g
z&D<2GmC{$9@XU@B*}V*RBC{h!v2j>XeYG(wtGnic)O#8Op?4HL-|kg1^gx`7&W;pJ
z0yB3HTq7o@ZTD|8S4=KCH6fiHDPZ+<SC16aFQW@n(Si7I_)+SUw5q(?B)wWKzn#xk
z)U!j)>`;^Ql-)S4%nmiB#j}f&40C|lp$0as_~Ka?hvULhFTJaWn%PB3&TevcQ8K$I
znccp!oI=cw6sAL&>BMmlj}*8lfpai8O4AOj3YI<cqw2zKE;?efo+=h-Wgb0cTh`=R
zPY3Udj>TbjJ=O_FN3^Qh=aLS&WG^ggI2fLi@=J8!JXa;uas>E!vDk0~_?c=qoD3ES
za0V#MS&WB->PcXOep0!3G(46b#8bQ4k;M3LGL@GYOr__kJCj7+m!*jDi12_*9BRg+
zLO6A7KJo>C8jB~=;T5n{i*6bnffJt<6Jvwv_@n7?I;EtVQX|Qqf=Xi#2}@xi2j_fn
zy7YkfV5zu(AI|<%RuHV2=L=aYAs!O_;<d>1oIE=HTr0Td=;yPX>xHnl(KpyXJ{CsK
z=fjr$Mw$-KffVq(K-xD)de<%QaItmfCrBO$u2_^c`7K#D7iguEOw<wX#)EXAf=N1g
znD3cO)riF<>lllq*9TlP<8dx=N?aayNvB-0pD}600+dSP{aP=$uVfw&2*&c*cpNoo
z?FjSKB9E3Zc7sC0A$hfKdii}LW5eNe!RXk?=D}EejErr+OFHb5S22<_SSFxodggQQ
zEJp7EqBW!Mv0|P#LdVr9oBj(Vi_L#(2z6!HP!@=Vn(Xrm!TI0;zg6gYnD3!qqqxC1
z+@f3aIw0QkNOUBzcz7_`5f6i)yW(Rh?U??Tq-`#_1U-)d7<e4R7lH<%n+QVtb0P@y
zJ$d+>8$sRZDKZhnfgwTsOC>0GrI7e^*QW_&5xJ?cdho^LWAW)7-yP!vuMg6{U3`ng
z&l+6&JP*nPgEq9_jydTq$+_Xt(MA?}(kyity|#t;Ou)3U3W3erY_JmxxI=DnC&Zqx
zQDC?0!CD{&N0KtH%k_4due!!p6{`rBMPhxvvdZ%Ecv&PGt|^OnSFNtBtc*sY5pP-4
z>-DZkC4u6}6{_=C6BMqj4p(_=e3cN-(^pkpR#{n9QC8#g`pT-xW2>uT@px5DxX-CA
zSmRRBqf2Pns=Sg~MiJogmfbE`j}gf2@<n}N!Gkm+9Wi?Ak~!g6EN?WN9w6tpcdoQ;
zu6)9c-tR6WA5?9c6Fnw^#^KNA1oBD>v@(yLGCG4Jf{-Wbc82_?2B+-vAUhB#BWNHF
zW)zw^E?#?MP7O`i3Iz7AgKZRObvy-HExVSCR4yJKi4FD*Mrp>N&)kl=q?0ar9r{lP
zh)A(B9-v5#))H8NT3(4u@VJ=QR3o=QP@+)ukdR+QX0iaIoN5Qc$$loRCaF~x)`Fer
z4Y50#N5*C11A31TghiFiXQ+0lyN(95!p~r5p^0D@#v(ZliXrJ2v$&KjqwrIPAhc<a
zWgZYa#U4$=KXY^um|kP+qjepfjq6)Cb~d)uErp!*ZIEgV)otulI)Zhb8y|%<h=gXF
zh1zU0uVkrHJ<lRDJhBFLE?GhnyYU&<oCTV@B{uTX;eNZI3Z<QHX|G#Ovu>O-Hc;OX
z@|aIk>#Ay;u$QlbzbaqF>Pqiimv=to_*PX{R#a71t@3?A@Km60^Ll~D2gQ<7(ZJ(;
zy`E>W4n(a^tbI0s#xSRZ=8!_8+dU^{HV6xIx?&1}H))&B-gJoUM(G%)*v3b)YBa}1
zFxije<h&~$O^=M-mx7;ge;lJ($W`7(+~PDS7NHwnKh~H`3?}1&&3A%DoqIVkl0VYd
zmx38IMu*-e4_Tn!hpx0S>7Pu;Q+I+wb$m_?+!qax#UL6oF_^}sl8p<~D1j3%`~w5E
z&Rq+%GLN1zjuNlq9M(uiAp}{bQyalcrQ)##$?@Ticywee#@IA2pzUyR0d3DkY+#(G
zX{}1Dc_IOGPt5nwFVVQ*LFzT@DEL=Z2RQ^2k@I1~&3x%RH4KvE^`x|2n{jinIKky#
zM4EYoz<6GUG+!GPvSG}_$O%H{lVCBmmco1Q2;w|7veF9FGB4<<IsNkHbN}`w(708K
z1zMR$PuZopfGu0H@s`gbGKmnjTx2^ZcJQcpd#;n>3%dT#e>#wyhxs1*wUe5Q;?>=B
ztKvCWR6PGuNdw7gYrh2#LZGw&8*UJj;Xq(MEAgoR+KJRwJ?K+2Sf|>SXx!_P4!Yz@
z9J%sK9DrLpOz$Jqxvi?s!+a0@+DXbmZh07QY84uXi9+LFs?bc1=hP3rdG4?Oc`iC0
zD{Tc@nMY6AX;eK=HZz>BA1K=_(T)Kv(VnYPPIFgw#bNPU*egMG!#)|?l5*ex^vKIS
zrY{pG3k=&XJ<}Kf;9I1EHSYbr;F8X<`7PLk36mW!*v1Ocry1@~^n4CDF^A-^ohQ3W
zKIc%)<CJM)&$z2JkcGX4Cg@f!O@&&}5NY&nmvr4F!z3H~f)h0vNwoG7^z06Lw0L|W
z*I^~R<^T7vqG9~xh7}FskAGOvD7goQ6^+z+ck{5K5qTF4D;i06^RS}Pc<&D@8YgG~
z{{@E?jlCbsuwt6z;IM*!4nqs9OxT}CPnpI}TDfGN<lyR|c4R!6HmzKYlWjYEoZ{GX
z)hUjNZfz~*M7LIESrwXw#vJJ81C^0zJ8&f0p6f)k#+nn+TG^cn+qlFs(rt&IyRfyk
zpCa9vMotYi7t38mwrLc-m&6Tz6<33}`EoCj&2AvxYh?2|!#yPHER%>IQ>Uc0sOq>K
z)}s1=lV#Fy56OQHM8n=|hCY{~w(b_HX>qUGi^Jkxx0l<Uh1~_llxvre)O{Jtg;VQB
z#vKj^mXpIw0RmdILH<~Lcw}>&pIuu51abtYYPduNw?sSj!2?!$Re|i$1RGAN#Km~3
zNhha*kiV3jYHDYtDk$r5=?clm_rY_5ur$6U9Z$yMvHHP8yhVLxg6J{8vK_f*5}buo
ze&}st&Iay&@$-fg+=X7oEBoc=bt=y}oZ<FJzmVk&_Y=aVpA{BJpCCTVfutI%_i3WY
zz(>&%cTsLH#pGBGg0-Z40UjqQf)D`wKV}p-WdP!J`jrdj7vOfK{dy~ZcSF7E#50n2
z4q`+j7|78>Qw>S2*v1dkpFWDS$dgOFrG^7QvdG9H4@EE<^0n~6B0YXkE3^@2%K)$x
z0;m3?HN<S}eCj`1wypVXWB}-OhJdBpiZwEWxSg0Mh{7zA^1$?f96fx;kTe!+fiqC7
zMY$M?xg{gG=>-A^Eg=|19>>@T0}fkgLy?EaBWU%eM|~SLP(0ekfgy=Fu_r?@?e44p
zXxY;vLb%$Ir6H*SK)85ZsQzQfR<~P=w4*>n5uKGm3p%Xh1nLJ;fJZx<GPE+C_7s_q
zdkoc_4n~R`Peu%lW@Q6UH>uM)b|_|0i?%`owP;C$=%%qcy-9}F8pPRR8f4YJdIwUQ
zzDxgjYAlnsh*w+^j#bN~t9jxoHMuokPu^T8zE`eh9~~1DYVw`57*LbXPl(qv81FwW
zCe`d0o)ibv<ZDm4^xCJk^7N5yF7eB1otL)r^yMAA&e@$jeQ=jctW@it{u)nT-QyB3
zyVZhM_q%i|_do5@={ft`F0oauv-NjeqF+s4KFks9{ymp?SmWibqb_le_VL`aE?p!)
zI?mJQPVn?c-{R@>-{$E9f50ia@g0}=3o7Q1|B%;t=DR$7>jh5rweRuzM_+V_eQMh`
z&hh-6-{<vr!&7Zfs&!8Nxl25#F>v@Vd4uQviX(gdHJ(24ItR4pZ+QJT{+1(t>JraC
zbD7tF@$Wg}t8W_+zxfWYbm5<P`sBMjefVE^de;q}zV#!Xe)r#b`qGCy{nCH%^a~&J
z^h>|&)^&ek8&B{5Do<b8$<sH#=GKjC->>uZ$vtk-r%Lqs-{9%%dwHGh``qF>wa&HQ
z;^~*Z&eJ=dR@308Z;dtJgMA|t8`<L(;sT+{?a{BEh>Ky>_K@2HFGM~hG%thhNuTr}
zqcOwyo}w|o(D)>VR*(k(jhD(0qwQiBJffN3ycQ5v<$36mm*{4=waZ>AG-zwS9t92E
z&84g%zbYf<VQayI%P@@&WQ?tuI4t#+533G>&~E$i8rn6SJHvy3>7I*N0G=HPvE6s!
z*``}3C@jlWkQs<-k@-@`3|it93r#BQ-i9VdRHt_N0;$8;7R_#G!^K#UHWDC^kvO)5
zsKd!1WE*xu5Sn$9;LgNOi}E`IVF~qx4nu}z4Efa<MuW4&>1su&ay7UX6)kv-v9Kr~
z$T5Vjq9kC3%R@@hY*2`X&{C|!gE+;d%z(5bQ6^kEXk<w(ltmC1AT2CPQEj6JY&^68
zZ#|ZN8Gp5|uTI>Je9)Q0Lxs%=tb;`H{rrcPHt$-p40|JVWWd5Eky=-_><Owcp`Ll0
z6skj>rLqP?bc~4Npym&2C*ZRZVM>^<??#Kun7+7;p!QT7!6dg4{L*cNic}k65LByB
zKiJV3`yk_BNp2PvTR?ag^|BUtFu_u6yC2)-Xd4LA*)|YJhD~ZTDkox~g}P@cE>cH8
z48PYp0XDJk1>4;j`8UUXT85T6M?iLWIX$VMXj##%CizYGGVMM|rnjTF$OWIm+tJdk
z7u}*Hoj&LmPr3wo-&_Ye%x^?r&6CgP(JRr?-ao;%*M8eA77Oy3e21pS$qQ|m<4uRH
z`SLq+8HD2mVH;c`1PIr@<N(1m%^?u%<*%Cy<^A(eAhS<@%|88=^P4t$q_+MNq3Mkm
zbl>bvalB}6eNtS1^H<+)pM7Q0_~g1SAMMo-i+HqbHLM(OaIq)fXJ47jVs_KMw9DVx
zRQC}r$HTg*=W<-6f>2dtZ|bN+9Wcx(pb4x4t-7VHO~=eyr&p+F{^01eQks2_7$0%Z
z(ZQ(`P+k4SYR2r#^%RPbyC7#riZxkBQq7&rjufB3f0STaA2>Twz<(KLS}A?y35@!d
z&cV{eBOQaK$ziu3?@2AwE+O*;nsKL<mL}}>@zF8)gU4q0fS2?Zt%NQdc8gyW<m2P|
zN{ByIW!uMfexU1}w0tqmR#)#4%&osmtE+vJ4lvF8yL?aA^8D5F6Y`18xY)`i9wUdq
z4ifOzb12}WK;1D|d0zRY8IY%k<1CQa%T@0`E<g7;5@>z9N;(KwB<YnSZqX~qhyS~d
zB&XM>UU*V|>PZBv1(4|RBgDZeg8dP|UL(kRf7zu2pXmcraCRo2ddkJLc9G<A`u9j{
zA8efwR9!Ia6)8A}llKAyy!0(yCAe=fxVN?eT=o8KiN0fQ+bJijrkA1M7*GE2S6tJ)
z4kg_rBp(8j6(H`fW<p}xcjrjpd{92P9g$#Y4f~Zqsr-xNzWYz9Yd*t}{3@4FgU*R%
zAK$sFz!9YU;to|j#|Y?~02H<&KHljF)V#&-3>3~X<wMY%WPXl--ef>u+r@#FI6Tq>
zC!g}IUsGGYMJ*3}gUGr5HE5~D4LZKa1Sh5PeyBxQ->0@`S=%$eX~YT#2s~1gy)py`
ztMcjH(DW{QXh_<A-YwoQT>&=G1E2Z2Q3l9|<+UI<6P6F{2jkJBm3S)ii$T&6z~YfU
z`X)KvZ3it#vOV?#2hZ|b2N0na^u(VK%KYGubcPU~IYtO~90G*XyzK*r+NRfiq}K_;
zO@IKVdf|7te(9FFb%)dD)e9tizXy)T<#zx(S<H2Uc;H!rc=&g7hG_9OaLg{hcvyw`
z)b~-aXBo`rp0U7mj^Wf^%z-m{`QQ<<Nyo@9Yk(NJg#&;P>;(`;1o?;G%#Ki}Tzka^
zA_3%U-yoDca?JV39O-$&bnduYY=dpfXE`RvcT${QtbtGic{em9>Ay;n+Wq?^{o}u%
z9h~LVi<~F!szShm{N`~L;yVQK6odG|34o~jdM5&{FVR4_gM0+q5N)>z(i;rYspqmo
zGM=>Qgh9lFeC1mzxP337QGCF_-TXFyQ{9>qVa8)R9UKH*$j?ASqV72YxBmo5Zrdpf
zIMcD7;W&?AvAG(AZOGS8VjtFG_;?CN9*0~qwF`vz4B&;?+H22qsZb9z;pOLs+F1~x
zr^jC)P(*(DJ2>9qDIWX9@s58Hwkw3~CSZd>_4H}L#ymzgWc=PEzWV}EC~HYz9}uXc
z-y(rs`fg63w4*Ab8=*lQ3xl@n&rq8e8MJr*2%z28t1S=<Bft6rDnSc)<SA5wJ(t9C
zl(1h1?4X46XLD47x|P9|0MR${+3%qe`132M1pY<XUL<VCo+Bmf{1d=-TQ90We2;wN
zkHLH$UsaLbBrI<NmQ{j$>Q8gyiQh~ktAem0`NTP7h(C>j4Dl~w=siNU^V`JGhu;TO
zYFEr=R{Wrm1jsJWKx~qH`8?`Riyq}E)So?<2-tQ3t>+970Q%eY11`~?EN^(I=iHVv
zO(Nz6dZGk^qvY5Aj09{7Ts6n75&?&ZfSW)-t02GfGACdl3jyXsQY#&fN9IHwdqf1n
zwd89Ti8`lvS;uf$qV+V;dcSnxq+1*k<XeB4-7vIsdArAf2r>EQD`*?s$DnQSFT(a3
zN#^7qkZoN1E5Mf7<7mjt9tXnDn2-1&fw}^qV6w9PuX6%r^EeQ|#-M#n&^~6+4*XAm
zmc`>}l-aK(LlB(&5zxZ@3{RoOX7D)se}EQy<P=%#`+t+85^Np^!tUhjZ=e#mk3l8y
zFH*uu!gdv~f!jax7GTTbadci`%Tc=%%-(Q<z&`o#-)8hU7YWO*?~up2dMP)aEFK5q
z1?5Zs3mM`*1{vaC#L!hj^%9^0hK~ImpvvNLG=>~K4g@92J1(RC?7ju{XU`=9ZV~}E
zfB^6~XRmUJI(Zz8fN4DrL`BNCuH@u#c3(sTI{7?#oDctj6X4`=Gy?9-<3OCIyyxxP
z@HoeS*88PfKr48hm;N!Uw)D6clgDAB1H^{PN8dqRp_9Qso<d!*JYz2D>O6_+hkr=A
zdj5KD=(!JKxFHr*-uF+4dq&f{ON9IAX~KQzpL4@)a#{>Mguluk|L+;IeuL0oW9WCi
zpBs9+lY@X;`Th51jD5$Slau={G5^tx+^{=5p#U+y@|Ay`G5RBf{whQN?!V=RUJa5m
z%t;`sSbp_KGsb>~upfGXn7?#0N9?u-4IqwLKKt)8#{N2Czs#^-{J*(j=f0R39mGw`
z$3L7g?(2m6z#kFkr*7qj+tE2gu(rJSKW0q8Q!k_4y~PMP`k$Nt2j{H&n=H<mNduzA
z<?SEOn3w}Z%)T>B8e1VeEAvpL6JxpT55eiME9{=B<2yyD-(aW@{EysF+wSZ`G`#%L
zugn<v%Y=N-_lWOpJ2-NO0ZQllb_Xb?A&AwNf4F_dl)g=rzQ!nh<<~f+*~}$FaH4e<
z&u`Gf<pKG*oiirxLn3b1SrX%k-9TK{>+*0dLB8}g_cB_d@A(Vt2d_X|m@vQm>zuJ1
z?#9EN2KgqSBS6Oq(0dHf+j{_zy3v^BVmw@gU=!{0)OhzF6BY0LhOMz<tR?whyb}+1
zC>WSa1m+Y2bLcni%<x@Yy#5aNE#z1Bs`THWwr{Yu7xzJ1ZT>s6TkhNUx#jDCnuN0B
z6_oA=4CpiaX985;GGsT+;SPv=;kQ(<M+oeG2&M$3Z9QNI>%8B|Ai)h1`RLbGNM{Jr
z83yU)r>BKvp$cxG$nOGvV)k`{b(vsEXAiif0=YnX`Bz==M0(LOPodJ$vA%l^yGP=A
zam`BnTi)KKC`vp0AO9XvlnJG_UunWD_*YT<ln<ZqttgH71zFm^@U;PR>gu3wIo7S4
z09dt~6!@m5j|@Gwa%J@dlqdoG!M}E&LgkoSk6-~A)UQ<PfP!{t=AS@{#<4CID7D^I
zn^&%^TtBvQ<re%_TJQ({{B<GrFa9DRrL`0A)l{x-Te-3Uzkwh1ueFyFM7kG~RKJbr
z+q4QqQBUPs{knRcUSJZ-{V1h|&_JZ2D}hzFte*mUNOT%KioacJuPANnf#?Y=@{fgj
zHYv4OHJDDsyJ~#B-CN2-)D(aG-rnxm6eK{hQ@~_3k*eeG2vSX^C6b4N^faxn0Q`hc
z#~<h>C`=TTr25347wIw6>o?LnLG+S7IeiU*G|+>YTz|?29iOs(2<atK!@jO6McD*3
zk>5mLY`8uZ>!zk{*ghJppNs_)-f(x#co&s*O(}JO6tpM)f>;Uv+8e2ng8Y%DD3{C$
zO$Fdjg?mOx2TB9AH`CYN%jpa9@=(u!H`HCVDb(E&40mrybar=v+&czhL9jdcd(5s+
zzrUaBa~R7fMiBp|-ky$ucHh82WH<(N1U8_(5fne!C)BB{C3|Y>1o_n<c=pFM*?}LS
zU{@;C(=i(HRc#1&mnWhX9Rrabs@A@`FRMK&SWW_I@B{q}C!ua=N<_MAd}v!8D8DH}
zsL8`0=$&W<4AcpF6cBE0KYRy9P=VvDuRvXYdo8eCuOP1o+rI(v^&`AV3%qzc_zLW}
zAV1W7i$ZEd+Q<kS(SDYvuze?1()(jwYdLk}N~{^h>@a=@+L_tI_Y{)Z)DK|%<WE}g
zyR9EULr(%SdJ%2y1b!1_X7niUYGpN;IuT;P_bAmOQi=Z>;wLbk$^I(WuXeCMq}M+c
zYiKI(t?1|rfgMG<yT)UXTaTh@X!S;uT|=?@n$4j^%|JBSGywe_)9GH+Z9^yGGxzUy
z9}&g-w|@lbPauAazYCzZ1_JPh$#)3gE%#!50%EnlhE6E4*&jiP40lb%f?b<>Ds-9?
zkziLE`Uv&9fSDg-8QPFyYmb@!DZuC4pDWgb)3Wtv?7Ds?Cp4|P%<Thh0f+Wz+J2I4
z9N}B#wE>wh$Sa8Om-m8Xo#j<OG~pnx^#}y|=VYwA)!XBPehd9Il0<@0m6|=kcWY+)
zlaBP0zXYFX?*G-9{7GxTi9S+&C|K`hV~StN*j@>2ao~@-O7C!Dytk*R3iz4G*glZC
zJ<2ECU6lmO?9bRf=-8fyYVe0cAcuhnIDw4qn;hE{{h?q@dGGKR8W*a;-#GS%7RUC4
zzjwH1GuZz?q+glH*uKrNJ)w#uyHXJ}6vf}4v3+e|(n0=&A7lsPQ!Ejs`4{vrgjd(P
zIi~i<k+uf#=QJL1e;%4ZHvscC7*EhY2GC!EBL%asp!pQ~OMf-`mQHfh<S$cLpZrJj
z5ZJ@YO-L^Z(cmxpuo83a<S%`Qf1(rUxB9PgD(C(yP>JxG+M2V@H|nXJ`u}*Srxkeb
z=yUF4N|yeI4#jV4kCYD(d=P|Tz8IjHg25j&3;{pOH&ugI$UOh6-GJp;{2_@>qaBst
z0PhhtA4PKfbt^$WB!B$uXH%=Lt_~Rc9Q{GG8kA(AuaS%n+bgw$E5Tn-l9WL66E;7p
z8-huTW&YM~n7;+6T&JJ)U+DjKd<_ggVW~#?X+BKz1I5yRq5tdt6@4WB)dh&&+U6Rd
z2fmrfsQz<Pq@rsu5(MS6OgYeNn9ns+xuw4bybkmR*+c?y2R30j4VuBK@<@>SkkZiB
z3HFKQ<izn`zcPUI2BI|}PmMyg58$c<=I@Dd7&DWho<?;(Up^48XKR8eme)^tfj?xB
z&zIXP>QGFyo@&JY3WGF!x6*uu`XjA(2C%Qy<N62sFOV^YKuErRFVZtY^g=_UJ@*hV
z#2VRCQzzhGsbGCls-Zty=RawLw>*IKQm@eEWwei0D*yE63w42Vz;Brkl_QUAs?Y4p
zTLbpi+t!oSpKQSLiOlmeHa}M9IsiUN@JN2TKdl|q?3Mf@<d2}UAXCV_zXtk|-pu{g
zKMXo@?yt&}FRMP<rchp_T0Y(?-Wu>v%)bynz+GJf_cusCtn0~;$$p45#!nRL`e9Af
zIvVM&Pa(4~j|CxADuD^+{GlERg^*GwfF{tNnSGHp1N>gJYWAo8OW(l`7{8iQI4?=H
zMeO=Btgl9}e1hm1jQDC`j#WRdv<^AQ!{85EsN6FCDhfE6OyfT+VyZPtRnD0IvW>sp
z3BFDR&f42RsgC|aQI<o0<CeobGxc}$H(gDMUSG{*r~%Ck^#k)on5#w0YbId6H5KY+
zEe8?)B$1}uW1xfl0qF}ZTX}@qQ<Am9`n4^{Z%bR!N*@e@Sj%7ONFRa<whlyxVGRoF
z@o-N^l@jc%0{IZ=AQZDdwjLjA8v^>YJT``cRw09}#~<yks*U$_B)|{$BR-OXZWQWz
zczvMKLH>j);tLH&eO)l12O5{#`j2ARkMWbd*?5fW-+{L1ro?yzd~;|+2W&0{DyL~b
zf%M@T7uRc%il%{(uWLiR#}8vB`48w1TN1EWu{n|mC19Pm8Rlfl1oCfLU)c7anmd8N
zn%?EM{-`+bKeY~_JnHQ98uy<9UZmH$|1{bLTQBDQ9Mk?&1KPt(_n(4OsDHCRw(URp
z`;lIM8!(yM{uA++BKsuM((T*0|1?7M(tMEbKh+0dsb(3UjQdY5Lr5<TUAMLWR2x`j
zgKykF@>4mRUqhem-OxJH3u_$oXPqQ}>JKC@><FyCDJ{#PKN6=@%=`iU2Fo{xf{Ed9
zcPrQ}+86XawnwH6W`yT&bb?2NenabMe3(sY$L;oD+TVK&%W-=aW-_pUHA1A(9*ln=
z1Y3-&s6BQH%EW*Dfo}Syp`s0QXgm(|qkRWR|CDLrKj6qe4IK@kL^KIwjjvK^qxPBY
zp&833$Q}lw4gMy!)r+R%_r`;jN(9TT`%xQkKBbLYAn_LTkAC8nm<UdRJuUA=c{2Y-
z{9%)p_3*1UrSX0U;jbRD+n;fM^az!+`7hrKB2dxcE|6>2WC#2kYv`H`^#JW{oe0m|
zzuIZiVwmp)Mv%VxHrxKPNuPc!XXB+|Z-)F=MRYjf4fQkyVGQ-gdYaHDD;o$NF=^44
z(vI_aVw%f<2{Bkc8cD<^VQff5!53<}!)ikr;rUy#!-Mg#+YXQP)8L<sfWL+DlGdm&
z{=?M0ns$x&{It9shf%6ui_`1??GL7Lngi2v&^MVWt$+PAeiCb|h~7?Ie?*af4zGgY
zLQ`qRA2#0$wBi2g5Y{*A%jOT26!PnB1J&l_5B((XK7?=f-zNP=u$<XH_lJJac{YF8
zMEp(X56NC56@J=_k4^+OA^n#5s$skhY`}7^pU^1UH3~{yzYW6}3VU$Sm;F;<x;A-{
zP}wY!lI<&@Yo%dKv&pN6fD$U&Bv<GESihI#o7rD%z;c_sU_Pt%EbDyM%&&2NQQL_4
zXsU+iHSKsF2K&=+zR^>`&wm0)O0acvWEkX8(KQJ^mHZl5RzJ265-XYgOF7XCdzw~#
z8vRRK0Oe`*FC+xxc}hF<msofCIITg!{sAb%Pm)AJ>T20~fHoVzF{W7mVM7yEpy5QF
z&kdpP!oA_HU?LrA_v7+8i+%W0SWZUQ&^i=`J>=dlSZBe0Vu;wUrE;^pjq{t@D7L5h
zSP=YzuNC|avrXv#P+}SXjq+)sa^eE!us|!a0+t4oCCCInT&*(J&-@vikMj8kGcsxf
z0)Qq2fTR|tc{$pvKMm(J106X2Gx$Fzc#7c3=Euh4>9VoZ1DoSxDfllIKTzdc;alN_
X2Y%przyHGj6W)jaqH>$kQU3n`EoDKR

literal 0
HcmV?d00001

diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.json b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.json
new file mode 100644
index 0000000000..f6f9a119ce
--- /dev/null
+++ b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/00000000000000000002.json
@@ -0,0 +1,2 @@
+{"commitInfo":{"timestamp":1666652374424,"userId":"6114986638742036","userName":"dummy_username","operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"notebook":{"notebookId":"1829280694121074"},"clusterId":"1007-161845-fa2h8e50","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"5489"},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"efe25f5f-e03a-458d-8fbe-34ed2111b3c1"}}
+{"add":{"path":"part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet","partitionValues":{},"size":5489,"modificationTime":1666652374000,"dataChange":true,"stats_parsed":null,"tags":{"INSERTION_TIME":"1666652374000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/_last_checkpoint b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/_last_checkpoint
new file mode 100644
index 0000000000..ce032077f2
--- /dev/null
+++ b/crates/test/tests/data/delta-checkpoint-stats-optional/_delta_log/_last_checkpoint
@@ -0,0 +1 @@
+{"size":4,"size_in_bytes":41898,"version":2}
\ No newline at end of file
diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet b/crates/test/tests/data/delta-checkpoint-stats-optional/part-00000-28925d3a-bdf2-411e-bca9-b067444cbcb0-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1b473a23e83223587202e007ea28e8e7052131bf
GIT binary patch
literal 5489
zcmcIoeP~-%6u)_SP0}>$+O7Bbp2SI6YJ-P)R#=d4q7Y7JuynR8lSn%6BZA2mrQ
zWe&y=l)69W1eKxwAtG*01@RvhhEp~_n6N*FAK=IE>kk<s3JNkme)rw`-n(g{W^O`B
z?mZvBbM8Ioo^#TT*KHc2g!WOLo_OfxKf2;n!8SxAgha1W2qD2h0H2`6Xb=76=Tl!t
zC6Q2{Bf{9-bQyhT;p~A9i5=Dj0{C&-Pro~N<b$OWzk8VTw~W2^-mYUmUkfqj$eRZb
zy&aasTt_0e|N19H`sr2l#G`*c5s*ZBOpX+SueEx$P~Hx~6*NK5ef-rAOC-Ui87|lX
z!Q6b^t_hn@AAIUl*>%_E9k=4}Dx8Yb$G-gSxo#8pqY@GuoVerc?Hj=S_Td3`{2qAz
z(~AdQ>1MW6eu0rVw?%>wNYOR)_YWU=rc-j}vMoV&!wQLUx96toQ@gC<oDKOj<X<>^
z{38dDMFdCF`F$cLp|nE6P%2roQ#<W>$Pd#|di=#F-wC<+kiXoQ&jd-QkasO)=s11-
z^_P!(5rIbB*O&;BE@)WQsx|L<Wzq@y`7hu65pk^`Pgc~OQv}I!h?nhJ-Of)T2J&H;
zbKe|(_kAxC%(=LuMu-{+4<j6n$6?MLerfM#s%wZjCux}2Hyp_5RSaEaxhlS>Vrba(
z1`Hh}!nO*wkP3hl2)KL@B5ndoONp(r!~?36e*zU+A)aX7Lel7PQp?c!x5Q9cJ^{)}
zD<3K*-<P2`Y$ffm2)CiTD~Z|3m`-dB-Cs)rT~a6wUEWDT%UcA{&|6U@CD)q^_baYN
z=#2;yG2&X5t`n>M0XiX;rvfx9mWLEi^sGWR3F@msx=}3msh+?>2VL(XbkKXm@@U9g
z?hn(vp#B)9JH+yn2rY}{$xct;NR$iU#OJTr5Ub5v)tytd=^b{-qH!eFfTD*B<+{Dy
zuA&rRr<Ui6MZOH@DwU#bl?jc45z1HQaz$I{<n8G~$tn&+X_WR1ZeyRg2^wR9JhukO
zWZWa=3T3N0&vn(jRksIXlSRf2L7}r=DB<Z@rCBn<)$#tQ!qK`#df7$0WS8p#6>>r3
z4C<9KIS8B)RqsSC5pP{(szeWhB74!{WSEc8;UpC>LX#%PP>Y3{XF1$ta!Esi!9%#Q
z$UoO@+8{dB1f_{%Rc5AmYGdIXs3m5zhp_DWcM+GMDMs8l-;-n|(HM!45MF`E2C8U^
zwqCn288LZj;-4B>=@cryL+Yb^BdmmI=X+_NcCd@4NHPnmfOFEx!5Y`rC9~!z>X@%{
zU>S7Y>Y^)1az9$4tK=r0qi1RpV4A3U_=9=pTLyV<8P&8ex@dwVPXQz<EQ@?~FmBPZ
z;F*50^jARiN~lkj@!w$hdt-DRNgh(9ixN-3yQo77Zi>}e{>g62-^qZd8Q`%PyQ#AP
zjJXc;cjMq?wN2oe?pt!dfhk^>OObKEW4PyHbd)4t4NAED@j1AY+D7n9QQXBBP<gQz
zdl~aQ!+drLT|<)lSU+>a%HFVd@_i~!&e{V1WGDA`GTOoAKs&R9oqPckOUM{eBgi?w
z0635jXb%R!Prj$d^_V=XH11&LAGpLA#4Mw-_sE7ImZ2}eI&uVt#g#jkiCWY2hLQT)
zw0j_GHukf|9dkfDW2dq-a^-U$W9Q!2O|v9<6hltT7{F7@unOi^Emp0Z<KGR-M?<F9
zC%pyix!ua6$yUxVE2q2ZI9?NG(3Vzs4Rq|}v^?ZYso_dX*zy~(M3?P-&+IKMrRz!Z
zN8cW=s!k>KD-<qtM%w`~v!Qu6Y+zDu><v8G?A|_@J++kHLXw}LaEVxE8jJJdkhUzx
z&NbYtm4RcnoD*6S;b*IvqNS|t_v6g($9w3#BzY2(G!M^?zi9-E{BO9<7CFOWd9w3A
z<jhg#)ZSi}Ge_|2=u$W(r5KC(k?2LF*xSs#H+qp`+UcmKBqQ2FS05&>ZMspZZckaW
z7DkzrGu5Qn6tT5Zw)Nmmw{0eRyh)&FZ?~rBZ_aKW8`aqlk|u#&cC}Whl=Y6asWqt~
zqOWR_sNGjgVT8JG&aS7bmHSgAyKbQrTKe;`-HH1AteqH7@RE=iOw1JQV!oCbzjJq@
zY?T;G^j7eMtWRKLmxcK%F^-=f_F%$APK@u~vuDslU}C<y7|=jyQ{7h(4)}yN5%?77
zyWs8H)#rnw1i%R^GlL_m+CK>Il|G8MZ_^Fl6McPe-=;q6f4;WZwW;fkyFP$SY7@mv
zGakuy(vPdS!>Dh)z0136PuppR#^#}Wj&UNo+-C2c;C~&EbDC#M;`Nns-71u8S(f$A
z!)V_JXRL1a!X=A|e6iW-i?G<xW11~{0c)-ce-jw*>i$8s!RY@M+6#!iILo3RHkb4|
zTE@`3tV@+=7oH0LdP{fhA^Md~O@;TR8&FX;qlK~7v{m$&)un2`XiEJ<si8Hg^lH{^
za@E50&f4m9Iz8OKGB;N!=KI&q42`7MT4R}vJ(F8IW97z1a=DqY^l)Y<KVzq{y?Vs5
YN2gce=N^8RU(g@92EXk$;-9U50YQ75%>V!Z

literal 0
HcmV?d00001

diff --git a/crates/test/tests/data/delta-checkpoint-stats-optional/part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet b/crates/test/tests/data/delta-checkpoint-stats-optional/part-00000-7a509247-4f58-4453-9202-51d75dee59af-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..612bc6a5b63d75ed53b0e311005f67ac812daecd
GIT binary patch
literal 5489
zcmcIoTZkJ~7(SWICfRJd-EH@bGZO0(+|a@{WZT^}yNtC~@lw~Ylp<cL+sS6Ojm>3E
zvX!M4O7TI~KGZ&_6!k$w>_vs*ixrBkST9uQgJ=bn;_X3-6bt%NFW;GSX69rQHMJF%
z<edNV{r@@tfByf^PTsa@h!WaIb$aoKV++AJRj>`w2qDp%6hcTafDZy%jP}r9FP-@=
zDv5;p91+IurpxI23+MNDNbIoA5dz@HX+Qn(!r@PrO8o9&&VOL+?T>bzxO6MTm?Q5V
zICv^7iMftM?*Hvii1gE&=m*dK`FubU=`lG{2)@?p)k1k21Xs`mz3|1iKQEC4mu9$N
z2LyBTb-N~PzI5QlFJ;$Vn-AWL!>e#APM>-CmrII?`%wvr4Nm@kWB1)){_ykwJAMy5
z|K-U3x4M}vm0w^a&TWw(1X6Sj{r%HtUhb5fx$c1=+z2Wp#@(Kqu21c>igPyP(~y7t
z(8<poKo$`kP3QNCn1s>_2}7x5%}(vG=OI5#N9oBUFT5Xe@gaY`FP{mLP9g7F$k1{6
z&O2|O^dbU{xUaE3NV=e5Rjby#=aos<)31O1{*Q=j1$nZf?wle>mP5R3*XnkD3Nerm
z!<_s6(1#y;kzmfn9W_GKKzJD8Xgm&c?$8^1zEWL7%sENJ#J=G`Mz3P%hzud(iz<eO
zO>e-^K_YCcU<;`LNP&RM2O;7nptO|ODoZ?|I{7D1p%vnZ<}D<R4kxt?oqtOVmE{wl
zoV4<xV)A_%dc#)I4vTOby1SB?os8+k*3kX6B+w;=($M9dB(%Im01dqrRZ?=jxpKea
zT7=$-FcBlJW$8Mx+83bf#qvymX2tTL;)$MD=q5ou7NnD6xmWcB7CPt#7omf070aU`
zZ@Di_^MZOYOt*{Wxd<(b<>^jO;Bb@+;KY}2xI0#xwW>R&YSY{8l11Z4tN}$27s_>e
zn_WdIz)mgC6^ncs&Q&T!+bR<p1tXNN%;k!<(8=4=g_2bqh|(zS8Qj7?aT7Gg1bJ=^
zkcqfQ$`#61b)M_0d8=*@#3qW28-hY-y->o_vr4mMgsbEIQH7&*i}bRKcF8W+1uEo%
z$QjftWnvIGBdXqsS|Z-M%2bIS1V#3u!^toop~FckV1y=2j-eI{HP3Rm$>frT1cQfg
zW08NZ+q6M+stHOH$EwUs@zln`IZ#W?W)ET6_3tV!K~s#lak(eSN}@3mAtAg1kquPQ
z6m5fcXEI{)(!@VCveGG3e23IW`9@d?(JuGWKJ7pkO_5|4Q~~FtlY=#`txIOjQPeSC
zXa6$jyw^onkmNqJL|4g8JV(#e)`MxH>fsOOoo^ZB)n!!EzUiU~k~{;DsIV;Z)xp@P
zWx+H3V(G7d=#@~PD&rq#_<Le>9Z4Qkq>B<yz`Lk}3T}$kS^mjx%HPR=XBps$7`v(S
z0F1c~^LOLmWwlM<neJP1zkw-UmrIdxe`L59Vsw-wk6{?{aQWkNa3{1$@JvzM#TQU{
zu@`$8^D@JHWeHtFl6zS{bHmEsuy^vkDo)PY0{>(u_jfYdf#pE^YzaI00w$J_F``D0
zbAADEAfMEp3V@&dNR8_;c~)uM!OTB!i7|*-MrH4j4M8kJUx#(%2n>s>aV`_Jrs)kM
z^|xqSA!;`Ev&J2BKs;lovNUq#bDv=6-rG&HBzY7=PRtm<Q_HXl=2tCNt(@aO3d=`B
zrq?IE1?;)q%A?6vK4VtScGGdZCd{BMt?(M?*vV;m$eB{Zm6ovOH)4q{+xv;xTUbgr
zkmN<*9<QoSCG{&5E_6oQ4l%Q#c{gleQf}-GJlX7?KA1hTlx`%+b0}OQmYK%lyf~yS
z%dv9}k7;G#m@VgomPGj3YNlu@EBpN%^ZU6T`WQ)`#w5+dv;A)x!6N?~uCqnXuvnh%
z{0}*ElsUDhm*vc1{5rZ8PDv@oVtzP!6)AR#xp%x5DW;u`YDzMqEp+u^;@YAcmFl*X
zHEUs%NjXzZicJv@Rm!#=yz9QrM2|NK6zy%+^!z>9&10iF`$5tqu+y&A3YD_nu{O0P
zHAM7PO%k=oiz$pykI&ilRJHO%s$|zKltN2?Ew(FBpP#i8;|X385`&4Ef?dql65|i=
zN|db<V~O4ho{;qkZ0xcyUnR!z^TQrYxX6j|UAuP=dI(I+R~G{s2yLqS3c>-O&?W+(
z0(}>}eY^U6aFhTzVP$4;WL5hI;l0vF@%C-H!F!^w@9o>vXZ_FD7P~fey>ZtEkV$Q#
zcxlEX*-rX#6?Yi*t+#i1kL+$c&Cu99bk8wPM3>v_y%YSe19DFDY)QPmQm$KtaxKfU
z-gy}9`{0b#&0e@<F_AAeJAD-v8+uH$WiMdOb>VLU<6Yf9s5Th=-$Hu@u@`4q^uy+w
zUPsFqdY5&r^6bh};a_j*k=;bUv8k!>zH|pF%4W1M)|ws?J!W;O+Ao??|4?dZO)9;b
zb(>tZFukL;I-O1r_pi*&6^i-(wKGE_>9y8aCS%X!*3MYDv5{PEW-L9N8OqPtX>4yE
ZvFy?5RrtAwpXFEdM{dDy`$_z>^)EM!n;-xH

literal 0
HcmV?d00001

diff --git a/dev/publish.sh b/dev/publish.sh
new file mode 100755
index 0000000000..064caa191f
--- /dev/null
+++ b/dev/publish.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -xe
+
+for crate in "mount" "catalog-glue" "hdfs" "azure" "aws" "gcp" "core" "deltalake"; do
+        echo ">> Dry-run publishing ${crate}"
+        (cd crates/${crate} && \
+                cargo publish \
+                        --allow-dirty \
+                        --dry-run)
+done;
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index 02592d7d2f..5b4d1cf547 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -16,8 +16,8 @@
 set -e
 
 LANGUAGE="rust"
-SINCE_VERSION=${SINCE_VERSION:-"0.17.1"}
-FUTURE_RELEASE=${FUTURE_RELEASE:-"0.17.3"}
+SINCE_VERSION=${SINCE_VERSION:-"0.18.1"}
+FUTURE_RELEASE=${FUTURE_RELEASE:-"0.18.3"}
 
 # only consider tags of the correct language
 if [ "$LANGUAGE" == "rust" ]; then
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000..9a1bfea066
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+.DEFAULT_GOAL := help
+
+.PHONY: install-ruff
+install-ruff: ## install ruff
+	$(info --- Installing ruff ---)
+	pip install ruff==0.5.2
+
+.PHONY: format
+format: install-ruff ## format code with ruff
+	$(info --- format Python code in docs ---)
+	ruff format .
+
+.PHONY: check
+check: install-ruff ## check if code is formatted with ruff
+	$(info --- format Python code in docs ---)
+	ruff format --check .
+
+.PHONY: help
+help:
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
diff --git a/docs/api/delta_writer.md b/docs/api/delta_writer.md
index 9b395d3604..1e207a9f0c 100644
--- a/docs/api/delta_writer.md
+++ b/docs/api/delta_writer.md
@@ -8,6 +8,10 @@ search:
 
 ::: deltalake.write_deltalake
 
+::: deltalake.BloomFilterProperties
+
+::: deltalake.ColumnProperties
+
 ::: deltalake.WriterProperties
 
 ## Convert to Delta Tables
diff --git a/docs/how-delta-lake-works/architecture-of-delta-table.md b/docs/how-delta-lake-works/architecture-of-delta-table.md
index 5406295ad3..5a6df49969 100644
--- a/docs/how-delta-lake-works/architecture-of-delta-table.md
+++ b/docs/how-delta-lake-works/architecture-of-delta-table.md
@@ -27,7 +27,7 @@ tmp/some-table
     └── 00000000000000000000.json
 ```
 
-The Parquet file stores the data that was written.  The `_delta_log` directory stores metadata about the transactions.  Let's inspect the `_delta_log/00000000000000000000.json` file.
+The Parquet file stores the data that was written. The `_delta_log` directory stores metadata about the transactions. Let's inspect the `_delta_log/00000000000000000000.json` file.
 
 ```json
 {
@@ -76,11 +76,11 @@ The Parquet file stores the data that was written.  The `_delta_log` directory s
 }
 ```
 
-The tranasction log file contains the following information:
+The transaction log file contains the following information:
 
-* the files added to the Delta table
-* schema of the files
-* column level metadata including the min/max value for each file
+- the files added to the Delta table
+- schema of the files
+- column level metadata including the min/max value for each file
 
 Create another pandas DataFrame and append it to the Delta table to see how this transaction is recorded.
 
@@ -194,11 +194,11 @@ Here are the contents of the `_delta_log/0002.json` file:
 }
 ```
 
-This transaction adds a data file and marks the two exising data files for removal.  Marking a file for removal in the transaction log is known as "tombstoning the file" or a "logical delete".  This is different from a "physical delete" which actually removes the data file from storage.
+This transaction adds a data file and marks the two exising data files for removal. Marking a file for removal in the transaction log is known as "tombstoning the file" or a "logical delete". This is different from a "physical delete" which actually removes the data file from storage.
 
 ## How Delta table operations differ from data lakes
 
-Data lakes consist of data files persisted in storage.  They don't have a transaction log that retain metadata about the transactions.
+Data lakes consist of data files persisted in storage. They don't have a transaction log that retain metadata about the transactions.
 
 Data lakes perform transactions differently than Delta tables.
 
@@ -206,6 +206,6 @@ When you perform an overwrite tranasction with a Delta table, you logically dele
 
 Data lakes don't support logical deletes, so you have to physically delete the data from storage.
 
-Logical data operations are safer because they can be rolled back if they don't complete successfully.  Physically removing data from storage can be dangerous, especially if it's before a transaction is complete.
+Logical data operations are safer because they can be rolled back if they don't complete successfully. Physically removing data from storage can be dangerous, especially if it's before a transaction is complete.
 
-We're now ready to look into Delta Lake ACID transactions in more detail.
+We're now ready to look into [Delta Lake ACID transactions](../how-delta-lake-works/delta-lake-acid-transactions.md) in more detail.
diff --git a/docs/how-delta-lake-works/delta-lake-acid-transactions.md b/docs/how-delta-lake-works/delta-lake-acid-transactions.md
index 46dbd402e1..75fa00520c 100644
--- a/docs/how-delta-lake-works/delta-lake-acid-transactions.md
+++ b/docs/how-delta-lake-works/delta-lake-acid-transactions.md
@@ -1,20 +1,20 @@
 # Delta Lake Transactions
 
-This page teaches you about Delta Lake transactions and why transactions are important in production data settings.  Data lakes don’t support transactions and this is a huge downside because they offer a poor user experience, lack functionality, and can easily be corrupted.
+This page teaches you about Delta Lake transactions and why transactions are important in production data settings. Data lakes don’t support transactions and this is a huge downside because they offer a poor user experience, lack functionality, and can easily be corrupted.
 
-Transactions on Delta Lake tables are operations that change the state of table and record descriptive entries (metadata) of those changes to the Delta Lake transaction log.  Here are some examples of transactions:
+Transactions on Delta Lake tables are operations that change the state of table and record descriptive entries (metadata) of those changes to the Delta Lake transaction log. Here are some examples of transactions:
 
-* Deleting rows
-* Appending to the table
-* Compacting small files
-* Upserting
-* Overwriting rows
+- Deleting rows
+- Appending to the table
+- Compacting small files
+- Upserting
+- Overwriting rows
 
 All Delta Lake write operations are transactions in Delta tables. Reads actually aren’t technically transactions because they don’t result in new entries being appended to the transaction log.
 
 ## What are transactions?
 
-Transactions are any Delta operation that change the underlying files of a Delta table and result in new entries metadata entries in the transaction log.  Some Delta operations rearrange data in the existing table (like Z Ordering the table or compacting the small files) and these are also transactions.  Let’s look at a simple example.
+Transactions are any Delta operation that change the underlying files of a Delta table and result in new entries metadata entries in the transaction log. Some Delta operations rearrange data in the existing table (like Z Ordering the table or compacting the small files) and these are also transactions. Let’s look at a simple example.
 
 Suppose you have a Delta table with the following data:
 
@@ -62,7 +62,7 @@ tmp/my-delta-table
 └── part-00001-90312b96-b487-4a8f-9edc-1b9b3963f136-c000.snappy.parquet
 ```
 
-Notice the `00000000000000000001.json` file that was added to the transaction log to record this transaction.  Let’s inspect the content of the file.
+Notice the `00000000000000000001.json` file that was added to the transaction log to record this transaction. Let’s inspect the content of the file.
 
 ```
 {
@@ -114,10 +114,10 @@ Notice the `00000000000000000001.json` file that was added to the transaction lo
 
 We can see that this transaction includes two components:
 
-* Remove file `0-fea2de92-861a-423e-9708-a9e91dafb27b-0.parquet`
-* Add file `part-00001-90312b96-b487-4a8f-9edc-1b9b3963f136-c000.snappy.parquet`
+- Remove file `0-fea2de92-861a-423e-9708-a9e91dafb27b-0.parquet`
+- Add file `part-00001-90312b96-b487-4a8f-9edc-1b9b3963f136-c000.snappy.parquet`
 
-Transactions are recorded in the transaction log.  The transaction log is also referred to as the table metadata and is the `_delta_log` directory in storage.
+Transactions are recorded in the transaction log. The transaction log is also referred to as the table metadata and is the `_delta_log` directory in storage.
 
 Let’s see how Delta Lake implements transactions.
 
@@ -135,24 +135,24 @@ Let’s recall our delete operation from the prior section and see how it fits i
 1. We read the existing metadata to find the file paths for the existing Parquet files
 2. We read the existing Parquet files and identify the files that contains data that should be removed
 3. We write new Parquet files with the deleted data filtered out
-4. Once the new Parquet files are written, we check for conflicts and then make an entry in the transaction log.  The next section will discuss transaction conflicts in more detail.
+4. Once the new Parquet files are written, we check for conflicts and then make an entry in the transaction log. The next section will discuss transaction conflicts in more detail.
 
 Blind append operations can skip a few steps and are executed as follows:
 
 1. Write the Parquet files for the current transaction
 2. Record the new transaction in the metadata
 
-Delta implements a non locking MVCC (multi version concurrency control) so writers optimistically write new data and simply abandon the transaction if it conflicts at the end.  The alternative would be getting a lock at the start thereby guaranteeing the transaction immediately.
+Delta implements a non locking MVCC (multi version concurrency control) so writers optimistically write new data and simply abandon the transaction if it conflicts at the end. The alternative would be getting a lock at the start thereby guaranteeing the transaction immediately.
 
 Let’s look at the case when a Delta Lake transaction conflicts.
 
 ## How Delta Lake transactions can conflict
 
-Suppose you have a transaction that deletes a row of data that’s stored in FileA (Transaction 1).  While this job is running, there is another transaction that deletes some other rows in FileA (Transaction 2).  Transaction 1 finishes running first and is recorded in the metadata.
+Suppose you have a transaction that deletes a row of data that’s stored in FileA (Transaction 1). While this job is running, there is another transaction that deletes some other rows in FileA (Transaction 2). Transaction 1 finishes running first and is recorded in the metadata.
 
 Before Transaction 2 is recorded as a transaction, it will check the metadata, find that Transaction 2 conflicts with a transaction that was already recorded (from Transaction 1), and error without recording a new transaction.
 
-Transactions 2 will write Parquet data files, but will not be recorded as a transaction, so the data files will be ignored.  The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
+Transactions 2 will write Parquet data files, but will not be recorded as a transaction, so the data files will be ignored. The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
 
 Transaction 2 must fail otherwise it would cause the data to be incorrect.
 
@@ -160,11 +160,11 @@ Delta Lake transactions prevent users from making changes that would corrupt the
 
 ## Transactions rely on atomic primitives storage guarantees
 
-Suppose you have two transactions that are finishishing at the same exact time.  Both of these transactions look at the existing Delta Lake transaction log, see that the latest transaction was `003.json` and determine that the next entry should be `004.json`.
+Suppose you have two transactions that are finishishing at the same exact time. Both of these transactions look at the existing Delta Lake transaction log, see that the latest transaction was `003.json` and determine that the next entry should be `004.json`.
 
 If both transactions are recorded in the `004.json` file, then one of them will be clobbered, and the transaction log entry for the clobbered metadata entry will be lost.
 
-Delta tables rely on storage systems that provide atomic primitives for safe concurrency.  The storage system must allow Delta Lake to write the file, _only if it does not exist already_, and error out otherwise.  The storage system must NOT permit concurrent writers to overwrite existing metadata entries.
+Delta tables rely on storage systems that provide atomic primitives for safe concurrency. The storage system must allow Delta Lake to write the file, _only if it does not exist already_, and error out otherwise. The storage system must NOT permit concurrent writers to overwrite existing metadata entries.
 
 Some clouds have filesystems that don’t explicitly support these atomic primitives, and therefore must be coupled with other services to provide the necessary guarantees.
 
@@ -172,7 +172,7 @@ Some clouds have filesystems that don’t explicitly support these atomic primit
 
 Delta Lake transactions are only valid for a single table.
 
-Some databases offer transaction support for operations that impact multiple tables.  Delta Lake does not support multi-table transactions.
+Some databases offer transaction support for operations that impact multiple tables. Delta Lake does not support multi-table transactions.
 
 ## Data lakes don’t support transactions
 
@@ -182,15 +182,15 @@ Data lakes don’t have a metadata layer, conflict resolution, or any way to sto
 
 Data lakes are prone to multiple types of errors because they don’t support transactions:
 
-* Easy to corrupt
-* Downtime/unstable state while jobs are running
-* Operations can conflict
+- Easy to corrupt
+- Downtime/unstable state while jobs are running
+- Operations can conflict
 
 Data lakes have many downsides and it’s almost always better to use a lakehouse storage system like Delta Lake compared to a data lake.
 
 ## ACID Transactions
 
-We’ve already explored how Delta Lake supports transactions.  This section explains how Delta Lake transactions have the Atomic, Consistent, Isolated and Durable (ACID transaction) properties.  Reading this section is optional.
+We’ve already explored how Delta Lake supports transactions. This section explains how Delta Lake transactions have the Atomic, Consistent, Isolated and Durable (ACID transaction) properties. Reading this section is optional.
 
 ACID transactions are commonplace in databases but notably absent for data lakes.
 
@@ -204,18 +204,18 @@ An atomic transaction either fully completes or fully fails, with nothing in bet
 
 Delta Lake transactions are atomic, unlike data lake transactions that are not atomic.
 
-Suppose you have a job that’s writing 100 files to a table.  Further suppose that the job errors out and the cluster dies after writing 40 files:
+Suppose you have a job that’s writing 100 files to a table. Further suppose that the job errors out and the cluster dies after writing 40 files:
 
-* For a Delta table, no additional data will be added to the table.  Parquet files were written to the table, but the job errored, so no transaction log entry was added and no data was added to the table.
-* For a data lake, the 40 files are added and the transaction “partially succeeds”.
+- For a Delta table, no additional data will be added to the table. Parquet files were written to the table, but the job errored, so no transaction log entry was added and no data was added to the table.
+- For a data lake, the 40 files are added and the transaction “partially succeeds”.
 
 For data tables, it’s almost always preferable to have a transaction that “fully fails” instead of one that “partially succeeds” because partial writes are hard to unwind and debug.
 
 Delta Lake implements atomic transactions by writing data files first before making a new entry in the Delta transaction log.
 
-These guarantees are provided at the protocol level through the "transaction" abstraction.  We’ve already discussed what constitutes a transaction for Delta Lake.
+These guarantees are provided at the protocol level through the "transaction" abstraction. We’ve already discussed what constitutes a transaction for Delta Lake.
 
-If there is an error with the transaction and some files don’t get written, then no metadata entry is made and the partial data write is ignored.  The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
+If there is an error with the transaction and some files don’t get written, then no metadata entry is made and the partial data write is ignored. The zombie Parquet files can be easily cleaned up via subsequent vacuum operations.
 
 Now let’s look at how Delta Lake also provides consistent transactions.
 
@@ -225,22 +225,22 @@ Consistency means that transactions won’t violate integrity constraints on the
 
 Delta Lake has two types of consistency checks:
 
-* Schema enforcement checks
-* Column constraints
+- Schema enforcement checks
+- Column constraints
 
-Schema enforcement checks verify that new data appended to a Delta table matches the schema of the existing table.  You cannot append data with a different schema, unless you enable schema evolution.
+Schema enforcement checks verify that new data appended to a Delta table matches the schema of the existing table. You cannot append data with a different schema, unless you enable schema evolution.
 
-Delta Lake column constraints allow users to specify the requirements of data that’s added to a Delta table.  For example, if you have an age column with a constraint that requires the value to be positive, then Delta Lake will reject appends of any data that doesn’t meet the constraint.
+Delta Lake column constraints allow users to specify the requirements of data that’s added to a Delta table. For example, if you have an age column with a constraint that requires the value to be positive, then Delta Lake will reject appends of any data that doesn’t meet the constraint.
 
-Data lakes don’t support schema enforcement or column constraints.  That’s another reason why data lakes are not ACID-compliant.
+Data lakes don’t support schema enforcement or column constraints. That’s another reason why data lakes are not ACID-compliant.
 
 **Isolated transactions**
 
 Isolation means that transactions are applied to a Delta table sequentially.
 
-Delta Lake transactions are persisted in monotonically increasing transaction files, as we saw in the previous example.  First `00000000000000000000.json`, then `00000000000000000001.json`, then `00000000000000000002.json`, and so on.
+Delta Lake transactions are persisted in monotonically increasing transaction files, as we saw in the previous example. First `00000000000000000000.json`, then `00000000000000000001.json`, then `00000000000000000002.json`, and so on.
 
-Delta Lake uses concurrency control to ensure that transactions are executed sequentially, even when user operations are performed concurrently.  The next page of this guide explains concurrency in Delta Lake in detail.
+Delta Lake uses concurrency control to ensure that transactions are executed sequentially, even when user operations are performed concurrently. The next page of this guide explains concurrency in Delta Lake in detail.
 
 **Durable transactions**
 
@@ -248,11 +248,11 @@ Delta tables are generally persisted in cloud object stores which provide durabi
 
 Durability means that all transactions that are successfully completed will always remain persisted, even if there are service outages or program crashes.
 
-Suppose you have a Delta table that’s persisted in Azure blob storage.  The Delta table transactions that are committed will always remain available, even in these circumstances:
+Suppose you have a Delta table that’s persisted in Azure blob storage. The Delta table transactions that are committed will always remain available, even in these circumstances:
 
-* When there are Azure service outages
-* If a computation cluster that’s writing the Delta table crashes for some reason
-* Two operations are running concurrently and one of them fails
+- When there are Azure service outages
+- If a computation cluster that’s writing the Delta table crashes for some reason
+- Two operations are running concurrently and one of them fails
 
 Successful transactions are always registered in the Delta table and persisted no matter what.
 
@@ -260,10 +260,12 @@ Successful transactions are always registered in the Delta table and persisted n
 
 Delta Lake supports transactions which provide necessary reliability guarantees for production data systems.
 
-Vanilla data lakes don’t provide transactions and this can cause nasty bugs and a bad user experience.  Let’s look at a couple of scenarios when the lack of transactions cause a poor user experience:
+Vanilla data lakes don’t provide transactions and this can cause nasty bugs and a bad user experience. Let’s look at a couple of scenarios when the lack of transactions cause a poor user experience:
 
-* While running a compaction operation on a data lake, newly compacted “right sized” files are added before the small files are deleted.  If you read the data lake while this operation is running, you will see duplicate data.
-* While writing to a data lake, a job might fail, which leaves behind partially written files.  These files are corrupt, which means that the data lake cannot be read until the corrupt files are manually removed.
-* Users want to run a simple DML operation like deleting a few rows of data which require a few files to be rewritten.  This operation renders the data lake unusable until it’s done running.
+- While running a compaction operation on a data lake, newly compacted “right sized” files are added before the small files are deleted. If you read the data lake while this operation is running, you will see duplicate data.
+- While writing to a data lake, a job might fail, which leaves behind partially written files. These files are corrupt, which means that the data lake cannot be read until the corrupt files are manually removed.
+- Users want to run a simple DML operation like deleting a few rows of data which require a few files to be rewritten. This operation renders the data lake unusable until it’s done running.
 
-Transactions are a key advantage of Delta Lake vs. data lakes.  There are many other advantages, but proper transactions are necessary in production data environments.
+Transactions are a key advantage of Delta Lake vs. data lakes. There are many other advantages, but proper transactions are necessary in production data environments.
+
+Let's take a look at [File Skipping](../how-delta-lake-works/delta-lake-file-skipping.md) in the next section.
diff --git a/docs/integrations/delta-lake-daft.md b/docs/integrations/delta-lake-daft.md
index 499ec1e08b..cedbb61d68 100644
--- a/docs/integrations/delta-lake-daft.md
+++ b/docs/integrations/delta-lake-daft.md
@@ -77,6 +77,16 @@ The Delta table we have just loaded only has 5 rows. You can materialize it in m
 |  4 | Soraya       | Jala        | Germany   | NaN         |
 ```
 
+## Write to Delta Lake
+
+You can use `write_deltalake` to write a Daft DataFrame to a Delta table:
+
+```
+df.write_deltalake("tmp/daft-table", mode="overwrite")
+```
+
+Daft supports multiple write modes. See the [Daft documentation](https://www.getdaft.io/projects/docs/en/latest/api_docs/doc_gen/dataframe_methods/daft.DataFrame.write_deltalake.html#daft.DataFrame.write_deltalake) for more information.
+
 ## What can I do with a Daft DataFrame?
 
 Daft gives you [full-featured DataFrame functionality](https://www.getdaft.io/projects/docs/en/latest/user_guide/basic_concepts.html), similar to what you might be used to from pandas, Dask or PySpark.
diff --git a/docs/integrations/delta-lake-dagster.md b/docs/integrations/delta-lake-dagster.md
index 0fe413c7a0..3aa5a505a6 100644
--- a/docs/integrations/delta-lake-dagster.md
+++ b/docs/integrations/delta-lake-dagster.md
@@ -212,7 +212,7 @@ def iris_dataset() -> pd.DataFrame:
 ```
 
 ## Using Delta Lake and Dagster with Polars
-To read and write data to Delta Lake using pandas, use the `DeltaLakePolarsIOManager()`. 
+To read and write data to Delta Lake using polars, use the `DeltaLakePolarsIOManager()`. 
 
 You will need to install it using:
 
@@ -223,7 +223,7 @@ pip install dagster-deltalake-polars
 In your `Definitions` object, change the `io_manager` to `DeltaLakePolarsIOManager()`:
 
 ```
-from dagster_polars import DeltaLakePolarsIOManager
+from dagster_deltalake_polars import DeltaLakePolarsIOManager
 
 defs = Definitions(
    assets=all_assets,
diff --git a/docs/integrations/object-storage/adls.md b/docs/integrations/object-storage/adls.md
new file mode 100644
index 0000000000..2867c07da3
--- /dev/null
+++ b/docs/integrations/object-storage/adls.md
@@ -0,0 +1,57 @@
+# Azure ADLS Storage Backend
+
+`delta-rs` offers native support for using Microsoft Azure Data Lake Storage (ADSL) as an object storage backend.
+
+You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your ADLS access credentials correctly.
+
+## Passing Credentials Explicitly
+
+You can also pass ADLS credentials to your query engine explicitly.
+
+For Polars, you would do this using the `storage_options` keyword as demonstrated above. This will forward your credentials to the `object store` library that Polars uses for cloud storage access under the hood. Read the [`object store` documentation](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants) for more information defining specific credentials.
+
+## Example: Write Delta table to ADLS with Polars
+
+Using Polars, you can write a Delta table to ADLS directly like this:
+
+```python
+import polars as pl
+
+df = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
+
+# define container name
+container = <container_name>
+
+# define credentials
+storage_options = {
+    "ACCOUNT_NAME": <account_name>,
+    "ACCESS_KEY": <access_key>,
+}
+
+# write Delta to ADLS
+df_pl.write_delta(
+    f"abfs://{container}/delta_table",
+    storage_options = storage_options
+)
+```
+
+## Example with pandas
+
+For libraries without direct `write_delta` methods (like Pandas), you can use the `write_deltalake` function from the `deltalake` library:
+
+```python
+import pandas as pd
+from deltalake import write_deltalake
+
+df = pd.DataFrame({"foo": [1, 2, 3, 4, 5]})
+
+write_deltalake(
+    f"abfs://{container}/delta_table_pandas",
+    df,
+    storage_options=storage_options
+)
+```
+
+## Using Local Authentication
+
+If your local session is authenticated using the Azure CLI then you can write Delta tables directly to ADLS. Read more about this in the [Azure CLI documentation](https://learn.microsoft.com/en-us/cli/azure/).
diff --git a/docs/integrations/object-storage/gcs.md b/docs/integrations/object-storage/gcs.md
new file mode 100644
index 0000000000..aa8682d3cc
--- /dev/null
+++ b/docs/integrations/object-storage/gcs.md
@@ -0,0 +1,44 @@
+# GCS Storage Backend
+
+`delta-rs` offers native support for using Google Cloud Storage (GCS) as an object storage backend.
+
+You don’t need to install any extra dependencies to read/write Delta tables to GCS with engines that use `delta-rs`. You do need to configure your GCS access credentials correctly.
+
+## Using Application Default Credentials
+
+Application Default Credentials (ADC) is a strategy used by GCS to automatically find credentials based on the application environment.
+
+If you are working from your local machine and have ADC set up then you can read/write Delta tables from GCS directly, without having to pass your credentials explicitly.
+
+## Example: Write Delta tables to GCS with Polars
+
+Using Polars, you can write a Delta table to GCS like this:
+
+```python
+# create a toy dataframe
+import polars as pl
+df = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
+
+# define path
+table_path = "gs://bucket/delta-table"
+
+# write Delta to GCS
+df.write_delta(table_path)
+```
+
+## Passing GCS Credentials explicitly
+
+Alternatively, you can pass GCS credentials to your query engine explicitly.
+
+For Polars, you would do this using the `storage_options` keyword. This will forward your credentials to the `object store` library that Polars uses under the hood. Read the [Polars documentation](https://docs.pola.rs/api/python/stable/reference/api/polars.DataFrame.write_delta.html) and the [`object store` documentation](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants) for more information.
+
+## Delta Lake on GCS: Required permissions
+
+You will need the following permissions in your GCS account:
+
+- `storage.objects.create`
+- `storage.objects.delete` (only required for uploads that overwrite an existing object)
+- `storage.objects.get` (only required if you plan on using the Google Cloud CLI)
+- `storage.objects.list` (only required if you plan on using the Google Cloud CLI)
+
+For more information, see the [GCP documentation](https://cloud.google.com/storage/docs/uploading-objects)
diff --git a/docs/integrations/object-storage/hdfs.md b/docs/integrations/object-storage/hdfs.md
new file mode 100644
index 0000000000..74d513c1f7
--- /dev/null
+++ b/docs/integrations/object-storage/hdfs.md
@@ -0,0 +1,48 @@
+# HDFS Storage Backend
+HDFS support is provided via the [hdfs-native-object-store](https://github.com/datafusion-contrib/hdfs-native-object-store) package, which sits on top of [hdfs-native](https://github.com/Kimahriman/hdfs-native). This is an HDFS client written from scratch in Rust, with no bindings to libhdfs or any use of Java. While it supports most common cluster configurations, it does not support every possible client configuration that could exist.
+
+## Supported Configurations
+By default, the client looks for existing Hadoop configs in following manner:
+
+- If the `HADOOP_CONF_DIR` environment variable is defined, load configs from `$HADOOP_CONF_DIR/core-site.xml` and `$HADOOP_CONF_DIR/hdfs-site.xml`
+- Otherwise, if the `HADOOP_HOME` environment variable is set, load configs from `$HADOOP_HOME/etc/hadoop/core-site.xml` and `$HADOOP_HOME/etc/hadoop/hdfs-site.xml`
+
+Additionally, you can pass Hadoop configs as `storage_options` and these will take precedence over the above configs.
+
+Currently the supported client configuration parameters are:
+
+- `dfs.ha.namenodes.*` - name service support
+- `dfs.namenode.rpc-address.*` - name service support
+- `fs.viewfs.mounttable.*.link.*` - ViewFS links
+- `fs.viewfs.mounttable.*.linkFallback` - ViewFS link fallback
+
+If you find your setup is not supported, please file an issue in the [hdfs-native](https://github.com/Kimahriman/hdfs-native) repository.
+
+## Secure Clusters
+The client supports connecting to secure clusters through both Kerberos authentication as well as token authentication, and all SASL protection types are supported. The highest supported protection mechanism advertised by the server will be used.
+
+### Kerberos Support
+Kerberos is supported through dynamically loading the `libgssapi_krb5` library. This must be installed separately through your package manager, and currently only works on Linux and Mac.
+
+Debian-based systems:
+```bash
+apt-get install libgssapi-krb5-2
+```
+
+RHEL-based systems:
+```bash
+yum install krb5-libs
+```
+
+MacOS:
+```bash
+brew install krb5
+```
+
+Then simply `kinit` to get your TGT and authentication to HDFS should just work.
+
+### Token Support
+Token authentication is supported by looking for a token file located at the environment variable `HADOOP_TOKEN_FILE_LOCATION`. This is the location systems like YARN will automatically place a delegation token, so things will just work inside of YARN jobs.
+
+## Issues
+If you face any HDFS-specific issues, please report to the [hdfs-native-object-store](https://github.com/datafusion-contrib/hdfs-native-object-store) repository.
\ No newline at end of file
diff --git a/docs/integrations/object-storage/s3-like.md b/docs/integrations/object-storage/s3-like.md
new file mode 100644
index 0000000000..40b2f6e076
--- /dev/null
+++ b/docs/integrations/object-storage/s3-like.md
@@ -0,0 +1,83 @@
+# CloudFlare R2 & Minio
+
+`delta-rs` offers native support for using Cloudflare R2 and Minio's as storage backend. R2 and Minio support conditional puts, however we have to pass this flag into the storage options. See the example below
+
+You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+
+## Passing S3 Credentials
+
+You can pass your AWS credentials explicitly by using:
+
+- the `storage_options `kwarg
+- Environment variables
+
+## Example
+
+Let's work through an example with Polars. The same logic applies to other Python engines like Pandas, Daft, Dask, etc.
+
+Follow the steps below to use Delta Lake on S3 (R2/Minio) with Polars:
+
+1. Install Polars and deltalake. For example, using:
+
+   `pip install polars deltalake`
+
+2. Create a dataframe with some toy data.
+
+   `df = pl.DataFrame({'x': [1, 2, 3]})`
+
+3. Set your `storage_options` correctly.
+
+```python
+storage_options = {
+    'AWS_SECRET_ACCESS_KEY': <access_key>,
+    'conditional_put': 'etag', # Here we say to use conditional put, this provides safe concurrency.
+}
+```
+
+4. Write data to Delta table using the `storage_options` kwarg.
+
+   ```python
+   df.write_delta(
+       "s3://bucket/delta_table",
+       storage_options=storage_options,
+   )
+   ```
+
+## Delta Lake on S3: Safe Concurrent Writes
+
+You need a locking provider to ensure safe concurrent writes when writing Delta tables to S3. This is because S3 does not guarantee mutual exclusion.
+
+A locking provider guarantees that only one writer is able to create the same file. This prevents corrupted or conflicting data.
+
+`delta-rs` uses DynamoDB to guarantee safe concurrent writes.
+
+Run the code below in your terminal to create a DynamoDB table that will act as your locking provider.
+
+```
+    aws dynamodb create-table \
+    --table-name delta_log \
+    --attribute-definitions AttributeName=tablePath,AttributeType=S AttributeName=fileName,AttributeType=S \
+    --key-schema AttributeName=tablePath,KeyType=HASH AttributeName=fileName,KeyType=RANGE \
+    --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
+```
+
+If for some reason you don't want to use DynamoDB as your locking mechanism you can choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
+
+Read more in the [Usage](../../usage/writing/writing-to-s3-with-locking-provider.md) section.
+
+## Delta Lake on S3: Required permissions
+
+You need to have permissions to get, put and delete objects in the S3 bucket you're storing your data in. Please note that you must be allowed to delete objects even if you're just appending to the Delta Lake, because there are temporary files into the log folder that are deleted after usage.
+
+In AWS S3, you will need the following permissions:
+
+- s3:GetObject
+- s3:PutObject
+- s3:DeleteObject
+
+In DynamoDB, you will need the following permissions:
+
+- dynamodb:GetItem
+- dynamodb:Query
+- dynamodb:PutItem
+- dynamodb:UpdateItem
diff --git a/docs/integrations/object-storage/s3.md b/docs/integrations/object-storage/s3.md
new file mode 100644
index 0000000000..a7965cb2a5
--- /dev/null
+++ b/docs/integrations/object-storage/s3.md
@@ -0,0 +1,102 @@
+# AWS S3 Storage Backend
+
+`delta-rs` offers native support for using AWS S3 as an object storage backend.
+
+You don’t need to install any extra dependencies to read/write Delta tables to S3 with engines that use `delta-rs`. You do need to configure your AWS access credentials correctly.
+
+## Note for boto3 users
+
+Many Python engines use [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) to connect to AWS. This library supports reading credentials automatically from your local `.aws/config` or `.aws/creds` file.
+
+For example, if you’re running locally with the proper credentials in your local `.aws/config` or `.aws/creds` file then you can write a Parquet file to S3 like this with pandas:
+
+```python
+    import pandas as pd
+    df = pd.DataFrame({'x': [1, 2, 3]})
+    df.to_parquet("s3://avriiil/parquet-test-pandas")
+```
+
+The `delta-rs` writer does not use `boto3` and therefore does not support taking credentials from your `.aws/config` or `.aws/creds` file. If you’re used to working with writers from Python engines like Polars, pandas or Dask, this may mean a small change to your workflow.
+
+## Passing AWS Credentials
+
+You can pass your AWS credentials explicitly by using:
+
+- the `storage_options `kwarg
+- Environment variables
+- EC2 metadata if using EC2 instances
+- AWS Profiles
+
+## Example
+
+Let's work through an example with Polars. The same logic applies to other Python engines like Pandas, Daft, Dask, etc.
+
+Follow the steps below to use Delta Lake on S3 with Polars:
+
+1. Install Polars and deltalake. For example, using:
+
+   `pip install polars deltalake`
+
+2. Create a dataframe with some toy data.
+
+   `df = pl.DataFrame({'x': [1, 2, 3]})`
+
+3. Set your `storage_options` correctly.
+
+```python
+storage_options = {
+    "AWS_REGION":<region_name>,
+    'AWS_ACCESS_KEY_ID': <key_id>,
+    'AWS_SECRET_ACCESS_KEY': <access_key>,
+    'AWS_S3_LOCKING_PROVIDER': 'dynamodb',
+    'DELTA_DYNAMO_TABLE_NAME': 'delta_log',
+}
+```
+
+4. Write data to Delta table using the `storage_options` kwarg.
+
+   ```python
+   df.write_delta(
+       "s3://bucket/delta_table",
+       storage_options=storage_options,
+   )
+   ```
+
+## Delta Lake on AWS S3: Safe Concurrent Writes
+
+You need a locking provider to ensure safe concurrent writes when writing Delta tables to AWS S3. This is because AWS S3 does not guarantee mutual exclusion.
+
+A locking provider guarantees that only one writer is able to create the same file. This prevents corrupted or conflicting data.
+
+`delta-rs` uses DynamoDB to guarantee safe concurrent writes.
+
+Run the code below in your terminal to create a DynamoDB table that will act as your locking provider.
+
+```
+    aws dynamodb create-table \
+    --table-name delta_log \
+    --attribute-definitions AttributeName=tablePath,AttributeType=S AttributeName=fileName,AttributeType=S \
+    --key-schema AttributeName=tablePath,KeyType=HASH AttributeName=fileName,KeyType=RANGE \
+    --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
+```
+
+If for some reason you don't want to use DynamoDB as your locking mechanism you can choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
+
+Read more in the [Usage](../../usage/writing/writing-to-s3-with-locking-provider.md) section.
+
+## Delta Lake on AWS S3: Required permissions
+
+You need to have permissions to get, put and delete objects in the S3 bucket you're storing your data in. Please note that you must be allowed to delete objects even if you're just appending to the Delta Lake, because there are temporary files into the log folder that are deleted after usage.
+
+In AWS S3, you will need the following permissions:
+
+- s3:GetObject
+- s3:PutObject
+- s3:DeleteObject
+
+In DynamoDB, you will need the following permissions:
+
+- dynamodb:GetItem
+- dynamodb:Query
+- dynamodb:PutItem
+- dynamodb:UpdateItem
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 42830c1e63..e6ef3b18a0 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,9 @@
 mkdocs==1.5.3
+# 0.45 results in build errors on our native symbols
+griffe==0.44
 mkdocstrings[python]==0.23.0
 mkdocs-autorefs==0.5.0
 mkdocs-material==9.4.5
 mkdocs-macros-plugin==1.0.4
 markdown-exec[ansi]==1.7.0
-mkdocs-simple-hooks==0.1.5
\ No newline at end of file
+mkdocs-simple-hooks==0.1.5
diff --git a/docs/usage/loading-table.md b/docs/usage/loading-table.md
index e8d817c6e5..f000adb6ee 100644
--- a/docs/usage/loading-table.md
+++ b/docs/usage/loading-table.md
@@ -16,7 +16,7 @@ options](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfig
 [gcs
 options](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants).
 
-``` python
+```python
 >>> storage_options = {"AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY":"THE_AWS_SECRET_ACCESS_KEY"}
 >>> dt = DeltaTable("../rust/tests/data/delta-0.2.0", storage_options=storage_options)
 ```
@@ -28,25 +28,27 @@ properties.
 
 **S3**:
 
-> -   s3://\<bucket\>/\<path\>
-> -   s3a://\<bucket\>/\<path\>
+> - s3://\<bucket\>/\<path\>
+> - s3a://\<bucket\>/\<path\>
+
+Note that `delta-rs` does not read credentials from a local `.aws/config` or `.aws/creds` file. Credentials can be accessed from environment variables, ec2 metadata, profiles or web identity. You can also pass credentials to `storage_options` using `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
 
 **Azure**:
 
-> -   az://\<container\>/\<path\>
-> -   adl://\<container\>/\<path\>
-> -   abfs://\<container\>/\<path\>
+> - az://\<container\>/\<path\>
+> - adl://\<container\>/\<path\>
+> - abfs://\<container\>/\<path\>
 
 **GCS**:
 
-> -   gs://\<bucket\>/\<path\>
+> - gs://\<bucket\>/\<path\>
 
 Alternatively, if you have a data catalog you can load it by reference
 to a database and table name. Currently only AWS Glue is supported.
 
 For AWS Glue catalog, use AWS environment variables to authenticate.
 
-``` python
+```python
 >>> from deltalake import DeltaTable
 >>> from deltalake import DataCatalog
 >>> database_name = "simple_database"
@@ -57,6 +59,33 @@ For AWS Glue catalog, use AWS environment variables to authenticate.
 {'id': [5, 7, 9, 5, 6, 7, 8, 9]}
 ```
 
+## Verify Table Existence
+
+You can check whether or not a Delta table exists at a particular path by using
+the `DeltaTable.is_deltatable()` method.
+
+```python
+from deltalake import DeltaTable
+
+table_path = "<path/to/valid/table>"
+DeltaTable.is_deltatable(table_path)
+# True
+
+invalid_table_path = "<path/to/nonexistent/table>"
+DeltaTable.is_deltatable(invalid_table_path)
+# False
+
+bucket_table_path = "<path/to/valid/table/in/bucket>"
+storage_options = {
+    "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
+    "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
+    ...
+}
+DeltaTable.is_deltatable(bucket_table_path)
+# True
+```
+
+
 ## Custom Storage Backends
 
 While delta always needs its internal storage backend to work and be
@@ -66,7 +95,7 @@ customize the storage interface used for reading the bulk data.
 
 `deltalake` will work with any storage compliant with `pyarrow.fs.FileSystem`, however the root of the filesystem has to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into a `pyarrow.fs.SubTreeFileSystem`.
 
-``` python
+```python
 import pyarrow.fs as fs
 from deltalake import DeltaTable
 
@@ -81,7 +110,7 @@ When using the pyarrow factory method for file systems, the normalized
 path is provided on creation. In case of S3 this would look something
 like:
 
-``` python
+```python
 import pyarrow.fs as fs
 from deltalake import DeltaTable
 
@@ -98,14 +127,14 @@ ds = dt.to_pyarrow_dataset(filesystem=filesystem)
 To load previous table states, you can provide the version number you
 wish to load:
 
-``` python
+```python
 >>> dt = DeltaTable("../rust/tests/data/simple_table", version=2)
 ```
 
 Once you\'ve loaded a table, you can also change versions using either a
 version number or datetime string:
 
-``` python
+```python
 >>> dt.load_version(1)
 >>> dt.load_with_datetime("2021-11-04 00:05:23.283+00:00")
 ```
@@ -114,4 +143,4 @@ version number or datetime string:
 
     Previous table versions may not exist if they have been vacuumed, in
     which case an exception will be thrown. See [Vacuuming
-    tables](#vacuuming-tables) for more information.
+    tables](managing-tables.md#vacuuming-tables) for more information.
diff --git a/docs/usage/managing-tables.md b/docs/usage/managing-tables.md
index 8a2f20580a..b3ab3540e1 100644
--- a/docs/usage/managing-tables.md
+++ b/docs/usage/managing-tables.md
@@ -26,4 +26,11 @@ Use `DeltaTable.vacuum` to perform the vacuum operation. Note that to prevent ac
 
 ## Optimizing tables
 
-Optimizing tables is not currently supported.
\ No newline at end of file
+Optimizing a table compacts small files into larger files to avoid the small file problem. This is especially important for tables that get small amounts of data appended to with high frequency. In addition to compacting small files, you can colocate similar data in the same files with Z Ordering, which allows for better file skipping and faster queries.
+
+A table `dt = DeltaTable(...)` has two methods for optimizing it:
+
+- `dt.optimize.compact()` for compacting small files,
+- `dt.optimize.z_order()` to compact and apply Z Ordering.
+
+See the section [Small file compaction](./optimize/small-file-compaction-with-optimize.md) for more information and a detailed example on `compact`, and the section [Z Order](./optimize/delta-lake-z-order.md) for more information on `z_order`.
diff --git a/docs/usage/optimize/small-file-compaction-with-optimize.md b/docs/usage/optimize/small-file-compaction-with-optimize.md
index 78d8778ff5..d86bba54f7 100644
--- a/docs/usage/optimize/small-file-compaction-with-optimize.md
+++ b/docs/usage/optimize/small-file-compaction-with-optimize.md
@@ -104,7 +104,7 @@ Let’s run the optimize command to compact the existing small files into larger
 ```python
 dt = DeltaTable("observation_data")
 
-dt.optimize()
+dt.optimize.compact()
 ```
 
 Here’s the output of the command:
diff --git a/docs/usage/writing/index.md b/docs/usage/writing/index.md
index 9e9e1bcbec..8480bbb7d1 100644
--- a/docs/usage/writing/index.md
+++ b/docs/usage/writing/index.md
@@ -69,4 +69,37 @@ In this case, you can use a `predicate` to overwrite only the relevant records o
     Data written must conform to the same predicate, i.e. not contain any records that don't match the `predicate` condition, 
     otherwise the operation will fail 
 
-{{ code_example('operations', 'replace_where', ['replaceWhere'])}}
\ No newline at end of file
+{{ code_example('operations', 'replace_where', ['replaceWhere'])}}
+
+## Using Writer Properites
+
+You can customize the Rust Parquet writer by using the [WriterProperties](../../api/delta_writer.md#deltalake.WriterProperties). Additionally, you can apply extra configurations through the [BloomFilterProperties](../../api/delta_writer.md#deltalake.BloomFilterProperties) and [ColumnProperties](../../api/delta_writer.md#deltalake.ColumnProperties) data classes.
+
+
+Here's how you can do it:
+``` python
+from deltalake import BloomFilterProperties, ColumnProperties, WriterProperties, write_deltalake
+import pyarrow as pa
+
+wp = WriterProperties(
+        statistics_truncate_length=200,
+        default_column_properties=ColumnProperties(
+            bloom_filter_properties=BloomFilterProperties(True, 0.2, 30)
+        ),
+        column_properties={
+            "value_non_bloom": ColumnProperties(bloom_filter_properties=None),
+        },
+    )
+
+table_path = "/tmp/my_table"
+
+data = pa.table(
+        {
+            "id": pa.array(["1", "1"], pa.string()),
+            "value": pa.array([11, 12], pa.int64()),
+            "value_non_bloom": pa.array([11, 12], pa.int64()),
+        }
+    )
+
+write_deltalake(table_path, data, writer_properties=wp)
+```
\ No newline at end of file
diff --git a/docs/usage/writing/writing-to-s3-with-locking-provider.md b/docs/usage/writing/writing-to-s3-with-locking-provider.md
index 6b0da2c3c1..6a275d685a 100644
--- a/docs/usage/writing/writing-to-s3-with-locking-provider.md
+++ b/docs/usage/writing/writing-to-s3-with-locking-provider.md
@@ -1,14 +1,18 @@
 # Writing to S3 with a locking provider
 
-A locking mechanism is needed to prevent unsafe concurrent writes to a
-delta lake directory when writing to S3.
+Delta lake guarantees [ACID transactions](../../how-delta-lake-works/delta-lake-acid-transactions.md) when writing data. This is done by default when writing to all supported object stores except AWS S3. (Some S3 clients like CloudFlare R2 or MinIO may enable concurrent writing without a locking provider, refer to [this section](#enabling-concurrent-writes-for-alternative-clients) for more information).
+
+When writing to S3, delta-rs provides a locking mechanism to ensure that concurrent writes are safe. This is done by default when writing to S3, but you can opt-out by setting the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true`.
+
+To enable safe concurrent writes to AWS S3, we must provide an external locking mechanism.
 
 ### DynamoDB
-DynamoDB is the only available locking provider at the moment in delta-rs. To enable DynamoDB as the locking provider, you need to set the ``AWS_S3_LOCKING_PROVIDER`` to 'dynamodb' as a ``storage_options`` or as an environment variable.
 
-Additionally, you must create a DynamoDB table with the name ``delta_log``
+DynamoDB is the only available locking provider at the moment in delta-rs. To enable DynamoDB as the locking provider, you need to set the `AWS_S3_LOCKING_PROVIDER` to 'dynamodb' as a `storage_options` or as an environment variable.
+
+Additionally, you must create a DynamoDB table with the name `delta_log`
 so that it can be automatically recognized by delta-rs. Alternatively, you can
-use a table name of your choice, but you must set the ``DELTA_DYNAMO_TABLE_NAME``
+use a table name of your choice, but you must set the `DELTA_DYNAMO_TABLE_NAME`
 variable to match your chosen table name. The required schema for the DynamoDB
 table is as follows:
 
@@ -43,13 +47,22 @@ Here is an example writing to s3 using this mechanism:
 ```python
 from deltalake import write_deltalake
 df = pd.DataFrame({'x': [1, 2, 3]})
-storage_options = {'AWS_S3_LOCKING_PROVIDER': 'dynamodb', 'DELTA_DYNAMO_TABLE_NAME': 'custom_table_name'}
-write_deltalake('s3a://path/to/table', df, 'storage_options'= storage_options)
+storage_options = {
+    'AWS_S3_LOCKING_PROVIDER': 'dynamodb',
+    'DELTA_DYNAMO_TABLE_NAME': 'custom_table_name'
+}
+write_deltalake(
+    's3a://path/to/table',
+    df,
+    storage_options=storage_options
+)
 ```
 
 This locking mechanism is compatible with the one used by Apache Spark. The `tablePath` property, denoting the root url of the delta table itself, is part of the primary key, and all writers intending to write to the same table must match this property precisely. In Spark, S3 URLs are prefixed with `s3a://`, and a table in delta-rs must be configured accordingly.
 
-The following code allows creating the necessary table from the AWS cli:
+Note that `delta-rs` does not read credentials from your local `.aws/config` or `.aws/creds` file. Credentials can be accessed from environment variables, ec2 metadata, profiles or web identity. You can pass credentials to `storage_options` using `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
+
+The following code allows creating the necessary DynamoDB table from the AWS cli:
 
 ```sh
 aws dynamodb create-table \
@@ -59,24 +72,40 @@ aws dynamodb create-table \
 --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
 ```
 
-You can find additional information in the [delta-rs-documentation](https://docs.delta.io/latest/delta-storage.html#multi-cluster-setup), which also includes recommendations on configuring a time-to-live (TTL) for the table to avoid growing the table indefinitely.
-
+You can find additional information in the [Delta Lake documentation](https://docs.delta.io/latest/delta-storage.html#multi-cluster-setup), which also includes recommendations on configuring a time-to-live (TTL) for the table to avoid growing the table indefinitely.
 
 ### Enable unsafe writes in S3 (opt-in)
-If for some reason you don't want to use dynamodb as your locking mechanism you can
-choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to ``true`` in order to enable S3 unsafe writes.
 
+If for some reason you don't want to use dynamodb as your locking mechanism you can
+choose to set the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true` in order to enable S3 unsafe writes.
 
 ### Required permissions
+
 You need to have permissions to get, put and delete objects in the S3 bucket you're storing your data in. Please note that you must be allowed to delete objects even if you're just appending to the deltalake, because there are temporary files into the log folder that are deleted after usage.
 
 In AWS, those would be the required permissions:
+
 - s3:GetObject
 - s3:PutObject
 - s3:DeleteObject
 
 In DynamoDB, you need those permissions:
+
 - dynamodb:GetItem
 - dynamodb:Query
 - dynamodb:PutItem
 - dynamodb:UpdateItem
+- dynamodb:DeleteItem 
+
+### Enabling concurrent writes for alternative clients
+
+Unlike AWS S3, some S3 clients support atomic renames by passing some headers
+in requests.
+
+For CloudFlare R2 or Minio passing this in the storage_options will enable concurrent writes:
+
+```python
+storage_options = {
+    "conditional_put": "etag",
+}
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index b70a21fe63..baf28ff3fc 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -81,6 +81,11 @@ nav:
       - api/catalog.md
       - api/exceptions.md
   - Integrations:
+      - Object Storage:
+          - integrations/object-storage/adls.md
+          - integrations/object-storage/hdfs.md
+          - integrations/object-storage/s3.md
+          - integrations/object-storage/s3-like.md
       - Arrow: integrations/delta-lake-arrow.md
       - Daft: integrations/delta-lake-daft.md
       - Dagster: integrations/delta-lake-dagster.md
diff --git a/python/.gitignore b/python/.gitignore
index e1e978f0a6..56df04b804 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -7,6 +7,7 @@ __pycache__/
 # Unit test / coverage reports
 .coverage
 .pytest_cache/
+.benchmarks/
 
 # mypy
 .mypy_cache/
diff --git a/python/Cargo.toml b/python/Cargo.toml
index f5b4cf5b5b..0c3c62232a 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "deltalake-python"
-version = "0.18.0"
+version = "0.20.0"
 authors = ["Qingping Hou <dave2008713@gmail.com>", "Will Jones <willjones127@gmail.com>"]
 homepage = "https://github.com/delta-io/delta-rs"
 license = "Apache-2.0"
@@ -15,6 +15,8 @@ crate-type = ["cdylib"]
 doc = false
 
 [dependencies]
+delta_kernel.workspace = true
+
 # arrow
 arrow-schema = { workspace = true, features = ["serde"] }
 
@@ -41,15 +43,18 @@ reqwest = { version = "*", features = ["native-tls-vendored"] }
 deltalake-mount = { path = "../crates/mount" }
 
 [dependencies.pyo3]
-version = "0.20"
+version = "0.21.1"
 features = ["extension-module", "abi3", "abi3-py38"]
 
 [dependencies.deltalake]
 path = "../crates/deltalake"
 version = "0"
-features = ["azure", "gcs", "python", "datafusion", "unity-experimental"]
+features = ["azure", "gcs", "python", "datafusion", "unity-experimental", "hdfs"]
 
 [features]
 default = ["rustls"]
 native-tls = ["deltalake/s3-native-tls", "deltalake/glue"]
 rustls = ["deltalake/s3", "deltalake/glue"]
+
+[build-dependencies]
+openssl-src = "=300.3.1"
diff --git a/python/Makefile b/python/Makefile
index 1a95a95695..2c657fa9f3 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -53,7 +53,7 @@ format: ## Format the code
 	$(info --- Rust format ---)
 	cargo fmt
 	$(info --- Python format ---)
-	ruff . --fix
+	ruff check . --fix
 	ruff format .
 
 .PHONY: check-rust
@@ -77,6 +77,11 @@ unit-test: ## Run unit test
 	$(info --- Run Python unit-test ---)
 	python -m pytest --doctest-modules 
 
+.PHONY: test-cov
+test-cov: ## Create coverage report
+	$(info --- Run Python unit-test ---)
+	python -m pytest --doctest-modules --cov --cov-config=pyproject.toml --cov-report=term --cov-report=html
+
 .PHONY: test-pyspark
 test-pyspark:
 	python -m pytest -m 'pyspark and integration'
diff --git a/python/deltalake/__init__.py b/python/deltalake/__init__.py
index 99089ae922..607a5d988b 100644
--- a/python/deltalake/__init__.py
+++ b/python/deltalake/__init__.py
@@ -1,11 +1,22 @@
+from ._internal import TableFeatures as TableFeatures
 from ._internal import __version__ as __version__
 from ._internal import rust_core_version as rust_core_version
 from .data_catalog import DataCatalog as DataCatalog
 from .schema import DataType as DataType
 from .schema import Field as Field
 from .schema import Schema as Schema
+from .table import (
+    BloomFilterProperties as BloomFilterProperties,
+)
+from .table import (
+    ColumnProperties as ColumnProperties,
+)
+from .table import CommitProperties as CommitProperties
 from .table import DeltaTable as DeltaTable
 from .table import Metadata as Metadata
-from .table import WriterProperties as WriterProperties
+from .table import PostCommitHookProperties as PostCommitHookProperties
+from .table import (
+    WriterProperties as WriterProperties,
+)
 from .writer import convert_to_deltalake as convert_to_deltalake
 from .writer import write_deltalake as write_deltalake
diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi
index a6f5094b35..02a3765e02 100644
--- a/python/deltalake/_internal.pyi
+++ b/python/deltalake/_internal.pyi
@@ -1,18 +1,46 @@
-import sys
-from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
-
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
+from enum import Enum
+from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union
 
 import pyarrow
 import pyarrow.fs as fs
 
-from deltalake.writer import AddAction
+from deltalake.writer import (
+    AddAction,
+    CommitProperties,
+    PostCommitHookProperties,
+    WriterProperties,
+)
 
 __version__: str
 
+class TableFeatures(Enum):
+    # Mapping of one column to another
+    ColumnMapping = "ColumnMapping"
+    # Deletion vectors for merge, update, delete
+    DeletionVectors = "DeletionVectors"
+    # timestamps without timezone support
+    TimestampWithoutTimezone = "TimestampWithoutTimezone"
+    # version 2 of checkpointing
+    V2Checkpoint = "V2Checkpoint"
+    # Append Only Tables
+    AppendOnly = "AppendOnly"
+    # Table invariants
+    Invariants = "Invariants"
+    # Check constraints on columns
+    CheckConstraints = "CheckConstraints"
+    # CDF on a table
+    ChangeDataFeed = "ChangeDataFeed"
+    # Columns with generated values
+    GeneratedColumns = "GeneratedColumns"
+    # ID Columns
+    IdentityColumns = "IdentityColumns"
+    # Row tracking on tables
+    RowTracking = "RowTracking"
+    # domain specific metadata
+    DomainMetadata = "DomainMetadata"
+    # Iceberg compatibility support
+    IcebergCompatV1 = "IcebergCompatV1"
+
 class RawDeltaTableMetaData:
     id: int
     name: str
@@ -40,8 +68,14 @@ class RawDeltaTable:
         data_catalog_id: Optional[str] = None,
         catalog_options: Optional[Dict[str, str]] = None,
     ) -> str: ...
+    @staticmethod
+    def is_deltatable(
+        table_uri: str, storage_options: Optional[Dict[str, str]]
+    ) -> bool: ...
     def table_uri(self) -> str: ...
     def version(self) -> int: ...
+    def has_files(self) -> bool: ...
+    def get_add_file_sizes(self) -> Dict[str, int]: ...
     def get_latest_version(self) -> int: ...
     def get_num_index_cols(self) -> int: ...
     def get_stats_columns(self) -> Optional[List[str]]: ...
@@ -49,106 +83,120 @@ class RawDeltaTable:
     def protocol_versions(self) -> List[Any]: ...
     def load_version(self, version: int) -> None: ...
     def load_with_datetime(self, ds: str) -> None: ...
-    def files_by_partitions(
-        self, partitions_filters: Optional[FilterType]
+    def files(self, partition_filters: Optional[PartitionFilterType]) -> List[str]: ...
+    def file_uris(
+        self, partition_filters: Optional[PartitionFilterType]
     ) -> List[str]: ...
-    def files(self, partition_filters: Optional[FilterType]) -> List[str]: ...
-    def file_uris(self, partition_filters: Optional[FilterType]) -> List[str]: ...
     def vacuum(
         self,
         dry_run: bool,
         retention_hours: Optional[int],
         enforce_retention_duration: bool,
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> List[str]: ...
     def compact_optimize(
         self,
-        partition_filters: Optional[FilterType],
+        partition_filters: Optional[PartitionFilterType],
         target_size: Optional[int],
         max_concurrent_tasks: Optional[int],
         min_commit_interval: Optional[int],
-        writer_properties: Optional[Dict[str, Optional[str]]],
-        custom_metadata: Optional[Dict[str, str]],
+        writer_properties: Optional[WriterProperties],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> str: ...
     def z_order_optimize(
         self,
         z_order_columns: List[str],
-        partition_filters: Optional[FilterType],
+        partition_filters: Optional[PartitionFilterType],
         target_size: Optional[int],
         max_concurrent_tasks: Optional[int],
         max_spill_size: Optional[int],
         min_commit_interval: Optional[int],
-        writer_properties: Optional[Dict[str, Optional[str]]],
-        custom_metadata: Optional[Dict[str, str]],
+        writer_properties: Optional[WriterProperties],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> str: ...
+    def add_columns(
+        self,
+        fields: List[Field],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
+    ) -> None: ...
+    def add_feature(
+        self,
+        feature: List[TableFeatures],
+        allow_protocol_versions_increase: bool,
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
+    ) -> None: ...
     def add_constraints(
         self,
         constraints: Dict[str, str],
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> None: ...
     def drop_constraints(
         self,
         name: str,
         raise_if_not_exists: bool,
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> None: ...
     def set_table_properties(
         self,
         properties: Dict[str, str],
         raise_if_not_exists: bool,
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
     ) -> None: ...
     def restore(
         self,
         target: Optional[Any],
         ignore_missing_files: bool,
         protocol_downgrade_allowed: bool,
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
     ) -> str: ...
     def history(self, limit: Optional[int]) -> List[str]: ...
     def update_incremental(self) -> None: ...
     def dataset_partitions(
-        self, schema: pyarrow.Schema, partition_filters: Optional[FilterType]
+        self, schema: pyarrow.Schema, partition_filters: Optional[FilterConjunctionType]
     ) -> List[Any]: ...
     def create_checkpoint(self) -> None: ...
     def get_add_actions(self, flatten: bool) -> pyarrow.RecordBatch: ...
     def delete(
         self,
         predicate: Optional[str],
-        writer_properties: Optional[Dict[str, Optional[str]]],
-        custom_metadata: Optional[Dict[str, str]],
+        writer_properties: Optional[WriterProperties],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> str: ...
     def repair(
-        self, dry_run: bool, custom_metadata: Optional[Dict[str, str]]
+        self,
+        dry_run: bool,
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> str: ...
     def update(
         self,
         updates: Dict[str, str],
         predicate: Optional[str],
-        writer_properties: Optional[Dict[str, Optional[str]]],
+        writer_properties: Optional[WriterProperties],
         safe_cast: bool,
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> str: ...
-    def merge_execute(
+    def create_merge_builder(
         self,
         source: pyarrow.RecordBatchReader,
         predicate: str,
         source_alias: Optional[str],
         target_alias: Optional[str],
-        writer_properties: Optional[Dict[str, Optional[str]]],
-        custom_metadata: Optional[Dict[str, str]],
+        writer_properties: Optional[WriterProperties],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
         safe_cast: bool,
-        matched_update_updates: Optional[List[Dict[str, str]]],
-        matched_update_predicate: Optional[List[Optional[str]]],
-        matched_delete_predicate: Optional[List[str]],
-        matched_delete_all: Optional[bool],
-        not_matched_insert_updates: Optional[List[Dict[str, str]]],
-        not_matched_insert_predicate: Optional[List[Optional[str]]],
-        not_matched_by_source_update_updates: Optional[List[Dict[str, str]]],
-        not_matched_by_source_update_predicate: Optional[List[Optional[str]]],
-        not_matched_by_source_delete_predicate: Optional[List[str]],
-        not_matched_by_source_delete_all: Optional[bool],
-    ) -> str: ...
+    ) -> PyMergeBuilder: ...
+    def merge_execute(self, merge_builder: PyMergeBuilder) -> str: ...
     def get_active_partitions(
         self, partitions_filters: Optional[FilterType] = None
     ) -> Any: ...
@@ -159,12 +207,14 @@ class RawDeltaTable:
         partition_by: List[str],
         schema: pyarrow.Schema,
         partitions_filters: Optional[FilterType],
-        custom_metadata: Optional[Dict[str, str]],
+        commit_properties: Optional[CommitProperties],
+        post_commithook_properties: Optional[PostCommitHookProperties],
     ) -> None: ...
     def cleanup_metadata(self) -> None: ...
     def check_can_write_timestamp_ntz(self, schema: pyarrow.Schema) -> None: ...
     def load_cdf(
         self,
+        columns: Optional[List[str]] = None,
         starting_version: int = 0,
         ending_version: Optional[int] = None,
         starting_timestamp: Optional[str] = None,
@@ -192,12 +242,14 @@ def write_to_deltalake(
     table: Optional[RawDeltaTable],
     schema_mode: Optional[str],
     predicate: Optional[str],
+    target_file_size: Optional[int],
     name: Optional[str],
     description: Optional[str],
     configuration: Optional[Mapping[str, Optional[str]]],
     storage_options: Optional[Dict[str, str]],
-    writer_properties: Optional[Dict[str, Optional[str]]],
-    custom_metadata: Optional[Dict[str, str]],
+    writer_properties: Optional[WriterProperties],
+    commit_properties: Optional[CommitProperties],
+    post_commithook_properties: Optional[PostCommitHookProperties],
 ) -> None: ...
 def convert_to_deltalake(
     uri: str,
@@ -214,6 +266,7 @@ def create_deltalake(
     schema: pyarrow.Schema,
     partition_by: List[str],
     mode: str,
+    raise_if_key_not_exists: bool,
     name: Optional[str],
     description: Optional[str],
     configuration: Optional[Mapping[str, Optional[str]]],
@@ -225,6 +278,26 @@ def get_num_idx_cols_and_stats_columns(
     table: Optional[RawDeltaTable], configuration: Optional[Mapping[str, Optional[str]]]
 ) -> Tuple[int, Optional[List[str]]]: ...
 
+class PyMergeBuilder:
+    source_alias: str
+    target_alias: str
+    arrow_schema: pyarrow.Schema
+
+    def when_matched_update(
+        self, updates: Dict[str, str], predicate: Optional[str]
+    ) -> None: ...
+    def when_matched_delete(self, predicate: Optional[str]) -> None: ...
+    def when_not_matched_insert(
+        self, updates: Dict[str, str], predicate: Optional[str]
+    ) -> None: ...
+    def when_not_matched_by_source_update(
+        self, updates: Dict[str, str], predicate: Optional[str]
+    ) -> None: ...
+    def when_not_matched_by_source_delete(
+        self,
+        predicate: Optional[str],
+    ) -> None: ...
+
 # Can't implement inheritance (see note in src/schema.rs), so this is next
 # best thing.
 DataType = Union["PrimitiveType", "MapType", "StructType", "ArrayType"]
@@ -831,3 +904,4 @@ FilterLiteralType = Tuple[str, str, Any]
 FilterConjunctionType = List[FilterLiteralType]
 FilterDNFType = List[FilterConjunctionType]
 FilterType = Union[FilterConjunctionType, FilterDNFType]
+PartitionFilterType = List[Tuple[str, str, Union[str, List[str]]]]
diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py
index 7124d51de3..2008c43de0 100644
--- a/python/deltalake/schema.py
+++ b/python/deltalake/schema.py
@@ -1,3 +1,4 @@
+from enum import Enum
 from typing import Generator, Union
 
 import pyarrow as pa
@@ -15,9 +16,25 @@
 DataType = Union["PrimitiveType", "MapType", "StructType", "ArrayType"]
 
 
+class ArrowSchemaConversionMode(Enum):
+    NORMAL = "NORMAL"
+    LARGE = "LARGE"
+    PASSTHROUGH = "PASSTHROUGH"
+
+    @classmethod
+    def from_str(cls, value: str) -> "ArrowSchemaConversionMode":
+        try:
+            return cls(value.upper())
+        except ValueError:
+            raise ValueError(
+                f"{value} is not a valid ArrowSchemaConversionMode. Valid values are: {[item.value for item in ArrowSchemaConversionMode]}"
+            )
+
+
 ### Inspired from Pola-rs repo - licensed with MIT License, see license in python/licenses/polars_license.txt.###
 def _convert_pa_schema_to_delta(
-    schema: pa.schema, large_dtypes: bool = False
+    schema: pa.schema,
+    schema_conversion_mode: ArrowSchemaConversionMode = ArrowSchemaConversionMode.NORMAL,
 ) -> pa.schema:
     """Convert a PyArrow schema to a schema compatible with Delta Lake. Converts unsigned to signed equivalent, and
     converts all timestamps to `us` timestamps. With the boolean flag large_dtypes you can control if the schema
@@ -25,7 +42,9 @@ def _convert_pa_schema_to_delta(
 
     Args
         schema: Source schema
-        large_dtypes: If True, the pyarrow schema is casted to large_dtypes
+        schema_conversion_mode: large mode will cast all string/binary/list to the large version arrow types, normal mode
+            keeps the normal version of the types. Passthrough mode keeps string/binary/list flavored types in their original
+            version, whether that is view/large/normal.
     """
     dtype_map = {
         pa.uint8(): pa.int8(),
@@ -33,20 +52,39 @@ def _convert_pa_schema_to_delta(
         pa.uint32(): pa.int32(),
         pa.uint64(): pa.int64(),
     }
-    if large_dtypes:
+    if schema_conversion_mode == ArrowSchemaConversionMode.LARGE:
         dtype_map = {
             **dtype_map,
-            **{pa.string(): pa.large_string(), pa.binary(): pa.large_binary()},
+            **{
+                pa.string(): pa.large_string(),
+                pa.string_view(): pa.large_string(),
+                pa.binary(): pa.large_binary(),
+                pa.binary_view(): pa.large_binary(),
+            },
         }
-    else:
+    elif schema_conversion_mode == ArrowSchemaConversionMode.NORMAL:
         dtype_map = {
             **dtype_map,
-            **{pa.large_string(): pa.string(), pa.large_binary(): pa.binary()},
+            **{
+                pa.large_string(): pa.string(),
+                pa.string_view(): pa.string(),
+                pa.large_binary(): pa.binary(),
+                pa.binary_view(): pa.binary(),
+            },
         }
 
     def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType:
         # Handle nested types
-        if isinstance(dtype, (pa.LargeListType, pa.ListType, pa.FixedSizeListType)):
+        if isinstance(
+            dtype,
+            (
+                pa.LargeListType,
+                pa.ListType,
+                pa.FixedSizeListType,
+                pa.ListViewType,
+                pa.LargeListViewType,
+            ),
+        ):
             return list_to_delta_dtype(dtype)
         elif isinstance(dtype, pa.StructType):
             return struct_to_delta_dtype(dtype)
@@ -57,20 +95,43 @@ def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType:
                 return pa.timestamp("us", "UTC")
         elif type(dtype) is pa.FixedSizeBinaryType:
             return pa.binary()
+        elif isinstance(dtype, pa.ExtensionType):
+            return dtype.storage_type
         try:
             return dtype_map[dtype]
         except KeyError:
             return dtype
 
     def list_to_delta_dtype(
-        dtype: Union[pa.LargeListType, pa.ListType],
+        dtype: Union[
+            pa.LargeListType,
+            pa.ListType,
+            pa.ListViewType,
+            pa.LargeListViewType,
+            pa.FixedSizeListType,
+        ],
     ) -> Union[pa.LargeListType, pa.ListType]:
         nested_dtype = dtype.value_type
         nested_dtype_cast = dtype_to_delta_dtype(nested_dtype)
-        if large_dtypes:
+        if schema_conversion_mode == ArrowSchemaConversionMode.LARGE:
             return pa.large_list(nested_dtype_cast)
-        else:
+        elif schema_conversion_mode == ArrowSchemaConversionMode.NORMAL:
             return pa.list_(nested_dtype_cast)
+        elif schema_conversion_mode == ArrowSchemaConversionMode.PASSTHROUGH:
+            if isinstance(dtype, pa.LargeListType):
+                return pa.large_list(nested_dtype_cast)
+            elif isinstance(dtype, pa.ListType):
+                return pa.list_(nested_dtype_cast)
+            elif isinstance(dtype, pa.FixedSizeListType):
+                return pa.list_(nested_dtype_cast)
+            elif isinstance(dtype, pa.LargeListViewType):
+                return pa.large_list_view(nested_dtype_cast)
+            elif isinstance(dtype, pa.ListViewType):
+                return pa.list_view(nested_dtype_cast)
+            else:
+                raise NotImplementedError
+        else:
+            raise NotImplementedError
 
     def struct_to_delta_dtype(dtype: pa.StructType) -> pa.StructType:
         fields = [dtype[i] for i in range(dtype.num_fields)]
@@ -85,14 +146,18 @@ def _cast_schema_to_recordbatchreader(
 ) -> Generator[pa.RecordBatch, None, None]:
     """Creates recordbatch generator."""
     for batch in reader:
-        yield pa.Table.from_batches([batch]).cast(schema).to_batches()[0]
+        batchs = pa.Table.from_batches([batch]).cast(schema).to_batches()
+        if len(batchs) > 0:
+            yield batchs[0]
 
 
 def convert_pyarrow_recordbatchreader(
-    data: pa.RecordBatchReader, large_dtypes: bool
+    data: pa.RecordBatchReader, schema_conversion_mode: ArrowSchemaConversionMode
 ) -> pa.RecordBatchReader:
     """Converts a PyArrow RecordBatchReader to a PyArrow RecordBatchReader with a compatible delta schema"""
-    schema = _convert_pa_schema_to_delta(data.schema, large_dtypes=large_dtypes)
+    schema = _convert_pa_schema_to_delta(
+        data.schema, schema_conversion_mode=schema_conversion_mode
+    )
 
     data = pa.RecordBatchReader.from_batches(
         schema,
@@ -102,25 +167,33 @@ def convert_pyarrow_recordbatchreader(
 
 
 def convert_pyarrow_recordbatch(
-    data: pa.RecordBatch, large_dtypes: bool
+    data: pa.RecordBatch, schema_conversion_mode: ArrowSchemaConversionMode
 ) -> pa.RecordBatchReader:
     """Converts a PyArrow RecordBatch to a PyArrow RecordBatchReader with a compatible delta schema"""
-    schema = _convert_pa_schema_to_delta(data.schema, large_dtypes=large_dtypes)
+    schema = _convert_pa_schema_to_delta(
+        data.schema, schema_conversion_mode=schema_conversion_mode
+    )
     data = pa.Table.from_batches([data]).cast(schema).to_reader()
     return data
 
 
-def convert_pyarrow_table(data: pa.Table, large_dtypes: bool) -> pa.RecordBatchReader:
+def convert_pyarrow_table(
+    data: pa.Table, schema_conversion_mode: ArrowSchemaConversionMode
+) -> pa.RecordBatchReader:
     """Converts a PyArrow table to a PyArrow RecordBatchReader with a compatible delta schema"""
-    schema = _convert_pa_schema_to_delta(data.schema, large_dtypes=large_dtypes)
+    schema = _convert_pa_schema_to_delta(
+        data.schema, schema_conversion_mode=schema_conversion_mode
+    )
     data = data.cast(schema).to_reader()
     return data
 
 
 def convert_pyarrow_dataset(
-    data: ds.Dataset, large_dtypes: bool
+    data: ds.Dataset, schema_conversion_mode: ArrowSchemaConversionMode
 ) -> pa.RecordBatchReader:
     """Converts a PyArrow dataset to a PyArrow RecordBatchReader with a compatible delta schema"""
     data = data.scanner().to_reader()
-    data = convert_pyarrow_recordbatchreader(data, large_dtypes)
+    data = convert_pyarrow_recordbatchreader(
+        data, schema_conversion_mode=schema_conversion_mode
+    )
     return data
diff --git a/python/deltalake/table.py b/python/deltalake/table.py
index f10e5c4932..9150be697c 100644
--- a/python/deltalake/table.py
+++ b/python/deltalake/table.py
@@ -1,10 +1,8 @@
 import json
-import operator
 import warnings
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from enum import Enum
-from functools import reduce
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
@@ -19,13 +17,11 @@
     Optional,
     Tuple,
     Union,
-    cast,
 )
 
 import pyarrow
 import pyarrow.dataset as ds
 import pyarrow.fs as pa_fs
-import pyarrow_hotfix  # noqa: F401; addresses CVE-2023-47248; # type: ignore
 from pyarrow.dataset import (
     Expression,
     FileSystemDataset,
@@ -34,20 +30,31 @@
     ParquetReadOptions,
 )
 
+try:
+    from pyarrow.parquet import filters_to_expression  # pyarrow >= 10.0.0
+except ImportError:
+    from pyarrow.parquet import _filters_to_expression as filters_to_expression
+
 if TYPE_CHECKING:
     import os
 
-from deltalake._internal import DeltaDataChecker as _DeltaDataChecker
-from deltalake._internal import RawDeltaTable
+from deltalake._internal import (
+    DeltaError,
+    PyMergeBuilder,
+    RawDeltaTable,
+    TableFeatures,
+)
 from deltalake._internal import create_deltalake as _create_deltalake
 from deltalake._util import encode_partition_value
 from deltalake.data_catalog import DataCatalog
 from deltalake.exceptions import DeltaProtocolError
 from deltalake.fs import DeltaStorageHandler
+from deltalake.schema import ArrowSchemaConversionMode
+from deltalake.schema import Field as DeltaField
 from deltalake.schema import Schema as DeltaSchema
 
 try:
-    import pandas as pd  # noqa: F811
+    import pandas as pd
 except ModuleNotFoundError:
     _has_pandas = False
 else:
@@ -61,6 +68,12 @@
 NOT_SUPPORTED_READER_VERSION = 2
 SUPPORTED_READER_FEATURES = {"timestampNtz"}
 
+FilterLiteralType = Tuple[str, str, Any]
+FilterConjunctionType = List[FilterLiteralType]
+FilterDNFType = List[FilterConjunctionType]
+FilterType = Union[FilterConjunctionType, FilterDNFType]
+PartitionFilterType = List[Tuple[str, str, Union[str, List[str]]]]
+
 
 class Compression(Enum):
     UNCOMPRESSED = "UNCOMPRESSED"
@@ -115,6 +128,110 @@ def check_valid_level(self, level: int) -> bool:
             return True
 
 
+@dataclass(init=True)
+class PostCommitHookProperties:
+    """The post commit hook properties, only required for advanced usecases where you need to control this."""
+
+    def __init__(
+        self,
+        create_checkpoint: bool = True,
+        cleanup_expired_logs: Optional[bool] = None,
+    ):
+        """Checkpoints are by default created based on the delta.checkpointInterval config setting.
+        cleanup_expired_logs can be set to override the delta.enableExpiredLogCleanup, otherwise the
+        config setting will be used to decide whether to clean up logs automatically by taking also
+        the delta.logRetentionDuration into account.
+
+        Args:
+            create_checkpoint (bool, optional): to create checkpoints based on checkpoint interval. Defaults to True.
+            cleanup_expired_logs (Optional[bool], optional): to clean up logs based on interval. Defaults to None.
+        """
+        self.create_checkpoint = create_checkpoint
+        self.cleanup_expired_logs = cleanup_expired_logs
+
+
+@dataclass(init=True)
+class CommitProperties:
+    """The commit properties. Controls the behaviour of the commit."""
+
+    def __init__(
+        self,
+        custom_metadata: Optional[Dict[str, str]] = None,
+        max_commit_retries: Optional[int] = None,
+    ):
+        """Custom metadata to be stored in the commit. Controls the number of retries for the commit.
+
+        Args:
+            custom_metadata: custom metadata that will be added to the transaction commit.
+            max_commit_retries: maximum number of times to retry the transaction commit.
+        """
+        self.custom_metadata = custom_metadata
+        self.max_commit_retries = max_commit_retries
+
+
+def _commit_properties_from_custom_metadata(
+    maybe_properties: Optional[CommitProperties], custom_metadata: Dict[str, str]
+) -> CommitProperties:
+    if maybe_properties is not None:
+        if maybe_properties.custom_metadata is None:
+            maybe_properties.custom_metadata = custom_metadata
+            return maybe_properties
+        return maybe_properties
+    return CommitProperties(custom_metadata=custom_metadata)
+
+
+@dataclass(init=True)
+class BloomFilterProperties:
+    """The Bloom Filter Properties instance for the Rust parquet writer."""
+
+    def __init__(
+        self,
+        set_bloom_filter_enabled: Optional[bool],
+        fpp: Optional[float] = None,
+        ndv: Optional[int] = None,
+    ):
+        """Create a Bloom Filter Properties instance for the Rust parquet writer:
+
+        Args:
+            set_bloom_filter_enabled: If True and no fpp or ndv are provided, the default values will be used.
+            fpp: The false positive probability for the bloom filter. Must be between 0 and 1 exclusive.
+            ndv: The number of distinct values for the bloom filter.
+        """
+        if fpp is not None and (fpp <= 0 or fpp >= 1):
+            raise ValueError("fpp must be between 0 and 1 exclusive")
+        self.set_bloom_filter_enabled = set_bloom_filter_enabled
+        self.fpp = fpp
+        self.ndv = ndv
+
+    def __str__(self) -> str:
+        return f"set_bloom_filter_enabled: {self.set_bloom_filter_enabled}, fpp: {self.fpp}, ndv: {self.ndv}"
+
+
+@dataclass(init=True)
+class ColumnProperties:
+    """The Column Properties instance for the Rust parquet writer."""
+
+    def __init__(
+        self,
+        dictionary_enabled: Optional[bool] = None,
+        max_statistics_size: Optional[int] = None,
+        bloom_filter_properties: Optional[BloomFilterProperties] = None,
+    ):
+        """Create a Column Properties instance for the Rust parquet writer:
+
+        Args:
+            dictionary_enabled: Enable dictionary encoding for the column.
+            max_statistics_size: Maximum size of statistics for the column.
+            bloom_filter_properties: Bloom Filter Properties for the column.
+        """
+        self.dictionary_enabled = dictionary_enabled
+        self.max_statistics_size = max_statistics_size
+        self.bloom_filter_properties = bloom_filter_properties
+
+    def __str__(self) -> str:
+        return f"dictionary_enabled: {self.dictionary_enabled}, max_statistics_size: {self.max_statistics_size}, bloom_filter_properties: {self.bloom_filter_properties}"
+
+
 @dataclass(init=True)
 class WriterProperties:
     """A Writer Properties instance for the Rust parquet writer."""
@@ -138,6 +255,9 @@ def __init__(
             ]
         ] = None,
         compression_level: Optional[int] = None,
+        statistics_truncate_length: Optional[int] = None,
+        default_column_properties: Optional[ColumnProperties] = None,
+        column_properties: Optional[Dict[str, ColumnProperties]] = None,
     ):
         """Create a Writer Properties instance for the Rust parquet writer:
 
@@ -152,6 +272,9 @@ def __init__(
                 GZIP: levels (1-9),
                 BROTLI: levels (1-11),
                 ZSTD: levels (1-22),
+            statistics_truncate_length: maximum length of truncated min/max values in statistics.
+            default_column_properties: Default Column Properties for the Rust parquet writer.
+            column_properties: Column Properties for the Rust parquet writer.
         """
         self.data_page_size_limit = data_page_size_limit
         self.dictionary_page_size_limit = dictionary_page_size_limit
@@ -159,6 +282,9 @@ def __init__(
         self.write_batch_size = write_batch_size
         self.max_row_group_size = max_row_group_size
         self.compression = None
+        self.statistics_truncate_length = statistics_truncate_length
+        self.default_column_properties = default_column_properties
+        self.column_properties = column_properties
 
         if compression_level is not None and compression is None:
             raise ValueError(
@@ -184,18 +310,18 @@ def __init__(
             self.compression = parquet_compression
 
     def __str__(self) -> str:
+        column_properties_str = (
+            ", ".join([f"column '{k}': {v}" for k, v in self.column_properties.items()])
+            if self.column_properties
+            else None
+        )
         return (
             f"WriterProperties(data_page_size_limit: {self.data_page_size_limit}, dictionary_page_size_limit: {self.dictionary_page_size_limit}, "
             f"data_page_row_count_limit: {self.data_page_row_count_limit}, write_batch_size: {self.write_batch_size}, "
-            f"max_row_group_size: {self.max_row_group_size}, compression: {self.compression})"
+            f"max_row_group_size: {self.max_row_group_size}, compression: {self.compression}, statistics_truncate_length: {self.statistics_truncate_length},"
+            f"default_column_properties: {self.default_column_properties}, column_properties: {column_properties_str})"
         )
 
-    def _to_dict(self) -> Dict[str, Optional[str]]:
-        values = {}
-        for key, value in self.__dict__.items():
-            values[key] = str(value) if isinstance(value, int) else value
-        return values
-
 
 @dataclass(init=False)
 class Metadata:
@@ -251,125 +377,6 @@ class ProtocolVersions(NamedTuple):
     reader_features: Optional[List[str]]
 
 
-FilterLiteralType = Tuple[str, str, Any]
-
-FilterConjunctionType = List[FilterLiteralType]
-
-FilterDNFType = List[FilterConjunctionType]
-
-FilterType = Union[FilterConjunctionType, FilterDNFType]
-
-
-def _check_contains_null(value: Any) -> bool:
-    """
-    Check if target contains nullish value.
-    """
-    if isinstance(value, bytes):
-        for byte in value:
-            if isinstance(byte, bytes):
-                compare_to = chr(0)
-            else:
-                compare_to = 0
-            if byte == compare_to:
-                return True
-    elif isinstance(value, str):
-        return "\x00" in value
-    return False
-
-
-def _check_dnf(
-    dnf: FilterDNFType,
-    check_null_strings: bool = True,
-) -> FilterDNFType:
-    """
-    Check if DNF are well-formed.
-    """
-    if len(dnf) == 0 or any(len(c) == 0 for c in dnf):
-        raise ValueError("Malformed DNF")
-    if check_null_strings:
-        for conjunction in dnf:
-            for col, op, val in conjunction:
-                if (
-                    isinstance(val, list)
-                    and all(_check_contains_null(v) for v in val)
-                    or _check_contains_null(val)
-                ):
-                    raise NotImplementedError(
-                        "Null-terminated binary strings are not supported "
-                        "as filter values."
-                    )
-    return dnf
-
-
-def _convert_single_predicate(column: str, op: str, value: Any) -> Expression:
-    """
-    Convert given `tuple` to [pyarrow.dataset.Expression].
-    """
-    import pyarrow.dataset as ds
-
-    field = ds.field(column)
-    if op == "=" or op == "==":
-        return field == value
-    elif op == "!=":
-        return field != value
-    elif op == "<":
-        return field < value
-    elif op == ">":
-        return field > value
-    elif op == "<=":
-        return field <= value
-    elif op == ">=":
-        return field >= value
-    elif op == "in":
-        return field.isin(value)
-    elif op == "not in":
-        return ~field.isin(value)
-    else:
-        raise ValueError(
-            f'"{(column, op, value)}" is not a valid operator in predicates.'
-        )
-
-
-def _filters_to_expression(filters: FilterType) -> Expression:
-    """
-    Check if filters are well-formed and convert to an [pyarrow.dataset.Expression].
-    """
-    if isinstance(filters[0][0], str):
-        # We have encountered the situation where we have one nesting level too few:
-        #   We have [(,,), ..] instead of [[(,,), ..]]
-        dnf = cast(FilterDNFType, [filters])
-    else:
-        dnf = cast(FilterDNFType, filters)
-    dnf = _check_dnf(dnf, check_null_strings=False)
-    disjunction_members = []
-    for conjunction in dnf:
-        conjunction_members = [
-            _convert_single_predicate(col, op, val) for col, op, val in conjunction
-        ]
-        disjunction_members.append(reduce(operator.and_, conjunction_members))
-    return reduce(operator.or_, disjunction_members)
-
-
-_DNF_filter_doc = """
-Predicates are expressed in disjunctive normal form (DNF), like [("x", "=", "a"), ...].
-DNF allows arbitrary boolean logical combinations of single partition predicates.
-The innermost tuples each describe a single partition predicate. The list of inner
-predicates is interpreted as a conjunction (AND), forming a more selective and
-multiple partition predicates. Each tuple has format: (key, op, value) and compares
-the key with the value. The supported op are: `=`, `!=`, `in`, and `not in`. If
-the op is in or not in, the value must be a collection such as a list, a set or a tuple.
-The supported type for value is str. Use empty string `''` for Null partition value.
-
-Example:
-    ```
-    ("x", "=", "a")
-    ("x", "!=", "a")
-    ("y", "in", ["a", "b", "c"])
-    ("z", "not in", ["a","b"])
-    ```
-"""
-
-
 @dataclass(init=False)
 class DeltaTable:
     """Represents a Delta Table"""
@@ -435,16 +442,25 @@ def from_data_catalog(
                                 but will also increase memory usage. Possible rate limits of the storage backend should
                                 also be considered for optimal performance. Defaults to 4 * number of cpus.
         """
-        table_uri = RawDeltaTable.get_table_uri_from_data_catalog(
-            data_catalog=data_catalog.value,
-            data_catalog_id=data_catalog_id,
-            database_name=database_name,
-            table_name=table_name,
-        )
-        return cls(
-            table_uri=table_uri, version=version, log_buffer_size=log_buffer_size
+        raise NotImplementedError(
+            "Reading from data catalog is not supported at this point in time."
         )
 
+    @staticmethod
+    def is_deltatable(
+        table_uri: str, storage_options: Optional[Dict[str, str]] = None
+    ) -> bool:
+        """
+        Returns True if a Delta Table exists at specified path.
+        Returns False otherwise.
+
+        Args:
+            table_uri: the path of the DeltaTable
+            storage_options: a dictionary of the options to use for the
+                storage backend
+        """
+        return RawDeltaTable.is_deltatable(table_uri, storage_options)
+
     @classmethod
     def create(
         cls,
@@ -457,6 +473,7 @@ def create(
         configuration: Optional[Mapping[str, Optional[str]]] = None,
         storage_options: Optional[Dict[str, str]] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        raise_if_key_not_exists: bool = True,
     ) -> "DeltaTable":
         """`CREATE` or `CREATE_OR_REPLACE` a delta table given a table_uri.
 
@@ -471,8 +488,9 @@ def create(
             name: User-provided identifier for this table.
             description: User-provided description for this table.
             configuration:  A map containing configuration options for the metadata action.
-            storage_options: options passed to the object store crate.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            storage_options: Options passed to the object store crate.
+            custom_metadata: Custom metadata that will be added to the transaction commit.
+            raise_if_key_not_exists: Whether to raise an error if the configuration uses keys that are not Delta keys
 
         Returns:
             DeltaTable: created delta table
@@ -506,6 +524,7 @@ def create(
             schema,
             partition_by or [],
             mode,
+            raise_if_key_not_exists,
             name,
             description,
             configuration,
@@ -524,6 +543,24 @@ def version(self) -> int:
         """
         return self._table.version()
 
+    def partitions(
+        self,
+        partition_filters: Optional[List[Tuple[str, str, Any]]] = None,
+    ) -> List[Dict[str, str]]:
+        """
+        Returns the partitions as a list of dicts. Example: `[{'month': '1', 'year': '2020', 'day': '1'}, ...]`
+
+        Args:
+            partition_filters: The partition filters that will be used for getting the matched partitions, defaults to `None` (no filtering).
+        """
+
+        partitions: List[Dict[str, str]] = []
+        for partition in self._table.get_active_partitions(partition_filters):
+            if not partition:
+                continue
+            partitions.append({k: v for (k, v) in partition})
+        return partitions
+
     def files(
         self, partition_filters: Optional[List[Tuple[str, str, Any]]] = None
     ) -> List[str]:
@@ -557,10 +594,10 @@ def files(
             ("z", "not in", ["a","b"])
             ```
         """
-        return self._table.files(self.__stringify_partition_values(partition_filters))
+        return self._table.files(self._stringify_partition_values(partition_filters))
 
     def file_uris(
-        self, partition_filters: Optional[List[Tuple[str, str, Any]]] = None
+        self, partition_filters: Optional[FilterConjunctionType] = None
     ) -> List[str]:
         """
         Get the list of files as absolute URIs, including the scheme (e.g. "s3://").
@@ -595,7 +632,7 @@ def file_uris(
             ```
         """
         return self._table.file_uris(
-            self.__stringify_partition_values(partition_filters)
+            self._stringify_partition_values(partition_filters)
         )
 
     file_uris.__doc__ = ""
@@ -642,57 +679,20 @@ def load_as_version(self, version: Union[int, str, datetime]) -> None:
                 "Invalid datatype provided for version, only int, str or datetime are accepted."
             )
 
-    def load_version(self, version: int) -> None:
-        """
-        Load a DeltaTable with a specified version.
-
-        !!! warning "Deprecated"
-            Load_version and load_with_datetime have been combined into `DeltaTable.load_as_version`.
-
-        Args:
-            version: the identifier of the version of the DeltaTable to load
-        """
-        warnings.warn(
-            "Call to deprecated method DeltaTable.load_version. Use DeltaTable.load_as_version() instead.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        self._table.load_version(version)
-
-    def load_with_datetime(self, datetime_string: str) -> None:
-        """
-        Time travel Delta table to the latest version that's created at or before provided `datetime_string` argument.
-        The `datetime_string` argument should be an RFC 3339 and ISO 8601 date and time string.
-
-        !!! warning "Deprecated"
-            Load_version and load_with_datetime have been combined into `DeltaTable.load_as_version`.
-
-        Args:
-            datetime_string: the identifier of the datetime point of the DeltaTable to load
-
-        Example:
-            ```
-            "2018-01-26T18:30:09Z"
-            "2018-12-19T16:39:57-08:00"
-            "2018-01-26T18:30:09.453+00:00"
-            ```
-        """
-        warnings.warn(
-            "Call to deprecated method DeltaTable.load_with_datetime. Use DeltaTable.load_as_version() instead.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        self._table.load_with_datetime(datetime_string)
-
     def load_cdf(
         self,
         starting_version: int = 0,
         ending_version: Optional[int] = None,
         starting_timestamp: Optional[str] = None,
         ending_timestamp: Optional[str] = None,
+        columns: Optional[List[str]] = None,
     ) -> pyarrow.RecordBatchReader:
         return self._table.load_cdf(
-            starting_version, ending_version, starting_timestamp, ending_timestamp
+            columns=columns,
+            starting_version=starting_version,
+            ending_version=ending_version,
+            starting_timestamp=starting_timestamp,
+            ending_timestamp=ending_timestamp,
         )
 
     @property
@@ -708,6 +708,19 @@ def schema(self) -> DeltaSchema:
         """
         return self._table.schema
 
+    def files_by_partitions(self, partition_filters: PartitionFilterType) -> List[str]:
+        """
+        Get the files for each partition
+
+        """
+        warnings.warn(
+            "files_by_partitions is deprecated, please use DeltaTable.files() instead.",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
+        return self.files(partition_filters)
+
     def metadata(self) -> Metadata:
         """
         Get the current metadata of the DeltaTable.
@@ -762,18 +775,32 @@ def vacuum(
         dry_run: bool = True,
         enforce_retention_duration: bool = True,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> List[str]:
         """
         Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table and are older than the retention threshold.
 
         Args:
-            retention_hours: the retention threshold in hours, if none then the value from `configuration.deletedFileRetentionDuration` is used or default of 1 week otherwise.
+            retention_hours: the retention threshold in hours, if none then the value from `delta.deletedFileRetentionDuration` is used or default of 1 week otherwise.
             dry_run: when activated, list only the files, delete otherwise
-            enforce_retention_duration: when disabled, accepts retention hours smaller than the value from `configuration.deletedFileRetentionDuration`.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            enforce_retention_duration: when disabled, accepts retention hours smaller than the value from `delta.deletedFileRetentionDuration`.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
         Returns:
             the list of files no longer referenced by the Delta Table and are older than the retention threshold.
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         if retention_hours:
             if retention_hours < 0:
                 raise ValueError("The retention periods should be positive.")
@@ -782,7 +809,8 @@ def vacuum(
             dry_run,
             retention_hours,
             enforce_retention_duration,
-            custom_metadata,
+            commit_properties,
+            post_commithook_properties,
         )
 
     def update(
@@ -795,6 +823,8 @@ def update(
         writer_properties: Optional[WriterProperties] = None,
         error_on_type_mismatch: bool = True,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> Dict[str, Any]:
         """`UPDATE` records in the Delta Table that matches an optional predicate. Either updates or new_values needs
         to be passed for it to execute.
@@ -805,7 +835,9 @@ def update(
             predicate: a logical expression.
             writer_properties: Pass writer properties to the Rust parquet writer.
             error_on_type_mismatch: specify if update will return error if data types are mismatching :default = True
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
         Returns:
             the metrics from update
 
@@ -847,6 +879,16 @@ def update(
             {'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...}
             ```
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         if updates is None and new_values is not None:
             updates = {}
             for key, value in new_values.items():
@@ -881,9 +923,10 @@ def update(
         metrics = self._table.update(
             updates,
             predicate,
-            writer_properties._to_dict() if writer_properties else None,
+            writer_properties,
             safe_cast=not error_on_type_mismatch,
-            custom_metadata=custom_metadata,
+            commit_properties=commit_properties,
+            post_commithook_properties=post_commithook_properties,
         )
         return json.loads(metrics)
 
@@ -923,8 +966,10 @@ def merge(
         target_alias: Optional[str] = None,
         error_on_type_mismatch: bool = True,
         writer_properties: Optional[WriterProperties] = None,
-        large_dtypes: bool = True,
+        large_dtypes: Optional[bool] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> "TableMerger":
         """Pass the source data which you want to merge on the target delta table, providing a
         predicate in SQL query like format. You can also specify on what to do when the underlying data types do not
@@ -937,14 +982,37 @@ def merge(
             target_alias: Alias for the target table
             error_on_type_mismatch: specify if merge will return error if data types are mismatching :default = True
             writer_properties: Pass writer properties to the Rust parquet writer
-            large_dtypes: If True, the data schema is kept in large_dtypes.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            large_dtypes: Deprecated, will be removed in 1.0
+            arrow_schema_conversion_mode: Large converts all types of data schema into Large Arrow types, passthrough keeps string/binary/list types untouched
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties for the commit. If None, default values are used.
 
         Returns:
             TableMerger: TableMerger Object
         """
-        invariants = self.schema().invariants
-        checker = _DeltaDataChecker(invariants)
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
+        if large_dtypes is not None:
+            warnings.warn(
+                "large_dtypes is deprecated",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            if large_dtypes:
+                conversion_mode = ArrowSchemaConversionMode.LARGE
+            else:
+                conversion_mode = ArrowSchemaConversionMode.NORMAL
+        else:
+            conversion_mode = ArrowSchemaConversionMode.PASSTHROUGH
 
         from .schema import (
             convert_pyarrow_dataset,
@@ -954,40 +1022,37 @@ def merge(
         )
 
         if isinstance(source, pyarrow.RecordBatchReader):
-            source = convert_pyarrow_recordbatchreader(source, large_dtypes)
+            source = convert_pyarrow_recordbatchreader(source, conversion_mode)
         elif isinstance(source, pyarrow.RecordBatch):
-            source = convert_pyarrow_recordbatch(source, large_dtypes)
+            source = convert_pyarrow_recordbatch(source, conversion_mode)
         elif isinstance(source, pyarrow.Table):
-            source = convert_pyarrow_table(source, large_dtypes)
+            source = convert_pyarrow_table(source, conversion_mode)
         elif isinstance(source, ds.Dataset):
-            source = convert_pyarrow_dataset(source, large_dtypes)
+            source = convert_pyarrow_dataset(source, conversion_mode)
         elif _has_pandas and isinstance(source, pd.DataFrame):
             source = convert_pyarrow_table(
-                pyarrow.Table.from_pandas(source), large_dtypes
+                pyarrow.Table.from_pandas(source), conversion_mode
             )
         else:
             raise TypeError(
                 f"{type(source).__name__} is not a valid input. Only PyArrow RecordBatchReader, RecordBatch, Table or Pandas DataFrame are valid inputs for source."
             )
 
-        def validate_batch(batch: pyarrow.RecordBatch) -> pyarrow.RecordBatch:
-            checker.check_batch(batch)
-            return batch
-
         source = pyarrow.RecordBatchReader.from_batches(
-            source.schema, (validate_batch(batch) for batch in source)
+            source.schema, (batch for batch in source)
         )
 
-        return TableMerger(
-            self,
+        py_merge_builder = self._table.create_merge_builder(
             source=source,
             predicate=predicate,
             source_alias=source_alias,
             target_alias=target_alias,
             safe_cast=not error_on_type_mismatch,
             writer_properties=writer_properties,
-            custom_metadata=custom_metadata,
+            commit_properties=commit_properties,
+            post_commithook_properties=post_commithook_properties,
         )
+        return TableMerger(py_merge_builder, self._table)
 
     def restore(
         self,
@@ -996,6 +1061,7 @@ def restore(
         ignore_missing_files: bool = False,
         protocol_downgrade_allowed: bool = False,
         custom_metadata: Optional[Dict[str, str]] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> Dict[str, Any]:
         """
         Run the Restore command on the Delta Table: restore table to a given version or datetime.
@@ -1004,30 +1070,41 @@ def restore(
             target: the expected version will restore, which represented by int, date str or datetime.
             ignore_missing_files: whether the operation carry on when some data files missing.
             protocol_downgrade_allowed: whether the operation when protocol version upgraded.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            commit_properties: properties of the transaction commit. If None, default values are used.
 
         Returns:
             the metrics from restore.
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         if isinstance(target, datetime):
             metrics = self._table.restore(
                 target.isoformat(),
                 ignore_missing_files=ignore_missing_files,
                 protocol_downgrade_allowed=protocol_downgrade_allowed,
-                custom_metadata=custom_metadata,
+                commit_properties=commit_properties,
             )
         else:
             metrics = self._table.restore(
                 target,
                 ignore_missing_files=ignore_missing_files,
                 protocol_downgrade_allowed=protocol_downgrade_allowed,
-                custom_metadata=custom_metadata,
+                commit_properties=commit_properties,
             )
         return json.loads(metrics)
 
     def to_pyarrow_dataset(
         self,
-        partitions: Optional[List[Tuple[str, str, Any]]] = None,
+        partitions: Optional[FilterConjunctionType] = None,
         filesystem: Optional[Union[str, pa_fs.FileSystem]] = None,
         parquet_read_options: Optional[ParquetReadOptions] = None,
         schema: Optional[pyarrow.Schema] = None,
@@ -1068,6 +1145,9 @@ def to_pyarrow_dataset(
         Returns:
             the PyArrow dataset in PyArrow
         """
+        if not self._table.has_files():
+            raise DeltaError("Table is instantiated without files.")
+
         table_protocol = self.protocol()
         if (
             table_protocol.min_reader_version > MAX_SUPPORTED_READER_VERSION
@@ -1088,17 +1168,12 @@ def to_pyarrow_dataset(
                 raise DeltaProtocolError(
                     f"The table has set these reader features: {missing_features} but these are not yet supported by the deltalake reader."
                 )
-
         if not filesystem:
-            file_sizes = self.get_add_actions().to_pydict()
-            file_sizes = {
-                x: y for x, y in zip(file_sizes["path"], file_sizes["size_bytes"])
-            }
             filesystem = pa_fs.PyFileSystem(
                 DeltaStorageHandler.from_table(
                     self._table,
                     self._storage_options,
-                    file_sizes,
+                    self._table.get_add_file_sizes(),
                 )
             )
         format = ParquetFileFormat(
@@ -1135,7 +1210,7 @@ def to_pyarrow_table(
         partitions: Optional[List[Tuple[str, str, Any]]] = None,
         columns: Optional[List[str]] = None,
         filesystem: Optional[Union[str, pa_fs.FileSystem]] = None,
-        filters: Optional[FilterType] = None,
+        filters: Optional[Union[FilterType, Expression]] = None,
     ) -> pyarrow.Table:
         """
         Build a PyArrow Table using data from the DeltaTable.
@@ -1144,10 +1219,10 @@ def to_pyarrow_table(
             partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
             columns: The columns to project. This can be a list of column names to include (order and duplicates will be preserved)
             filesystem: A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem
-            filters: A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass ``partitions``
+            filters: A disjunctive normal form (DNF) predicate for filtering rows, or directly a pyarrow.dataset.Expression. If you pass a filter you do not need to pass ``partitions``
         """
         if filters is not None:
-            filters = _filters_to_expression(filters)
+            filters = filters_to_expression(filters)
         return self.to_pyarrow_dataset(
             partitions=partitions, filesystem=filesystem
         ).to_table(columns=columns, filter=filters)
@@ -1157,7 +1232,7 @@ def to_pandas(
         partitions: Optional[List[Tuple[str, str, Any]]] = None,
         columns: Optional[List[str]] = None,
         filesystem: Optional[Union[str, pa_fs.FileSystem]] = None,
-        filters: Optional[FilterType] = None,
+        filters: Optional[Union[FilterType, Expression]] = None,
     ) -> "pd.DataFrame":
         """
         Build a pandas dataframe using data from the DeltaTable.
@@ -1166,7 +1241,7 @@ def to_pandas(
             partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
             columns: The columns to project. This can be a list of column names to include (order and duplicates will be preserved)
             filesystem: A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem
-            filters: A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass ``partitions``
+            filters: A disjunctive normal form (DNF) predicate for filtering rows, or directly a pyarrow.dataset.Expression. If you pass a filter you do not need to pass ``partitions``
         """
         return self.to_pyarrow_table(
             partitions=partitions,
@@ -1188,13 +1263,13 @@ def create_checkpoint(self) -> None:
     def cleanup_metadata(self) -> None:
         """
         Delete expired log files before current version from table. The table log retention is based on
-        the `configuration.logRetentionDuration` value, 30 days by default.
+        the `delta.logRetentionDuration` value, 30 days by default.
         """
         self._table.cleanup_metadata()
 
-    def __stringify_partition_values(
-        self, partition_filters: Optional[List[Tuple[str, str, Any]]]
-    ) -> Optional[List[Tuple[str, str, Union[str, List[str]]]]]:
+    def _stringify_partition_values(
+        self, partition_filters: Optional[FilterConjunctionType]
+    ) -> Optional[PartitionFilterType]:
         if partition_filters is None:
             return partition_filters
         out = []
@@ -1253,6 +1328,8 @@ def delete(
         predicate: Optional[str] = None,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> Dict[str, Any]:
         """Delete records from a Delta Table that statisfy a predicate.
 
@@ -1264,20 +1341,37 @@ def delete(
         Args:
             predicate: a SQL where clause. If not passed, will delete all rows.
             writer_properties: Pass writer properties to the Rust parquet writer.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
 
         Returns:
             the metrics from delete.
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         metrics = self._table.delete(
             predicate,
-            writer_properties._to_dict() if writer_properties else None,
-            custom_metadata,
+            writer_properties,
+            commit_properties,
+            post_commithook_properties,
         )
         return json.loads(metrics)
 
     def repair(
-        self, dry_run: bool = False, custom_metadata: Optional[Dict[str, str]] = None
+        self,
+        dry_run: bool = False,
+        custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> Dict[str, Any]:
         """Repair the Delta Table by auditing active files that do not exist in the underlying
         filesystem and removes them. This can be useful when there are accidental deletions or corrupted files.
@@ -1288,7 +1382,10 @@ def repair(
 
         Args:
             dry_run: when activated, list only the files, otherwise add remove actions to transaction log. Defaults to False.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
+
         Returns:
             The metrics from repair (FSCK) action.
 
@@ -1303,7 +1400,21 @@ def repair(
             {'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']}
             ```
         """
-        metrics = self._table.repair(dry_run, custom_metadata)
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
+        metrics = self._table.repair(
+            dry_run,
+            commit_properties,
+            post_commithook_properties,
+        )
         return json.loads(metrics)
 
 
@@ -1312,74 +1423,11 @@ class TableMerger:
 
     def __init__(
         self,
-        table: DeltaTable,
-        source: pyarrow.RecordBatchReader,
-        predicate: str,
-        source_alias: Optional[str] = None,
-        target_alias: Optional[str] = None,
-        safe_cast: bool = True,
-        writer_properties: Optional[WriterProperties] = None,
-        custom_metadata: Optional[Dict[str, str]] = None,
+        builder: PyMergeBuilder,
+        table: RawDeltaTable,
     ):
-        self.table = table
-        self.source = source
-        self.predicate = predicate
-        self.source_alias = source_alias
-        self.target_alias = target_alias
-        self.safe_cast = safe_cast
-        self.writer_properties = writer_properties
-        self.custom_metadata = custom_metadata
-        self.matched_update_updates: Optional[List[Dict[str, str]]] = None
-        self.matched_update_predicate: Optional[List[Optional[str]]] = None
-        self.matched_delete_predicate: Optional[List[str]] = None
-        self.matched_delete_all: Optional[bool] = None
-        self.not_matched_insert_updates: Optional[List[Dict[str, str]]] = None
-        self.not_matched_insert_predicate: Optional[List[Optional[str]]] = None
-        self.not_matched_by_source_update_updates: Optional[List[Dict[str, str]]] = None
-        self.not_matched_by_source_update_predicate: Optional[List[Optional[str]]] = (
-            None
-        )
-        self.not_matched_by_source_delete_predicate: Optional[List[str]] = None
-        self.not_matched_by_source_delete_all: Optional[bool] = None
-
-    def with_writer_properties(
-        self,
-        data_page_size_limit: Optional[int] = None,
-        dictionary_page_size_limit: Optional[int] = None,
-        data_page_row_count_limit: Optional[int] = None,
-        write_batch_size: Optional[int] = None,
-        max_row_group_size: Optional[int] = None,
-    ) -> "TableMerger":
-        """
-        !!! warning "Deprecated"
-            Use `.merge(writer_properties = WriterProperties())` instead
-        Pass writer properties to the Rust parquet writer, see options https://arrow.apache.org/rust/parquet/file/properties/struct.WriterProperties.html:
-
-        Args:
-            data_page_size_limit: Limit DataPage size to this in bytes.
-            dictionary_page_size_limit: Limit the size of each DataPage to store dicts to this amount in bytes.
-            data_page_row_count_limit: Limit the number of rows in each DataPage.
-            write_batch_size: Splits internally to smaller batch size.
-            max_row_group_size: Max number of rows in row group.
-
-        Returns:
-            TableMerger: TableMerger Object
-        """
-        warnings.warn(
-            "Call to deprecated method TableMerger.with_writer_properties. Use DeltaTable.merge(writer_properties=WriterProperties()) instead.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-
-        writer_properties: Dict[str, Any] = {
-            "data_page_size_limit": data_page_size_limit,
-            "dictionary_page_size_limit": dictionary_page_size_limit,
-            "data_page_row_count_limit": data_page_row_count_limit,
-            "write_batch_size": write_batch_size,
-            "max_row_group_size": max_row_group_size,
-        }
-        self.writer_properties = WriterProperties(**writer_properties)
-        return self
+        self._builder = builder
+        self._table = table
 
     def when_matched_update(
         self, updates: Dict[str, str], predicate: Optional[str] = None
@@ -1426,14 +1474,7 @@ def when_matched_update(
             2  3  6
             ```
         """
-        if isinstance(self.matched_update_updates, list) and isinstance(
-            self.matched_update_predicate, list
-        ):
-            self.matched_update_updates.append(updates)
-            self.matched_update_predicate.append(predicate)
-        else:
-            self.matched_update_updates = [updates]
-            self.matched_update_predicate = [predicate]
+        self._builder.when_matched_update(updates, predicate)
         return self
 
     def when_matched_update_all(self, predicate: Optional[str] = None) -> "TableMerger":
@@ -1478,24 +1519,20 @@ def when_matched_update_all(self, predicate: Optional[str] = None) -> "TableMerg
             2  3  6
             ```
         """
+        maybe_source_alias = self._builder.source_alias
+        maybe_target_alias = self._builder.target_alias
 
-        src_alias = (self.source_alias + ".") if self.source_alias is not None else ""
-        trgt_alias = (self.target_alias + ".") if self.target_alias is not None else ""
+        src_alias = (maybe_source_alias + ".") if maybe_source_alias is not None else ""
+        trgt_alias = (
+            (maybe_target_alias + ".") if maybe_target_alias is not None else ""
+        )
 
         updates = {
             f"{trgt_alias}`{col.name}`": f"{src_alias}`{col.name}`"
-            for col in self.source.schema
+            for col in self._builder.arrow_schema
         }
 
-        if isinstance(self.matched_update_updates, list) and isinstance(
-            self.matched_update_predicate, list
-        ):
-            self.matched_update_updates.append(updates)
-            self.matched_update_predicate.append(predicate)
-        else:
-            self.matched_update_updates = [updates]
-            self.matched_update_predicate = [predicate]
-
+        self._builder.when_matched_update(updates, predicate)
         return self
 
     def when_matched_delete(self, predicate: Optional[str] = None) -> "TableMerger":
@@ -1561,19 +1598,7 @@ def when_matched_delete(self, predicate: Optional[str] = None) -> "TableMerger":
             0  1  4
             ```
         """
-        if self.matched_delete_all is not None:
-            raise ValueError(
-                """when_matched_delete without a predicate has already been set, which means
-                             it will delete all, any subsequent when_matched_delete, won't make sense."""
-            )
-
-        if predicate is None:
-            self.matched_delete_all = True
-        else:
-            if isinstance(self.matched_delete_predicate, list):
-                self.matched_delete_predicate.append(predicate)
-            else:
-                self.matched_delete_predicate = [predicate]
+        self._builder.when_matched_delete(predicate)
         return self
 
     def when_not_matched_insert(
@@ -1626,16 +1651,7 @@ def when_not_matched_insert(
             3  4  7
             ```
         """
-
-        if isinstance(self.not_matched_insert_updates, list) and isinstance(
-            self.not_matched_insert_predicate, list
-        ):
-            self.not_matched_insert_updates.append(updates)
-            self.not_matched_insert_predicate.append(predicate)
-        else:
-            self.not_matched_insert_updates = [updates]
-            self.not_matched_insert_predicate = [predicate]
-
+        self._builder.when_not_matched_insert(updates, predicate)
         return self
 
     def when_not_matched_insert_all(
@@ -1684,22 +1700,19 @@ def when_not_matched_insert_all(
             3  4  7
             ```
         """
+        maybe_source_alias = self._builder.source_alias
+        maybe_target_alias = self._builder.target_alias
 
-        src_alias = (self.source_alias + ".") if self.source_alias is not None else ""
-        trgt_alias = (self.target_alias + ".") if self.target_alias is not None else ""
+        src_alias = (maybe_source_alias + ".") if maybe_source_alias is not None else ""
+        trgt_alias = (
+            (maybe_target_alias + ".") if maybe_target_alias is not None else ""
+        )
         updates = {
             f"{trgt_alias}`{col.name}`": f"{src_alias}`{col.name}`"
-            for col in self.source.schema
+            for col in self._builder.arrow_schema
         }
-        if isinstance(self.not_matched_insert_updates, list) and isinstance(
-            self.not_matched_insert_predicate, list
-        ):
-            self.not_matched_insert_updates.append(updates)
-            self.not_matched_insert_predicate.append(predicate)
-        else:
-            self.not_matched_insert_updates = [updates]
-            self.not_matched_insert_predicate = [predicate]
 
+        self._builder.when_not_matched_insert(updates, predicate)
         return self
 
     def when_not_matched_by_source_update(
@@ -1749,15 +1762,7 @@ def when_not_matched_by_source_update(
             2  3  6
             ```
         """
-
-        if isinstance(self.not_matched_by_source_update_updates, list) and isinstance(
-            self.not_matched_by_source_update_predicate, list
-        ):
-            self.not_matched_by_source_update_updates.append(updates)
-            self.not_matched_by_source_update_predicate.append(predicate)
-        else:
-            self.not_matched_by_source_update_updates = [updates]
-            self.not_matched_by_source_update_predicate = [predicate]
+        self._builder.when_not_matched_by_source_update(updates, predicate)
         return self
 
     def when_not_matched_by_source_delete(
@@ -1776,19 +1781,7 @@ def when_not_matched_by_source_delete(
         Returns:
             TableMerger: TableMerger Object
         """
-        if self.not_matched_by_source_delete_all is not None:
-            raise ValueError(
-                """when_not_matched_by_source_delete without a predicate has already been set, which means
-                             it will delete all, any subsequent when_not_matched_by_source_delete, won't make sense."""
-            )
-
-        if predicate is None:
-            self.not_matched_by_source_delete_all = True
-        else:
-            if isinstance(self.not_matched_by_source_delete_predicate, list):
-                self.not_matched_by_source_delete_predicate.append(predicate)
-            else:
-                self.not_matched_by_source_delete_predicate = [predicate]
+        self._builder.when_not_matched_by_source_delete(predicate)
         return self
 
     def execute(self) -> Dict[str, Any]:
@@ -1797,28 +1790,7 @@ def execute(self) -> Dict[str, Any]:
         Returns:
             Dict: metrics
         """
-        metrics = self.table._table.merge_execute(
-            source=self.source,
-            predicate=self.predicate,
-            source_alias=self.source_alias,
-            target_alias=self.target_alias,
-            safe_cast=self.safe_cast,
-            writer_properties=self.writer_properties._to_dict()
-            if self.writer_properties
-            else None,
-            custom_metadata=self.custom_metadata,
-            matched_update_updates=self.matched_update_updates,
-            matched_update_predicate=self.matched_update_predicate,
-            matched_delete_predicate=self.matched_delete_predicate,
-            matched_delete_all=self.matched_delete_all,
-            not_matched_insert_updates=self.not_matched_insert_updates,
-            not_matched_insert_predicate=self.not_matched_insert_predicate,
-            not_matched_by_source_update_updates=self.not_matched_by_source_update_updates,
-            not_matched_by_source_update_predicate=self.not_matched_by_source_update_predicate,
-            not_matched_by_source_delete_predicate=self.not_matched_by_source_delete_predicate,
-            not_matched_by_source_delete_all=self.not_matched_by_source_delete_all,
-        )
-        self.table.update_incremental()
+        metrics = self._table.merge_execute(self._builder)
         return json.loads(metrics)
 
 
@@ -1828,17 +1800,105 @@ class TableAlterer:
     def __init__(self, table: DeltaTable) -> None:
         self.table = table
 
+    def add_feature(
+        self,
+        feature: Union[TableFeatures, List[TableFeatures]],
+        allow_protocol_versions_increase: bool = False,
+        commit_properties: Optional[CommitProperties] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+    ) -> None:
+        """
+        Enable a table feature.
+
+        Args:
+            feature: Table Feature e.g. Deletion Vectors, Change Data Feed
+            allow_protocol_versions_increase: Allow the protocol to be implicitily bumped to reader 3 or writer 7
+            commit_properties: properties of the transaction commit. If None, default values are used.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+
+        Example:
+            ```python
+            from deltalake import DeltaTable
+            dt = DeltaTable("test_table")
+            dt.alter.add_feature(TableFeatures.AppendOnly)
+            ```
+
+            **Check protocol**
+            ```
+            dt.protocol()
+            ProtocolVersions(min_reader_version=1, min_writer_version=7, writer_features=['appendOnly'], reader_features=None)
+            ```
+        """
+        if isinstance(feature, TableFeatures):
+            feature = [feature]
+        self.table._table.add_feature(
+            feature,
+            allow_protocol_versions_increase,
+            commit_properties,
+            post_commithook_properties,
+        )
+
+    def add_columns(
+        self,
+        fields: Union[DeltaField, List[DeltaField]],
+        custom_metadata: Optional[Dict[str, str]] = None,
+        commit_properties: Optional[CommitProperties] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+    ) -> None:
+        """Add new columns and/or update the fields of a stuctcolumn
+
+        Args:
+            fields: fields to merge into schema
+            commit_properties: properties of the transaction commit. If None, default values are used.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+
+        Example:
+            from deltalake.schema import Field, PrimitiveType, StructType
+            dt = DeltaTable("test_table")
+            new_fields = [
+                Field("baz", StructType([Field("bar", PrimitiveType("integer"))])),
+                Field("bar", PrimitiveType("integer"))
+            ]
+            dt.alter.add_columns(
+                new_fields
+            )
+            ```
+        """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
+        if isinstance(fields, DeltaField):
+            fields = [fields]
+
+        self.table._table.add_columns(
+            fields,
+            commit_properties,
+            post_commithook_properties,
+        )
+
     def add_constraint(
         self,
         constraints: Dict[str, str],
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> None:
         """
         Add constraints to the table. Limited to `single constraint` at once.
 
         Args:
             constraints: mapping of constraint name to SQL-expression to evaluate on write
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
+
         Example:
             ```python
             from deltalake import DeltaTable
@@ -1854,19 +1914,35 @@ def add_constraint(
             {'delta.constraints.value_gt_5': 'value > 5'}
             ```
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         if len(constraints.keys()) > 1:
             raise ValueError(
                 """add_constraints is limited to a single constraint addition at once for now. 
                 Please execute add_constraints multiple times with each time a different constraint."""
             )
 
-        self.table._table.add_constraints(constraints, custom_metadata)
+        self.table._table.add_constraints(
+            constraints,
+            commit_properties,
+            post_commithook_properties,
+        )
 
     def drop_constraint(
         self,
         name: str,
         raise_if_not_exists: bool = True,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> None:
         """
         Drop constraints from a table. Limited to `single constraint` at once.
@@ -1874,7 +1950,10 @@ def drop_constraint(
         Args:
             name: constraint name which to drop.
             raise_if_not_exists: set if should raise if not exists.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
+
         Example:
             ```python
             from deltalake import DeltaTable
@@ -1894,24 +1973,68 @@ def drop_constraint(
             {}
             ```
         """
-        self.table._table.drop_constraints(name, raise_if_not_exists, custom_metadata)
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
+        self.table._table.drop_constraints(
+            name,
+            raise_if_not_exists,
+            commit_properties,
+            post_commithook_properties,
+        )
 
     def set_table_properties(
         self,
         properties: Dict[str, str],
         raise_if_not_exists: bool = True,
         custom_metadata: Optional[Dict[str, str]] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> None:
         """
-        Unset properties from the table.
+        Set properties from the table.
+
         Args:
             properties: properties which set
             raise_if_not_exists: set if should raise if not exists.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            commit_properties: properties of the transaction commit. If None, default values are used.
+
         Example:
+            ```python
+            from deltalake import write_deltalake, DeltaTable
+            import pandas as pd
+            df = pd.DataFrame(
+                {"id": ["1", "2", "3"],
+                "deleted": [False, False, False],
+                "price": [10., 15., 20.]
+                })
+            write_deltalake("tmp", df)
+
+            dt = DeltaTable("tmp")
+            dt.alter.set_table_properties({"delta.enableChangeDataFeed": "true"})
+            ```
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         self.table._table.set_table_properties(
-            properties, raise_if_not_exists, custom_metadata
+            properties,
+            raise_if_not_exists,
+            commit_properties,
         )
 
 
@@ -1921,33 +2044,16 @@ class TableOptimizer:
     def __init__(self, table: DeltaTable):
         self.table = table
 
-    def __call__(
-        self,
-        partition_filters: Optional[FilterType] = None,
-        target_size: Optional[int] = None,
-        max_concurrent_tasks: Optional[int] = None,
-    ) -> Dict[str, Any]:
-        """
-        !!! warning "DEPRECATED 0.10.0"
-            Use [compact][deltalake.table.DeltaTable.compact] instead, which has the same signature.
-        """
-
-        warnings.warn(
-            "Call to deprecated method DeltaTable.optimize. Use DeltaTable.optimize.compact() instead.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-
-        return self.compact(partition_filters, target_size, max_concurrent_tasks)
-
     def compact(
         self,
-        partition_filters: Optional[FilterType] = None,
+        partition_filters: Optional[FilterConjunctionType] = None,
         target_size: Optional[int] = None,
         max_concurrent_tasks: Optional[int] = None,
         min_commit_interval: Optional[Union[int, timedelta]] = None,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> Dict[str, Any]:
         """
         Compacts small files to reduce the total number of files in the table.
@@ -1970,7 +2076,9 @@ def compact(
                                     created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you
                                     want a commit per partition.
             writer_properties: Pass writer properties to the Rust parquet writer.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
 
         Returns:
             the metrics from optimize
@@ -1991,16 +2099,27 @@ def compact(
             {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 1, 'numBatches': 2, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True}
             ```
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         if isinstance(min_commit_interval, timedelta):
             min_commit_interval = int(min_commit_interval.total_seconds())
 
         metrics = self.table._table.compact_optimize(
-            partition_filters,
+            self.table._stringify_partition_values(partition_filters),
             target_size,
             max_concurrent_tasks,
             min_commit_interval,
-            writer_properties._to_dict() if writer_properties else None,
-            custom_metadata,
+            writer_properties,
+            commit_properties,
+            post_commithook_properties,
         )
         self.table.update_incremental()
         return json.loads(metrics)
@@ -2008,13 +2127,15 @@ def compact(
     def z_order(
         self,
         columns: Iterable[str],
-        partition_filters: Optional[FilterType] = None,
+        partition_filters: Optional[FilterConjunctionType] = None,
         target_size: Optional[int] = None,
         max_concurrent_tasks: Optional[int] = None,
         max_spill_size: int = 20 * 1024 * 1024 * 1024,
         min_commit_interval: Optional[Union[int, timedelta]] = None,
         writer_properties: Optional[WriterProperties] = None,
         custom_metadata: Optional[Dict[str, str]] = None,
+        post_commithook_properties: Optional[PostCommitHookProperties] = None,
+        commit_properties: Optional[CommitProperties] = None,
     ) -> Dict[str, Any]:
         """
         Reorders the data using a Z-order curve to improve data skipping.
@@ -2030,12 +2151,14 @@ def z_order(
             max_concurrent_tasks: the maximum number of concurrent tasks to use for
                                     file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction
                                     faster, but will also use more memory.
-            max_spill_size: the maximum number of bytes to spill to disk. Defaults to 20GB.
+            max_spill_size: the maximum number of bytes allowed in memory before spilling to disk. Defaults to 20GB.
             min_commit_interval: minimum interval in seconds or as timedeltas before a new commit is
                                     created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you
                                     want a commit per partition.
             writer_properties: Pass writer properties to the Rust parquet writer.
-            custom_metadata: custom metadata that will be added to the transaction commit.
+            custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+            post_commithook_properties: properties for the post commit hook. If None, default values are used.
+            commit_properties: properties of the transaction commit. If None, default values are used.
 
         Returns:
             the metrics from optimize
@@ -2056,18 +2179,29 @@ def z_order(
             {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 0, 'numBatches': 1, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True}
             ```
         """
+        if custom_metadata:
+            warnings.warn(
+                "custom_metadata is deprecated, please use commit_properties instead.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            commit_properties = _commit_properties_from_custom_metadata(
+                commit_properties, custom_metadata
+            )
+
         if isinstance(min_commit_interval, timedelta):
             min_commit_interval = int(min_commit_interval.total_seconds())
 
         metrics = self.table._table.z_order_optimize(
             list(columns),
-            partition_filters,
+            self.table._stringify_partition_values(partition_filters),
             target_size,
             max_concurrent_tasks,
             max_spill_size,
             min_commit_interval,
-            writer_properties._to_dict() if writer_properties else None,
-            custom_metadata,
+            writer_properties,
+            commit_properties,
+            post_commithook_properties,
         )
         self.table.update_incremental()
         return json.loads(metrics)
diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py
index 1a367c05a0..535a6e7a13 100644
--- a/python/deltalake/writer.py
+++ b/python/deltalake/writer.py
@@ -1,6 +1,6 @@
 import json
-import sys
 import uuid
+import warnings
 from dataclasses import dataclass
 from datetime import date, datetime
 from decimal import Decimal
@@ -13,28 +13,23 @@
     Iterable,
     Iterator,
     List,
+    Literal,
     Mapping,
     Optional,
+    Protocol,
     Tuple,
     Union,
     overload,
 )
 from urllib.parse import unquote
 
-from deltalake import Schema as DeltaSchema
-from deltalake.fs import DeltaStorageHandler
-
-from ._util import encode_partition_value
-
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
-
 import pyarrow as pa
 import pyarrow.dataset as ds
 import pyarrow.fs as pa_fs
-from pyarrow.lib import RecordBatchReader
+from pyarrow import RecordBatchReader
+
+from deltalake import Schema as DeltaSchema
+from deltalake.fs import DeltaStorageHandler
 
 from ._internal import DeltaDataChecker as _DeltaDataChecker
 from ._internal import batch_distinct
@@ -44,8 +39,10 @@
 )
 from ._internal import write_new_deltalake as write_deltalake_pyarrow
 from ._internal import write_to_deltalake as write_deltalake_rust
+from ._util import encode_partition_value
 from .exceptions import DeltaProtocolError, TableNotFoundError
 from .schema import (
+    ArrowSchemaConversionMode,
     convert_pyarrow_dataset,
     convert_pyarrow_recordbatch,
     convert_pyarrow_recordbatchreader,
@@ -55,20 +52,37 @@
     MAX_SUPPORTED_PYARROW_WRITER_VERSION,
     NOT_SUPPORTED_PYARROW_WRITER_VERSIONS,
     SUPPORTED_WRITER_FEATURES,
+    CommitProperties,
     DeltaTable,
+    PostCommitHookProperties,
     WriterProperties,
+    _commit_properties_from_custom_metadata,
 )
 
 try:
-    import pandas as pd  # noqa: F811
+    import pandas as pd
 except ModuleNotFoundError:
     _has_pandas = False
 else:
     _has_pandas = True
 
-PYARROW_MAJOR_VERSION = int(pa.__version__.split(".", maxsplit=1)[0])
 DEFAULT_DATA_SKIPPING_NUM_INDEX_COLS = 32
 
+DTYPE_MAP = {
+    pa.large_string(): pa.string(),
+}
+
+
+class ArrowStreamExportable(Protocol):
+    """Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.
+
+    https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+    """
+
+    def __arrow_c_stream__(
+        self, requested_schema: Optional[object] = None
+    ) -> object: ...
+
 
 @dataclass
 class AddAction:
@@ -90,6 +104,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = ...,
@@ -110,6 +125,8 @@ def write_deltalake(
     large_dtypes: bool = ...,
     engine: Literal["pyarrow"] = ...,
     custom_metadata: Optional[Dict[str, str]] = ...,
+    post_commithook_properties: Optional[PostCommitHookProperties] = ...,
+    commit_properties: Optional[CommitProperties] = ...,
 ) -> None: ...
 
 
@@ -123,6 +140,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = ...,
@@ -134,9 +152,11 @@ def write_deltalake(
     schema_mode: Optional[Literal["merge", "overwrite"]] = ...,
     storage_options: Optional[Dict[str, str]] = ...,
     large_dtypes: bool = ...,
-    engine: Literal["rust"],
+    engine: Literal["rust"] = ...,
     writer_properties: WriterProperties = ...,
     custom_metadata: Optional[Dict[str, str]] = ...,
+    post_commithook_properties: Optional[PostCommitHookProperties] = ...,
+    commit_properties: Optional[CommitProperties] = ...,
 ) -> None: ...
 
 
@@ -150,6 +170,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = ...,
@@ -161,10 +182,13 @@ def write_deltalake(
     schema_mode: Optional[Literal["merge", "overwrite"]] = ...,
     storage_options: Optional[Dict[str, str]] = ...,
     predicate: Optional[str] = ...,
+    target_file_size: Optional[int] = ...,
     large_dtypes: bool = ...,
-    engine: Literal["rust"],
+    engine: Literal["rust"] = ...,
     writer_properties: WriterProperties = ...,
     custom_metadata: Optional[Dict[str, str]] = ...,
+    post_commithook_properties: Optional[PostCommitHookProperties] = ...,
+    commit_properties: Optional[CommitProperties] = ...,
 ) -> None: ...
 
 
@@ -177,6 +201,7 @@ def write_deltalake(
         pa.RecordBatch,
         Iterable[pa.RecordBatch],
         RecordBatchReader,
+        ArrowStreamExportable,
     ],
     *,
     schema: Optional[Union[pa.Schema, DeltaSchema]] = None,
@@ -195,10 +220,13 @@ def write_deltalake(
     storage_options: Optional[Dict[str, str]] = None,
     partition_filters: Optional[List[Tuple[str, str, Any]]] = None,
     predicate: Optional[str] = None,
+    target_file_size: Optional[int] = None,
     large_dtypes: bool = False,
-    engine: Literal["pyarrow", "rust"] = "pyarrow",
+    engine: Literal["pyarrow", "rust"] = "rust",
     writer_properties: Optional[WriterProperties] = None,
     custom_metadata: Optional[Dict[str, str]] = None,
+    post_commithook_properties: Optional[PostCommitHookProperties] = None,
+    commit_properties: Optional[CommitProperties] = None,
 ) -> None:
     """Write to a Delta Lake table
 
@@ -208,9 +236,9 @@ def write_deltalake(
     For higher protocol support use engine='rust', this will become the default
     eventually.
 
-    A locking mechanism is needed to prevent unsafe concurrent writes to a
-    delta lake directory when writing to S3. For more information on the setup, follow
-    this usage guide: https://delta-io.github.io/delta-rs/usage/writing/writing-to-s3-with-locking-provider/
+    To enable safe concurrent writes when writing to S3, an additional locking
+    mechanism must be supplied. For more information on enabling concurrent writing to S3, follow
+    [this guide](https://delta-io.github.io/delta-rs/usage/writing/writing-to-s3-with-locking-provider/)
 
     Args:
         table_or_uri: URI of a table or a DeltaTable object.
@@ -247,59 +275,50 @@ def write_deltalake(
         configuration: A map containing configuration options for the metadata action.
         schema_mode: If set to "overwrite", allows replacing the schema of the table. Set to "merge" to merge with existing schema.
         storage_options: options passed to the native delta filesystem.
-        predicate: When using `Overwrite` mode, replace data that matches a predicate. Only used in rust engine.
+        predicate: When using `Overwrite` mode, replace data that matches a predicate. Only used in rust engine.'
+        target_file_size: Override for target file size for data files written to the delta table. If not passed, it's taken from `delta.targetFileSize`.
         partition_filters: the partition filters that will be used for partition overwrite. Only used in pyarrow engine.
-        large_dtypes: If True, the data schema is kept in large_dtypes, has no effect on pandas dataframe input.
-        engine: writer engine to write the delta table. `Rust` engine is still experimental but you may
-            see up to 4x performance improvements over pyarrow.
+        large_dtypes: Only used for pyarrow engine
+        engine: writer engine to write the delta table. PyArrow engine is deprecated, and will be removed in v1.0.
         writer_properties: Pass writer properties to the Rust parquet writer.
-        custom_metadata: Custom metadata to add to the commitInfo.
+        custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
+        post_commithook_properties: properties for the post commit hook. If None, default values are used.
+        commit_properties: properties of the transaction commit. If None, default values are used.
     """
+    if custom_metadata:
+        warnings.warn(
+            "custom_metadata is deprecated, please use commit_properties instead.",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+        commit_properties = _commit_properties_from_custom_metadata(
+            commit_properties, custom_metadata
+        )
+
     table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
     if table is not None:
         storage_options = table._storage_options or {}
         storage_options.update(storage_options or {})
         table.update_incremental()
 
-    __enforce_append_only(table=table, configuration=configuration, mode=mode)
+    _enforce_append_only(table=table, configuration=configuration, mode=mode)
     if isinstance(partition_by, str):
         partition_by = [partition_by]
 
-    if isinstance(schema, DeltaSchema):
-        schema = schema.to_pyarrow(as_large_types=True)
-
-    if isinstance(data, RecordBatchReader):
-        data = convert_pyarrow_recordbatchreader(data, large_dtypes)
-    elif isinstance(data, pa.RecordBatch):
-        data = convert_pyarrow_recordbatch(data, large_dtypes)
-    elif isinstance(data, pa.Table):
-        data = convert_pyarrow_table(data, large_dtypes)
-    elif isinstance(data, ds.Dataset):
-        data = convert_pyarrow_dataset(data, large_dtypes)
-    elif _has_pandas and isinstance(data, pd.DataFrame):
-        if schema is not None:
-            data = convert_pyarrow_table(
-                pa.Table.from_pandas(data, schema=schema), large_dtypes=large_dtypes
-            )
-        else:
-            data = convert_pyarrow_table(
-                pa.Table.from_pandas(data), large_dtypes=large_dtypes
+    if engine == "rust":
+        if partition_filters is not None:
+            raise ValueError(
+                "Partition filters can only be used with PyArrow engine, use predicate instead. PyArrow engine will be deprecated in 1.0"
             )
-    elif isinstance(data, Iterable):
-        if schema is None:
-            raise ValueError("You must provide schema if data is Iterable")
-    else:
-        raise TypeError(
-            f"{type(data).__name__} is not a valid input. Only PyArrow RecordBatchReader, RecordBatch, Iterable[RecordBatch], Table, Dataset or Pandas DataFrame are valid inputs for source."
-        )
-
-    if schema is None:
-        schema = data.schema
 
-    if engine == "rust":
         if table is not None and mode == "ignore":
             return
 
+        data, schema = _convert_data_and_schema(
+            data=data,
+            schema=schema,
+            conversion_mode=ArrowSchemaConversionMode.PASSTHROUGH,
+        )
         data = RecordBatchReader.from_batches(schema, (batch for batch in data))
         write_deltalake_rust(
             table_uri=table_uri,
@@ -309,33 +328,50 @@ def write_deltalake(
             table=table._table if table is not None else None,
             schema_mode=schema_mode,
             predicate=predicate,
+            target_file_size=target_file_size,
             name=name,
             description=description,
             configuration=configuration,
             storage_options=storage_options,
-            writer_properties=(
-                writer_properties._to_dict() if writer_properties else None
-            ),
-            custom_metadata=custom_metadata,
+            writer_properties=writer_properties,
+            commit_properties=commit_properties,
+            post_commithook_properties=post_commithook_properties,
         )
         if table:
             table.update_incremental()
-
     elif engine == "pyarrow":
+        warnings.warn(
+            "pyarrow engine is deprecated and will be removed in v1.0",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
+        if predicate is not None:
+            raise ValueError(
+                "Predicate can only be used with Rust engine, use partition_filters instead. PyArrow engine will be removed in 1.0"
+            )
+
+        if large_dtypes:
+            arrow_schema_conversion_mode = "large"
+        else:
+            arrow_schema_conversion_mode = "normal"
+
+        conversion_mode = ArrowSchemaConversionMode.from_str(
+            arrow_schema_conversion_mode
+        )
+        data, schema = _convert_data_and_schema(
+            data=data, schema=schema, conversion_mode=conversion_mode
+        )
+
         if schema_mode == "merge":
             raise ValueError(
                 "schema_mode 'merge' is not supported in pyarrow engine. Use engine=rust"
             )
-        # We need to write against the latest table version
 
         num_indexed_cols, stats_cols = get_num_idx_cols_and_stats_columns(
             table._table if table is not None else None, configuration
         )
 
-        def sort_arrow_schema(schema: pa.schema) -> pa.schema:
-            sorted_cols = sorted(iter(schema), key=lambda x: (x.name, str(x.type)))
-            return pa.schema(sorted_cols)
-
         if table:  # already exists
             filesystem = pa_fs.PyFileSystem(
                 DeltaStorageHandler.from_table(
@@ -343,7 +379,7 @@ def sort_arrow_schema(schema: pa.schema) -> pa.schema:
                 )
             )
 
-            if sort_arrow_schema(schema) != sort_arrow_schema(
+            if _sort_arrow_schema(schema) != _sort_arrow_schema(
                 table.schema().to_pyarrow(as_large_types=large_dtypes)
             ) and not (mode == "overwrite" and schema_mode == "overwrite"):
                 raise ValueError(
@@ -372,31 +408,11 @@ def sort_arrow_schema(schema: pa.schema) -> pa.schema:
             )
             current_version = -1
 
-        dtype_map = {
-            pa.large_string(): pa.string(),
-        }
-
-        def _large_to_normal_dtype(dtype: pa.DataType) -> pa.DataType:
-            try:
-                return dtype_map[dtype]
-            except KeyError:
-                return dtype
-
         if partition_by:
             table_schema: pa.Schema = schema
-            if PYARROW_MAJOR_VERSION < 12:
-                partition_schema = pa.schema(
-                    [
-                        pa.field(
-                            name, _large_to_normal_dtype(table_schema.field(name).type)
-                        )
-                        for name in partition_by
-                    ]
-                )
-            else:
-                partition_schema = pa.schema(
-                    [table_schema.field(name) for name in partition_by]
-                )
+            partition_schema = pa.schema(
+                [table_schema.field(name) for name in partition_by]
+            )
             partitioning = ds.partitioning(partition_schema, flavor="hive")
         else:
             partitioning = None
@@ -411,18 +427,10 @@ def visitor(written_file: Any) -> None:
                 columns_to_collect_stats=stats_cols,
             )
 
-            # PyArrow added support for written_file.size in 9.0.0
-            if PYARROW_MAJOR_VERSION >= 9:
-                size = written_file.size
-            elif filesystem is not None:
-                size = filesystem.get_file_info([path])[0].size
-            else:
-                size = 0
-
             add_actions.append(
                 AddAction(
                     path,
-                    size,
+                    written_file.size,
                     partition_values,
                     int(datetime.now().timestamp() * 1000),
                     True,
@@ -443,7 +451,7 @@ def visitor(written_file: Any) -> None:
                 raise DeltaProtocolError(
                     "This table's min_writer_version is "
                     f"{table_protocol.min_writer_version}, "
-                    f"""but this method only supports version 2 or 7 with at max these features {SUPPORTED_WRITER_FEATURES} enabled. 
+                    f"""but this method only supports version 2 or 7 with at max these features {SUPPORTED_WRITER_FEATURES} enabled.
                     Try engine='rust' instead which supports more features and writer versions."""
                 )
             if (
@@ -550,7 +558,9 @@ def validate_batch(batch: pa.RecordBatch) -> pa.RecordBatch:
                 description,
                 configuration,
                 storage_options,
-                custom_metadata,
+                commit_properties.custom_metadata
+                if commit_properties
+                else custom_metadata,
             )
         else:
             table._table.create_write_transaction(
@@ -559,7 +569,8 @@ def validate_batch(batch: pa.RecordBatch) -> pa.RecordBatch:
                 partition_by or [],
                 schema,
                 partition_filters,
-                custom_metadata,
+                commit_properties=commit_properties,
+                post_commithook_properties=post_commithook_properties,
             )
             table.update_incremental()
     else:
@@ -620,7 +631,7 @@ def convert_to_deltalake(
     return
 
 
-def __enforce_append_only(
+def _enforce_append_only(
     table: Optional[DeltaTable],
     configuration: Optional[Mapping[str, Optional[str]]],
     mode: str,
@@ -638,6 +649,83 @@ def __enforce_append_only(
         )
 
 
+def _convert_data_and_schema(
+    data: Union[
+        "pd.DataFrame",
+        ds.Dataset,
+        pa.Table,
+        pa.RecordBatch,
+        Iterable[pa.RecordBatch],
+        RecordBatchReader,
+        ArrowStreamExportable,
+    ],
+    schema: Optional[Union[pa.Schema, DeltaSchema]],
+    conversion_mode: ArrowSchemaConversionMode,
+) -> Tuple[pa.RecordBatchReader, pa.Schema]:
+    if isinstance(data, RecordBatchReader):
+        data = convert_pyarrow_recordbatchreader(data, conversion_mode)
+    elif isinstance(data, pa.RecordBatch):
+        data = convert_pyarrow_recordbatch(data, conversion_mode)
+    elif isinstance(data, pa.Table):
+        data = convert_pyarrow_table(data, conversion_mode)
+    elif isinstance(data, ds.Dataset):
+        data = convert_pyarrow_dataset(data, conversion_mode)
+    elif _has_pandas and isinstance(data, pd.DataFrame):
+        if schema is not None:
+            data = convert_pyarrow_table(
+                pa.Table.from_pandas(data, schema=schema), conversion_mode
+            )
+        else:
+            data = convert_pyarrow_table(pa.Table.from_pandas(data), conversion_mode)
+    elif hasattr(data, "__arrow_c_array__"):
+        data = convert_pyarrow_recordbatch(
+            pa.record_batch(data),  # type:ignore[attr-defined]
+            conversion_mode,
+        )
+    elif hasattr(data, "__arrow_c_stream__"):
+        if not hasattr(RecordBatchReader, "from_stream"):
+            raise ValueError(
+                "pyarrow 15 or later required to read stream via pycapsule interface"
+            )
+
+        data = convert_pyarrow_recordbatchreader(
+            RecordBatchReader.from_stream(data), conversion_mode
+        )
+    elif isinstance(data, Iterable):
+        if schema is None:
+            raise ValueError("You must provide schema if data is Iterable")
+    else:
+        raise TypeError(
+            f"{type(data).__name__} is not a valid input. Only PyArrow RecordBatchReader, RecordBatch, Iterable[RecordBatch], Table, Dataset or Pandas DataFrame or objects implementing the Arrow PyCapsule Interface are valid inputs for source."
+        )
+
+    if (
+        isinstance(schema, DeltaSchema)
+        and conversion_mode == ArrowSchemaConversionMode.PASSTHROUGH
+    ):
+        raise NotImplementedError(
+            "ArrowSchemaConversionMode.passthrough is not implemented to work with DeltaSchema, skip passing a schema or pass an arrow schema."
+        )
+    elif isinstance(schema, DeltaSchema):
+        if conversion_mode == ArrowSchemaConversionMode.LARGE:
+            schema = schema.to_pyarrow(as_large_types=True)
+        else:
+            schema = schema.to_pyarrow(as_large_types=False)
+    elif schema is None:
+        schema = data.schema
+
+    return data, schema
+
+
+def _sort_arrow_schema(schema: pa.schema) -> pa.schema:
+    sorted_cols = sorted(iter(schema), key=lambda x: (x.name, str(x.type)))
+    return pa.schema(sorted_cols)
+
+
+def _large_to_normal_dtype(dtype: pa.DataType) -> pa.DataType:
+    return DTYPE_MAP.get(dtype, dtype)
+
+
 class DeltaJSONEncoder(json.JSONEncoder):
     def default(self, obj: Any) -> Any:
         if isinstance(obj, bytes):
@@ -757,19 +845,6 @@ def iter_groups(metadata: Any) -> Iterator[Any]:
                 # Min and Max are recorded in physical type, not logical type
                 # https://stackoverflow.com/questions/66753485/decoding-parquet-min-max-statistics-for-decimal-type
                 # TODO: Add logic to decode physical type for DATE, DECIMAL
-                logical_type = (
-                    metadata.row_group(0)
-                    .column(column_idx)
-                    .statistics.logical_type.type
-                )
-
-                if PYARROW_MAJOR_VERSION < 8 and logical_type not in [
-                    "STRING",
-                    "INT",
-                    "TIMESTAMP",
-                    "NONE",
-                ]:
-                    continue
 
                 minimums = (
                     group.column(column_idx).statistics.min
diff --git a/python/docs/source/usage.rst b/python/docs/source/usage.rst
index baa26f275c..753c1470ec 100644
--- a/python/docs/source/usage.rst
+++ b/python/docs/source/usage.rst
@@ -90,6 +90,32 @@ For Databricks Unity Catalog authentication, use environment variables:
 .. _`azure options`: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants
 .. _`gcs options`: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants
 
+Verify Table Existence
+~~~~~~~~~~~~~~~~~~~~~~
+
+You can check whether or not a Delta table exists at a particular path by using
+the :meth:`DeltaTable.is_deltatable()` method.
+
+.. code-block:: python
+    from deltalake import DeltaTable
+
+    table_path = "<path/to/valid/table>"
+    DeltaTable.is_deltatable(table_path)
+    # True
+
+    invalid_table_path = "<path/to/nonexistent/table>"
+    DeltaTable.is_deltatable(invalid_table_path)
+    # False
+
+    bucket_table_path = "<path/to/valid/table/in/bucket>"
+    storage_options = {
+        "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
+        ...
+    }
+    DeltaTable.is_deltatable(bucket_table_path)
+    # True
+
 Custom Storage Backends
 ~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 9b74760948..a13886209b 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -18,8 +18,7 @@ classifiers = [
     "Programming Language :: Python :: 3.12"
 ]
 dependencies = [
-    "pyarrow>=8",
-    "pyarrow-hotfix",    
+    "pyarrow>=16",
 ]
 
 [project.optional-dependencies]
@@ -27,8 +26,7 @@ pandas = [
     "pandas"
 ]
 devel = [
-    "mypy~=1.8.0",
-    "ruff~=0.3.0",
+    "azure-storage-blob==12.20.0",
     "packaging>=20",
     "pytest",
     "pytest-mock",
@@ -38,7 +36,10 @@ devel = [
     "sphinx-rtd-theme",
     "toml",
     "wheel",
-    "pytest-benchmark"
+    "pytest-benchmark",
+    # keep ruff and mypy versions in sync with .github/workflows/python_build.yml
+    "mypy==1.10.1",
+    "ruff==0.5.2"
 ]
 pyspark = [
     "pyspark",
@@ -71,26 +72,24 @@ warn_return_any = false
 implicit_reexport = true
 strict_equality = true
 
-[tool.black]
-include = '\.pyi?$'
-exclude = "venv"
-
-[tool.ruff]
+[tool.ruff.lint]
 select = [
     # pycodestyle error
     "E",
     # pyflakes
     "F",
     # isort
-    "I"
+    "I",
+    # ruff-specific rules
+    "RUF"
 ]
 ignore = ["E501"]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 known-first-party = ["deltalake"]
 
 [tool.pytest.ini_options]
-addopts = "--cov=deltalake -v -m 'not integration and not benchmark'"
+addopts = "-v -m 'not integration and not benchmark'"
 testpaths = [
     "tests",
     "deltalake",
@@ -101,4 +100,8 @@ markers = [
     "azure: marks tests as integration tests with Azure Blob Store",
     "pandas: marks tests that require pandas",
     "pyspark: marks tests that require pyspark",
-]
\ No newline at end of file
+]
+
+[tool.coverage.run]
+branch = true
+source = ["deltalake"]
diff --git a/python/src/features.rs b/python/src/features.rs
new file mode 100644
index 0000000000..155f7aa365
--- /dev/null
+++ b/python/src/features.rs
@@ -0,0 +1,56 @@
+use deltalake::kernel::TableFeatures as KernelTableFeatures;
+use pyo3::pyclass;
+
+/// High level table features
+#[pyclass]
+#[derive(Clone)]
+pub enum TableFeatures {
+    /// Mapping of one column to another
+    ColumnMapping,
+    /// Deletion vectors for merge, update, delete
+    DeletionVectors,
+    /// timestamps without timezone support
+    TimestampWithoutTimezone,
+    /// version 2 of checkpointing
+    V2Checkpoint,
+    /// Append Only Tables
+    AppendOnly,
+    /// Table invariants
+    Invariants,
+    /// Check constraints on columns
+    CheckConstraints,
+    /// CDF on a table
+    ChangeDataFeed,
+    /// Columns with generated values
+    GeneratedColumns,
+    /// ID Columns
+    IdentityColumns,
+    /// Row tracking on tables
+    RowTracking,
+    /// domain specific metadata
+    DomainMetadata,
+    /// Iceberg compatibility support
+    IcebergCompatV1,
+}
+
+impl From<TableFeatures> for KernelTableFeatures {
+    fn from(value: TableFeatures) -> Self {
+        match value {
+            TableFeatures::ColumnMapping => KernelTableFeatures::ColumnMapping,
+            TableFeatures::DeletionVectors => KernelTableFeatures::DeletionVectors,
+            TableFeatures::TimestampWithoutTimezone => {
+                KernelTableFeatures::TimestampWithoutTimezone
+            }
+            TableFeatures::V2Checkpoint => KernelTableFeatures::V2Checkpoint,
+            TableFeatures::AppendOnly => KernelTableFeatures::AppendOnly,
+            TableFeatures::Invariants => KernelTableFeatures::Invariants,
+            TableFeatures::CheckConstraints => KernelTableFeatures::CheckConstraints,
+            TableFeatures::ChangeDataFeed => KernelTableFeatures::ChangeDataFeed,
+            TableFeatures::GeneratedColumns => KernelTableFeatures::GeneratedColumns,
+            TableFeatures::IdentityColumns => KernelTableFeatures::IdentityColumns,
+            TableFeatures::RowTracking => KernelTableFeatures::RowTracking,
+            TableFeatures::DomainMetadata => KernelTableFeatures::DomainMetadata,
+            TableFeatures::IcebergCompatV1 => KernelTableFeatures::IcebergCompatV1,
+        }
+    }
+}
diff --git a/python/src/filesystem.rs b/python/src/filesystem.rs
index 2825bf9092..453d05e480 100644
--- a/python/src/filesystem.rs
+++ b/python/src/filesystem.rs
@@ -1,7 +1,8 @@
 use crate::error::PythonError;
-use crate::utils::{delete_dir, rt, walk_tree};
+use crate::utils::{delete_dir, rt, walk_tree, warn};
 use crate::RawDeltaTable;
-use deltalake::storage::{DynObjectStore, ListResult, MultipartId, ObjectStoreError, Path};
+use deltalake::storage::object_store::{MultipartUpload, PutPayloadMut};
+use deltalake::storage::{DynObjectStore, ListResult, ObjectStoreError, Path};
 use deltalake::DeltaTableBuilder;
 use pyo3::exceptions::{PyIOError, PyNotImplementedError, PyValueError};
 use pyo3::prelude::*;
@@ -9,9 +10,8 @@ use pyo3::types::{IntoPyDict, PyBytes, PyType};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::sync::Arc;
-use tokio::io::{AsyncWrite, AsyncWriteExt};
 
-const DEFAULT_MAX_BUFFER_SIZE: i64 = 4 * 1024 * 1024;
+const DEFAULT_MAX_BUFFER_SIZE: usize = 5 * 1024 * 1024;
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub(crate) struct FsConfig {
@@ -66,7 +66,7 @@ impl DeltaFileSystemHandler {
     #[classmethod]
     #[pyo3(signature = (table, options = None, known_sizes = None))]
     fn from_table(
-        _cls: &PyType,
+        _cls: &Bound<'_, PyType>,
         table: &RawDeltaTable,
         options: Option<HashMap<String, String>>,
         known_sizes: Option<HashMap<String, i64>>,
@@ -123,12 +123,20 @@ impl DeltaFileSystemHandler {
         Ok(format!("{self:?}") == format!("{other:?}"))
     }
 
-    fn get_file_info<'py>(&self, paths: Vec<String>, py: Python<'py>) -> PyResult<Vec<&'py PyAny>> {
-        let fs = PyModule::import(py, "pyarrow.fs")?;
+    fn get_file_info<'py>(
+        &self,
+        paths: Vec<String>,
+        py: Python<'py>,
+    ) -> PyResult<Vec<Bound<'py, PyAny>>> {
+        let fs = PyModule::import_bound(py, "pyarrow.fs")?;
         let file_types = fs.getattr("FileType")?;
 
-        let to_file_info = |loc: &str, type_: &PyAny, kwargs: &HashMap<&str, i64>| {
-            fs.call_method("FileInfo", (loc, type_), Some(kwargs.into_py_dict(py)))
+        let to_file_info = |loc: &str, type_: &Bound<'py, PyAny>, kwargs: &HashMap<&str, i64>| {
+            fs.call_method(
+                "FileInfo",
+                (loc, type_),
+                Some(&kwargs.into_py_dict_bound(py)),
+            )
         };
 
         let mut infos = Vec::new();
@@ -155,14 +163,14 @@ impl DeltaFileSystemHandler {
                         ]);
                         infos.push(to_file_info(
                             meta.location.as_ref(),
-                            file_types.getattr("File")?,
+                            &file_types.getattr("File")?,
                             &kwargs,
                         )?);
                     }
                     Err(ObjectStoreError::NotFound { .. }) => {
                         infos.push(to_file_info(
                             path.as_ref(),
-                            file_types.getattr("NotFound")?,
+                            &file_types.getattr("NotFound")?,
                             &HashMap::new(),
                         )?);
                     }
@@ -173,7 +181,7 @@ impl DeltaFileSystemHandler {
             } else {
                 infos.push(to_file_info(
                     path.as_ref(),
-                    file_types.getattr("Directory")?,
+                    &file_types.getattr("Directory")?,
                     &HashMap::new(),
                 )?);
             }
@@ -189,12 +197,16 @@ impl DeltaFileSystemHandler {
         allow_not_found: bool,
         recursive: bool,
         py: Python<'py>,
-    ) -> PyResult<Vec<&'py PyAny>> {
-        let fs = PyModule::import(py, "pyarrow.fs")?;
+    ) -> PyResult<Vec<Bound<'py, PyAny>>> {
+        let fs = PyModule::import_bound(py, "pyarrow.fs")?;
         let file_types = fs.getattr("FileType")?;
 
-        let to_file_info = |loc: String, type_: &PyAny, kwargs: HashMap<&str, i64>| {
-            fs.call_method("FileInfo", (loc, type_), Some(kwargs.into_py_dict(py)))
+        let to_file_info = |loc: String, type_: &Bound<'py, PyAny>, kwargs: HashMap<&str, i64>| {
+            fs.call_method(
+                "FileInfo",
+                (loc, type_),
+                Some(&kwargs.into_py_dict_bound(py)),
+            )
         };
 
         let path = Self::parse_path(&base_dir);
@@ -222,7 +234,7 @@ impl DeltaFileSystemHandler {
                 .map(|p| {
                     to_file_info(
                         p.to_string(),
-                        file_types.getattr("Directory")?,
+                        &file_types.getattr("Directory")?,
                         HashMap::new(),
                     )
                 })
@@ -244,7 +256,7 @@ impl DeltaFileSystemHandler {
                     ]);
                     to_file_info(
                         meta.location.to_string(),
-                        file_types.getattr("File")?,
+                        &file_types.getattr("File")?,
                         kwargs,
                     )
                 })
@@ -285,6 +297,7 @@ impl DeltaFileSystemHandler {
         &self,
         path: String,
         #[allow(unused)] metadata: Option<HashMap<String, String>>,
+        py: Python<'_>,
     ) -> PyResult<ObjectOutputStream> {
         let path = Self::parse_path(&path);
         let max_buffer_size = self
@@ -292,8 +305,21 @@ impl DeltaFileSystemHandler {
             .options
             .get("max_buffer_size")
             .map_or(DEFAULT_MAX_BUFFER_SIZE, |v| {
-                v.parse::<i64>().unwrap_or(DEFAULT_MAX_BUFFER_SIZE)
+                v.parse::<usize>().unwrap_or(DEFAULT_MAX_BUFFER_SIZE)
             });
+        if max_buffer_size < DEFAULT_MAX_BUFFER_SIZE {
+            warn(
+                py,
+                "UserWarning",
+                format!(
+                    "You specified a `max_buffer_size` of {} bits less than {} bits. Most object 
+                    stores expect greater than that number, you may experience issues",
+                    max_buffer_size, DEFAULT_MAX_BUFFER_SIZE
+                )
+                .as_str(),
+                Some(2),
+            )?;
+        }
         let file = rt()
             .block_on(ObjectOutputStream::try_new(
                 self.inner.clone(),
@@ -438,7 +464,7 @@ impl ObjectInputFile {
     }
 
     #[pyo3(signature = (nbytes = None))]
-    fn read(&mut self, nbytes: Option<i64>, py: Python<'_>) -> PyResult<Py<PyBytes>> {
+    fn read<'py>(&mut self, nbytes: Option<i64>, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
         self.check_closed()?;
         let range = match nbytes {
             Some(len) => {
@@ -466,7 +492,7 @@ impl ObjectInputFile {
         // TODO: PyBytes copies the buffer. If we move away from the limited CPython
         // API (the stable C API), we could implement the buffer protocol for
         // bytes::Bytes and return this zero-copy.
-        Ok(PyBytes::new(py, data.as_ref()).into_py(py))
+        Ok(PyBytes::new_bound(py, data.as_ref()))
     }
 
     fn fileno(&self) -> PyResult<()> {
@@ -489,39 +515,32 @@ impl ObjectInputFile {
 }
 
 // TODO the C++ implementation track an internal lock on all random access files, DO we need this here?
-// TODO add buffer to store data ...
 #[pyclass(weakref, module = "deltalake._internal")]
 pub struct ObjectOutputStream {
-    store: Arc<DynObjectStore>,
-    path: Path,
-    writer: Box<dyn AsyncWrite + Send + Unpin>,
-    multipart_id: MultipartId,
+    upload: Box<dyn MultipartUpload>,
     pos: i64,
     #[pyo3(get)]
     closed: bool,
     #[pyo3(get)]
     mode: String,
-    max_buffer_size: i64,
-    buffer_size: i64,
+    max_buffer_size: usize,
+    buffer: PutPayloadMut,
 }
 
 impl ObjectOutputStream {
     pub async fn try_new(
         store: Arc<DynObjectStore>,
         path: Path,
-        max_buffer_size: i64,
+        max_buffer_size: usize,
     ) -> Result<Self, ObjectStoreError> {
-        let (multipart_id, writer) = store.put_multipart(&path).await?;
+        let upload = store.put_multipart(&path).await?;
         Ok(Self {
-            store,
-            path,
-            writer,
-            multipart_id,
+            upload,
             pos: 0,
             closed: false,
             mode: "wb".into(),
+            buffer: PutPayloadMut::default(),
             max_buffer_size,
-            buffer_size: 0,
         })
     }
 
@@ -532,18 +551,36 @@ impl ObjectOutputStream {
 
         Ok(())
     }
+
+    fn abort(&mut self) -> PyResult<()> {
+        rt().block_on(self.upload.abort())
+            .map_err(PythonError::from)?;
+        Ok(())
+    }
+
+    fn upload_buffer(&mut self) -> PyResult<()> {
+        let payload = std::mem::take(&mut self.buffer).freeze();
+        match rt().block_on(self.upload.put_part(payload)) {
+            Ok(_) => Ok(()),
+            Err(err) => {
+                self.abort()?;
+                Err(PyIOError::new_err(err.to_string()))
+            }
+        }
+    }
 }
 
 #[pymethods]
 impl ObjectOutputStream {
     fn close(&mut self, py: Python<'_>) -> PyResult<()> {
-        self.closed = true;
-        py.allow_threads(|| match rt().block_on(self.writer.shutdown()) {
-            Ok(_) => Ok(()),
-            Err(err) => {
-                rt().block_on(self.store.abort_multipart(&self.path, &self.multipart_id))
-                    .map_err(PythonError::from)?;
-                Err(PyIOError::new_err(err.to_string()))
+        py.allow_threads(|| {
+            self.closed = true;
+            if !self.buffer.is_empty() {
+                self.upload_buffer()?;
+            }
+            match rt().block_on(self.upload.complete()) {
+                Ok(_) => Ok(()),
+                Err(err) => Err(PyIOError::new_err(err.to_string())),
             }
         })
     }
@@ -588,36 +625,37 @@ impl ObjectOutputStream {
         Err(PyNotImplementedError::new_err("'read' not implemented"))
     }
 
-    fn write(&mut self, data: &PyBytes) -> PyResult<i64> {
+    fn write(&mut self, data: &Bound<'_, PyBytes>) -> PyResult<i64> {
         self.check_closed()?;
-        let len = data.as_bytes().len() as i64;
         let py = data.py();
-        let data = data.as_bytes();
-        let res = py.allow_threads(|| match rt().block_on(self.writer.write_all(data)) {
-            Ok(_) => Ok(len),
-            Err(err) => {
-                rt().block_on(self.store.abort_multipart(&self.path, &self.multipart_id))
-                    .map_err(PythonError::from)?;
-                Err(PyIOError::new_err(err.to_string()))
+        let bytes = data.as_bytes();
+        py.allow_threads(|| {
+            let len = bytes.len();
+            for chunk in bytes.chunks(self.max_buffer_size) {
+                // this will never overflow
+                let remaining = self.max_buffer_size - self.buffer.content_length();
+                // if we have enough space to store this chunk, just append it
+                if chunk.len() < remaining {
+                    self.buffer.extend_from_slice(chunk);
+                    break;
+                }
+                // if we don't, fill as much as we can, flush the buffer, and then append the rest
+                // this won't panic since we've checked the size of the chunk
+                let (first, second) = chunk.split_at(remaining);
+                self.buffer.extend_from_slice(first);
+                self.upload_buffer()?;
+                // len(second) will always be < max_buffer_size, and we just
+                // emptied the buffer by flushing, so we won't overflow
+                // if len(chunk) just happened to be == remaining,
+                // the second slice is empty. this is a no-op
+                self.buffer.extend_from_slice(second);
             }
-        })?;
-        self.buffer_size += len;
-        if self.buffer_size >= self.max_buffer_size {
-            let _ = self.flush(py);
-            self.buffer_size = 0;
-        }
-        Ok(res)
+            Ok(len as i64)
+        })
     }
 
     fn flush(&mut self, py: Python<'_>) -> PyResult<()> {
-        py.allow_threads(|| match rt().block_on(self.writer.flush()) {
-            Ok(_) => Ok(()),
-            Err(err) => {
-                rt().block_on(self.store.abort_multipart(&self.path, &self.multipart_id))
-                    .map_err(PythonError::from)?;
-                Err(PyIOError::new_err(err.to_string()))
-            }
-        })
+        py.allow_threads(|| self.upload_buffer())
     }
 
     fn fileno(&self) -> PyResult<()> {
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 6d1048975c..77db334283 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -1,32 +1,34 @@
-#![deny(warnings)]
-
 mod error;
+mod features;
 mod filesystem;
+mod merge;
 mod schema;
 mod utils;
 
 use std::collections::{HashMap, HashSet};
 use std::future::IntoFuture;
 use std::str::FromStr;
-use std::sync::Arc;
 use std::time;
 use std::time::{SystemTime, UNIX_EPOCH};
 
 use arrow::pyarrow::PyArrowType;
 use chrono::{DateTime, Duration, FixedOffset, Utc};
+use delta_kernel::expressions::Scalar;
+use delta_kernel::schema::StructField;
 use deltalake::arrow::compute::concat_batches;
 use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
-use deltalake::arrow::record_batch::RecordBatchReader;
 use deltalake::arrow::record_batch::{RecordBatch, RecordBatchIterator};
 use deltalake::arrow::{self, datatypes::Schema as ArrowSchema};
 use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
-use deltalake::datafusion::datasource::memory::MemTable;
-use deltalake::datafusion::datasource::provider::TableProvider;
 use deltalake::datafusion::physical_plan::ExecutionPlan;
 use deltalake::datafusion::prelude::SessionContext;
 use deltalake::delta_datafusion::DeltaDataChecker;
 use deltalake::errors::DeltaTableError;
-use deltalake::kernel::{Action, Add, Invariant, LogicalFile, Remove, Scalar, StructType};
+use deltalake::kernel::{
+    scalars::ScalarExt, Action, Add, Invariant, LogicalFile, Remove, StructType,
+};
+use deltalake::operations::add_column::AddColumnBuilder;
+use deltalake::operations::add_feature::AddTableFeatureBuilder;
 use deltalake::operations::collect_sendable_stream;
 use deltalake::operations::constraints::ConstraintBuilder;
 use deltalake::operations::convert_to_delta::{ConvertToDeltaBuilder, PartitionStrategy};
@@ -34,7 +36,6 @@ use deltalake::operations::delete::DeleteBuilder;
 use deltalake::operations::drop_constraints::DropConstraintBuilder;
 use deltalake::operations::filesystem_check::FileSystemCheckBuilder;
 use deltalake::operations::load_cdf::CdfLoadBuilder;
-use deltalake::operations::merge::MergeBuilder;
 use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType};
 use deltalake::operations::restore::RestoreBuilder;
 use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder;
@@ -48,24 +49,30 @@ use deltalake::parquet::errors::ParquetError;
 use deltalake::parquet::file::properties::WriterProperties;
 use deltalake::partitions::PartitionFilter;
 use deltalake::protocol::{DeltaOperation, SaveMode};
+use deltalake::storage::IORuntime;
 use deltalake::DeltaTableBuilder;
 use deltalake::{DeltaOps, DeltaResult};
+use error::DeltaError;
 use futures::future::join_all;
+
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
+use pyo3::pybacked::PyBackedStr;
 use pyo3::types::{PyDict, PyFrozenSet};
 use serde_json::{Map, Value};
 
 use crate::error::DeltaProtocolError;
 use crate::error::PythonError;
+use crate::features::TableFeatures;
 use crate::filesystem::FsConfig;
-use crate::schema::schema_to_pyobject;
+use crate::merge::PyMergeBuilder;
+use crate::schema::{schema_to_pyobject, Field};
 use crate::utils::rt;
 
 #[derive(FromPyObject)]
-enum PartitionFilterValue<'a> {
-    Single(&'a str),
-    Multiple(Vec<&'a str>),
+enum PartitionFilterValue {
+    Single(PyBackedStr),
+    Multiple(Vec<PyBackedStr>),
 }
 
 #[pyclass(module = "deltalake._internal")]
@@ -106,7 +113,8 @@ impl RawDeltaTable {
         log_buffer_size: Option<usize>,
     ) -> PyResult<Self> {
         py.allow_threads(|| {
-            let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri);
+            let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri)
+                .with_io_runtime(IORuntime::default());
             let options = storage_options.clone().unwrap_or_default();
             if let Some(storage_options) = storage_options {
                 builder = builder.with_storage_options(storage_options)
@@ -134,6 +142,26 @@ impl RawDeltaTable {
         })
     }
 
+    #[pyo3(signature = (table_uri, storage_options = None))]
+    #[staticmethod]
+    pub fn is_deltatable(
+        table_uri: &str,
+        storage_options: Option<HashMap<String, String>>,
+    ) -> PyResult<bool> {
+        let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri);
+        if let Some(storage_options) = storage_options {
+            builder = builder.with_storage_options(storage_options)
+        }
+        Ok(rt()
+            .block_on(async {
+                match builder.build() {
+                    Ok(table) => table.verify_deltatable_existence().await,
+                    Err(err) => Err(err),
+                }
+            })
+            .map_err(PythonError::from)?)
+    }
+
     pub fn table_uri(&self) -> PyResult<String> {
         Ok(self._table.table_uri())
     }
@@ -142,6 +170,10 @@ impl RawDeltaTable {
         Ok(self._table.version())
     }
 
+    pub fn has_files(&self) -> PyResult<bool> {
+        Ok(self._table.config.require_files)
+    }
+
     pub fn metadata(&self) -> PyResult<RawDeltaTableMetaData> {
         let metadata = self._table.metadata().map_err(PythonError::from)?;
         Ok(RawDeltaTableMetaData {
@@ -244,42 +276,14 @@ impl RawDeltaTable {
         })
     }
 
-    pub fn files_by_partitions(
-        &self,
-        py: Python,
-        partitions_filters: Vec<(&str, &str, PartitionFilterValue)>,
-    ) -> PyResult<Vec<String>> {
-        py.allow_threads(|| {
-            let partition_filters: Result<Vec<PartitionFilter>, DeltaTableError> =
-                partitions_filters
-                    .into_iter()
-                    .map(|filter| match filter {
-                        (key, op, PartitionFilterValue::Single(v)) => {
-                            PartitionFilter::try_from((key, op, v))
-                        }
-                        (key, op, PartitionFilterValue::Multiple(v)) => {
-                            PartitionFilter::try_from((key, op, v.as_slice()))
-                        }
-                    })
-                    .collect();
-            match partition_filters {
-                Ok(filters) => Ok(self
-                    ._table
-                    .get_files_by_partitions(&filters)
-                    .map_err(PythonError::from)?
-                    .into_iter()
-                    .map(|p| p.to_string())
-                    .collect()),
-                Err(err) => Err(PythonError::from(err).into()),
-            }
-        })
-    }
-
     pub fn files(
         &self,
         py: Python,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
     ) -> PyResult<Vec<String>> {
+        if !self.has_files()? {
+            return Err(DeltaError::new_err("Table is instantiated without files."));
+        }
         py.allow_threads(|| {
             if let Some(filters) = partition_filters {
                 let filters = convert_partition_filters(filters).map_err(PythonError::from)?;
@@ -303,8 +307,12 @@ impl RawDeltaTable {
 
     pub fn file_uris(
         &self,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
     ) -> PyResult<Vec<String>> {
+        if !self._table.config.require_files {
+            return Err(DeltaError::new_err("Table is initiated without files."));
+        }
+
         if let Some(filters) = partition_filters {
             let filters = convert_partition_filters(filters).map_err(PythonError::from)?;
             Ok(self
@@ -321,21 +329,22 @@ impl RawDeltaTable {
     }
 
     #[getter]
-    pub fn schema(&self, py: Python) -> PyResult<PyObject> {
+    pub fn schema<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let schema: &StructType = self._table.get_schema().map_err(PythonError::from)?;
-        schema_to_pyobject(schema, py)
+        schema_to_pyobject(schema.to_owned(), py)
     }
 
     /// Run the Vacuum command on the Delta Table: list and delete files no longer referenced
     /// by the Delta table and are older than the retention threshold.
-    #[pyo3(signature = (dry_run, retention_hours = None, enforce_retention_duration = true, custom_metadata=None))]
+    #[pyo3(signature = (dry_run, retention_hours = None, enforce_retention_duration = true, commit_properties=None, post_commithook_properties=None))]
     pub fn vacuum(
         &mut self,
         py: Python,
         dry_run: bool,
         retention_hours: Option<u64>,
         enforce_retention_duration: bool,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<Vec<String>> {
         let (table, metrics) = py.allow_threads(|| {
             let mut cmd = VacuumBuilder::new(
@@ -348,14 +357,11 @@ impl RawDeltaTable {
                 cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
             }
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
-
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
             rt().block_on(cmd.into_future()).map_err(PythonError::from)
         })?;
         self._table.state = table.state;
@@ -363,15 +369,17 @@ impl RawDeltaTable {
     }
 
     /// Run the UPDATE command on the Delta Table
-    #[pyo3(signature = (updates, predicate=None, writer_properties=None, safe_cast = false, custom_metadata = None))]
+    #[pyo3(signature = (updates, predicate=None, writer_properties=None, safe_cast = false, commit_properties = None, post_commithook_properties=None))]
+    #[allow(clippy::too_many_arguments)]
     pub fn update(
         &mut self,
         py: Python,
         updates: HashMap<String, String>,
         predicate: Option<String>,
-        writer_properties: Option<HashMap<String, Option<String>>>,
+        writer_properties: Option<PyWriterProperties>,
         safe_cast: bool,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<String> {
         let (table, metrics) = py.allow_threads(|| {
             let mut cmd = UpdateBuilder::new(
@@ -394,13 +402,11 @@ impl RawDeltaTable {
                 cmd = cmd.with_predicate(update_predicate);
             }
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             rt().block_on(cmd.into_future()).map_err(PythonError::from)
         })?;
@@ -409,24 +415,27 @@ impl RawDeltaTable {
     }
 
     /// Run the optimize command on the Delta Table: merge small files into a large file by bin-packing.
+    #[allow(clippy::too_many_arguments)]
     #[pyo3(signature = (
         partition_filters = None,
         target_size = None,
         max_concurrent_tasks = None,
         min_commit_interval = None,
         writer_properties=None,
-        custom_metadata=None,
+        commit_properties=None,
+        post_commithook_properties=None
     ))]
     #[allow(clippy::too_many_arguments)]
     pub fn compact_optimize(
         &mut self,
         py: Python,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         target_size: Option<i64>,
         max_concurrent_tasks: Option<usize>,
         min_commit_interval: Option<u64>,
-        writer_properties: Option<HashMap<String, Option<String>>>,
-        custom_metadata: Option<HashMap<String, String>>,
+        writer_properties: Option<PyWriterProperties>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<String> {
         let (table, metrics) = py.allow_threads(|| {
             let mut cmd = OptimizeBuilder::new(
@@ -447,13 +456,11 @@ impl RawDeltaTable {
                 );
             }
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             let converted_filters =
                 convert_partition_filters(partition_filters.unwrap_or_default())
@@ -475,18 +482,20 @@ impl RawDeltaTable {
         max_spill_size = 20 * 1024 * 1024 * 1024,
         min_commit_interval = None,
         writer_properties=None,
-        custom_metadata=None,))]
+        commit_properties=None,
+        post_commithook_properties=None))]
     pub fn z_order_optimize(
         &mut self,
         py: Python,
         z_order_columns: Vec<String>,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         target_size: Option<i64>,
         max_concurrent_tasks: Option<usize>,
         max_spill_size: usize,
         min_commit_interval: Option<u64>,
-        writer_properties: Option<HashMap<String, Option<String>>>,
-        custom_metadata: Option<HashMap<String, String>>,
+        writer_properties: Option<PyWriterProperties>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<String> {
         let (table, metrics) = py.allow_threads(|| {
             let mut cmd = OptimizeBuilder::new(
@@ -509,13 +518,11 @@ impl RawDeltaTable {
                 );
             }
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             let converted_filters =
                 convert_partition_filters(partition_filters.unwrap_or_default())
@@ -528,12 +535,73 @@ impl RawDeltaTable {
         Ok(serde_json::to_string(&metrics).unwrap())
     }
 
-    #[pyo3(signature = (constraints, custom_metadata=None))]
+    #[pyo3(signature = (fields, commit_properties=None, post_commithook_properties=None))]
+    pub fn add_columns(
+        &mut self,
+        py: Python,
+        fields: Vec<Field>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
+    ) -> PyResult<()> {
+        let table = py.allow_threads(|| {
+            let mut cmd = AddColumnBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            );
+
+            let new_fields = fields
+                .iter()
+                .map(|v| v.inner.clone())
+                .collect::<Vec<StructField>>();
+
+            cmd = cmd.with_fields(new_fields);
+
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(())
+    }
+
+    #[pyo3(signature = (feature, allow_protocol_versions_increase, commit_properties=None, post_commithook_properties=None))]
+    pub fn add_feature(
+        &mut self,
+        py: Python,
+        feature: Vec<TableFeatures>,
+        allow_protocol_versions_increase: bool,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
+    ) -> PyResult<()> {
+        let table = py.allow_threads(|| {
+            let mut cmd = AddTableFeatureBuilder::new(
+                self._table.log_store(),
+                self._table.snapshot().map_err(PythonError::from)?.clone(),
+            )
+            .with_features(feature)
+            .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
+
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
+            rt().block_on(cmd.into_future()).map_err(PythonError::from)
+        })?;
+        self._table.state = table.state;
+        Ok(())
+    }
+
+    #[pyo3(signature = (constraints, commit_properties=None, post_commithook_properties=None))]
     pub fn add_constraints(
         &mut self,
         py: Python,
         constraints: HashMap<String, String>,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<()> {
         let table = py.allow_threads(|| {
             let mut cmd = ConstraintBuilder::new(
@@ -545,13 +613,11 @@ impl RawDeltaTable {
                 cmd = cmd.with_constraint(col_name.clone(), expression.clone());
             }
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             rt().block_on(cmd.into_future()).map_err(PythonError::from)
         })?;
@@ -559,13 +625,14 @@ impl RawDeltaTable {
         Ok(())
     }
 
-    #[pyo3(signature = (name, raise_if_not_exists, custom_metadata=None))]
+    #[pyo3(signature = (name, raise_if_not_exists, commit_properties=None, post_commithook_properties=None))]
     pub fn drop_constraints(
         &mut self,
         py: Python,
         name: String,
         raise_if_not_exists: bool,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<()> {
         let table = py.allow_threads(|| {
             let mut cmd = DropConstraintBuilder::new(
@@ -575,13 +642,11 @@ impl RawDeltaTable {
             .with_constraint(name)
             .with_raise_if_not_exists(raise_if_not_exists);
 
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             rt().block_on(cmd.into_future()).map_err(PythonError::from)
         })?;
@@ -589,7 +654,7 @@ impl RawDeltaTable {
         Ok(())
     }
 
-    #[pyo3(signature = (starting_version = 0, ending_version = None, starting_timestamp = None, ending_timestamp = None))]
+    #[pyo3(signature = (starting_version = 0, ending_version = None, starting_timestamp = None, ending_timestamp = None, columns = None))]
     pub fn load_cdf(
         &mut self,
         py: Python,
@@ -597,6 +662,7 @@ impl RawDeltaTable {
         ending_version: Option<i64>,
         starting_timestamp: Option<String>,
         ending_timestamp: Option<String>,
+        columns: Option<Vec<String>>,
     ) -> PyResult<PyArrowType<ArrowArrayStreamReader>> {
         let ctx = SessionContext::new();
         let mut cdf_read = CdfLoadBuilder::new(
@@ -621,6 +687,10 @@ impl RawDeltaTable {
             cdf_read = cdf_read.with_starting_timestamp(ending_ts);
         }
 
+        if let Some(columns) = columns {
+            cdf_read = cdf_read.with_columns(columns);
+        }
+
         cdf_read = cdf_read.with_session_ctx(ctx.clone());
 
         let plan = rt().block_on(cdf_read.build()).map_err(PythonError::from)?;
@@ -652,209 +722,68 @@ impl RawDeltaTable {
     }
 
     #[allow(clippy::too_many_arguments)]
-    #[pyo3(signature = (source,
+    #[pyo3(signature = (
+        source,
         predicate,
         source_alias = None,
         target_alias = None,
         safe_cast = false,
         writer_properties = None,
-        custom_metadata = None,
-        matched_update_updates = None,
-        matched_update_predicate = None,
-        matched_delete_predicate = None,
-        matched_delete_all = None,
-        not_matched_insert_updates = None,
-        not_matched_insert_predicate = None,
-        not_matched_by_source_update_updates = None,
-        not_matched_by_source_update_predicate = None,
-        not_matched_by_source_delete_predicate = None,
-        not_matched_by_source_delete_all = None,
+        post_commithook_properties = None,
+        commit_properties = None,
     ))]
-    pub fn merge_execute(
-        &mut self,
+    pub fn create_merge_builder(
+        &self,
         py: Python,
         source: PyArrowType<ArrowArrayStreamReader>,
         predicate: String,
         source_alias: Option<String>,
         target_alias: Option<String>,
         safe_cast: bool,
-        writer_properties: Option<HashMap<String, Option<String>>>,
-        custom_metadata: Option<HashMap<String, String>>,
-        matched_update_updates: Option<Vec<HashMap<String, String>>>,
-        matched_update_predicate: Option<Vec<Option<String>>>,
-        matched_delete_predicate: Option<Vec<String>>,
-        matched_delete_all: Option<bool>,
-        not_matched_insert_updates: Option<Vec<HashMap<String, String>>>,
-        not_matched_insert_predicate: Option<Vec<Option<String>>>,
-        not_matched_by_source_update_updates: Option<Vec<HashMap<String, String>>>,
-        not_matched_by_source_update_predicate: Option<Vec<Option<String>>>,
-        not_matched_by_source_delete_predicate: Option<Vec<String>>,
-        not_matched_by_source_delete_all: Option<bool>,
-    ) -> PyResult<String> {
+        writer_properties: Option<PyWriterProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
+        commit_properties: Option<PyCommitProperties>,
+    ) -> PyResult<PyMergeBuilder> {
         py.allow_threads(|| {
-            let ctx = SessionContext::new();
-            let schema = source.0.schema();
-            let batches = vec![source.0.map(|batch| batch.unwrap()).collect::<Vec<_>>()];
-            let table_provider: Arc<dyn TableProvider> =
-                Arc::new(MemTable::try_new(schema, batches).unwrap());
-            let source_df = ctx.read_table(table_provider).unwrap();
-
-            let mut cmd = MergeBuilder::new(
+            Ok(PyMergeBuilder::new(
                 self._table.log_store(),
                 self._table.snapshot().map_err(PythonError::from)?.clone(),
+                source.0,
                 predicate,
-                source_df,
+                source_alias,
+                target_alias,
+                safe_cast,
+                writer_properties,
+                post_commithook_properties,
+                commit_properties,
             )
-            .with_safe_cast(safe_cast);
-
-            if let Some(src_alias) = source_alias {
-                cmd = cmd.with_source_alias(src_alias);
-            }
-
-            if let Some(trgt_alias) = target_alias {
-                cmd = cmd.with_target_alias(trgt_alias);
-            }
-
-            if let Some(writer_props) = writer_properties {
-                cmd = cmd.with_writer_properties(
-                    set_writer_properties(writer_props).map_err(PythonError::from)?,
-                );
-            }
-
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
-
-            if let Some(mu_updates) = matched_update_updates {
-                if let Some(mu_predicate) = matched_update_predicate {
-                    for it in mu_updates.iter().zip(mu_predicate.iter()) {
-                        let (update_values, predicate_value) = it;
-
-                        if let Some(pred) = predicate_value {
-                            cmd = cmd
-                                .when_matched_update(|mut update| {
-                                    for (col_name, expression) in update_values {
-                                        update =
-                                            update.update(col_name.clone(), expression.clone());
-                                    }
-                                    update.predicate(pred.clone())
-                                })
-                                .map_err(PythonError::from)?;
-                        } else {
-                            cmd = cmd
-                                .when_matched_update(|mut update| {
-                                    for (col_name, expression) in update_values {
-                                        update =
-                                            update.update(col_name.clone(), expression.clone());
-                                    }
-                                    update
-                                })
-                                .map_err(PythonError::from)?;
-                        }
-                    }
-                }
-            }
-
-            if let Some(_md_delete_all) = matched_delete_all {
-                cmd = cmd
-                    .when_matched_delete(|delete| delete)
-                    .map_err(PythonError::from)?;
-            } else if let Some(md_predicate) = matched_delete_predicate {
-                for pred in md_predicate.iter() {
-                    cmd = cmd
-                        .when_matched_delete(|delete| delete.predicate(pred.clone()))
-                        .map_err(PythonError::from)?;
-                }
-            }
-
-            if let Some(nmi_updates) = not_matched_insert_updates {
-                if let Some(nmi_predicate) = not_matched_insert_predicate {
-                    for it in nmi_updates.iter().zip(nmi_predicate.iter()) {
-                        let (update_values, predicate_value) = it;
-                        if let Some(pred) = predicate_value {
-                            cmd = cmd
-                                .when_not_matched_insert(|mut insert| {
-                                    for (col_name, expression) in update_values {
-                                        insert = insert.set(col_name.clone(), expression.clone());
-                                    }
-                                    insert.predicate(pred.clone())
-                                })
-                                .map_err(PythonError::from)?;
-                        } else {
-                            cmd = cmd
-                                .when_not_matched_insert(|mut insert| {
-                                    for (col_name, expression) in update_values {
-                                        insert = insert.set(col_name.clone(), expression.clone());
-                                    }
-                                    insert
-                                })
-                                .map_err(PythonError::from)?;
-                        }
-                    }
-                }
-            }
-
-            if let Some(nmbsu_updates) = not_matched_by_source_update_updates {
-                if let Some(nmbsu_predicate) = not_matched_by_source_update_predicate {
-                    for it in nmbsu_updates.iter().zip(nmbsu_predicate.iter()) {
-                        let (update_values, predicate_value) = it;
-                        if let Some(pred) = predicate_value {
-                            cmd = cmd
-                                .when_not_matched_by_source_update(|mut update| {
-                                    for (col_name, expression) in update_values {
-                                        update =
-                                            update.update(col_name.clone(), expression.clone());
-                                    }
-                                    update.predicate(pred.clone())
-                                })
-                                .map_err(PythonError::from)?;
-                        } else {
-                            cmd = cmd
-                                .when_not_matched_by_source_update(|mut update| {
-                                    for (col_name, expression) in update_values {
-                                        update =
-                                            update.update(col_name.clone(), expression.clone());
-                                    }
-                                    update
-                                })
-                                .map_err(PythonError::from)?;
-                        }
-                    }
-                }
-            }
-
-            if let Some(_nmbs_delete_all) = not_matched_by_source_delete_all {
-                cmd = cmd
-                    .when_not_matched_by_source_delete(|delete| delete)
-                    .map_err(PythonError::from)?;
-            } else if let Some(nmbs_predicate) = not_matched_by_source_delete_predicate {
-                for pred in nmbs_predicate.iter() {
-                    cmd = cmd
-                        .when_not_matched_by_source_delete(|delete| delete.predicate(pred.clone()))
-                        .map_err(PythonError::from)?;
-                }
-            }
+            .map_err(PythonError::from)?)
+        })
+    }
 
-            let (table, metrics) = rt()
-                .block_on(cmd.into_future())
-                .map_err(PythonError::from)?;
+    #[pyo3(signature=(
+        merge_builder
+    ))]
+    pub fn merge_execute(
+        &mut self,
+        py: Python,
+        merge_builder: &mut PyMergeBuilder,
+    ) -> PyResult<String> {
+        py.allow_threads(|| {
+            let (table, metrics) = merge_builder.execute().map_err(PythonError::from)?;
             self._table.state = table.state;
-            Ok(serde_json::to_string(&metrics).unwrap())
+            Ok(metrics)
         })
     }
 
     // Run the restore command on the Delta Table: restore table to a given version or datetime
-    #[pyo3(signature = (target, *, ignore_missing_files = false, protocol_downgrade_allowed = false, custom_metadata=None))]
+    #[pyo3(signature = (target, *, ignore_missing_files = false, protocol_downgrade_allowed = false, commit_properties=None))]
     pub fn restore(
         &mut self,
-        target: Option<&PyAny>,
+        target: Option<&Bound<'_, PyAny>>,
         ignore_missing_files: bool,
         protocol_downgrade_allowed: bool,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
     ) -> PyResult<String> {
         let mut cmd = RestoreBuilder::new(
             self._table.log_store(),
@@ -864,9 +793,9 @@ impl RawDeltaTable {
             if let Ok(version) = val.extract::<i64>() {
                 cmd = cmd.with_version_to_restore(version)
             }
-            if let Ok(ds) = val.extract::<&str>() {
+            if let Ok(ds) = val.extract::<PyBackedStr>() {
                 let datetime = DateTime::<Utc>::from(
-                    DateTime::<FixedOffset>::parse_from_rfc3339(ds).map_err(|err| {
+                    DateTime::<FixedOffset>::parse_from_rfc3339(ds.as_ref()).map_err(|err| {
                         PyValueError::new_err(format!("Failed to parse datetime string: {err}"))
                     })?,
                 );
@@ -876,12 +805,9 @@ impl RawDeltaTable {
         cmd = cmd.with_ignore_missing_files(ignore_missing_files);
         cmd = cmd.with_protocol_downgrade_allowed(protocol_downgrade_allowed);
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd
-                .with_commit_properties(CommitProperties::default().with_metadata(json_metadata));
-        };
+        if let Some(commit_properties) = maybe_create_commit_properties(commit_properties, None) {
+            cmd = cmd.with_commit_properties(commit_properties);
+        }
 
         let (table, metrics) = rt()
             .block_on(cmd.into_future())
@@ -912,11 +838,11 @@ impl RawDeltaTable {
         &mut self,
         py: Python<'py>,
         schema: PyArrowType<ArrowSchema>,
-        partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
-    ) -> PyResult<Vec<(String, Option<&'py PyAny>)>> {
+        partition_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
+    ) -> PyResult<Vec<(String, Option<Bound<'py, PyAny>>)>> {
         let path_set = match partition_filters {
             Some(filters) => Some(HashSet::<_>::from_iter(
-                self.files_by_partitions(py, filters)?.iter().cloned(),
+                self.files(py, Some(filters))?.iter().cloned(),
             )),
             None => None,
         };
@@ -941,15 +867,14 @@ impl RawDeltaTable {
 
     fn get_active_partitions<'py>(
         &self,
-        partitions_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
+        partitions_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
         py: Python<'py>,
-    ) -> PyResult<&'py PyFrozenSet> {
+    ) -> PyResult<Bound<'py, PyFrozenSet>> {
         let column_names: HashSet<&str> = self
             ._table
             .get_schema()
             .map_err(|_| DeltaProtocolError::new_err("table does not yet have a schema"))?
             .fields()
-            .iter()
             .map(|field| field.name().as_str())
             .collect();
         let partition_columns: HashSet<&str> = self
@@ -962,10 +887,13 @@ impl RawDeltaTable {
             .collect();
 
         if let Some(filters) = &partitions_filters {
-            let unknown_columns: Vec<&str> = filters
+            let unknown_columns: Vec<&PyBackedStr> = filters
                 .iter()
-                .map(|(column_name, _, _)| *column_name)
-                .filter(|column_name| !column_names.contains(column_name))
+                .map(|(column_name, _, _)| column_name)
+                .filter(|column_name| {
+                    let column_name: &'_ str = column_name.as_ref();
+                    !column_names.contains(column_name)
+                })
                 .collect();
             if !unknown_columns.is_empty() {
                 return Err(PyValueError::new_err(format!(
@@ -973,10 +901,13 @@ impl RawDeltaTable {
                 )));
             }
 
-            let non_partition_columns: Vec<&str> = filters
+            let non_partition_columns: Vec<&PyBackedStr> = filters
                 .iter()
-                .map(|(column_name, _, _)| *column_name)
-                .filter(|column_name| !partition_columns.contains(column_name))
+                .map(|(column_name, _, _)| column_name)
+                .filter(|column_name| {
+                    let column_name: &'_ str = column_name.as_ref();
+                    !partition_columns.contains(column_name)
+                })
                 .collect();
 
             if !non_partition_columns.is_empty() {
@@ -1019,11 +950,11 @@ impl RawDeltaTable {
             })
             .collect();
 
-        let active_partitions: Vec<&'py PyFrozenSet> = active_partitions
+        let active_partitions = active_partitions
             .into_iter()
-            .map(|part| PyFrozenSet::new(py, part.iter()))
-            .collect::<Result<_, PyErr>>()?;
-        PyFrozenSet::new(py, active_partitions)
+            .map(|part| PyFrozenSet::new_bound(py, part.iter()))
+            .collect::<Result<Vec<Bound<'py, _>>, PyErr>>()?;
+        PyFrozenSet::new_bound(py, &active_partitions)
     }
 
     #[allow(clippy::too_many_arguments)]
@@ -1034,8 +965,9 @@ impl RawDeltaTable {
         mode: &str,
         partition_by: Vec<String>,
         schema: PyArrowType<ArrowSchema>,
-        partitions_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
-        custom_metadata: Option<HashMap<String, String>>,
+        partitions_filters: Option<Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<()> {
         py.allow_threads(|| {
             let mode = mode.parse().map_err(PythonError::from)?;
@@ -1119,22 +1051,32 @@ impl RawDeltaTable {
                 predicate: None,
             };
 
+            let mut properties = CommitProperties::default();
+            if let Some(props) = commit_properties {
+                if let Some(metadata) = props.custom_metadata {
+                    let json_metadata: Map<String, Value> =
+                        metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+                    properties = properties.with_metadata(json_metadata);
+                };
+
+                if let Some(max_retries) = props.max_commit_retries {
+                    properties = properties.with_max_retries(max_retries);
+                };
+            }
+
+            if let Some(post_commit_hook_props) = post_commithook_properties {
+                properties = set_post_commithook_properties(properties, post_commit_hook_props)
+            }
+
             rt().block_on(
-                CommitBuilder::from(
-                    CommitProperties::default().with_metadata(
-                        custom_metadata
-                            .unwrap_or_default()
-                            .into_iter()
-                            .map(|(k, v)| (k, v.into())),
-                    ),
-                )
-                .with_actions(actions)
-                .build(
-                    Some(self._table.snapshot().map_err(PythonError::from)?),
-                    self._table.log_store(),
-                    operation,
-                )
-                .into_future(),
+                CommitBuilder::from(properties)
+                    .with_actions(actions)
+                    .build(
+                        Some(self._table.snapshot().map_err(PythonError::from)?),
+                        self._table.log_store(),
+                        operation,
+                    )
+                    .into_future(),
             )
             .map_err(PythonError::from)?;
 
@@ -1173,6 +1115,9 @@ impl RawDeltaTable {
     }
 
     pub fn get_add_actions(&self, flatten: bool) -> PyResult<PyArrowType<RecordBatch>> {
+        if !self.has_files()? {
+            return Err(DeltaError::new_err("Table is instantiated without files."));
+        }
         Ok(PyArrowType(
             self._table
                 .snapshot()
@@ -1182,14 +1127,25 @@ impl RawDeltaTable {
         ))
     }
 
+    pub fn get_add_file_sizes(&self) -> PyResult<HashMap<String, i64>> {
+        Ok(self
+            ._table
+            .snapshot()
+            .map_err(PythonError::from)?
+            .eager_snapshot()
+            .files()
+            .map(|f| (f.path().to_string(), f.size()))
+            .collect::<HashMap<String, i64>>())
+    }
     /// Run the delete command on the delta table: delete records following a predicate and return the delete metrics.
-    #[pyo3(signature = (predicate = None, writer_properties=None, custom_metadata=None))]
+    #[pyo3(signature = (predicate = None, writer_properties=None, commit_properties=None, post_commithook_properties=None))]
     pub fn delete(
         &mut self,
         py: Python,
         predicate: Option<String>,
-        writer_properties: Option<HashMap<String, Option<String>>>,
-        custom_metadata: Option<HashMap<String, String>>,
+        writer_properties: Option<PyWriterProperties>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<String> {
         let (table, metrics) = py.allow_threads(|| {
             let mut cmd = DeleteBuilder::new(
@@ -1199,20 +1155,16 @@ impl RawDeltaTable {
             if let Some(predicate) = predicate {
                 cmd = cmd.with_predicate(predicate);
             }
-
             if let Some(writer_props) = writer_properties {
                 cmd = cmd.with_writer_properties(
                     set_writer_properties(writer_props).map_err(PythonError::from)?,
                 );
             }
-
-            if let Some(metadata) = custom_metadata {
-                let json_metadata: Map<String, Value> =
-                    metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-                cmd = cmd.with_commit_properties(
-                    CommitProperties::default().with_metadata(json_metadata),
-                );
-            };
+            if let Some(commit_properties) =
+                maybe_create_commit_properties(commit_properties, post_commithook_properties)
+            {
+                cmd = cmd.with_commit_properties(commit_properties);
+            }
 
             rt().block_on(cmd.into_future()).map_err(PythonError::from)
         })?;
@@ -1220,12 +1172,12 @@ impl RawDeltaTable {
         Ok(serde_json::to_string(&metrics).unwrap())
     }
 
-    #[pyo3(signature = (properties, raise_if_not_exists, custom_metadata=None))]
+    #[pyo3(signature = (properties, raise_if_not_exists, commit_properties=None))]
     pub fn set_table_properties(
         &mut self,
         properties: HashMap<String, String>,
         raise_if_not_exists: bool,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
     ) -> PyResult<()> {
         let mut cmd = SetTablePropertiesBuilder::new(
             self._table.log_store(),
@@ -1234,12 +1186,9 @@ impl RawDeltaTable {
         .with_properties(properties)
         .with_raise_if_not_exists(raise_if_not_exists);
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd
-                .with_commit_properties(CommitProperties::default().with_metadata(json_metadata));
-        };
+        if let Some(commit_properties) = maybe_create_commit_properties(commit_properties, None) {
+            cmd = cmd.with_commit_properties(commit_properties);
+        }
 
         let table = rt()
             .block_on(cmd.into_future())
@@ -1250,12 +1199,12 @@ impl RawDeltaTable {
 
     /// Execute the File System Check command (FSCK) on the delta table: removes old reference to files that
     /// have been deleted or are malformed
-    #[pyo3(signature = (dry_run = true, custom_metadata = None))]
+    #[pyo3(signature = (dry_run = true, commit_properties = None, post_commithook_properties=None))]
     pub fn repair(
         &mut self,
-        _py: Python,
         dry_run: bool,
-        custom_metadata: Option<HashMap<String, String>>,
+        commit_properties: Option<PyCommitProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
     ) -> PyResult<String> {
         let mut cmd = FileSystemCheckBuilder::new(
             self._table.log_store(),
@@ -1263,12 +1212,11 @@ impl RawDeltaTable {
         )
         .with_dry_run(dry_run);
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            cmd = cmd
-                .with_commit_properties(CommitProperties::default().with_metadata(json_metadata));
-        };
+        if let Some(commit_properties) =
+            maybe_create_commit_properties(commit_properties, post_commithook_properties)
+        {
+            cmd = cmd.with_commit_properties(commit_properties);
+        }
 
         let (table, metrics) = rt()
             .block_on(cmd.into_future())
@@ -1278,62 +1226,163 @@ impl RawDeltaTable {
     }
 }
 
-fn set_writer_properties(
-    writer_properties: HashMap<String, Option<String>>,
-) -> DeltaResult<WriterProperties> {
+fn set_post_commithook_properties(
+    mut commit_properties: CommitProperties,
+    post_commithook_properties: PyPostCommitHookProperties,
+) -> CommitProperties {
+    commit_properties =
+        commit_properties.with_create_checkpoint(post_commithook_properties.create_checkpoint);
+    commit_properties = commit_properties
+        .with_cleanup_expired_logs(post_commithook_properties.cleanup_expired_logs);
+    commit_properties
+}
+
+fn set_writer_properties(writer_properties: PyWriterProperties) -> DeltaResult<WriterProperties> {
     let mut properties = WriterProperties::builder();
-    let data_page_size_limit = writer_properties.get("data_page_size_limit");
-    let dictionary_page_size_limit = writer_properties.get("dictionary_page_size_limit");
-    let data_page_row_count_limit = writer_properties.get("data_page_row_count_limit");
-    let write_batch_size = writer_properties.get("write_batch_size");
-    let max_row_group_size = writer_properties.get("max_row_group_size");
-    let compression = writer_properties.get("compression");
-
-    if let Some(Some(data_page_size)) = data_page_size_limit {
-        properties = properties.set_data_page_size_limit(data_page_size.parse::<usize>().unwrap());
+    let data_page_size_limit = writer_properties.data_page_size_limit;
+    let dictionary_page_size_limit = writer_properties.dictionary_page_size_limit;
+    let data_page_row_count_limit = writer_properties.data_page_row_count_limit;
+    let write_batch_size = writer_properties.write_batch_size;
+    let max_row_group_size = writer_properties.max_row_group_size;
+    let compression = writer_properties.compression;
+    let statistics_truncate_length = writer_properties.statistics_truncate_length;
+    let default_column_properties = writer_properties.default_column_properties;
+    let column_properties = writer_properties.column_properties;
+
+    if let Some(data_page_size) = data_page_size_limit {
+        properties = properties.set_data_page_size_limit(data_page_size);
     }
-    if let Some(Some(dictionary_page_size)) = dictionary_page_size_limit {
-        properties = properties
-            .set_dictionary_page_size_limit(dictionary_page_size.parse::<usize>().unwrap());
+    if let Some(dictionary_page_size) = dictionary_page_size_limit {
+        properties = properties.set_dictionary_page_size_limit(dictionary_page_size);
     }
-    if let Some(Some(data_page_row_count)) = data_page_row_count_limit {
-        properties =
-            properties.set_data_page_row_count_limit(data_page_row_count.parse::<usize>().unwrap());
+    if let Some(data_page_row_count) = data_page_row_count_limit {
+        properties = properties.set_data_page_row_count_limit(data_page_row_count);
     }
-    if let Some(Some(batch_size)) = write_batch_size {
-        properties = properties.set_write_batch_size(batch_size.parse::<usize>().unwrap());
+    if let Some(batch_size) = write_batch_size {
+        properties = properties.set_write_batch_size(batch_size);
     }
-    if let Some(Some(row_group_size)) = max_row_group_size {
-        properties = properties.set_max_row_group_size(row_group_size.parse::<usize>().unwrap());
+    if let Some(row_group_size) = max_row_group_size {
+        properties = properties.set_max_row_group_size(row_group_size);
     }
+    properties = properties.set_statistics_truncate_length(statistics_truncate_length);
 
-    if let Some(Some(compression)) = compression {
+    if let Some(compression) = compression {
         let compress: Compression = compression
             .parse()
             .map_err(|err: ParquetError| DeltaTableError::Generic(err.to_string()))?;
 
         properties = properties.set_compression(compress);
     }
+
+    if let Some(default_column_properties) = default_column_properties {
+        if let Some(dictionary_enabled) = default_column_properties.dictionary_enabled {
+            properties = properties.set_dictionary_enabled(dictionary_enabled);
+        }
+        if let Some(max_statistics_size) = default_column_properties.max_statistics_size {
+            properties = properties.set_max_statistics_size(max_statistics_size);
+        }
+        if let Some(bloom_filter_properties) = default_column_properties.bloom_filter_properties {
+            if let Some(set_bloom_filter_enabled) = bloom_filter_properties.set_bloom_filter_enabled
+            {
+                properties = properties.set_bloom_filter_enabled(set_bloom_filter_enabled);
+            }
+            if let Some(bloom_filter_fpp) = bloom_filter_properties.fpp {
+                properties = properties.set_bloom_filter_fpp(bloom_filter_fpp);
+            }
+            if let Some(bloom_filter_ndv) = bloom_filter_properties.ndv {
+                properties = properties.set_bloom_filter_ndv(bloom_filter_ndv);
+            }
+        }
+    }
+    if let Some(column_properties) = column_properties {
+        for (column_name, column_prop) in column_properties {
+            if let Some(column_prop) = column_prop {
+                if let Some(dictionary_enabled) = column_prop.dictionary_enabled {
+                    properties = properties.set_column_dictionary_enabled(
+                        column_name.clone().into(),
+                        dictionary_enabled,
+                    );
+                }
+                if let Some(bloom_filter_properties) = column_prop.bloom_filter_properties {
+                    if let Some(set_bloom_filter_enabled) =
+                        bloom_filter_properties.set_bloom_filter_enabled
+                    {
+                        properties = properties.set_column_bloom_filter_enabled(
+                            column_name.clone().into(),
+                            set_bloom_filter_enabled,
+                        );
+                    }
+                    if let Some(bloom_filter_fpp) = bloom_filter_properties.fpp {
+                        properties = properties.set_column_bloom_filter_fpp(
+                            column_name.clone().into(),
+                            bloom_filter_fpp,
+                        );
+                    }
+                    if let Some(bloom_filter_ndv) = bloom_filter_properties.ndv {
+                        properties = properties
+                            .set_column_bloom_filter_ndv(column_name.into(), bloom_filter_ndv);
+                    }
+                }
+            }
+        }
+    }
     Ok(properties.build())
 }
 
-fn convert_partition_filters<'a>(
-    partitions_filters: Vec<(&'a str, &'a str, PartitionFilterValue)>,
+fn convert_partition_filters(
+    partitions_filters: Vec<(PyBackedStr, PyBackedStr, PartitionFilterValue)>,
 ) -> Result<Vec<PartitionFilter>, DeltaTableError> {
     partitions_filters
         .into_iter()
         .map(|filter| match filter {
-            (key, op, PartitionFilterValue::Single(v)) => PartitionFilter::try_from((key, op, v)),
+            (key, op, PartitionFilterValue::Single(v)) => {
+                let key: &'_ str = key.as_ref();
+                let op: &'_ str = op.as_ref();
+                let v: &'_ str = v.as_ref();
+                PartitionFilter::try_from((key, op, v))
+            }
             (key, op, PartitionFilterValue::Multiple(v)) => {
+                let key: &'_ str = key.as_ref();
+                let op: &'_ str = op.as_ref();
+                let v: Vec<&'_ str> = v.iter().map(|v| v.as_ref()).collect();
                 PartitionFilter::try_from((key, op, v.as_slice()))
             }
         })
         .collect()
 }
 
-fn scalar_to_py(value: &Scalar, py_date: &PyAny, py: Python) -> PyResult<PyObject> {
+fn maybe_create_commit_properties(
+    maybe_commit_properties: Option<PyCommitProperties>,
+    post_commithook_properties: Option<PyPostCommitHookProperties>,
+) -> Option<CommitProperties> {
+    if maybe_commit_properties.is_none() && post_commithook_properties.is_none() {
+        return None;
+    }
+    let mut commit_properties = CommitProperties::default();
+
+    if let Some(commit_props) = maybe_commit_properties {
+        if let Some(metadata) = commit_props.custom_metadata {
+            let json_metadata: Map<String, Value> =
+                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
+            commit_properties = commit_properties.with_metadata(json_metadata);
+        };
+
+        if let Some(max_retries) = commit_props.max_commit_retries {
+            commit_properties = commit_properties.with_max_retries(max_retries);
+        };
+    }
+
+    if let Some(post_commit_hook_props) = post_commithook_properties {
+        commit_properties =
+            set_post_commithook_properties(commit_properties, post_commit_hook_props)
+    }
+    Some(commit_properties)
+}
+
+fn scalar_to_py<'py>(value: &Scalar, py_date: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyAny>> {
     use Scalar::*;
 
+    let py = py_date.py();
     let val = match value {
         Null(_) => py.None(),
         Boolean(val) => val.to_object(py),
@@ -1362,16 +1411,16 @@ fn scalar_to_py(value: &Scalar, py_date: &PyAny, py: Python) -> PyResult<PyObjec
             date.to_object(py)
         }
         Decimal(_, _, _) => value.serialize().to_object(py),
-        Struct(values, fields) => {
-            let py_struct = PyDict::new(py);
-            for (field, value) in fields.iter().zip(values.iter()) {
-                py_struct.set_item(field.name(), scalar_to_py(value, py_date, py)?)?;
+        Struct(data) => {
+            let py_struct = PyDict::new_bound(py);
+            for (field, value) in data.fields().iter().zip(data.values().iter()) {
+                py_struct.set_item(field.name(), scalar_to_py(value, py_date)?)?;
             }
             py_struct.to_object(py)
         }
     };
 
-    Ok(val)
+    Ok(val.into_bound(py))
 }
 
 /// Create expression that file statistics guarantee to be true.
@@ -1390,14 +1439,14 @@ fn filestats_to_expression_next<'py>(
     py: Python<'py>,
     schema: &PyArrowType<ArrowSchema>,
     file_info: LogicalFile<'_>,
-) -> PyResult<Option<&'py PyAny>> {
-    let ds = PyModule::import(py, "pyarrow.dataset")?;
+) -> PyResult<Option<Bound<'py, PyAny>>> {
+    let ds = PyModule::import_bound(py, "pyarrow.dataset")?;
     let py_field = ds.getattr("field")?;
-    let pa = PyModule::import(py, "pyarrow")?;
-    let py_date = Python::import(py, "datetime")?.getattr("date")?;
-    let mut expressions: Vec<PyResult<&PyAny>> = Vec::new();
+    let pa = PyModule::import_bound(py, "pyarrow")?;
+    let py_date = Python::import_bound(py, "datetime")?.getattr("date")?;
+    let mut expressions = Vec::new();
 
-    let cast_to_type = |column_name: &String, value: PyObject, schema: &ArrowSchema| {
+    let cast_to_type = |column_name: &String, value: &Bound<'py, PyAny>, schema: &ArrowSchema| {
         let column_type = schema
             .field_with_name(column_name)
             .map_err(|_| {
@@ -1416,7 +1465,7 @@ fn filestats_to_expression_next<'py>(
             if !value.is_null() {
                 // value is a string, but needs to be parsed into appropriate type
                 let converted_value =
-                    cast_to_type(&column, scalar_to_py(value, py_date, py)?, &schema.0)?;
+                    cast_to_type(&column, &scalar_to_py(value, &py_date)?, &schema.0)?;
                 expressions.push(
                     py_field
                         .call1((&column,))?
@@ -1431,8 +1480,8 @@ fn filestats_to_expression_next<'py>(
     let mut has_nulls_set: HashSet<String> = HashSet::new();
 
     // NOTE: null_counts should always return a struct scalar.
-    if let Some(Scalar::Struct(values, fields)) = file_info.null_counts() {
-        for (field, value) in fields.iter().zip(values.iter()) {
+    if let Some(Scalar::Struct(data)) = file_info.null_counts() {
+        for (field, value) in data.fields().iter().zip(data.values().iter()) {
             if let Scalar::Long(val) = value {
                 if *val == 0 {
                     expressions.push(py_field.call1((field.name(),))?.call_method0("is_valid"));
@@ -1446,14 +1495,14 @@ fn filestats_to_expression_next<'py>(
     }
 
     // NOTE: min_values should always return a struct scalar.
-    if let Some(Scalar::Struct(values, fields)) = file_info.min_values() {
-        for (field, value) in fields.iter().zip(values.iter()) {
+    if let Some(Scalar::Struct(data)) = file_info.min_values() {
+        for (field, value) in data.fields().iter().zip(data.values().iter()) {
             match value {
                 // TODO: Handle nested field statistics.
-                Scalar::Struct(_, _) => {}
+                Scalar::Struct(_) => {}
                 _ => {
                     let maybe_minimum =
-                        cast_to_type(field.name(), scalar_to_py(value, py_date, py)?, &schema.0);
+                        cast_to_type(field.name(), &scalar_to_py(value, &py_date)?, &schema.0);
                     if let Ok(minimum) = maybe_minimum {
                         let field_expr = py_field.call1((field.name(),))?;
                         let expr = field_expr.call_method1("__ge__", (minimum,));
@@ -1473,14 +1522,14 @@ fn filestats_to_expression_next<'py>(
     }
 
     // NOTE: max_values should always return a struct scalar.
-    if let Some(Scalar::Struct(values, fields)) = file_info.max_values() {
-        for (field, value) in fields.iter().zip(values.iter()) {
+    if let Some(Scalar::Struct(data)) = file_info.max_values() {
+        for (field, value) in data.fields().iter().zip(data.values().iter()) {
             match value {
                 // TODO: Handle nested field statistics.
-                Scalar::Struct(_, _) => {}
+                Scalar::Struct(_) => {}
                 _ => {
                     let maybe_maximum =
-                        cast_to_type(field.name(), scalar_to_py(value, py_date, py)?, &schema.0);
+                        cast_to_type(field.name(), &scalar_to_py(value, &py_date)?, &schema.0);
                     if let Ok(maximum) = maybe_maximum {
                         let field_expr = py_field.call1((field.name(),))?;
                         let expr = field_expr.call_method1("__le__", (maximum,));
@@ -1566,6 +1615,45 @@ impl From<&PyAddAction> for Add {
     }
 }
 
+#[derive(FromPyObject)]
+pub struct BloomFilterProperties {
+    pub set_bloom_filter_enabled: Option<bool>,
+    pub fpp: Option<f64>,
+    pub ndv: Option<u64>,
+}
+
+#[derive(FromPyObject)]
+pub struct ColumnProperties {
+    pub dictionary_enabled: Option<bool>,
+    pub max_statistics_size: Option<usize>,
+    pub bloom_filter_properties: Option<BloomFilterProperties>,
+}
+
+#[derive(FromPyObject)]
+pub struct PyWriterProperties {
+    data_page_size_limit: Option<usize>,
+    dictionary_page_size_limit: Option<usize>,
+    data_page_row_count_limit: Option<usize>,
+    write_batch_size: Option<usize>,
+    max_row_group_size: Option<usize>,
+    statistics_truncate_length: Option<usize>,
+    compression: Option<String>,
+    default_column_properties: Option<ColumnProperties>,
+    column_properties: Option<HashMap<String, Option<ColumnProperties>>>,
+}
+
+#[derive(FromPyObject)]
+pub struct PyPostCommitHookProperties {
+    create_checkpoint: bool,
+    cleanup_expired_logs: Option<bool>,
+}
+
+#[derive(FromPyObject)]
+pub struct PyCommitProperties {
+    custom_metadata: Option<HashMap<String, String>>,
+    max_commit_retries: Option<usize>,
+}
+
 #[pyfunction]
 #[allow(clippy::too_many_arguments)]
 fn write_to_deltalake(
@@ -1577,12 +1665,14 @@ fn write_to_deltalake(
     schema_mode: Option<String>,
     partition_by: Option<Vec<String>>,
     predicate: Option<String>,
+    target_file_size: Option<usize>,
     name: Option<String>,
     description: Option<String>,
     configuration: Option<HashMap<String, Option<String>>>,
     storage_options: Option<HashMap<String, String>>,
-    writer_properties: Option<HashMap<String, Option<String>>>,
-    custom_metadata: Option<HashMap<String, String>>,
+    writer_properties: Option<PyWriterProperties>,
+    commit_properties: Option<PyCommitProperties>,
+    post_commithook_properties: Option<PyPostCommitHookProperties>,
 ) -> PyResult<()> {
     py.allow_threads(|| {
         let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
@@ -1624,15 +1714,18 @@ fn write_to_deltalake(
             builder = builder.with_replace_where(predicate);
         };
 
+        if let Some(target_file_size) = target_file_size {
+            builder = builder.with_target_file_size(target_file_size)
+        };
+
         if let Some(config) = configuration {
             builder = builder.with_configuration(config);
         };
 
-        if let Some(metadata) = custom_metadata {
-            let json_metadata: Map<String, Value> =
-                metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
-            builder = builder
-                .with_commit_properties(CommitProperties::default().with_metadata(json_metadata));
+        if let Some(commit_properties) =
+            maybe_create_commit_properties(commit_properties, post_commithook_properties)
+        {
+            builder = builder.with_commit_properties(commit_properties);
         };
 
         rt().block_on(builder.into_future())
@@ -1650,6 +1743,7 @@ fn create_deltalake(
     schema: PyArrowType<ArrowSchema>,
     partition_by: Vec<String>,
     mode: String,
+    raise_if_key_not_exists: bool,
     name: Option<String>,
     description: Option<String>,
     configuration: Option<HashMap<String, Option<String>>>,
@@ -1667,8 +1761,9 @@ fn create_deltalake(
 
         let mut builder = DeltaOps(table)
             .create()
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_save_mode(mode)
+            .with_raise_if_key_not_exists(raise_if_key_not_exists)
             .with_partition_columns(partition_by);
 
         if let Some(name) = &name {
@@ -1721,7 +1816,7 @@ fn write_new_deltalake(
 
         let mut builder = DeltaOps(table)
             .create()
-            .with_columns(schema.fields().clone())
+            .with_columns(schema.fields().cloned())
             .with_partition_columns(partition_by)
             .with_actions(add_actions.iter().map(|add| Action::Add(add.into())));
 
@@ -1768,7 +1863,7 @@ fn convert_to_deltalake(
 
         if let Some(part_schema) = partition_schema {
             let schema: StructType = (&part_schema.0).try_into().map_err(PythonError::from)?;
-            builder = builder.with_partition_schema(schema.fields().clone());
+            builder = builder.with_partition_schema(schema.fields().cloned());
         }
 
         if let Some(partition_strategy) = &partition_strategy {
@@ -1857,33 +1952,47 @@ impl PyDeltaDataChecker {
 
 #[pymodule]
 // module name need to match project name
-fn _internal(py: Python, m: &PyModule) -> PyResult<()> {
+fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> {
     use crate::error::{CommitFailedError, DeltaError, SchemaMismatchError, TableNotFoundError};
-
     deltalake::aws::register_handlers(None);
     deltalake::azure::register_handlers(None);
     deltalake::gcp::register_handlers(None);
+    deltalake::hdfs::register_handlers(None);
     deltalake_mount::register_handlers(None);
 
-    m.add("DeltaError", py.get_type::<DeltaError>())?;
-    m.add("CommitFailedError", py.get_type::<CommitFailedError>())?;
-    m.add("DeltaProtocolError", py.get_type::<DeltaProtocolError>())?;
-    m.add("TableNotFoundError", py.get_type::<TableNotFoundError>())?;
-    m.add("SchemaMismatchError", py.get_type::<SchemaMismatchError>())?;
+    let py = m.py();
+    m.add("DeltaError", py.get_type_bound::<DeltaError>())?;
+    m.add(
+        "CommitFailedError",
+        py.get_type_bound::<CommitFailedError>(),
+    )?;
+    m.add(
+        "DeltaProtocolError",
+        py.get_type_bound::<DeltaProtocolError>(),
+    )?;
+    m.add(
+        "TableNotFoundError",
+        py.get_type_bound::<TableNotFoundError>(),
+    )?;
+    m.add(
+        "SchemaMismatchError",
+        py.get_type_bound::<SchemaMismatchError>(),
+    )?;
 
     env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();
     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
-    m.add_function(pyo3::wrap_pyfunction!(rust_core_version, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(create_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(write_new_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(write_to_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(convert_to_deltalake, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(batch_distinct, m)?)?;
-    m.add_function(pyo3::wrap_pyfunction!(
+    m.add_function(pyo3::wrap_pyfunction_bound!(rust_core_version, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(create_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(write_new_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(write_to_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(convert_to_deltalake, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(batch_distinct, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction_bound!(
         get_num_idx_cols_and_stats_columns,
         m
     )?)?;
     m.add_class::<RawDeltaTable>()?;
+    m.add_class::<PyMergeBuilder>()?;
     m.add_class::<RawDeltaTableMetaData>()?;
     m.add_class::<PyDeltaDataChecker>()?;
     // There are issues with submodules, so we will expose them flat for now
@@ -1897,5 +2006,6 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> {
     m.add_class::<filesystem::DeltaFileSystemHandler>()?;
     m.add_class::<filesystem::ObjectInputFile>()?;
     m.add_class::<filesystem::ObjectOutputStream>()?;
+    m.add_class::<features::TableFeatures>()?;
     Ok(())
 }
diff --git a/python/src/merge.rs b/python/src/merge.rs
new file mode 100644
index 0000000000..e1e427f46d
--- /dev/null
+++ b/python/src/merge.rs
@@ -0,0 +1,214 @@
+use deltalake::arrow::array::RecordBatchReader;
+use deltalake::arrow::datatypes::Schema as ArrowSchema;
+use deltalake::arrow::ffi_stream::ArrowArrayStreamReader;
+use deltalake::arrow::pyarrow::IntoPyArrow;
+use deltalake::datafusion::catalog::TableProvider;
+use deltalake::datafusion::datasource::MemTable;
+use deltalake::datafusion::prelude::SessionContext;
+use deltalake::logstore::LogStoreRef;
+use deltalake::operations::merge::MergeBuilder;
+use deltalake::table::state::DeltaTableState;
+use deltalake::{DeltaResult, DeltaTable};
+use pyo3::prelude::*;
+use std::collections::HashMap;
+use std::future::IntoFuture;
+use std::sync::Arc;
+
+use crate::error::PythonError;
+use crate::utils::rt;
+use crate::{
+    maybe_create_commit_properties, set_writer_properties, PyCommitProperties,
+    PyPostCommitHookProperties, PyWriterProperties,
+};
+
+#[pyclass(module = "deltalake._internal")]
+pub(crate) struct PyMergeBuilder {
+    _builder: Option<MergeBuilder>,
+    #[pyo3(get)]
+    source_alias: Option<String>,
+    #[pyo3(get)]
+    target_alias: Option<String>,
+    arrow_schema: Arc<ArrowSchema>,
+}
+
+impl PyMergeBuilder {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        log_store: LogStoreRef,
+        snapshot: DeltaTableState,
+        source: ArrowArrayStreamReader,
+        predicate: String,
+        source_alias: Option<String>,
+        target_alias: Option<String>,
+        safe_cast: bool,
+        writer_properties: Option<PyWriterProperties>,
+        post_commithook_properties: Option<PyPostCommitHookProperties>,
+        commit_properties: Option<PyCommitProperties>,
+    ) -> DeltaResult<Self> {
+        let ctx = SessionContext::new();
+        let schema = source.schema();
+        let batches = vec![source.map(|batch| batch.unwrap()).collect::<Vec<_>>()];
+        let table_provider: Arc<dyn TableProvider> =
+            Arc::new(MemTable::try_new(schema.clone(), batches).unwrap());
+        let source_df = ctx.read_table(table_provider).unwrap();
+
+        let mut cmd =
+            MergeBuilder::new(log_store, snapshot, predicate, source_df).with_safe_cast(safe_cast);
+
+        if let Some(src_alias) = &source_alias {
+            cmd = cmd.with_source_alias(src_alias);
+        }
+
+        if let Some(trgt_alias) = &target_alias {
+            cmd = cmd.with_target_alias(trgt_alias);
+        }
+
+        if let Some(writer_props) = writer_properties {
+            cmd = cmd.with_writer_properties(set_writer_properties(writer_props)?);
+        }
+
+        if let Some(commit_properties) =
+            maybe_create_commit_properties(commit_properties, post_commithook_properties)
+        {
+            cmd = cmd.with_commit_properties(commit_properties);
+        }
+        Ok(Self {
+            _builder: Some(cmd),
+            source_alias,
+            target_alias,
+            arrow_schema: schema,
+        })
+    }
+
+    pub fn execute(&mut self) -> DeltaResult<(DeltaTable, String)> {
+        let (table, metrics) = rt().block_on(self._builder.take().unwrap().into_future())?;
+        Ok((table, serde_json::to_string(&metrics).unwrap()))
+    }
+}
+
+#[pymethods]
+impl PyMergeBuilder {
+    #[getter]
+    fn get_arrow_schema(&self, py: Python) -> PyResult<PyObject> {
+        <arrow_schema::Schema as Clone>::clone(&self.arrow_schema).into_pyarrow(py)
+    }
+
+    #[pyo3(signature=(
+        updates,
+        predicate = None,
+    ))]
+    fn when_matched_update(
+        &mut self,
+        updates: HashMap<String, String>,
+        predicate: Option<String>,
+    ) -> PyResult<()> {
+        self._builder = match self._builder.take() {
+            Some(cmd) => Some(
+                cmd.when_matched_update(|mut update| {
+                    for (column, expression) in updates {
+                        update = update.update(column, expression)
+                    }
+                    if let Some(predicate) = predicate {
+                        update = update.predicate(predicate)
+                    };
+                    update
+                })
+                .map_err(PythonError::from)?,
+            ),
+            None => unreachable!(),
+        };
+        Ok(())
+    }
+
+    #[pyo3(signature=(
+        predicate = None,
+    ))]
+    fn when_matched_delete(&mut self, predicate: Option<String>) -> PyResult<()> {
+        self._builder = match self._builder.take() {
+            Some(cmd) => Some(
+                cmd.when_matched_delete(|mut delete| {
+                    if let Some(predicate) = predicate {
+                        delete = delete.predicate(predicate)
+                    };
+                    delete
+                })
+                .map_err(PythonError::from)?,
+            ),
+            None => unreachable!(),
+        };
+        Ok(())
+    }
+
+    #[pyo3(signature=(
+        updates,
+        predicate = None,
+    ))]
+    fn when_not_matched_insert(
+        &mut self,
+        updates: HashMap<String, String>,
+        predicate: Option<String>,
+    ) -> PyResult<()> {
+        self._builder = match self._builder.take() {
+            Some(cmd) => Some(
+                cmd.when_not_matched_insert(|mut insert| {
+                    for (column, expression) in updates {
+                        insert = insert.set(column, expression)
+                    }
+                    if let Some(predicate) = predicate {
+                        insert = insert.predicate(predicate)
+                    };
+                    insert
+                })
+                .map_err(PythonError::from)?,
+            ),
+            None => unreachable!(),
+        };
+        Ok(())
+    }
+
+    #[pyo3(signature=(
+        updates,
+        predicate = None,
+    ))]
+    fn when_not_matched_by_source_update(
+        &mut self,
+        updates: HashMap<String, String>,
+        predicate: Option<String>,
+    ) -> PyResult<()> {
+        self._builder = match self._builder.take() {
+            Some(cmd) => Some(
+                cmd.when_not_matched_by_source_update(|mut update| {
+                    for (column, expression) in updates {
+                        update = update.update(column, expression)
+                    }
+                    if let Some(predicate) = predicate {
+                        update = update.predicate(predicate)
+                    };
+                    update
+                })
+                .map_err(PythonError::from)?,
+            ),
+            None => unreachable!(),
+        };
+        Ok(())
+    }
+
+    #[pyo3(signature=(
+        predicate = None,
+    ))]
+    fn when_not_matched_by_source_delete(&mut self, predicate: Option<String>) -> PyResult<()> {
+        self._builder = match self._builder.take() {
+            Some(cmd) => Some(
+                cmd.when_not_matched_by_source_delete(|mut delete| {
+                    if let Some(predicate) = predicate {
+                        delete = delete.predicate(predicate)
+                    };
+                    delete
+                })
+                .map_err(PythonError::from)?,
+            ),
+            None => unreachable!(),
+        };
+        Ok(())
+    }
+}
diff --git a/python/src/schema.rs b/python/src/schema.rs
index c4a250a57a..7508b5495f 100644
--- a/python/src/schema.rs
+++ b/python/src/schema.rs
@@ -7,14 +7,15 @@ use deltalake::arrow::datatypes::{
 use deltalake::arrow::error::ArrowError;
 use deltalake::arrow::pyarrow::PyArrowType;
 use deltalake::kernel::{
-    ArrayType as DeltaArrayType, DataType, MapType as DeltaMapType, PrimitiveType as DeltaPrimitve,
-    StructField, StructType as DeltaStructType,
+    ArrayType as DeltaArrayType, DataType, MapType as DeltaMapType, MetadataValue,
+    PrimitiveType as DeltaPrimitve, StructField, StructType as DeltaStructType, StructTypeExt,
 };
 use pyo3::exceptions::{PyException, PyNotImplementedError, PyTypeError, PyValueError};
 use pyo3::prelude::*;
-use pyo3::types::IntoPyDict;
 use std::collections::HashMap;
 
+use crate::utils::warn;
+
 // PyO3 doesn't yet support converting classes with inheritance with Python
 // objects within Rust code, which we need here. So for now, we implement
 // the types with no inheritance. Later, we may add inheritance.
@@ -22,40 +23,40 @@ use std::collections::HashMap;
 
 // Decimal is separate special case, since it has parameters
 
-fn schema_type_to_python(schema_type: DataType, py: Python) -> PyResult<PyObject> {
+fn schema_type_to_python(schema_type: DataType, py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
     match schema_type {
-        DataType::Primitive(data_type) => {
-            Ok((PrimitiveType::new(data_type.to_string())?).into_py(py))
-        }
+        DataType::Primitive(data_type) => Ok((PrimitiveType::new(data_type.to_string())?)
+            .into_py(py)
+            .into_bound(py)),
         DataType::Array(array_type) => {
             let array_type: ArrayType = (*array_type).into();
-            Ok(array_type.into_py(py))
+            Ok(array_type.into_py(py).into_bound(py))
         }
         DataType::Map(map_type) => {
             let map_type: MapType = (*map_type).into();
-            Ok(map_type.into_py(py))
+            Ok(map_type.into_py(py).into_bound(py))
         }
         DataType::Struct(struct_type) => {
             let struct_type: StructType = (*struct_type).into();
-            Ok(struct_type.into_py(py))
+            Ok(struct_type.into_py(py).into_bound(py))
         }
     }
 }
 
-fn python_type_to_schema(ob: PyObject, py: Python) -> PyResult<DataType> {
-    if let Ok(data_type) = ob.extract::<PrimitiveType>(py) {
+fn python_type_to_schema(ob: &Bound<'_, PyAny>) -> PyResult<DataType> {
+    if let Ok(data_type) = ob.extract::<PrimitiveType>() {
         return Ok(DataType::Primitive(data_type.inner_type));
     }
-    if let Ok(array_type) = ob.extract::<ArrayType>(py) {
+    if let Ok(array_type) = ob.extract::<ArrayType>() {
         return Ok(array_type.into());
     }
-    if let Ok(map_type) = ob.extract::<MapType>(py) {
+    if let Ok(map_type) = ob.extract::<MapType>() {
         return Ok(map_type.into());
     }
-    if let Ok(struct_type) = ob.extract::<StructType>(py) {
+    if let Ok(struct_type) = ob.extract::<StructType>() {
         return Ok(struct_type.into());
     }
-    if let Ok(raw_primitive) = ob.extract::<String>(py) {
+    if let Ok(raw_primitive) = ob.extract::<String>() {
         // Pass through PrimitiveType::new() to do validation
         return PrimitiveType::new(raw_primitive)
             .map(|data_type| DataType::Primitive(data_type.inner_type));
@@ -98,30 +99,6 @@ impl PrimitiveType {
         Ok(Self {
             inner_type: data_type,
         })
-
-        // if data_type.starts_with("decimal") {
-        //     if try_parse_decimal_type(&data_type).is_none() {
-        //         Err(PyValueError::new_err(format!(
-        //             "invalid decimal type: {data_type}"
-        //         )))
-        //     } else {
-        //         Ok(Self {
-        //             inner_type: data_type,
-        //         })
-        //     }
-        // } else if !VALID_PRIMITIVE_TYPES
-        //     .iter()
-        //     .any(|&valid| data_type == valid)
-        // {
-        //     Err(PyValueError::new_err(format!(
-        //         "data_type must be one of decimal(<precision>, <scale>), {}.",
-        //         VALID_PRIMITIVE_TYPES.join(", ")
-        //     )))
-        // } else {
-        //     Ok(Self {
-        //         inner_type: data_type,
-        //     })
-        // }
     }
 
     #[getter]
@@ -145,7 +122,7 @@ impl PrimitiveType {
 
     #[pyo3(text_signature = "($self)")]
     fn to_json(&self) -> PyResult<String> {
-        let inner_type = DataType::Primitive(self.inner_type);
+        let inner_type = DataType::Primitive(self.inner_type.clone());
         serde_json::to_string(&inner_type).map_err(|err| PyException::new_err(err.to_string()))
     }
 
@@ -160,7 +137,7 @@ impl PrimitiveType {
 
     #[pyo3(text_signature = "($self)")]
     fn to_pyarrow(&self) -> PyResult<PyArrowType<ArrowDataType>> {
-        let inner_type = DataType::Primitive(self.inner_type);
+        let inner_type = DataType::Primitive(self.inner_type.clone());
         Ok(PyArrowType((&inner_type).try_into().map_err(
             |err: ArrowError| PyException::new_err(err.to_string()),
         )?))
@@ -211,16 +188,15 @@ impl TryFrom<DataType> for ArrayType {
 impl ArrayType {
     #[new]
     #[pyo3(signature = (element_type, contains_null = true))]
-    fn new(element_type: PyObject, contains_null: bool, py: Python) -> PyResult<Self> {
-        let inner_type =
-            DeltaArrayType::new(python_type_to_schema(element_type, py)?, contains_null);
+    fn new(element_type: &Bound<'_, PyAny>, contains_null: bool) -> PyResult<Self> {
+        let inner_type = DeltaArrayType::new(python_type_to_schema(element_type)?, contains_null);
         Ok(Self { inner_type })
     }
 
     fn __repr__(&self, py: Python) -> PyResult<String> {
         let type_repr: String = schema_type_to_python(self.inner_type.element_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
         Ok(format!(
             "ArrayType({}, contains_null={})",
             type_repr,
@@ -248,13 +224,13 @@ impl ArrayType {
     }
 
     #[getter]
-    fn element_type(&self, py: Python) -> PyResult<PyObject> {
+    fn element_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner_type.element_type().to_owned(), py)
     }
 
     #[getter]
-    fn contains_null(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.inner_type.contains_null().into_py(py))
+    fn contains_null<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        Ok(self.inner_type.contains_null().into_py(py).into_bound(py))
     }
 
     #[pyo3(text_signature = "($self)")]
@@ -325,15 +301,14 @@ impl TryFrom<DataType> for MapType {
 impl MapType {
     #[new]
     #[pyo3(signature = (key_type, value_type, value_contains_null = true))]
-    fn new(
-        key_type: PyObject,
-        value_type: PyObject,
+    fn new<'py>(
+        key_type: &Bound<'py, PyAny>,
+        value_type: &Bound<'py, PyAny>,
         value_contains_null: bool,
-        py: Python,
     ) -> PyResult<Self> {
         let inner_type = DeltaMapType::new(
-            python_type_to_schema(key_type, py)?,
-            python_type_to_schema(value_type, py)?,
+            python_type_to_schema(key_type)?,
+            python_type_to_schema(value_type)?,
             value_contains_null,
         );
         Ok(Self { inner_type })
@@ -341,11 +316,11 @@ impl MapType {
 
     fn __repr__(&self, py: Python) -> PyResult<String> {
         let key_repr: String = schema_type_to_python(self.inner_type.key_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
         let value_repr: String = schema_type_to_python(self.inner_type.value_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
         Ok(format!(
             "MapType({}, {}, value_contains_null={})",
             key_repr,
@@ -374,18 +349,22 @@ impl MapType {
     }
 
     #[getter]
-    fn key_type(&self, py: Python) -> PyResult<PyObject> {
+    fn key_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner_type.key_type().to_owned(), py)
     }
 
     #[getter]
-    fn value_type(&self, py: Python) -> PyResult<PyObject> {
+    fn value_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner_type.value_type().to_owned(), py)
     }
 
     #[getter]
-    fn value_contains_null(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.inner_type.value_contains_null().into_py(py))
+    fn value_contains_null<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        Ok(self
+            .inner_type
+            .value_contains_null()
+            .into_py(py)
+            .into_bound(py))
     }
 
     #[pyo3(text_signature = "($self)")]
@@ -425,25 +404,25 @@ impl MapType {
 #[pyclass(module = "deltalake._internal")]
 #[derive(Clone)]
 pub struct Field {
-    inner: StructField,
+    pub inner: StructField,
 }
 
 #[pymethods]
 impl Field {
     #[new]
     #[pyo3(signature = (name, r#type, nullable = true, metadata = None))]
-    fn new(
+    fn new<'py>(
         name: String,
-        r#type: PyObject,
+        r#type: &Bound<'py, PyAny>,
         nullable: bool,
-        metadata: Option<PyObject>,
-        py: Python,
+        metadata: Option<&Bound<'py, PyAny>>,
     ) -> PyResult<Self> {
-        let ty = python_type_to_schema(r#type, py)?;
+        let py = r#type.py();
+        let ty = python_type_to_schema(r#type)?;
 
         // Serialize and de-serialize JSON (it needs to be valid JSON anyways)
-        let metadata: HashMap<String, serde_json::Value> = if let Some(ref json) = metadata {
-            let json_dumps = PyModule::import(py, "json")?.getattr("dumps")?;
+        let metadata: HashMap<String, serde_json::Value> = if let Some(json) = metadata {
+            let json_dumps = PyModule::import_bound(py, "json")?.getattr("dumps")?;
             let metadata_json: String = json_dumps.call1((json,))?.extract()?;
             let metadata_json = Some(metadata_json)
                 .filter(|x| x != "null")
@@ -455,7 +434,24 @@ impl Field {
         };
 
         let mut inner = StructField::new(name, ty, nullable);
-        inner = inner.with_metadata(metadata);
+        inner = inner.with_metadata(metadata.iter().map(|(k, v)| {
+            (
+                k,
+                if let serde_json::Value::Number(n) = v {
+                    n.as_i64().map_or_else(
+                        || MetadataValue::String(v.to_string()),
+                        |i| {
+                            i32::try_from(i)
+                                .ok()
+                                .map(MetadataValue::Number)
+                                .unwrap_or_else(|| MetadataValue::String(v.to_string()))
+                        },
+                    )
+                } else {
+                    MetadataValue::String(v.to_string())
+                },
+            )
+        }));
 
         Ok(Self { inner })
     }
@@ -466,7 +462,7 @@ impl Field {
     }
 
     #[getter]
-    fn get_type(&self, py: Python) -> PyResult<PyObject> {
+    fn get_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         schema_type_to_python(self.inner.data_type().clone(), py)
     }
 
@@ -476,26 +472,27 @@ impl Field {
     }
 
     #[getter]
-    fn metadata(&self, py: Python) -> PyResult<PyObject> {
-        let json_loads = PyModule::import(py, "json")?.getattr("loads")?;
+    fn metadata<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let json_loads = PyModule::import_bound(py, "json")?.getattr("loads")?;
         let metadata_json: String = serde_json::to_string(self.inner.metadata())
             .map_err(|err| PyValueError::new_err(err.to_string()))?;
-        Ok(json_loads.call1((metadata_json,))?.to_object(py))
+        Ok(json_loads
+            .call1((metadata_json,))?
+            .to_object(py)
+            .bind(py)
+            .to_owned())
     }
 
     fn __repr__(&self, py: Python) -> PyResult<String> {
         let type_repr: String = schema_type_to_python(self.inner.data_type().clone(), py)?
-            .call_method0(py, "__repr__")?
-            .extract(py)?;
+            .call_method0("__repr__")?
+            .extract()?;
 
         let metadata = self.inner.metadata();
         let maybe_metadata = if metadata.is_empty() {
             "".to_string()
         } else {
-            let metadata_repr: String = self
-                .metadata(py)?
-                .call_method0(py, "__repr__")?
-                .extract(py)?;
+            let metadata_repr: String = self.metadata(py)?.call_method0("__repr__")?.extract()?;
             format!(", metadata={metadata_repr}")
         };
         Ok(format!(
@@ -597,7 +594,6 @@ impl StructType {
         let inner_data: Vec<String> = self
             .inner_type
             .fields()
-            .iter()
             .map(|field| {
                 let field = Field {
                     inner: field.clone(),
@@ -628,7 +624,6 @@ impl StructType {
     fn fields(&self) -> Vec<Field> {
         self.inner_type
             .fields()
-            .iter()
             .map(|field| Field {
                 inner: field.clone(),
             })
@@ -669,20 +664,14 @@ impl StructType {
     }
 }
 
-pub fn schema_to_pyobject(schema: &DeltaStructType, py: Python) -> PyResult<PyObject> {
-    let fields: Vec<Field> = schema
-        .fields()
-        .iter()
-        .map(|field| Field {
-            inner: field.clone(),
-        })
-        .collect();
+pub fn schema_to_pyobject(schema: DeltaStructType, py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
+    let fields = schema.fields().map(|field| Field {
+        inner: field.clone(),
+    });
 
-    let py_schema = PyModule::import(py, "deltalake.schema")?.getattr("Schema")?;
+    let py_schema = PyModule::import_bound(py, "deltalake.schema")?.getattr("Schema")?;
 
-    py_schema
-        .call1((fields,))
-        .map(|schema| schema.to_object(py))
+    py_schema.call1((fields.collect::<Vec<_>>(),))
 }
 
 /// A Delta Lake schema
@@ -718,7 +707,6 @@ impl PySchema {
         let inner_data: Vec<String> = super_
             .inner_type
             .fields()
-            .iter()
             .map(|field| {
                 let field = Field {
                     inner: field.clone(),
@@ -729,26 +717,18 @@ impl PySchema {
         Ok(format!("Schema([{}])", inner_data.join(", ")))
     }
 
-    fn json(self_: PyRef<'_, Self>, py: Python) -> PyResult<PyObject> {
-        let warnings_warn = PyModule::import(py, "warnings")?.getattr("warn")?;
-        let deprecation_warning = PyModule::import(py, "builtins")?
-            .getattr("DeprecationWarning")?
-            .to_object(py);
-        let kwargs: [(&str, PyObject); 2] = [
-            ("category", deprecation_warning),
-            ("stacklevel", 2.to_object(py)),
-        ];
-        warnings_warn.call(
-            ("Schema.json() is deprecated. Use json.loads(Schema.to_json()) instead.",),
-            Some(kwargs.into_py_dict(py)),
+    fn json<'py>(self_: PyRef<'_, Self>, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        warn(
+            py,
+            "DeprecationWarning",
+            "Schema.json() is deprecated. Use json.loads(Schema.to_json()) instead.",
+            Some(2),
         )?;
 
         let super_ = self_.as_ref();
         let json = super_.to_json()?;
-        let json_loads = PyModule::import(py, "json")?.getattr("loads")?;
-        json_loads
-            .call1((json.into_py(py),))
-            .map(|obj| obj.to_object(py))
+        let json_loads = PyModule::import_bound(py, "json")?.getattr("loads")?;
+        json_loads.call1((json.into_py(py),))
     }
 
     #[pyo3(signature = (as_large_types = false))]
@@ -826,12 +806,15 @@ impl PySchema {
 
     #[staticmethod]
     #[pyo3(text_signature = "(data_type)")]
-    fn from_pyarrow(data_type: PyArrowType<ArrowSchema>, py: Python) -> PyResult<PyObject> {
+    fn from_pyarrow(
+        data_type: PyArrowType<ArrowSchema>,
+        py: Python<'_>,
+    ) -> PyResult<Bound<'_, PyAny>> {
         let inner_type: DeltaStructType = (&data_type.0)
             .try_into()
             .map_err(|err: ArrowError| PyException::new_err(err.to_string()))?;
 
-        schema_to_pyobject(&inner_type, py)
+        schema_to_pyobject(inner_type, py)
     }
 
     #[pyo3(text_signature = "($self)")]
diff --git a/python/src/utils.rs b/python/src/utils.rs
index 6d0f69b242..b063b64d08 100644
--- a/python/src/utils.rs
+++ b/python/src/utils.rs
@@ -3,11 +3,30 @@ use std::sync::{Arc, OnceLock};
 use deltalake::storage::{ListResult, ObjectStore, ObjectStoreError, ObjectStoreResult, Path};
 use futures::future::{join_all, BoxFuture, FutureExt};
 use futures::StreamExt;
+use pyo3::types::{IntoPyDict, PyAnyMethods, PyModule};
+use pyo3::{Bound, PyAny, PyResult, Python, ToPyObject};
 use tokio::runtime::Runtime;
 
 #[inline]
 pub fn rt() -> &'static Runtime {
     static TOKIO_RT: OnceLock<Runtime> = OnceLock::new();
+    static PID: OnceLock<u32> = OnceLock::new();
+    match PID.get() {
+        Some(pid) if pid == &std::process::id() => {} // Reuse the static runtime.
+        Some(pid) => {
+            panic!(
+                "Forked process detected - current PID is {} but the tokio runtime was created by {}. The tokio \
+                runtime does not support forked processes https://github.com/tokio-rs/tokio/issues/4301. If you are \
+                seeing this message while using Python multithreading make sure to use the `spawn` or `forkserver` \
+                mode.", 
+                pid, std::process::id()
+            );
+        }
+        None => {
+            PID.set(std::process::id())
+                .expect("Failed to record PID for tokio runtime.");
+        }
+    }
     TOKIO_RT.get_or_init(|| Runtime::new().expect("Failed to create a tokio runtime."))
 }
 
@@ -80,3 +99,20 @@ pub async fn delete_dir(storage: &dyn ObjectStore, prefix: &Path) -> ObjectStore
     }
     Ok(())
 }
+
+pub fn warn<'py>(
+    py: Python<'py>,
+    warning_type: &str,
+    message: &str,
+    stack_level: Option<u8>,
+) -> PyResult<()> {
+    let warnings_warn = PyModule::import_bound(py, "warnings")?.getattr("warn")?;
+    let warning_type = PyModule::import_bound(py, "builtins")?.getattr(warning_type)?;
+    let stack_level = stack_level.unwrap_or(1);
+    let kwargs: [(&str, Bound<'py, PyAny>); 2] = [
+        ("category", warning_type),
+        ("stacklevel", stack_level.to_object(py).into_bound(py)),
+    ];
+    warnings_warn.call((message,), Some(&kwargs.into_py_dict_bound(py)))?;
+    Ok(())
+}
diff --git a/python/stubs/pyarrow/__init__.pyi b/python/stubs/pyarrow/__init__.pyi
index aaf92ea962..31943db8b8 100644
--- a/python/stubs/pyarrow/__init__.pyi
+++ b/python/stubs/pyarrow/__init__.pyi
@@ -11,7 +11,10 @@ ListType: Any
 StructType: Any
 MapType: Any
 FixedSizeListType: Any
+LargeListViewType: Any
+ListViewType: Any
 FixedSizeBinaryType: Any
+ExtensionType: Any
 schema: Any
 map_: Any
 list_: Any
@@ -36,7 +39,11 @@ large_string: Any
 string: Any
 large_binary: Any
 binary: Any
+binary_view: Any
+string_view: Any
+list_view: Any
 large_list: Any
+large_list_view: Any
 LargeListType: Any
 dictionary: Any
 timestamp: Any
diff --git a/python/stubs/pyarrow/parquet.pyi b/python/stubs/pyarrow/parquet.pyi
new file mode 100644
index 0000000000..26db6a2fa2
--- /dev/null
+++ b/python/stubs/pyarrow/parquet.pyi
@@ -0,0 +1,8 @@
+from typing import Callable
+
+from pyarrow.dataset import Expression
+
+from deltalake.table import FilterType
+
+filters_to_expression: Callable[[FilterType], Expression]
+_filters_to_expression: Callable[[FilterType], Expression]
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 6621bc9afb..8f85f4ab04 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -2,12 +2,13 @@
 import pathlib
 import subprocess
 import time
-from datetime import date, datetime, timedelta
+from datetime import date, datetime, timedelta, timezone
 from decimal import Decimal
 from time import sleep
 
 import pyarrow as pa
 import pytest
+from azure.storage import blob
 
 from deltalake import DeltaTable, WriterProperties, write_deltalake
 
@@ -112,53 +113,34 @@ def s3_localstack(monkeypatch, s3_localstack_creds):
 def azurite_creds():
     # These are the well-known values
     # https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio#well-known-storage-account-and-key
+    account_name = "devstoreaccount1"
     config = dict(
-        AZURE_STORAGE_ACCOUNT_NAME="devstoreaccount1",
+        AZURE_STORAGE_ACCOUNT_NAME=account_name,
         AZURE_STORAGE_ACCOUNT_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
         AZURE_STORAGE_CONTAINER_NAME="deltars",
         AZURE_STORAGE_USE_EMULATOR="true",
         AZURE_STORAGE_USE_HTTP="true",
+        AZURE_STORAGE_ENDPOINT=f"http://localhost:10000/{account_name}",
     )
 
-    endpoint_url = f"http://localhost:10000/{config['AZURE_STORAGE_ACCOUNT_NAME']}"
-
     env = os.environ.copy()
     env.update(config)
-    env["AZURE_STORAGE_CONNECTION_STRING"] = (
+    conn_str = (
         "DefaultEndpointsProtocol=http;"
         f"AccountName={config['AZURE_STORAGE_ACCOUNT_NAME']};"
         f"AccountKey={config['AZURE_STORAGE_ACCOUNT_KEY']};"
-        f"BlobEndpoint={endpoint_url};"
+        f"BlobEndpoint={config['AZURE_STORAGE_ENDPOINT']};"
     )
-    wait_till_host_is_available(endpoint_url)
+    env["AZURE_STORAGE_CONNECTION_STRING"] = conn_str
+    wait_till_host_is_available(config["AZURE_STORAGE_ENDPOINT"])
     try:
-        subprocess.run(
-            [
-                "az",
-                "storage",
-                "container",
-                "create",
-                "--name",
-                config["AZURE_STORAGE_CONTAINER_NAME"],
-            ],
-            env=env,
+        blob_client = blob.BlobServiceClient.from_connection_string(conn_str=conn_str)
+        container = blob_client.create_container(
+            name=config["AZURE_STORAGE_CONTAINER_NAME"]
         )
-    except OSError:
-        pytest.skip("azure cli is not installed")
-
-    yield config
-
-    subprocess.run(
-        [
-            "az",
-            "storage",
-            "container",
-            "delete",
-            "--name",
-            config["AZURE_STORAGE_CONTAINER_NAME"],
-        ],
-        env=env,
-    )
+        yield config
+    finally:
+        container.delete_container()
 
 
 @pytest.fixture()
@@ -169,34 +151,30 @@ def azurite_env_vars(monkeypatch, azurite_creds):
 
 @pytest.fixture(scope="session")
 def azurite_sas_creds(azurite_creds):
-    endpoint_url = (
-        f"http://localhost:10000/{azurite_creds['AZURE_STORAGE_ACCOUNT_NAME']}"
-    )
     env = os.environ.copy()
     env.update(azurite_creds)
     env["AZURE_STORAGE_CONNECTION_STRING"] = (
         "DefaultEndpointsProtocol=http;"
         f"AccountName={azurite_creds['AZURE_STORAGE_ACCOUNT_NAME']};"
         f"AccountKey={azurite_creds['AZURE_STORAGE_ACCOUNT_KEY']};"
-        f"BlobEndpoint={endpoint_url};"
+        f"BlobEndpoint={azurite_creds['AZURE_STORAGE_ENDPOINT']};"
     )
-    output = subprocess.run(
-        [
-            "az",
-            "storage",
-            "container",
-            "generate-sas",
-            "--name",
-            azurite_creds["AZURE_STORAGE_CONTAINER_NAME"],
-            "--permissions",
-            "dlrw",
-        ],
-        env=env,
-        capture_output=True,
+    sas_token = blob.generate_container_sas(
+        account_name=azurite_creds["AZURE_STORAGE_ACCOUNT_NAME"],
+        container_name=azurite_creds["AZURE_STORAGE_CONTAINER_NAME"],
+        account_key=azurite_creds["AZURE_STORAGE_ACCOUNT_KEY"],
+        permission=blob.ContainerSasPermissions(
+            read=True,
+            write=True,
+            list=True,
+            delete=True,
+        ),
+        expiry=datetime.now(tz=timezone.utc) + timedelta(hours=1),
+        start=datetime.now(tz=timezone.utc),
+        protocol="http",
     )
-
     creds = {key: value for key, value in azurite_creds.items() if "KEY" not in key}
-    creds["SAS_TOKEN"] = output.stdout.decode()
+    creds["SAS_TOKEN"] = sas_token
 
     return creds
 
@@ -250,6 +228,13 @@ def sample_table():
     )
 
 
+@pytest.fixture()
+def existing_sample_table(tmp_path: pathlib.Path, sample_table: pa.Table):
+    path = str(tmp_path)
+    write_deltalake(path, sample_table)
+    return DeltaTable(path)
+
+
 @pytest.fixture()
 def sample_table_with_spaces_numbers():
     nrows = 5
diff --git a/python/tests/pyspark_integration/test_write_to_pyspark.py b/python/tests/pyspark_integration/test_write_to_pyspark.py
index 3e4bb9d7f0..d826140fbc 100644
--- a/python/tests/pyspark_integration/test_write_to_pyspark.py
+++ b/python/tests/pyspark_integration/test_write_to_pyspark.py
@@ -106,13 +106,13 @@ def test_checks_min_writer_version(tmp_path: pathlib.Path):
     )
 
     # Add a constraint upgrades the minWriterProtocol
-    spark.sql(f"ALTER TABLE delta.`{str(tmp_path)}` ADD CONSTRAINT x CHECK (c1 > 2)")
+    spark.sql(f"ALTER TABLE delta.`{tmp_path!s}` ADD CONSTRAINT x CHECK (c1 > 2)")
 
     with pytest.raises(
         DeltaProtocolError, match="This table's min_writer_version is 3, but"
     ):
         valid_data = pa.table({"c1": pa.array([5, 6])})
-        write_deltalake(str(tmp_path), valid_data, mode="append")
+        write_deltalake(str(tmp_path), valid_data, mode="append", engine="pyarrow")
 
 
 @pytest.mark.pyspark
diff --git a/python/tests/test_alter.py b/python/tests/test_alter.py
index fb03acd23b..acc37db822 100644
--- a/python/tests/test_alter.py
+++ b/python/tests/test_alter.py
@@ -1,10 +1,13 @@
 import pathlib
+from typing import List
 
 import pyarrow as pa
 import pytest
 
-from deltalake import DeltaTable, write_deltalake
+from deltalake import DeltaTable, TableFeatures, write_deltalake
 from deltalake.exceptions import DeltaError, DeltaProtocolError
+from deltalake.schema import Field, PrimitiveType, StructType
+from deltalake.table import CommitProperties
 
 
 def test_add_constraint(tmp_path: pathlib.Path, sample_table: pa.Table):
@@ -56,8 +59,9 @@ def test_add_constraint_roundtrip_metadata(
 
     dt = DeltaTable(tmp_path)
 
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
     dt.alter.add_constraint(
-        {"check_price2": "price >= 0"}, custom_metadata={"userName": "John Doe"}
+        {"check_price2": "price >= 0"}, commit_properties=commit_properties
     )
 
     assert dt.history(1)[0]["userName"] == "John Doe"
@@ -110,7 +114,8 @@ def test_drop_constraint_roundtrip_metadata(
     dt = DeltaTable(tmp_path)
 
     dt.alter.add_constraint({"check_price2": "price >= 0"})
-    dt.alter.drop_constraint("check_price2", custom_metadata={"userName": "John Doe"})
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
+    dt.alter.drop_constraint("check_price2", commit_properties=commit_properties)
 
     assert dt.history(1)[0]["userName"] == "John Doe"
 
@@ -305,3 +310,150 @@ def test_set_table_properties_enable_dv(tmp_path: pathlib.Path, sample_table: pa
     assert protocol.min_writer_version == 7
     assert protocol.writer_features == ["deletionVectors"]
     assert protocol.reader_features == ["deletionVectors"]
+
+
+def _sort_fields(fields: List[Field]) -> List[Field]:
+    return list(sorted(iter(fields), key=lambda x: (x.name, str(x.type))))
+
+
+def test_add_column_primitive(existing_table: DeltaTable):
+    current_fields = existing_table.schema().fields
+
+    new_fields_to_add = [
+        Field("foo", PrimitiveType("integer")),
+        Field("bar", PrimitiveType("float")),
+    ]
+
+    existing_table.alter.add_columns(new_fields_to_add)
+    new_fields = existing_table.schema().fields
+
+    assert _sort_fields(new_fields) == _sort_fields(
+        [*current_fields, *new_fields_to_add]
+    )
+
+
+def test_add_field_in_struct_column(existing_table: DeltaTable):
+    current_fields = existing_table.schema().fields
+
+    new_fields_to_add = [
+        Field("struct", StructType([Field("z", PrimitiveType("float"))])),
+    ]
+
+    existing_table.alter.add_columns(new_fields_to_add)
+    new_fields = existing_table.schema().fields
+
+    new_field = Field(
+        "struct",
+        StructType(
+            [
+                Field("x", PrimitiveType("long")),
+                Field("y", PrimitiveType("string")),
+                Field("z", PrimitiveType("float")),
+            ]
+        ),
+    )
+    assert _sort_fields(new_fields) == _sort_fields(
+        [*[field for field in current_fields if field.name != "struct"], new_field]
+    )
+
+
+def test_add_timestamp_ntz_column(tmp_path: pathlib.Path, sample_table: pa.Table):
+    write_deltalake(tmp_path, sample_table, mode="append", engine="rust")
+    dt = DeltaTable(tmp_path)
+    current_fields = dt.schema().fields
+
+    new_fields_to_add = Field("timestamp_ntz_col", PrimitiveType("timestamp_ntz"))
+
+    dt.alter.add_columns(new_fields_to_add)
+    new_fields = dt.schema().fields
+    new_protocol = dt.protocol()
+
+    assert _sort_fields(new_fields) == _sort_fields(
+        [*current_fields, new_fields_to_add]
+    )
+    assert new_protocol.min_reader_version == 3
+    assert new_protocol.min_writer_version == 7
+    assert new_protocol.reader_features == ["timestampNtz"]
+    assert new_protocol.writer_features == ["timestampNtz"]
+
+
+features = [
+    TableFeatures.ChangeDataFeed,
+    TableFeatures.DeletionVectors,
+    TableFeatures.ColumnMapping,
+    TableFeatures.TimestampWithoutTimezone,
+    TableFeatures.V2Checkpoint,
+    TableFeatures.AppendOnly,
+    TableFeatures.AppendOnly,
+    TableFeatures.Invariants,
+    TableFeatures.CheckConstraints,
+    TableFeatures.GeneratedColumns,
+    TableFeatures.IdentityColumns,
+    TableFeatures.RowTracking,
+    TableFeatures.DomainMetadata,
+    TableFeatures.IcebergCompatV1,
+]
+
+all_features = []
+all_features.extend(features)
+all_features.append(features)
+
+
+@pytest.mark.parametrize("feature", all_features)
+def test_add_feature_variations(existing_table: DeltaTable, feature):
+    """Existing table already has timestampNtz so it's already at v3,7"""
+    existing_table.alter.add_feature(
+        feature=feature,
+        allow_protocol_versions_increase=False,
+    )
+    last_action = existing_table.history(1)[0]
+    assert last_action["operation"] == "ADD FEATURE"
+    assert existing_table.version() == 1
+
+
+def test_add_features_disallowed_protocol_increase(existing_sample_table: DeltaTable):
+    with pytest.raises(
+        DeltaError,
+        match="Generic DeltaTable error: Table feature enables writer feature, but min_writer is not v7. Set allow_protocol_versions_increase or increase version explicitly through set_tbl_properties",
+    ):
+        existing_sample_table.alter.add_feature(
+            feature=TableFeatures.ChangeDataFeed,
+            allow_protocol_versions_increase=False,
+        )
+    with pytest.raises(
+        DeltaError,
+        match="Generic DeltaTable error: Table feature enables reader and writer feature, but reader is not v3, and writer not v7. Set allow_protocol_versions_increase or increase versions explicitly through set_tbl_properties",
+    ):
+        existing_sample_table.alter.add_feature(
+            feature=TableFeatures.DeletionVectors,
+            allow_protocol_versions_increase=False,
+        )
+
+
+def test_add_feautres(existing_sample_table: DeltaTable):
+    existing_sample_table.alter.add_feature(
+        feature=features,
+        allow_protocol_versions_increase=True,
+    )
+    protocol = existing_sample_table.protocol()
+
+    assert sorted(protocol.reader_features) == sorted(  # type: ignore
+        ["v2Checkpoint", "columnMapping", "deletionVectors", "timestampNtz"]
+    )
+    assert sorted(protocol.writer_features) == sorted(  # type: ignore
+        [
+            "appendOnly",
+            "changeDataFeed",
+            "checkConstraints",
+            "columnMapping",
+            "deletionVectors",
+            "domainMetadata",
+            "generatedColumns",
+            "icebergCompatV1",
+            "identityColumns",
+            "invariants",
+            "rowTracking",
+            "timestampNtz",
+            "v2Checkpoint",
+        ]
+    )  # type: ignore
diff --git a/python/tests/test_benchmark.py b/python/tests/test_benchmark.py
index bfcdd3f310..e869335e87 100644
--- a/python/tests/test_benchmark.py
+++ b/python/tests/test_benchmark.py
@@ -95,7 +95,7 @@ def setup():
         return (dt,), dict(max_concurrent_tasks=max_tasks)
 
     def func(dt, max_concurrent_tasks):
-        return dt.optimize(
+        return dt.optimize.compact(
             max_concurrent_tasks=max_concurrent_tasks, target_size=1024 * 1024 * 1024
         )
 
diff --git a/python/tests/test_cdf.py b/python/tests/test_cdf.py
index 905e3c44e2..36d94c9f99 100644
--- a/python/tests/test_cdf.py
+++ b/python/tests/test_cdf.py
@@ -1,6 +1,12 @@
+import os
 from datetime import date, datetime
 
-from deltalake import DeltaTable
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.dataset as ds
+import pyarrow.parquet as pq
+
+from deltalake import DeltaTable, write_deltalake
 
 
 def test_read_cdf_partitioned():
@@ -412,3 +418,262 @@ def test_read_cdf_non_partitioned():
         datetime(2024, 4, 14, 15, 58, 31, 257000),
         datetime(2024, 4, 14, 15, 58, 32, 495000),
     ]
+
+
+def test_read_cdf_partitioned_projection():
+    dt = DeltaTable("../crates/test/tests/data/cdf-table/")
+    columns = ["id", "_change_type", "_commit_version"]
+    assert columns == dt.load_cdf(0, 3, columns=columns).schema.names
+
+
+def test_delete_unpartitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    dt = DeltaTable(tmp_path)
+    dt.delete("int64 > 2")
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table(filter=(pc.field("int64") > 2))
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 2],
+        )
+    )
+    cdc_data = pq.read_table(cdc_path)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert cdc_data == expected_data
+
+
+def test_delete_partitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="overwrite",
+        partition_by=["utf8"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    dt = DeltaTable(tmp_path)
+    dt.delete("int64 > 2")
+
+    expected_data = (
+        ds.dataset(sample_data)
+        .to_table(filter=(pc.field("int64") > 2))
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=False),
+            column=[["delete"] * 2],
+        )
+    )
+    table_schema = dt.schema().to_pyarrow()
+    table_schema = table_schema.insert(
+        len(table_schema), pa.field("_change_type", pa.string(), nullable=False)
+    )
+    cdc_data = pq.read_table(cdc_path, schema=table_schema)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert len(os.listdir(cdc_path)) == 2
+    assert cdc_data == expected_data
+
+
+def test_write_predicate_unpartitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(filter=(pc.field("int64") > 2)),
+        mode="overwrite",
+        predicate="int64 > 2",
+        engine="rust",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    expected_data = pa.concat_tables(
+        [
+            ds.dataset(sample_data)
+            .to_table(filter=(pc.field("int64") > 2))
+            .append_column(
+                field_=pa.field("_change_type", pa.string(), nullable=False),
+                column=[["delete"] * 2],
+            ),
+            ds.dataset(sample_data)
+            .to_table(filter=(pc.field("int64") > 2))
+            .append_column(
+                field_=pa.field("_change_type", pa.string(), nullable=False),
+                column=[["insert"] * 2],
+            ),
+        ]
+    )
+    cdc_data = pq.read_table(cdc_path)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert cdc_data.sort_by([("_change_type", "ascending")]) == expected_data.sort_by(
+        [("_change_type", "ascending")]
+    )
+    assert dt.to_pyarrow_table().sort_by([("utf8", "ascending")]) == sample_data
+
+
+def test_write_predicate_partitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="overwrite",
+        partition_by=["utf8"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(filter=(pc.field("int64") > 3)),
+        mode="overwrite",
+        predicate="int64 > 3",
+        engine="rust",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    expected_data = pa.concat_tables(
+        [
+            ds.dataset(sample_data)
+            .to_table(filter=(pc.field("int64") > 3))
+            .append_column(
+                field_=pa.field("_change_type", pa.string(), nullable=False),
+                column=[["delete"] * 1],
+            ),
+            ds.dataset(sample_data)
+            .to_table(filter=(pc.field("int64") > 3))
+            .append_column(
+                field_=pa.field("_change_type", pa.string(), nullable=False),
+                column=[["insert"] * 1],
+            ),
+        ]
+    )
+
+    table_schema = dt.schema().to_pyarrow()
+    table_schema = table_schema.insert(
+        len(table_schema), pa.field("_change_type", pa.string(), nullable=False)
+    )
+    cdc_data = pq.read_table(cdc_path, schema=table_schema)
+
+    assert os.path.exists(cdc_path), "_change_data doesn't exist"
+    assert len(os.listdir(cdc_path)) == 1
+    expected_data = expected_data.combine_chunks().sort_by(
+        [("_change_type", "ascending")]
+    )
+    cdc_data = cdc_data.combine_chunks().sort_by([("_change_type", "ascending")])
+
+    assert expected_data == cdc_data
+    assert dt.to_pyarrow_table().sort_by([("utf8", "ascending")]) == sample_data
+
+
+def test_write_overwrite_unpartitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=ds.dataset(sample_data).to_table(),
+        mode="overwrite",
+        engine="rust",
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+    sort_values = [("_change_type", "ascending"), ("utf8", "ascending")]
+    expected_data = (
+        ds.dataset(pa.concat_tables([sample_data] * 3))
+        .to_table()
+        .append_column(
+            field_=pa.field("_change_type", pa.string(), nullable=True),
+            column=[["delete"] * 5 + ["insert"] * 10],
+        )
+    ).sort_by(sort_values)
+
+    assert not os.path.exists(
+        cdc_path
+    ), "_change_data shouldn't exist since table was overwritten"
+
+    ## TODO(ion): check if you see insert and deletes in commit version 1
+
+    assert (
+        dt.load_cdf()
+        .read_all()
+        .drop_columns(["_commit_version", "_commit_timestamp"])
+        .sort_by(sort_values)
+        == expected_data
+    )
+    assert dt.to_pyarrow_table().sort_by([("utf8", "ascending")]) == sample_data
+
+
+def test_write_overwrite_partitioned_cdf(tmp_path, sample_data: pa.Table):
+    cdc_path = f"{tmp_path}/_change_data"
+
+    write_deltalake(
+        tmp_path,
+        sample_data,
+        mode="append",
+        partition_by=["int64"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    batch2 = ds.dataset(sample_data).to_table(filter=(pc.field("int64") > 3))
+
+    dt = DeltaTable(tmp_path)
+    write_deltalake(
+        dt,
+        data=batch2,
+        engine="rust",
+        mode="overwrite",
+        predicate="int64 > 3",
+        partition_by=["int64"],
+        configuration={"delta.enableChangeDataFeed": "true"},
+    )
+
+    table_schema = dt.schema().to_pyarrow()
+    table_schema = table_schema.insert(
+        len(table_schema), pa.field("_change_type", pa.string(), nullable=False)
+    )
+
+    sort_values = [("_change_type", "ascending"), ("utf8", "ascending")]
+
+    first_batch = sample_data.append_column(
+        field_=pa.field("_change_type", pa.string(), nullable=True),
+        column=[["insert"] * 5],
+    )
+
+    expected_data = pa.concat_tables([batch2] * 2).append_column(
+        field_=pa.field("_change_type", pa.string(), nullable=True),
+        column=[["delete", "insert"]],
+    )
+
+    assert not os.path.exists(
+        cdc_path
+    ), "_change_data shouldn't exist since a specific partition was overwritten"
+
+    assert dt.load_cdf().read_all().drop_columns(
+        ["_commit_version", "_commit_timestamp"]
+    ).sort_by(sort_values).select(expected_data.column_names) == pa.concat_tables(
+        [first_batch, expected_data]
+    ).sort_by(sort_values)
diff --git a/python/tests/test_checkpoint.py b/python/tests/test_checkpoint.py
index 24e90e447a..5ce6656463 100644
--- a/python/tests/test_checkpoint.py
+++ b/python/tests/test_checkpoint.py
@@ -9,6 +9,7 @@
 import pytest
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import PostCommitHookProperties
 
 
 def test_checkpoint(tmp_path: pathlib.Path, sample_data: pa.Table):
@@ -99,6 +100,68 @@ def test_cleanup_metadata(tmp_path: pathlib.Path, sample_data: pa.Table):
     assert second_failed_log_path.exists()
 
 
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_cleanup_metadata_log_cleanup_hook(
+    tmp_path: pathlib.Path, sample_data: pa.Table, engine
+):
+    delta_table = setup_cleanup_metadata(tmp_path, sample_data)
+    delta_table.create_checkpoint()
+
+    sample_data = sample_data.drop(["binary"])
+    write_deltalake(delta_table, sample_data, mode="append", engine=engine)
+
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
+    first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
+    second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
+    third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
+
+    assert not first_log_path.exists()
+    assert not first_failed_log_path.exists()
+    assert second_log_path.exists()
+    assert third_log_path.exists()
+    assert second_failed_log_path.exists()
+
+
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_cleanup_metadata_log_cleanup_hook_disabled(
+    tmp_path: pathlib.Path, sample_data: pa.Table, engine
+):
+    delta_table = setup_cleanup_metadata(tmp_path, sample_data)
+    delta_table.create_checkpoint()
+
+    sample_data = sample_data.drop(["binary"])
+    write_deltalake(
+        delta_table,
+        sample_data,
+        mode="append",
+        engine=engine,
+        post_commithook_properties=PostCommitHookProperties(cleanup_expired_logs=False),
+    )
+
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    first_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000000.json.tmp"
+    )
+    first_log_path = tmp_table_path / "_delta_log" / "00000000000000000000.json"
+    second_log_path = tmp_table_path / "_delta_log" / "00000000000000000001.json"
+    second_failed_log_path = (
+        tmp_table_path / "_delta_log" / "00000000000000000002.json.tmp"
+    )
+    third_log_path = tmp_table_path / "_delta_log" / "00000000000000000002.json"
+
+    assert first_log_path.exists()
+    assert first_failed_log_path.exists()
+    assert second_log_path.exists()
+    assert third_log_path.exists()
+    assert second_failed_log_path.exists()
+
+
 def test_cleanup_metadata_no_checkpoint(tmp_path: pathlib.Path, sample_data: pa.Table):
     delta_table = setup_cleanup_metadata(tmp_path, sample_data)
     delta_table.cleanup_metadata()
@@ -249,6 +312,29 @@ def test_checkpoint_partition_timestamp_2380(
     assert checkpoint_path.exists()
 
 
+def test_checkpoint_with_binary_column(tmp_path: pathlib.Path):
+    data = pa.table(
+        {
+            "intColumn": pa.array([1]),
+            "binaryColumn": pa.array([b"a"]),
+        }
+    )
+
+    write_deltalake(
+        str(tmp_path),
+        data,
+        partition_by=["intColumn"],
+        mode="append",
+    )
+
+    dt = DeltaTable(tmp_path)
+    dt.create_checkpoint()
+
+    dt = DeltaTable(tmp_path)
+
+    assert dt.to_pyarrow_table().equals(data)
+
+
 def test_checkpoint_post_commit_config(tmp_path: pathlib.Path, sample_data: pa.Table):
     """Checks whether checkpoints are properly written based on commit_interval"""
     tmp_table_path = tmp_path / "path" / "to" / "table"
@@ -355,3 +441,30 @@ def test_checkpoint_post_commit_config_multiple_operations(
 
     delta_table = DeltaTable(str(tmp_table_path))
     assert delta_table.version() == 9
+
+
+def test_checkpoint_with_nullable_false(tmp_path: pathlib.Path):
+    tmp_table_path = tmp_path / "path" / "to" / "table"
+    checkpoint_path = tmp_table_path / "_delta_log" / "_last_checkpoint"
+
+    pylist = [{"year": 2023, "n_party": 0}, {"year": 2024, "n_party": 1}]
+    my_schema = pa.schema(
+        [
+            pa.field("year", pa.int64(), nullable=False),
+            pa.field("n_party", pa.int64(), nullable=False),
+        ]
+    )
+
+    data = pa.Table.from_pylist(pylist, schema=my_schema)
+
+    write_deltalake(
+        str(tmp_table_path),
+        data,
+        configuration={"delta.dataSkippingNumIndexedCols": "1"},
+    )
+
+    DeltaTable(str(tmp_table_path)).create_checkpoint()
+
+    assert checkpoint_path.exists()
+
+    assert DeltaTable(str(tmp_table_path)).to_pyarrow_table() == data
diff --git a/python/tests/test_delete.py b/python/tests/test_delete.py
index 65b5ebdec3..9d93b9f95f 100644
--- a/python/tests/test_delete.py
+++ b/python/tests/test_delete.py
@@ -4,14 +4,15 @@
 import pyarrow.compute as pc
 import pytest
 
-from deltalake.table import DeltaTable
+from deltalake.table import CommitProperties, DeltaTable
 from deltalake.writer import write_deltalake
 
 
 def test_delete_no_predicates(existing_table: DeltaTable):
     old_version = existing_table.version()
 
-    existing_table.delete(custom_metadata={"userName": "John Doe"})
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
+    existing_table.delete(commit_properties=commit_properties)
 
     last_action = existing_table.history(1)[0]
     assert last_action["operation"] == "DELETE"
diff --git a/python/tests/test_file_system_handler.py b/python/tests/test_file_system_handler.py
index 488a880e36..cd57d72dee 100644
--- a/python/tests/test_file_system_handler.py
+++ b/python/tests/test_file_system_handler.py
@@ -34,7 +34,7 @@ def test_file_info(file_systems, table_data):
     info = store.get_file_info(file_path)
     arrow_info = arrow_fs.get_file_info(file_path)
 
-    assert type(info) == type(arrow_info)
+    assert type(info) is type(arrow_info)
     assert info.path == arrow_info.path
     assert info.type == arrow_info.type
     assert info.size == arrow_info.size
diff --git a/python/tests/test_fs.py b/python/tests/test_fs.py
index 245d97a89a..87b24d8577 100644
--- a/python/tests/test_fs.py
+++ b/python/tests/test_fs.py
@@ -207,18 +207,8 @@ def test_roundtrip_azure_direct(azurite_creds, sample_data: pa.Table):
 @pytest.mark.azure
 @pytest.mark.integration
 @pytest.mark.timeout(timeout=60, method="thread")
-@pytest.mark.skip("since object store 0.9.1 sas tokens aren't working with azurite.")
 def test_roundtrip_azure_sas(azurite_sas_creds, sample_data: pa.Table):
     table_path = "az://deltars/roundtrip3"
-    import os
-
-    for key in [
-        "AZURE_USE_EMULATOR",
-        "AZURE_STORAGE_ALLOW_HTTP",
-        "AZURE_STORAGE_CONNECTION_STRING",
-    ]:
-        if key in os.environ:
-            del os.environ[key]
     write_deltalake(table_path, sample_data, storage_options=azurite_sas_creds)
     dt = DeltaTable(table_path, storage_options=azurite_sas_creds)
     table = dt.to_pyarrow_table()
@@ -229,7 +219,6 @@ def test_roundtrip_azure_sas(azurite_sas_creds, sample_data: pa.Table):
 @pytest.mark.azure
 @pytest.mark.integration
 @pytest.mark.timeout(timeout=60, method="thread")
-@pytest.mark.skip("since object store 0.9.1 sas tokens aren't working with azurite.")
 def test_roundtrip_azure_decoded_sas(azurite_sas_creds, sample_data: pa.Table):
     table_path = "az://deltars/roundtrip4"
     azurite_sas_creds["SAS_TOKEN"] = urllib.parse.unquote(
@@ -243,6 +232,20 @@ def test_roundtrip_azure_decoded_sas(azurite_sas_creds, sample_data: pa.Table):
     assert dt.version() == 0
 
 
+@pytest.mark.parametrize("storage_size", [1, 4 * 1024 * 1024, 5 * 1024 * 1024 - 1])
+def test_warning_for_small_max_buffer_size(tmp_path, storage_size):
+    storage_opts = {"max_buffer_size": str(storage_size)}
+    store = DeltaStorageHandler(str(tmp_path.absolute()), options=storage_opts)
+    with pytest.warns(UserWarning) as warnings:
+        store.open_output_stream("test")
+
+    assert len(warnings) == 1
+    assert (
+        f"You specified a `max_buffer_size` of {storage_size} bits less than {5*1024*1024} bits"
+        in str(warnings[0].message)
+    )
+
+
 def test_pickle_roundtrip(tmp_path):
     store = DeltaStorageHandler(str(tmp_path.absolute()))
 
diff --git a/python/tests/test_merge.py b/python/tests/test_merge.py
index 2349e68963..54c2726fd3 100644
--- a/python/tests/test_merge.py
+++ b/python/tests/test_merge.py
@@ -4,6 +4,7 @@
 import pytest
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import CommitProperties
 
 
 def test_merge_when_matched_delete_wo_predicate(
@@ -20,12 +21,13 @@ def test_merge_when_matched_delete_wo_predicate(
         }
     )
 
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
     dt.merge(
         source=source_table,
         predicate="t.id = s.id",
         source_alias="s",
         target_alias="t",
-        custom_metadata={"userName": "John Doe"},
+        commit_properties=commit_properties,
     ).when_matched_delete().execute()
 
     nrows = 4
@@ -940,3 +942,99 @@ def test_merge_field_special_characters_delete_2438(tmp_path: pathlib.Path):
     expected = pa.table({"x": [1], "y--1": [4]})
 
     assert dt.to_pyarrow_table() == expected
+
+
+@pytest.mark.pandas
+def test_struct_casting(tmp_path: pathlib.Path):
+    import pandas as pd
+
+    cols = ["id", "name", "address", "scores"]
+    data = [
+        (
+            2,
+            "Marry Doe",
+            {"street": "123 Main St", "city": "Anytown", "state": "CA"},
+            [0, 0, 0],
+        )
+    ]
+    df = pd.DataFrame(data, columns=cols)
+    df_merge = pd.DataFrame(
+        [
+            (
+                2,
+                "Merged",
+                {"street": "1 Front", "city": "San Francisco", "state": "CA"},
+                [7, 0, 7],
+            )
+        ],
+        columns=cols,
+    )
+    assert not df.empty
+
+    schema = pa.Table.from_pandas(df=df).schema
+    dt = DeltaTable.create(tmp_path, schema, name="test")
+    metadata = dt.metadata()
+    assert metadata.name == "test"
+
+    result = (
+        dt.merge(
+            source=df_merge,
+            predicate="t.id = s.id",
+            source_alias="s",
+            target_alias="t",
+        )
+        .when_matched_update_all()
+        .execute()
+    )
+    assert result is not None
+
+
+def test_merge_isin_partition_pruning(
+    tmp_path: pathlib.Path,
+):
+    nrows = 5
+    data = pa.table(
+        {
+            "id": pa.array([str(x) for x in range(nrows)]),
+            "partition": pa.array(list(range(nrows)), pa.int64()),
+            "sold": pa.array(list(range(nrows)), pa.int32()),
+        }
+    )
+
+    write_deltalake(tmp_path, data, mode="append", partition_by="partition")
+
+    dt = DeltaTable(tmp_path)
+
+    source_table = pa.table(
+        {
+            "id": pa.array(["3", "4"]),
+            "partition": pa.array([3, 4], pa.int64()),
+            "sold": pa.array([10, 20], pa.int32()),
+        }
+    )
+
+    metrics = (
+        dt.merge(
+            source=source_table,
+            predicate="t.id = s.id and t.partition in (3,4)",
+            source_alias="s",
+            target_alias="t",
+        )
+        .when_matched_update_all()
+        .execute()
+    )
+
+    expected = pa.table(
+        {
+            "id": pa.array(["0", "1", "2", "3", "4"]),
+            "partition": pa.array([0, 1, 2, 3, 4], pa.int64()),
+            "sold": pa.array([0, 1, 2, 10, 20], pa.int32()),
+        }
+    )
+    result = dt.to_pyarrow_table().sort_by([("id", "ascending")])
+    last_action = dt.history(1)[0]
+
+    assert last_action["operation"] == "MERGE"
+    assert result == expected
+    assert metrics["num_target_files_scanned"] == 2
+    assert metrics["num_target_files_skipped_during_scan"] == 3
diff --git a/python/tests/test_optimize.py b/python/tests/test_optimize.py
index 8cb0902dae..3bce0ad35f 100644
--- a/python/tests/test_optimize.py
+++ b/python/tests/test_optimize.py
@@ -4,7 +4,15 @@
 import pyarrow as pa
 import pytest
 
+try:
+    import pandas as pd
+except ModuleNotFoundError:
+    _has_pandas = False
+else:
+    _has_pandas = True
+
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import CommitProperties
 
 
 @pytest.mark.parametrize("engine", ["pyarrow", "rust"])
@@ -39,7 +47,8 @@ def test_optimize_run_table(
     old_data = dt.to_pyarrow_table()
     old_version = dt.version()
 
-    dt.optimize.compact(custom_metadata={"userName": "John Doe"})
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
+    dt.optimize.compact(commit_properties=commit_properties)
 
     new_data = dt.to_pyarrow_table()
     last_action = dt.history(1)[0]
@@ -70,9 +79,8 @@ def test_z_order_optimize(
     dt = DeltaTable(tmp_path)
     old_version = dt.version()
 
-    dt.optimize.z_order(
-        ["date32", "timestamp"], custom_metadata={"userName": "John Doe"}
-    )
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
+    dt.optimize.z_order(["date32", "timestamp"], commit_properties=commit_properties)
     last_action = dt.history(1)[0]
     assert last_action["operation"] == "OPTIMIZE"
     assert last_action["userName"] == "John Doe"
@@ -131,3 +139,31 @@ def test_optimize_schema_evolved_table(
     assert dt.to_pyarrow_table().sort_by([("foo", "ascending")]) == data.sort_by(
         [("foo", "ascending")]
     )
+
+
+@pytest.mark.pandas
+def test_zorder_with_space_partition(tmp_path: pathlib.Path):
+    df = pd.DataFrame(
+        {
+            "user": ["James", "Anna", "Sara", "Martin"],
+            "country": ["United States", "Canada", "Costa Rica", "South Africa"],
+            "age": [34, 23, 45, 26],
+        }
+    )
+
+    write_deltalake(
+        table_or_uri=tmp_path,
+        data=df,
+        mode="overwrite",
+        partition_by=["country"],
+    )
+
+    test_table = DeltaTable(tmp_path)
+
+    # retrieve by partition works fine
+    partitioned_df = test_table.to_pandas(
+        partitions=[("country", "=", "United States")],
+    )
+    print(partitioned_df)
+
+    test_table.optimize.z_order(columns=["user"])
diff --git a/python/tests/test_repair.py b/python/tests/test_repair.py
index 1d4a6adfa8..634bcbd441 100644
--- a/python/tests/test_repair.py
+++ b/python/tests/test_repair.py
@@ -1,6 +1,7 @@
 import os
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import CommitProperties
 
 
 def test_repair_with_dry_run(tmp_path, sample_data):
@@ -23,7 +24,8 @@ def test_repair_wo_dry_run(tmp_path, sample_data):
     dt = DeltaTable(tmp_path)
     os.remove(dt.file_uris()[0])
 
-    metrics = dt.repair(dry_run=False, custom_metadata={"userName": "John Doe"})
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
+    metrics = dt.repair(dry_run=False, commit_properties=commit_properties)
     last_action = dt.history(1)[0]
 
     assert len(metrics["files_removed"]) == 1
diff --git a/python/tests/test_restore.py b/python/tests/test_restore.py
index 099b887726..47fd5c21de 100644
--- a/python/tests/test_restore.py
+++ b/python/tests/test_restore.py
@@ -5,6 +5,7 @@
 import pytest
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import CommitProperties
 
 
 @pytest.mark.parametrize("use_relative", [True, False])
@@ -24,7 +25,8 @@ def test_restore_with_version(
 
     dt = DeltaTable(table_path)
     old_version = dt.version()
-    dt.restore(1, custom_metadata={"userName": "John Doe"})
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
+    dt.restore(1, commit_properties=commit_properties)
     last_action = dt.history(1)[0]
     assert last_action["operation"] == "RESTORE"
     assert last_action["userName"] == "John Doe"
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 23198d9ef3..34c833eb17 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -6,6 +6,7 @@
 from deltalake import DeltaTable, Field
 from deltalake.schema import (
     ArrayType,
+    ArrowSchemaConversionMode,
     MapType,
     PrimitiveType,
     Schema,
@@ -222,13 +223,94 @@ def test_delta_schema():
     assert schema_without_metadata == Schema.from_pyarrow(pa_schema)
 
 
-@pytest.mark.parametrize(
-    "schema,expected_schema,large_dtypes",
-    [
+def _generate_test_type():
+    class UuidType(pa.ExtensionType):
+        def __init__(self):
+            pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid")
+
+        def __arrow_ext_serialize__(self):
+            # since we don't have a parameterized type, we don't need extra
+            # metadata to be deserialized
+            return b""
+
+        @classmethod
+        def __arrow_ext_deserialize__(self, storage_type, serialized):
+            # return an instance of this subclass given the serialized
+            # metadata.
+            return UuidType()
+
+    pa.register_extension_type(UuidType())
+    return UuidType()
+
+
+def _generate_test_tuples():
+    test_tuples = [
+        (
+            pa.schema([("some_int", pa.uint32()), ("some_string", pa.string_view())]),
+            pa.schema([("some_int", pa.int32()), ("some_string", pa.string_view())]),
+            ArrowSchemaConversionMode.PASSTHROUGH,
+        ),
+        (
+            pa.schema(
+                [
+                    ("some_int", pa.uint32()),
+                    ("some_string", pa.list_view(pa.large_string())),
+                ]
+            ),
+            pa.schema(
+                [
+                    ("some_int", pa.int32()),
+                    ("some_string", pa.list_view(pa.large_string())),
+                ]
+            ),
+            ArrowSchemaConversionMode.PASSTHROUGH,
+        ),
         (
             pa.schema([("some_int", pa.uint32()), ("some_string", pa.string())]),
             pa.schema([("some_int", pa.int32()), ("some_string", pa.string())]),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
+        ),
+        (
+            pa.schema([("some_int", pa.uint32()), ("some_string", pa.string())]),
+            pa.schema([("some_int", pa.int32()), ("some_string", pa.string())]),
+            ArrowSchemaConversionMode.PASSTHROUGH,
+        ),
+        (
+            pa.schema([("some_int", pa.uint32()), ("some_string", pa.large_string())]),
+            pa.schema([("some_int", pa.int32()), ("some_string", pa.large_string())]),
+            ArrowSchemaConversionMode.PASSTHROUGH,
+        ),
+        (
+            pa.schema([("some_int", pa.uint32()), ("some_binary", pa.large_binary())]),
+            pa.schema([("some_int", pa.int32()), ("some_binary", pa.large_binary())]),
+            ArrowSchemaConversionMode.PASSTHROUGH,
+        ),
+        (
+            pa.schema(
+                [
+                    ("some_int", pa.uint32()),
+                    ("some_string", pa.large_list(pa.large_string())),
+                ]
+            ),
+            pa.schema(
+                [
+                    ("some_int", pa.int32()),
+                    ("some_string", pa.large_list(pa.large_string())),
+                ]
+            ),
+            ArrowSchemaConversionMode.PASSTHROUGH,
+        ),
+        (
+            pa.schema(
+                [
+                    ("some_int", pa.uint32()),
+                    ("some_string", pa.list_(pa.large_string())),
+                ]
+            ),
+            pa.schema(
+                [("some_int", pa.int32()), ("some_string", pa.list_(pa.large_string()))]
+            ),
+            ArrowSchemaConversionMode.PASSTHROUGH,
         ),
         (
             pa.schema(
@@ -247,7 +329,7 @@ def test_delta_schema():
                     pa.field("some_decimal", pa.decimal128(10, 2), nullable=False),
                 ]
             ),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
         ),
         (
             pa.schema(
@@ -262,17 +344,17 @@ def test_delta_schema():
                     pa.field("some_string", pa.large_string(), nullable=False),
                 ]
             ),
-            True,
+            ArrowSchemaConversionMode.LARGE,
         ),
         (
             pa.schema([("some_int", pa.uint32()), ("some_string", pa.string())]),
             pa.schema([("some_int", pa.int32()), ("some_string", pa.large_string())]),
-            True,
+            ArrowSchemaConversionMode.LARGE,
         ),
         (
             pa.schema([("some_int", pa.uint32()), ("some_string", pa.large_string())]),
             pa.schema([("some_int", pa.int32()), ("some_string", pa.string())]),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
         ),
         (
             pa.schema(
@@ -291,7 +373,7 @@ def test_delta_schema():
                     ("some_int3", pa.int64()),
                 ]
             ),
-            True,
+            ArrowSchemaConversionMode.LARGE,
         ),
         (
             pa.schema(
@@ -310,7 +392,7 @@ def test_delta_schema():
                     ("some_string", pa.large_string()),
                 ]
             ),
-            True,
+            ArrowSchemaConversionMode.LARGE,
         ),
         (
             pa.schema(
@@ -327,7 +409,7 @@ def test_delta_schema():
                     ("some_binary", pa.binary()),
                 ]
             ),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
         ),
         (
             pa.schema(
@@ -355,7 +437,7 @@ def test_delta_schema():
                     ("some_binary", pa.large_binary()),
                 ]
             ),
-            True,
+            ArrowSchemaConversionMode.LARGE,
         ),
         (
             pa.schema(
@@ -383,7 +465,7 @@ def test_delta_schema():
                     ("some_binary", pa.binary()),
                 ]
             ),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
         ),
         (
             pa.schema(
@@ -410,7 +492,7 @@ def test_delta_schema():
                     ("timestamp7", pa.timestamp("us", tz="UTC")),
                 ]
             ),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
         ),
         (
             pa.schema(
@@ -451,11 +533,23 @@ def test_delta_schema():
                     )
                 ]
             ),
-            False,
+            ArrowSchemaConversionMode.NORMAL,
         ),
-    ],
+        (
+            pa.schema([("uuid", _generate_test_type())]),
+            pa.schema([("uuid", pa.binary(16))]),
+            ArrowSchemaConversionMode.NORMAL,
+        ),
+    ]
+
+    return test_tuples
+
+
+@pytest.mark.parametrize(
+    "schema,expected_schema,conversion_mode",
+    _generate_test_tuples(),
 )
-def test_schema_conversions(schema, expected_schema, large_dtypes):
-    result_schema = _convert_pa_schema_to_delta(schema, large_dtypes)
+def test_schema_conversions(schema, expected_schema, conversion_mode):
+    result_schema = _convert_pa_schema_to_delta(schema, conversion_mode)
 
     assert result_schema == expected_schema
diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py
index 5e2e4f167b..5ff07ed9e8 100644
--- a/python/tests/test_table_read.py
+++ b/python/tests/test_table_read.py
@@ -1,13 +1,12 @@
 import os
+import tempfile
 from datetime import date, datetime, timezone
 from pathlib import Path
+from random import random
 from threading import Barrier, Thread
-from types import SimpleNamespace
-from typing import Any, List, Tuple
+from typing import Any, List, Tuple, Type
 from unittest.mock import Mock
 
-from packaging import version
-
 from deltalake._util import encode_partition_value
 from deltalake.exceptions import DeltaProtocolError
 from deltalake.table import ProtocolVersions
@@ -20,6 +19,9 @@
 else:
     _has_pandas = True
 
+import multiprocessing
+from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
+
 import pyarrow as pa
 import pyarrow.dataset as ds
 import pytest
@@ -59,6 +61,52 @@ def test_read_simple_table_to_dict():
     assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"id": [5, 7, 9]}
 
 
+class _SerializableException(BaseException):
+    pass
+
+
+def _recursively_read_simple_table(executor_class: Type[Executor], depth):
+    try:
+        test_read_simple_table_to_dict()
+    except BaseException as e:  # Ideally this would catch `pyo3_runtime.PanicException` but its seems that is not possible.
+        # Re-raise as something that can be serialized and therefore sent back to parent processes.
+        raise _SerializableException(f"Seraializatble exception: {e}") from e
+
+    if depth == 0:
+        return
+    # We use concurrent.futures.Executors instead of `threading.Thread` or `multiprocessing.Process` to that errors
+    # are re-rasied in the parent process/thread when we call `future.result()`.
+    with executor_class(max_workers=1) as executor:
+        future = executor.submit(
+            _recursively_read_simple_table, executor_class, depth - 1
+        )
+        future.result()
+
+
+@pytest.mark.parametrize(
+    "executor_class,multiprocessing_start_method,expect_panic",
+    [
+        (ThreadPoolExecutor, None, False),
+        (ProcessPoolExecutor, "forkserver", False),
+        (ProcessPoolExecutor, "spawn", False),
+        (ProcessPoolExecutor, "fork", True),
+    ],
+)
+def test_read_simple_in_threads_and_processes(
+    executor_class, multiprocessing_start_method, expect_panic
+):
+    if multiprocessing_start_method is not None:
+        multiprocessing.set_start_method(multiprocessing_start_method, force=True)
+    if expect_panic:
+        with pytest.raises(
+            _SerializableException,
+            match="The tokio runtime does not support forked processes",
+        ):
+            _recursively_read_simple_table(executor_class=executor_class, depth=5)
+    else:
+        _recursively_read_simple_table(executor_class=executor_class, depth=5)
+
+
 def test_read_simple_table_by_version_to_dict():
     table_path = "../crates/test/tests/data/delta-0.2.0"
     dt = DeltaTable(table_path, version=2)
@@ -168,18 +216,18 @@ def test_read_simple_table_update_incremental():
     assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"id": [5, 7, 9]}
 
 
-def test_read_simple_table_file_sizes_failure():
+def test_read_simple_table_file_sizes_failure(mocker):
     table_path = "../crates/test/tests/data/simple_table"
     dt = DeltaTable(table_path)
     add_actions = dt.get_add_actions().to_pydict()
 
     # set all sizes to -1, the idea is to break the reading, to check
     # that input file sizes are actually used
-    add_actions_modified = {
-        x: [-1 for item in x] if x == "size_bytes" else y
-        for x, y in add_actions.items()
-    }
-    dt.get_add_actions = lambda: SimpleNamespace(to_pydict=lambda: add_actions_modified)  # type:ignore
+    add_actions_modified = {x: -1 for x in add_actions["path"]}
+    mocker.patch(
+        "deltalake._internal.RawDeltaTable.get_add_file_sizes",
+        return_value=add_actions_modified,
+    )
 
     with pytest.raises(OSError, match="Cannot seek past end of file."):
         dt.to_pyarrow_dataset().to_table().to_pydict()
@@ -280,13 +328,11 @@ def test_read_table_with_stats():
     data = dataset.to_table(filter=filter_expr)
     assert data.num_rows == 0
 
-    # PyArrow added support for is_null and is_valid simplification in 8.0.0
-    if version.parse(pa.__version__).major >= 8:
-        filter_expr = ds.field("cases").is_null()
-        assert len(list(dataset.get_fragments(filter=filter_expr))) == 0
+    filter_expr = ds.field("cases").is_null()
+    assert len(list(dataset.get_fragments(filter=filter_expr))) == 0
 
-        data = dataset.to_table(filter=filter_expr)
-        assert data.num_rows == 0
+    data = dataset.to_table(filter=filter_expr)
+    assert data.num_rows == 0
 
 
 def test_read_special_partition():
@@ -507,7 +553,11 @@ def test_delta_table_with_filters():
 
     filter_expr = ds.field("date") > "2021-02-20"
     data = dataset.to_table(filter=filter_expr)
-    assert len(dt.to_pandas(filters=[("date", ">", "2021-02-20")])) == data.num_rows
+    assert (
+        len(dt.to_pandas(filters=[("date", ">", "2021-02-20")]))
+        == len(dt.to_pandas(filters=filter_expr))
+        == data.num_rows
+    )
 
     filter_expr = (ds.field("date") > "2021-02-20") | (
         ds.field("state").isin(["Alabama", "Wyoming"])
@@ -522,6 +572,7 @@ def test_delta_table_with_filters():
                 ]
             )
         )
+        == len(dt.to_pandas(filters=filter_expr))
         == data.num_rows
     )
 
@@ -538,6 +589,7 @@ def test_delta_table_with_filters():
                 ]
             )
         )
+        == len(dt.to_pandas(filters=filter_expr))
         == data.num_rows
     )
 
@@ -788,7 +840,109 @@ def test_encode_partition_value(input_value: Any, expected: str) -> None:
         assert encode_partition_value(input_value) == expected
 
 
+def test_partitions_partitioned_table():
+    table_path = "../crates/test/tests/data/delta-0.8.0-partitioned"
+    dt = DeltaTable(table_path)
+    expected = [
+        {"year": "2020", "month": "2", "day": "5"},
+        {"year": "2021", "month": "12", "day": "4"},
+        {"year": "2020", "month": "2", "day": "3"},
+        {"year": "2021", "month": "4", "day": "5"},
+        {"year": "2020", "month": "1", "day": "1"},
+        {"year": "2021", "month": "12", "day": "20"},
+    ]
+    actual = dt.partitions()
+    for partition in expected:
+        assert partition in actual
+
+
+def test_partitions_filtering_partitioned_table():
+    table_path = "../crates/test/tests/data/delta-0.8.0-partitioned"
+    dt = DeltaTable(table_path)
+    expected = [
+        {"day": "5", "month": "4", "year": "2021"},
+        {"day": "20", "month": "12", "year": "2021"},
+        {"day": "4", "month": "12", "year": "2021"},
+    ]
+
+    partition_filters = [("year", ">=", "2021")]
+    actual = dt.partitions(partition_filters=partition_filters)
+    assert len(expected) == len(actual)
+    for partition in expected:
+        partition in actual
+
+
+def test_partitions_date_partitioned_table():
+    table_path = tempfile.gettempdir() + "/date_partition_table"
+    date_partitions = [
+        date(2024, 8, 1),
+        date(2024, 8, 2),
+        date(2024, 8, 3),
+        date(2024, 8, 4),
+    ]
+    sample_data = pa.table(
+        {
+            "date_field": pa.array(date_partitions, pa.date32()),
+            "numeric_data": pa.array([1, 2, 3, 4], pa.int64()),
+        }
+    )
+    write_deltalake(
+        table_path, sample_data, mode="overwrite", partition_by=["date_field"]
+    )
+
+    delta_table = DeltaTable(table_path)
+    expected = [
+        {"date_field": "2024-08-01"},
+        {"date_field": "2024-08-02"},
+        {"date_field": "2024-08-03"},
+        {"date_field": "2024-08-04"},
+    ]
+    actual = sorted(delta_table.partitions(), key=lambda x: x["date_field"])
+    assert expected == actual
+
+
+def test_partitions_special_partitioned_table():
+    table_path = "../crates/test/tests/data/delta-0.8.0-special-partition"
+    dt = DeltaTable(table_path)
+
+    expected = [{"x": "A/A"}, {"x": "B B"}]
+    actual = dt.partitions()
+    for partition in expected:
+        partition in actual
+
+
+def test_partitions_unpartitioned_table():
+    table_path = "../crates/test/tests/data/simple_table"
+    dt = DeltaTable(table_path)
+    assert len(dt.partitions()) == 0
+
+
 def test_read_table_last_checkpoint_not_updated():
     dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update")
 
     assert dt.version() == 3
+
+
+def test_is_deltatable_valid_path():
+    table_path = "../crates/test/tests/data/simple_table"
+    assert DeltaTable.is_deltatable(table_path)
+
+
+def test_is_deltatable_invalid_path():
+    # Nonce ensures that the table_path always remains an invalid table path.
+    nonce = int(random() * 10000)
+    table_path = "../crates/test/tests/data/simple_table_invalid_%s" % nonce
+    assert not DeltaTable.is_deltatable(table_path)
+
+
+def test_is_deltatable_with_storage_opts():
+    table_path = "../crates/test/tests/data/simple_table"
+    storage_options = {
+        "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
+        "AWS_ALLOW_HTTP": "true",
+        "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
+        "AWS_S3_LOCKING_PROVIDER": "dynamodb",
+        "DELTA_DYNAMO_TABLE_NAME": "custom_table_name",
+    }
+    assert DeltaTable.is_deltatable(table_path, storage_options=storage_options)
diff --git a/python/tests/test_update.py b/python/tests/test_update.py
index 74ae130224..3ae39dadae 100644
--- a/python/tests/test_update.py
+++ b/python/tests/test_update.py
@@ -4,6 +4,7 @@
 import pytest
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import CommitProperties
 
 
 @pytest.fixture()
@@ -38,10 +39,11 @@ def test_update_with_predicate(tmp_path: pathlib.Path, sample_table: pa.Table):
         }
     )
 
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
     dt.update(
         updates={"deleted": "True"},
         predicate="price > 3",
-        custom_metadata={"userName": "John Doe"},
+        commit_properties=commit_properties,
     )
 
     result = dt.to_pyarrow_table()
@@ -117,10 +119,8 @@ def test_update_wrong_types_cast(tmp_path: pathlib.Path, sample_table: pa.Table)
     with pytest.raises(Exception) as excinfo:
         dt.update(updates={"deleted": "'hello_world'"})
 
-    assert (
-        str(excinfo.value)
-        == "Cast error: Cannot cast value 'hello_world' to value of Boolean type"
-    )
+    expected = """Generic DeltaTable error: type_coercion\ncaused by\nError during planning: Failed to coerce then ([Utf8]) and else (Some(Boolean)) to common types in CASE WHEN expression"""
+    assert str(excinfo.value) == expected
 
 
 def test_update_wo_predicate_multiple_updates(
diff --git a/python/tests/test_vacuum.py b/python/tests/test_vacuum.py
index 44c2195e17..6f1b1dd7c5 100644
--- a/python/tests/test_vacuum.py
+++ b/python/tests/test_vacuum.py
@@ -5,6 +5,7 @@
 import pytest
 
 from deltalake import DeltaTable, write_deltalake
+from deltalake.table import CommitProperties
 
 
 def test_vacuum_dry_run_simple_table():
@@ -72,11 +73,12 @@ def test_vacuum_transaction_log(tmp_path: pathlib.Path, sample_data: pa.Table):
 
     dt = DeltaTable(tmp_path)
 
+    commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"})
     dt.vacuum(
         retention_hours=0,
         dry_run=False,
         enforce_retention_duration=False,
-        custom_metadata={"userName": "John Doe"},
+        commit_properties=commit_properties,
     )
 
     dt = DeltaTable(tmp_path)
diff --git a/python/tests/test_writer.py b/python/tests/test_writer.py
index 0a7e766cac..c82d64d96c 100644
--- a/python/tests/test_writer.py
+++ b/python/tests/test_writer.py
@@ -5,6 +5,7 @@
 import random
 import threading
 from datetime import date, datetime
+from decimal import Decimal
 from math import inf
 from typing import Any, Dict, Iterable, List, Literal
 from unittest.mock import Mock
@@ -12,7 +13,6 @@
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
-from packaging import version
 from pyarrow.dataset import ParquetFileFormat, ParquetReadOptions, dataset
 from pyarrow.lib import RecordBatchReader
 
@@ -146,7 +146,7 @@ def test_enforce_schema_rust_writer(existing_table: DeltaTable, mode: str):
 def test_update_schema(existing_table: DeltaTable):
     new_data = pa.table({"x": pa.array([1, 2, 3])})
 
-    with pytest.raises(ValueError):
+    with pytest.raises(DeltaError):
         write_deltalake(
             existing_table, new_data, mode="append", schema_mode="overwrite"
         )
@@ -238,9 +238,7 @@ def test_overwrite_schema(existing_table: DeltaTable):
 
 
 def test_update_schema_rust_writer_append(existing_table: DeltaTable):
-    with pytest.raises(
-        SchemaMismatchError, match="Cannot cast schema, number of fields does not match"
-    ):
+    with pytest.raises(SchemaMismatchError):
         # It's illegal to do schema drift without correct schema_mode
         write_deltalake(
             existing_table,
@@ -257,24 +255,44 @@ def test_update_schema_rust_writer_append(existing_table: DeltaTable):
             schema_mode="overwrite",
             engine="rust",
         )
+    write_deltalake(
+        existing_table,
+        pa.table({"x2": pa.array([1, 2, 3])}),
+        mode="append",
+        schema_mode="merge",
+        engine="rust",
+    )
+
+
+def test_write_type_castable_types(existing_table: DeltaTable):
+    write_deltalake(
+        existing_table,
+        pa.table({"utf8": pa.array([1, 2, 3])}),
+        mode="append",
+        schema_mode="merge",
+        engine="rust",
+    )
     with pytest.raises(
-        SchemaMismatchError,
-        match="Schema error: Cannot merge types string and long",
+        Exception, match="Cast error: Cannot cast string 'hello' to value of Int8 type"
     ):
         write_deltalake(
             existing_table,
-            pa.table({"utf8": pa.array([1, 2, 3])}),
+            pa.table({"int8": pa.array(["hello", "2", "3"])}),
+            mode="append",
+            schema_mode="merge",
+            engine="rust",
+        )
+
+    with pytest.raises(
+        Exception, match="Cast error: Can't cast value 1000 to type Int8"
+    ):
+        write_deltalake(
+            existing_table,
+            pa.table({"int8": pa.array([1000, 100, 10])}),
             mode="append",
             schema_mode="merge",
             engine="rust",
         )
-    write_deltalake(
-        existing_table,
-        pa.table({"x2": pa.array([1, 2, 3])}),
-        mode="append",
-        schema_mode="merge",
-        engine="rust",
-    )
 
 
 def test_update_schema_rust_writer_invalid(existing_table: DeltaTable):
@@ -303,6 +321,34 @@ def test_update_schema_rust_writer_invalid(existing_table: DeltaTable):
     assert existing_table.schema().to_pyarrow() == new_data.schema
 
 
+def test_merge_schema_rust_writer_with_overwrite(tmp_path: pathlib.Path):
+    data = pa.table(
+        {
+            "a": pa.array([1, 2, 3, 4]),
+            "b": pa.array([1, 1, 2, 2]),
+            "c": pa.array([10, 11, 12, 13]),
+        }
+    )
+    write_deltalake(
+        tmp_path,
+        data,
+        engine="rust",
+    )
+
+    new_data = pa.table({"a": pa.array([100, 200, 300]), "b": pa.array([1, 1, 1])})
+
+    write_deltalake(
+        tmp_path,
+        new_data,
+        mode="overwrite",
+        schema_mode="merge",
+        engine="rust",
+    )
+    assert set(DeltaTable(tmp_path).to_pyarrow_table().column_names) == set(
+        ["a", "b", "c"]
+    )
+
+
 @pytest.mark.parametrize("engine", ["pyarrow", "rust"])
 def test_local_path(
     tmp_path: pathlib.Path,
@@ -446,10 +492,10 @@ def test_write_modes(tmp_path: pathlib.Path, sample_data: pa.Table, engine):
 
     if engine == "pyarrow":
         with pytest.raises(FileExistsError):
-            write_deltalake(tmp_path, sample_data, mode="error")
+            write_deltalake(tmp_path, sample_data, mode="error", engine=engine)
     elif engine == "rust":
         with pytest.raises(DeltaError):
-            write_deltalake(tmp_path, sample_data, mode="error", engine="rust")
+            write_deltalake(tmp_path, sample_data, mode="error", engine=engine)
 
     write_deltalake(tmp_path, sample_data, mode="ignore", engine="rust")
     assert ("0" * 19 + "1.json") not in os.listdir(tmp_path / "_delta_log")
@@ -600,6 +646,7 @@ def test_write_recordbatchreader(
         existing_table.to_pyarrow_dataset().schema, batches
     )
 
+    print("writing second time")
     write_deltalake(
         tmp_path, reader, mode="overwrite", large_dtypes=large_dtypes, engine=engine
     )
@@ -657,7 +704,13 @@ def test_writer_stats(existing_table: DeltaTable, sample_data: pa.Table):
 
     assert stats["numRecords"] == sample_data.num_rows
 
-    assert all(null_count == 0 for null_count in stats["nullCount"].values())
+    null_values = []
+    for null_count in stats["nullCount"].values():
+        if isinstance(null_count, dict):
+            null_values.extend(list(null_count.values()))
+        else:
+            null_values.append(null_count)
+    assert all(i == 0 for i in null_values)
 
     expected_mins = {
         "utf8": "0",
@@ -665,19 +718,19 @@ def test_writer_stats(existing_table: DeltaTable, sample_data: pa.Table):
         "int32": 0,
         "int16": 0,
         "int8": 0,
-        "float32": 0.0,
-        "float64": 0.0,
+        "float32": -0.0,
+        "float64": -0.0,
         "bool": False,
         "binary": "0",
-        "timestamp": "2022-01-01T00:00:00",
-        "struct.x": 0,
-        "struct.y": "0",
-        "list.list.item": 0,
+        "timestamp": "2022-01-01T00:00:00Z",
+        "struct": {
+            "x": 0,
+            "y": "0",
+        },
     }
     # PyArrow added support for decimal and date32 in 8.0.0
-    if version.parse(pa.__version__).major >= 8:
-        expected_mins["decimal"] = "10.000"
-        expected_mins["date32"] = "2022-01-01"
+    expected_mins["decimal"] = 10.0
+    expected_mins["date32"] = "2022-01-01"
 
     assert stats["minValues"] == expected_mins
 
@@ -691,15 +744,12 @@ def test_writer_stats(existing_table: DeltaTable, sample_data: pa.Table):
         "float64": 4.0,
         "bool": True,
         "binary": "4",
-        "timestamp": "2022-01-01T04:00:00",
-        "struct.x": 4,
-        "struct.y": "4",
-        "list.list.item": 4,
+        "timestamp": "2022-01-01T04:00:00Z",
+        "struct": {"x": 4, "y": "4"},
     }
     # PyArrow added support for decimal and date32 in 8.0.0
-    if version.parse(pa.__version__).major >= 8:
-        expected_maxs["decimal"] = "14.000"
-        expected_maxs["date32"] = "2022-01-05"
+    expected_maxs["decimal"] = 14.0
+    expected_maxs["date32"] = "2022-01-05"
 
     assert stats["maxValues"] == expected_maxs
 
@@ -765,6 +815,7 @@ def get_multifile_stats(table: DeltaTable) -> Iterable[Dict]:
     write_deltalake(
         tmp_path,
         data,
+        engine="pyarrow",
         file_options=ParquetFileFormat().make_write_options(),
         max_rows_per_file=rows_per_file,
         min_rows_per_group=rows_per_file,
@@ -866,7 +917,7 @@ def _normalize_path(t):  # who does not love Windows? ;)
     [
         (1, 2, pa.int64(), "1"),
         (False, True, pa.bool_(), "false"),
-        (date(2022, 1, 1), date(2022, 1, 2), pa.date32(), "2022-01-01"),
+        (date(2022, 1, 1), date(2022, 1, 2), pa.date32(), "'2022-01-01'"),
     ],
 )
 def test_partition_overwrite(
@@ -911,7 +962,7 @@ def test_partition_overwrite(
         tmp_path,
         sample_data,
         mode="overwrite",
-        partition_filters=[("p1", "=", "1")],
+        predicate="p1 = 1",
     )
 
     delta_table.update_incremental()
@@ -941,7 +992,7 @@ def test_partition_overwrite(
         tmp_path,
         sample_data,
         mode="overwrite",
-        partition_filters=[("p2", ">", filter_string)],
+        predicate=f"p2 > {filter_string}",
     )
     delta_table.update_incremental()
     assert (
@@ -970,7 +1021,7 @@ def test_partition_overwrite(
         tmp_path,
         sample_data,
         mode="overwrite",
-        partition_filters=[("p1", "=", "1"), ("p2", "=", filter_string)],
+        predicate=f"p1 = 1 AND p2 = {filter_string}",
     )
     delta_table.update_incremental()
     assert (
@@ -979,13 +1030,9 @@ def test_partition_overwrite(
         )
         == expected_data
     )
-
-    with pytest.raises(ValueError, match="Data should be aligned with partitioning"):
+    with pytest.raises(DeltaProtocolError, match="Invariant violations"):
         write_deltalake(
-            tmp_path,
-            sample_data,
-            mode="overwrite",
-            partition_filters=[("p2", "<", filter_string)],
+            tmp_path, sample_data, mode="overwrite", predicate=f"p2 < {filter_string}"
         )
 
 
@@ -1170,24 +1217,19 @@ def test_partition_overwrite_with_new_partition(
 
     new_sample_data = pa.table(
         {
-            "p1": pa.array(["2", "1"], pa.string()),
-            "p2": pa.array([3, 2], pa.int64()),
-            "val": pa.array([2, 2], pa.int64()),
+            "p1": pa.array(["1", "2"], pa.string()),
+            "p2": pa.array([2, 2], pa.int64()),
+            "val": pa.array([2, 3], pa.int64()),
         }
     )
     expected_data = pa.table(
         {
             "p1": pa.array(["1", "1", "2", "2"], pa.string()),
-            "p2": pa.array([1, 2, 1, 3], pa.int64()),
-            "val": pa.array([1, 2, 1, 2], pa.int64()),
+            "p2": pa.array([1, 2, 1, 2], pa.int64()),
+            "val": pa.array([1, 2, 1, 3], pa.int64()),
         }
     )
-    write_deltalake(
-        tmp_path,
-        new_sample_data,
-        mode="overwrite",
-        partition_filters=[("p2", "=", "2")],
-    )
+    write_deltalake(tmp_path, new_sample_data, mode="overwrite", predicate="p2 = 2")
     delta_table = DeltaTable(tmp_path)
     assert (
         delta_table.to_pyarrow_table().sort_by(
@@ -1201,14 +1243,12 @@ def test_partition_overwrite_with_non_partitioned_data(
     tmp_path: pathlib.Path, sample_data_for_partitioning: pa.Table
 ):
     write_deltalake(tmp_path, sample_data_for_partitioning, mode="overwrite")
-
-    with pytest.raises(ValueError, match=r'not partition columns: \["p1"\]'):
-        write_deltalake(
-            tmp_path,
-            sample_data_for_partitioning,
-            mode="overwrite",
-            partition_filters=[("p1", "=", "1")],
-        )
+    write_deltalake(
+        tmp_path,
+        sample_data_for_partitioning.filter(pc.field("p1") == "1"),
+        mode="overwrite",
+        predicate="p1 = 1",
+    )
 
 
 def test_partition_overwrite_with_wrong_partition(
@@ -1220,21 +1260,15 @@ def test_partition_overwrite_with_wrong_partition(
         mode="overwrite",
         partition_by=["p1", "p2"],
     )
+    from deltalake.exceptions import DeltaError
 
-    with pytest.raises(ValueError, match=r'not in table schema: \["p999"\]'):
-        write_deltalake(
-            tmp_path,
-            sample_data_for_partitioning,
-            mode="overwrite",
-            partition_filters=[("p999", "=", "1")],
-        )
-
-    with pytest.raises(ValueError, match=r'not partition columns: \["val"\]'):
+    with pytest.raises(DeltaError, match="No field named p999."):
         write_deltalake(
             tmp_path,
             sample_data_for_partitioning,
             mode="overwrite",
-            partition_filters=[("val", "=", "1")],
+            predicate="p999 = 1",
+            # partition_filters=[("p999", "=", "1")],
         )
 
     new_data = pa.table(
@@ -1246,15 +1280,14 @@ def test_partition_overwrite_with_wrong_partition(
     )
 
     with pytest.raises(
-        ValueError,
-        match="Data should be aligned with partitioning. "
-        "Data contained values for partition p1=1 p2=2",
+        DeltaProtocolError,
+        match="Invariant violations",
     ):
         write_deltalake(
             tmp_path,
             new_data,
             mode="overwrite",
-            partition_filters=[("p1", "=", "1"), ("p2", "=", "1")],
+            predicate="p1 = 1 AND p2 = 1",
         )
 
 
@@ -1279,12 +1312,14 @@ def test_max_partitions_exceeding_fragment_should_fail(
             tmp_path,
             sample_data_for_partitioning,
             mode="overwrite",
+            engine="pyarrow",
             max_partitions=1,
             partition_by=["p1", "p2"],
         )
 
 
-def test_large_arrow_types(tmp_path: pathlib.Path):
+@pytest.mark.parametrize("engine", ["rust", "pyarrow"])
+def test_large_arrow_types(tmp_path: pathlib.Path, engine):
     pylist = [
         {"name": "Joey", "gender": b"M", "arr_type": ["x", "y"], "dict": {"a": b"M"}},
         {"name": "Ivan", "gender": b"F", "arr_type": ["x", "z"]},
@@ -1300,15 +1335,14 @@ def test_large_arrow_types(tmp_path: pathlib.Path):
     )
     table = pa.Table.from_pylist(pylist, schema=schema)
 
-    write_deltalake(tmp_path, table)
+    write_deltalake(tmp_path, table, mode="append", engine=engine, large_dtypes=True)
+    write_deltalake(tmp_path, table, mode="append", engine=engine, large_dtypes=True)
+    write_deltalake(tmp_path, table, mode="append", engine=engine, large_dtypes=True)
 
     dt = DeltaTable(tmp_path)
     assert table.schema == dt.schema().to_pyarrow(as_large_types=True)
 
 
-@pytest.mark.skipif(
-    int(pa.__version__.split(".")[0]) < 10, reason="map casts require pyarrow >= 10"
-)
 def test_large_arrow_types_dataset_as_large_types(tmp_path: pathlib.Path):
     pylist = [
         {"name": "Joey", "gender": b"M", "arr_type": ["x", "y"], "dict": {"a": b"M"}},
@@ -1334,9 +1368,6 @@ def test_large_arrow_types_dataset_as_large_types(tmp_path: pathlib.Path):
     assert union_ds.to_table().shape[0] == 4
 
 
-@pytest.mark.skipif(
-    int(pa.__version__.split(".")[0]) < 10, reason="map casts require pyarrow >= 10"
-)
 def test_large_arrow_types_explicit_scan_schema(tmp_path: pathlib.Path):
     pylist = [
         {"name": "Joey", "gender": b"M", "arr_type": ["x", "y"], "dict": {"a": b"M"}},
@@ -1450,7 +1481,6 @@ def test_issue_1651_roundtrip_timestamp(tmp_path: pathlib.Path):
 @pytest.mark.parametrize("engine", ["rust", "pyarrow"])
 def test_invalid_decimals(tmp_path: pathlib.Path, engine):
     import re
-    from decimal import Decimal
 
     data = pa.table(
         {"x": pa.array([Decimal("10000000000000000000000000000000000000.0")])}
@@ -1458,11 +1488,24 @@ def test_invalid_decimals(tmp_path: pathlib.Path, engine):
 
     with pytest.raises(
         SchemaMismatchError,
-        match=re.escape("Invalid data type for Delta Lake: decimal(39,1)"),
+        match=re.escape("Invalid data type for Delta Lake: Decimal256(39, 1)"),
     ):
         write_deltalake(table_or_uri=tmp_path, mode="append", data=data, engine=engine)
 
 
+@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
+def test_write_large_decimal(tmp_path: pathlib.Path, engine):
+    data = pa.table(
+        {
+            "decimal_column": pa.array(
+                [Decimal(11111111111111111), Decimal(22222), Decimal("333333333333.33")]
+            )
+        }
+    )
+
+    write_deltalake(tmp_path, data, engine=engine)
+
+
 def test_float_values(tmp_path: pathlib.Path):
     data = pa.table(
         {
@@ -1488,12 +1531,41 @@ def test_float_values(tmp_path: pathlib.Path):
 
 def test_with_deltalake_schema(tmp_path: pathlib.Path, sample_data: pa.Table):
     write_deltalake(
-        tmp_path, sample_data, schema=Schema.from_pyarrow(sample_data.schema)
+        tmp_path,
+        sample_data,
+        engine="pyarrow",
+        schema=Schema.from_pyarrow(sample_data.schema),
     )
     delta_table = DeltaTable(tmp_path)
     assert delta_table.schema().to_pyarrow() == sample_data.schema
 
 
+def test_with_deltalake_json_schema(tmp_path: pathlib.Path):
+    json_schema = '{"type": "struct","fields": [{"name": "campaign", "type": "string", "nullable": true, "metadata": {}},{"name": "account", "type": "string", "nullable": true, "metadata": {}}]}'
+    table_schema = Schema.from_json(json_schema)
+    table = pa.table(
+        {
+            "campaign": pa.array([]),
+            "account": pa.array([]),
+        }
+    )
+    write_deltalake(tmp_path, table, engine="pyarrow", schema=table_schema)
+    table = pa.table(
+        {
+            "campaign": pa.array(["deltaLake"]),
+            "account": pa.array(["admin"]),
+        }
+    )
+
+    write_deltalake(
+        tmp_path, data=table, engine="pyarrow", schema=table_schema, mode="append"
+    )
+
+    delta_table = DeltaTable(tmp_path)
+    assert delta_table.schema() == table_schema
+    assert delta_table.to_pyarrow_table() == table
+
+
 def test_write_stats_empty_rowgroups(tmp_path: pathlib.Path):
     # https://github.com/delta-io/delta-rs/issues/2169
     data = pa.table(
@@ -1558,7 +1630,6 @@ def test_empty(existing_table: DeltaTable):
 
 def test_rust_decimal_cast(tmp_path: pathlib.Path):
     import re
-    from decimal import Decimal
 
     data = pa.table({"x": pa.array([Decimal("100.1")])})
 
@@ -1580,7 +1651,7 @@ def test_rust_decimal_cast(tmp_path: pathlib.Path):
     ):
         write_deltalake(tmp_path, data, mode="append", engine="rust")
 
-    with pytest.raises(SchemaMismatchError, match="Cannot merge types decimal"):
+    with pytest.raises(SchemaMismatchError):
         write_deltalake(
             tmp_path, data, mode="append", schema_mode="merge", engine="rust"
         )
@@ -1628,8 +1699,7 @@ def _check_stats(dt: DeltaTable):
     _check_stats(dt)
 
 
-@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
-def test_write_stats_columns_stats_provided(tmp_path: pathlib.Path, engine):
+def test_write_stats_columns_stats_provided(tmp_path: pathlib.Path):
     def _check_stats(dt: DeltaTable):
         add_actions_table = dt.get_add_actions(flatten=True)
         stats = add_actions_table.to_pylist()[0]
@@ -1655,15 +1725,14 @@ def _check_stats(dt: DeltaTable):
         tmp_path,
         data,
         mode="append",
-        engine=engine,
-        configuration={"delta.dataSkippingStatsColumns": "foo,baz"},
+        configuration={"delta.dataSkippingStatsColumns": "foo,`baz`"},
     )
 
     dt = DeltaTable(tmp_path)
     _check_stats(dt)
 
     # Check if it properly takes skippingNumIndexCols from the config in the table
-    write_deltalake(tmp_path, data, mode="overwrite", engine=engine)
+    write_deltalake(tmp_path, data, mode="overwrite")
 
     dt = DeltaTable(tmp_path)
     assert dt.version() == 1
@@ -1729,3 +1798,93 @@ def test_parse_stats_with_new_schema(tmp_path, engine):
     write_deltalake(
         tmp_path, sample_data, mode="overwrite", schema_mode="overwrite", engine=engine
     )
+
+
+def test_roundtrip_cdc_evolution(tmp_path: pathlib.Path):
+    """
+    This test is used as a CDC integration test from Python to ensure,
+    approximately, that CDC files are being written
+    """
+    raw_commit = r"""{"metaData":{"id":"bb0fdeb2-76dd-4f5e-b1ea-845ecec8fa7e","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableChangeDataFeed":"true"},"createdTime":1713110303902}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":4,"writerFeatures":["changeDataFeed"]}}
+"""
+    # timestampNtz looks like it might be an unnecessary requirement to write from Python
+    os.mkdir(os.path.join(tmp_path, "_delta_log"))
+    # This is a stupid hack to make sure we have a CDC capable table from the jump
+    with open(
+        os.path.join(tmp_path, "_delta_log", "00000000000000000000.json"), "w+"
+    ) as fd:
+        fd.write(raw_commit)
+    assert ("0" * 20 + ".json") in os.listdir(tmp_path / "_delta_log")
+
+    # Make sure the _change_data doesn't exist
+    assert not os.path.isdir(os.path.join(tmp_path, "_change_data"))
+
+    nrows = 5
+    sample_data = pa.table(
+        {
+            "utf8": pa.array([str(x) for x in range(nrows)]),
+            "int64": pa.array(list(range(nrows)), pa.int64()),
+            # See <https://github.com/delta-io/delta-rs/issues/2568>
+            # "struct": pa.array([{"x": x, "y": str(x)} for x in range(nrows)]),
+            # "list": pa.array([list(range(x + 1)) for x in range(nrows)]),
+        }
+    )
+
+    write_deltalake(
+        tmp_path, sample_data, mode="append", schema_mode="merge", engine="rust"
+    )
+    assert ("0" * 19 + "1.json") in os.listdir(tmp_path / "_delta_log")
+
+    delta_table = DeltaTable(tmp_path)
+    delta_table.update(predicate="utf8 = '1'", updates={"utf8": "'hello world'"})
+
+    delta_table = DeltaTable(tmp_path)
+    print(os.listdir(tmp_path))
+    # This is kind of a weak test to verify that CDFs were written
+    assert os.path.isdir(os.path.join(tmp_path, "_change_data"))
+
+
+def test_empty_dataset_write(tmp_path: pathlib.Path, sample_data: pa.Table):
+    empty_arrow_table = sample_data.schema.empty_table()
+    empty_dataset = dataset(empty_arrow_table)
+    with pytest.raises(DeltaError, match="No data source supplied to write command"):
+        write_deltalake(tmp_path, empty_dataset, mode="append")
+
+
+@pytest.mark.pandas
+def test_predicate_out_of_bounds(tmp_path: pathlib.Path):
+    """See <https://github.com/delta-io/delta-rs/issues/2867>"""
+    import pandas as pd
+
+    data = [
+        (datetime(2024, 7, 31, 9, 30, 0), "AAPL", "20240731", 100, 11.1),
+        (datetime(2024, 7, 31, 9, 30, 0), "GOOG", "20240731", 200, 11.1),
+    ]
+    columns = ["ts", "ins", "date", "f1", "f2"]
+    df = pd.DataFrame(data, columns=columns)
+
+    predicate = "date == 20240731"
+    write_deltalake(
+        table_or_uri=tmp_path,
+        data=df,
+        partition_by="date",
+        mode="overwrite",
+        schema_mode="merge",
+        predicate=predicate,
+    )
+
+    data = [
+        (datetime(2024, 7, 31, 9, 30, 0), "AAPL", "20240731", 666, 666),
+        (datetime(2024, 7, 31, 9, 30, 0), "GOOG", "20240731", 777, 777),
+    ]
+    columns = ["ts", "ins", "date", "fb", "fc"]
+    df = pd.DataFrame(data, columns=columns)
+    write_deltalake(
+        table_or_uri=tmp_path,
+        data=df,
+        partition_by="date",
+        mode="overwrite",
+        schema_mode="merge",
+        predicate=predicate,
+    )
diff --git a/python/tests/test_writerproperties.py b/python/tests/test_writerproperties.py
index 63f12879e6..30c25548ad 100644
--- a/python/tests/test_writerproperties.py
+++ b/python/tests/test_writerproperties.py
@@ -4,7 +4,13 @@
 import pyarrow.parquet as pq
 import pytest
 
-from deltalake import DeltaTable, WriterProperties, write_deltalake
+from deltalake import (
+    BloomFilterProperties,
+    ColumnProperties,
+    DeltaTable,
+    WriterProperties,
+    write_deltalake,
+)
 
 
 def test_writer_properties_all_filled():
@@ -15,33 +21,36 @@ def test_writer_properties_all_filled():
         write_batch_size=400,
         max_row_group_size=500,
         compression="SNAPPY",
+        statistics_truncate_length=600,
+        default_column_properties=ColumnProperties(
+            dictionary_enabled=False,
+        ),
+        column_properties={
+            "a": ColumnProperties(
+                dictionary_enabled=True,
+                max_statistics_size=40,
+                bloom_filter_properties=BloomFilterProperties(
+                    set_bloom_filter_enabled=True, fpp=0.2, ndv=30
+                ),
+            ),
+            "b": ColumnProperties(
+                dictionary_enabled=True,
+                max_statistics_size=400,
+                bloom_filter_properties=BloomFilterProperties(
+                    set_bloom_filter_enabled=False, fpp=0.2, ndv=30
+                ),
+            ),
+        },
     )
 
-    expected = {
-        "data_page_size_limit": "100",
-        "dictionary_page_size_limit": "200",
-        "data_page_row_count_limit": "300",
-        "write_batch_size": "400",
-        "max_row_group_size": "500",
-        "compression": "SNAPPY",
-    }
-
-    assert wp._to_dict() == expected
+    assert wp.default_column_properties.bloom_filter_properties is None
+    assert wp.column_properties["a"].bloom_filter_properties.fpp == 0.2
 
 
 def test_writer_properties_lower_case_compression():
     wp = WriterProperties(compression="snappy")  # type: ignore
 
-    expected = {
-        "data_page_size_limit": None,
-        "dictionary_page_size_limit": None,
-        "data_page_row_count_limit": None,
-        "write_batch_size": None,
-        "max_row_group_size": None,
-        "compression": "SNAPPY",
-    }
-
-    assert wp._to_dict() == expected
+    assert wp.compression == "SNAPPY"
 
 
 @pytest.mark.parametrize(
@@ -75,6 +84,11 @@ def test_writer_properties_no_compression():
         WriterProperties(compression_level=10)
 
 
+def test_invalid_fpp_value():
+    with pytest.raises(ValueError):
+        BloomFilterProperties(set_bloom_filter_enabled=True, fpp=1.1, ndv=30)
+
+
 def test_write_with_writerproperties(
     tmp_path: pathlib.Path, sample_table: pa.Table, writer_properties: WriterProperties
 ):