diff --git a/.github/actions/setup-rust-runtime/action.yaml b/.github/actions/setup-rust-runtime/action.yaml
index 90e09a957cd4..27cdf9b97419 100644
--- a/.github/actions/setup-rust-runtime/action.yaml
+++ b/.github/actions/setup-rust-runtime/action.yaml
@@ -37,5 +37,5 @@ runs:
         echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV 
         echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
         echo "RUST_MIN_STACK=3000000" >> $GITHUB_ENV
-        echo "RUST_FLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV                                   
+        echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV
      
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 375c9f2c2c5a..d384e4bc7ebf 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -488,7 +488,7 @@ jobs:
 
   # Verify MSRV for the crates which are directly used by other projects.
   msrv:
-    name: Verify MSRV
+    name: Verify MSRV (Min Supported Rust Version)
     runs-on: ubuntu-latest
     container:
       image: amd64/rust
@@ -500,7 +500,13 @@ jobs:
         run: cargo install cargo-msrv
       - name: Check datafusion
         working-directory: datafusion/core
-        run: cargo msrv verify
+        run: |
+          # If you encounter an error with any of the commands below
+          # it means some crate in your dependency tree has a higher 
+          # MSRV (Min Supported Rust Version) than the one specified 
+          # in the `rust-version` key of `Cargo.toml`. Check your 
+          # dependencies or update the version in `Cargo.toml`
+          cargo msrv verify
       - name: Check datafusion-substrait
         working-directory: datafusion/substrait
         run: cargo msrv verify
diff --git a/Cargo.toml b/Cargo.toml
index cc1861677476..3b1362d22426 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,7 @@ license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/arrow-datafusion"
 rust-version = "1.70"
-version = "34.0.0"
+version = "35.0.0"
 
 [workspace.dependencies]
 arrow = { version = "50.0.0", features = ["prettyprint"] }
@@ -45,17 +45,17 @@ bytes = "1.4"
 chrono = { version = "0.4.31", default-features = false }
 ctor = "0.2.0"
 dashmap = "5.4.0"
-datafusion = { path = "datafusion/core", version = "34.0.0" }
-datafusion-common = { path = "datafusion/common", version = "34.0.0" }
-datafusion-execution = { path = "datafusion/execution", version = "34.0.0" }
-datafusion-expr = { path = "datafusion/expr", version = "34.0.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "34.0.0" }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "34.0.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "34.0.0" }
-datafusion-proto = { path = "datafusion/proto", version = "34.0.0" }
-datafusion-sql = { path = "datafusion/sql", version = "34.0.0" }
-datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "34.0.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "34.0.0" }
+datafusion = { path = "datafusion/core", version = "35.0.0" }
+datafusion-common = { path = "datafusion/common", version = "35.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "35.0.0" }
+datafusion-expr = { path = "datafusion/expr", version = "35.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "35.0.0" }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "35.0.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "35.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "35.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "35.0.0" }
+datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "35.0.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "35.0.0" }
 doc-comment = "0.3"
 env_logger = "0.10"
 futures = "0.3"
@@ -70,7 +70,7 @@ parquet = { version = "50.0.0", default-features = false, features = ["arrow", "
 rand = "0.8"
 rstest = "0.18.0"
 serde_json = "1"
-sqlparser = { version = "0.41.0", features = ["visitor"] }
+sqlparser = { version = "0.43.0", features = ["visitor"] }
 tempfile = "3"
 thiserror = "1.0.44"
 url = "2.2"
diff --git a/README.md b/README.md
index 81ae30ab6897..cb89aff4aec7 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,25 @@
 
 # DataFusion
 
+[![Crates.io][crates-badge]][crates-url]
+[![Apache licensed][license-badge]][license-url]
+[![Build Status][actions-badge]][actions-url]
+[![Discord chat][discord-badge]][discord-url]
+
+[crates-badge]: https://img.shields.io/crates/v/datafusion.svg
+[crates-url]: https://crates.io/crates/datafusion
+[license-badge]: https://img.shields.io/badge/license-Apache%20v2-blue.svg
+[license-url]: https://github.com/apache/arrow-datafusion/blob/main/LICENSE.txt
+[actions-badge]: https://github.com/apache/arrow-datafusion/actions/workflows/rust.yml/badge.svg
+[actions-url]: https://github.com/apache/arrow-datafusion/actions?query=branch%3Amain
+[discord-badge]: https://img.shields.io/discord/885562378132000778.svg?logo=discord&style=flat-square
+[discord-url]: https://discord.com/invite/Qw5gKqHxUM
+
+[Website](https://github.com/apache/arrow-datafusion) |
+[Guides](https://github.com/apache/arrow-datafusion/tree/main/docs) |
+[API Docs](https://docs.rs/datafusion/latest/datafusion/) |
+[Chat](https://discord.com/channels/885562378132000778/885562378132000781)
+
 <img src="https://arrow.apache.org/datafusion/_images/DataFusion-Logo-Background-White.png" width="256" alt="logo"/>
 
 DataFusion is a very fast, extensible query engine for building high-quality data-centric systems in
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index 94c1ebe7ee47..50b79b4b0661 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-benchmarks"
 description = "DataFusion Benchmarks"
-version = "34.0.0"
+version = "35.0.0"
 edition = { workspace = true }
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
 homepage = "https://github.com/apache/arrow-datafusion"
@@ -33,8 +33,8 @@ snmalloc = ["snmalloc-rs"]
 
 [dependencies]
 arrow = { workspace = true }
-datafusion = { path = "../datafusion/core", version = "34.0.0" }
-datafusion-common = { path = "../datafusion/common", version = "34.0.0" }
+datafusion = { path = "../datafusion/core", version = "35.0.0" }
+datafusion-common = { path = "../datafusion/common", version = "35.0.0" }
 env_logger = { workspace = true }
 futures = { workspace = true }
 log = { workspace = true }
@@ -49,4 +49,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
 tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] }
 
 [dev-dependencies]
-datafusion-proto = { path = "../datafusion/proto", version = "34.0.0" }
+datafusion-proto = { path = "../datafusion/proto", version = "35.0.0" }
diff --git a/benchmarks/queries/clickbench/README.md b/benchmarks/queries/clickbench/README.md
index d5105afd4832..e03b7d519d91 100644
--- a/benchmarks/queries/clickbench/README.md
+++ b/benchmarks/queries/clickbench/README.md
@@ -11,23 +11,180 @@ ClickBench is focused on aggregation and filtering performance (though it has no
 [ClickBench repository]: https://github.com/ClickHouse/ClickBench/blob/main/datafusion/queries.sql
 
 ## "Extended" Queries 
-The "extended" queries are not part of the official ClickBench benchmark. 
-Instead they are used to test other DataFusion features that are not 
-covered by the standard benchmark
 
-Each description below is for the corresponding line in `extended.sql` (line 1
-is `Q0`, line 2 is `Q1`, etc.)  
+The "extended" queries are not part of the official ClickBench benchmark.
+Instead they are used to test other DataFusion features that are not covered by
+the standard benchmark  Each description below is for the corresponding line in
+`extended.sql` (line 1 is `Q0`, line 2 is `Q1`, etc.)
+
+### Q0: Data Exploration
+
+**Question**: "How many distinct searches, mobile phones, and mobile phone models are there in the dataset?"
+
+**Important Query Properties**: multiple `COUNT DISTINCT`s, with low and high cardinality
+distinct string columns.
+
+```sql
+SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") 
+FROM hits;
+```
+
+
+### Q1: Data Exploration
+
+**Question**: "How many distinct "hit color", "browser country" and "language" are there in the dataset?"
+
+**Important Query Properties**: multiple `COUNT DISTINCT`s. All three are small strings (length either 1 or 2).
 
-### Q0
-Models initial Data exploration, to understand some statistics of data. 
-Import Query Properties: multiple `COUNT DISTINCT` on strings
 
 ```sql
-SELECT 
-    COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") 
+SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage")
 FROM hits;
 ```
 
+### Q2: Top 10 anaylsis
 
+**Question**: "Find the top 10 "browser country" by number of distinct "social network"s, 
+including the distinct counts of  "hit color", "browser language",
+and "social action"."
 
+**Important Query Properties**: GROUP BY short, string, multiple `COUNT DISTINCT`s. There are several small strings (length either 1 or 2).
 
+```sql
+SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction")
+FROM hits 
+GROUP BY 1 
+ORDER BY 2 DESC 
+LIMIT 10;
+```
+
+
+## Data Notes
+
+Here are some interesting statistics about the data used in the queries
+Max length of `"SearchPhrase"` is 1113 characters
+```sql
+❯ select min(length("SearchPhrase")) as "SearchPhrase_len_min", max(length("SearchPhrase")) "SearchPhrase_len_max" from 'hits.parquet' limit 10;
++----------------------+----------------------+
+| SearchPhrase_len_min | SearchPhrase_len_max |
++----------------------+----------------------+
+| 0                    | 1113                 |
++----------------------+----------------------+
+```
+
+
+Here is the schema of the data
+```sql
+❯ describe 'hits.parquet';
++-----------------------+-----------+-------------+
+| column_name           | data_type | is_nullable |
++-----------------------+-----------+-------------+
+| WatchID               | Int64     | NO          |
+| JavaEnable            | Int16     | NO          |
+| Title                 | Utf8      | NO          |
+| GoodEvent             | Int16     | NO          |
+| EventTime             | Int64     | NO          |
+| EventDate             | UInt16    | NO          |
+| CounterID             | Int32     | NO          |
+| ClientIP              | Int32     | NO          |
+| RegionID              | Int32     | NO          |
+| UserID                | Int64     | NO          |
+| CounterClass          | Int16     | NO          |
+| OS                    | Int16     | NO          |
+| UserAgent             | Int16     | NO          |
+| URL                   | Utf8      | NO          |
+| Referer               | Utf8      | NO          |
+| IsRefresh             | Int16     | NO          |
+| RefererCategoryID     | Int16     | NO          |
+| RefererRegionID       | Int32     | NO          |
+| URLCategoryID         | Int16     | NO          |
+| URLRegionID           | Int32     | NO          |
+| ResolutionWidth       | Int16     | NO          |
+| ResolutionHeight      | Int16     | NO          |
+| ResolutionDepth       | Int16     | NO          |
+| FlashMajor            | Int16     | NO          |
+| FlashMinor            | Int16     | NO          |
+| FlashMinor2           | Utf8      | NO          |
+| NetMajor              | Int16     | NO          |
+| NetMinor              | Int16     | NO          |
+| UserAgentMajor        | Int16     | NO          |
+| UserAgentMinor        | Utf8      | NO          |
+| CookieEnable          | Int16     | NO          |
+| JavascriptEnable      | Int16     | NO          |
+| IsMobile              | Int16     | NO          |
+| MobilePhone           | Int16     | NO          |
+| MobilePhoneModel      | Utf8      | NO          |
+| Params                | Utf8      | NO          |
+| IPNetworkID           | Int32     | NO          |
+| TraficSourceID        | Int16     | NO          |
+| SearchEngineID        | Int16     | NO          |
+| SearchPhrase          | Utf8      | NO          |
+| AdvEngineID           | Int16     | NO          |
+| IsArtifical           | Int16     | NO          |
+| WindowClientWidth     | Int16     | NO          |
+| WindowClientHeight    | Int16     | NO          |
+| ClientTimeZone        | Int16     | NO          |
+| ClientEventTime       | Int64     | NO          |
+| SilverlightVersion1   | Int16     | NO          |
+| SilverlightVersion2   | Int16     | NO          |
+| SilverlightVersion3   | Int32     | NO          |
+| SilverlightVersion4   | Int16     | NO          |
+| PageCharset           | Utf8      | NO          |
+| CodeVersion           | Int32     | NO          |
+| IsLink                | Int16     | NO          |
+| IsDownload            | Int16     | NO          |
+| IsNotBounce           | Int16     | NO          |
+| FUniqID               | Int64     | NO          |
+| OriginalURL           | Utf8      | NO          |
+| HID                   | Int32     | NO          |
+| IsOldCounter          | Int16     | NO          |
+| IsEvent               | Int16     | NO          |
+| IsParameter           | Int16     | NO          |
+| DontCountHits         | Int16     | NO          |
+| WithHash              | Int16     | NO          |
+| HitColor              | Utf8      | NO          |
+| LocalEventTime        | Int64     | NO          |
+| Age                   | Int16     | NO          |
+| Sex                   | Int16     | NO          |
+| Income                | Int16     | NO          |
+| Interests             | Int16     | NO          |
+| Robotness             | Int16     | NO          |
+| RemoteIP              | Int32     | NO          |
+| WindowName            | Int32     | NO          |
+| OpenerName            | Int32     | NO          |
+| HistoryLength         | Int16     | NO          |
+| BrowserLanguage       | Utf8      | NO          |
+| BrowserCountry        | Utf8      | NO          |
+| SocialNetwork         | Utf8      | NO          |
+| SocialAction          | Utf8      | NO          |
+| HTTPError             | Int16     | NO          |
+| SendTiming            | Int32     | NO          |
+| DNSTiming             | Int32     | NO          |
+| ConnectTiming         | Int32     | NO          |
+| ResponseStartTiming   | Int32     | NO          |
+| ResponseEndTiming     | Int32     | NO          |
+| FetchTiming           | Int32     | NO          |
+| SocialSourceNetworkID | Int16     | NO          |
+| SocialSourcePage      | Utf8      | NO          |
+| ParamPrice            | Int64     | NO          |
+| ParamOrderID          | Utf8      | NO          |
+| ParamCurrency         | Utf8      | NO          |
+| ParamCurrencyID       | Int16     | NO          |
+| OpenstatServiceName   | Utf8      | NO          |
+| OpenstatCampaignID    | Utf8      | NO          |
+| OpenstatAdID          | Utf8      | NO          |
+| OpenstatSourceID      | Utf8      | NO          |
+| UTMSource             | Utf8      | NO          |
+| UTMMedium             | Utf8      | NO          |
+| UTMCampaign           | Utf8      | NO          |
+| UTMContent            | Utf8      | NO          |
+| UTMTerm               | Utf8      | NO          |
+| FromTag               | Utf8      | NO          |
+| HasGCLID              | Int16     | NO          |
+| RefererHash           | Int64     | NO          |
+| URLHash               | Int64     | NO          |
+| CLID                  | Int32     | NO          |
++-----------------------+-----------+-------------+
+105 rows in set. Query took 0.034 seconds.
+
+```
diff --git a/benchmarks/queries/clickbench/extended.sql b/benchmarks/queries/clickbench/extended.sql
index 82c0266af61a..0a2999fceb49 100644
--- a/benchmarks/queries/clickbench/extended.sql
+++ b/benchmarks/queries/clickbench/extended.sql
@@ -1 +1,3 @@
-SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits;
\ No newline at end of file
+SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits;
+SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage")  FROM hits;
+SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
\ No newline at end of file
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 5663e736dbd8..a718f7591a45 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -360,9 +360,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
+checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c"
 dependencies = [
  "bzip2",
  "flate2",
@@ -733,9 +733,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.4.1"
+version = "2.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
 
 [[package]]
 name = "blake2"
@@ -867,15 +867,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "chrono"
-version = "0.4.31"
+version = "0.4.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
+checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
  "num-traits",
  "serde",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.0",
 ]
 
 [[package]]
@@ -1098,7 +1098,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1125,7 +1125,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.3",
  "indexmap 2.1.0",
- "itertools 0.12.0",
+ "itertools",
  "log",
  "num-traits",
  "num_cpus",
@@ -1146,7 +1146,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "arrow",
  "assert_cmd",
@@ -1174,7 +1174,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1193,7 +1193,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -1212,7 +1212,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1226,7 +1226,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1235,14 +1235,14 @@ dependencies = [
  "datafusion-expr",
  "datafusion-physical-expr",
  "hashbrown 0.14.3",
- "itertools 0.12.0",
+ "itertools",
  "log",
  "regex-syntax",
 ]
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1260,7 +1260,7 @@ dependencies = [
  "hashbrown 0.14.3",
  "hex",
  "indexmap 2.1.0",
- "itertools 0.12.0",
+ "itertools",
  "log",
  "md-5",
  "paste",
@@ -1274,7 +1274,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1291,7 +1291,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.3",
  "indexmap 2.1.0",
- "itertools 0.12.0",
+ "itertools",
  "log",
  "once_cell",
  "parking_lot",
@@ -1303,7 +1303,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "34.0.0"
+version = "35.0.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1652,9 +1652,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
 [[package]]
 name = "h2"
-version = "0.3.23"
+version = "0.3.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b553656127a00601c8ae5590fcfdc118e4083a7924b6cf4ffc1ea4b99dc429d7"
+checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9"
 dependencies = [
  "bytes",
  "fnv",
@@ -1722,9 +1722,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
+checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
 
 [[package]]
 name = "hex"
@@ -1908,15 +1908,6 @@ version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
 
-[[package]]
-name = "itertools"
-version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itertools"
 version = "0.12.0"
@@ -2072,16 +2063,16 @@ version = "0.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "libc",
  "redox_syscall",
 ]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
+checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
 
 [[package]]
 name = "lock_api"
@@ -2279,7 +2270,7 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi 0.3.3",
+ "hermit-abi 0.3.4",
  "libc",
 ]
 
@@ -2305,7 +2296,7 @@ dependencies = [
  "futures",
  "humantime",
  "hyper",
- "itertools 0.12.0",
+ "itertools",
  "parking_lot",
  "percent-encoding",
  "quick-xml",
@@ -2484,18 +2475,18 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
+checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
+checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2516,9 +2507,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a"
+checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb"
 
 [[package]]
 name = "powerfmt"
@@ -2534,14 +2525,13 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "predicates"
-version = "3.0.4"
+version = "3.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dfc28575c2e3f19cb3c73b93af36460ae898d426eba6fc15b9bd2a5220758a0"
+checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8"
 dependencies = [
  "anstyle",
  "difflib",
  "float-cmp",
- "itertools 0.11.0",
  "normalize-line-endings",
  "predicates-core",
  "regex",
@@ -2589,9 +2579,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.76"
+version = "1.0.78"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
+checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
 dependencies = [
  "unicode-ident",
 ]
@@ -2683,9 +2673,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.2"
+version = "1.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
+checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -2695,9 +2685,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.3"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
+checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -2836,11 +2826,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.29"
+version = "0.38.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a1a81a2478639a14e68937903356dbac62cf52171148924f754bb8a8cd7a96c"
+checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -3102,9 +3092,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.11.2"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
+checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
 
 [[package]]
 name = "snafu"
@@ -3158,9 +3148,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.41.0"
+version = "0.43.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964"
+checksum = "a748c164141797ef0a712aaf16aa71df6f23e80ffea446daa2dd30e3325f89f3"
 dependencies = [
  "log",
  "sqlparser_derive",
@@ -3563,9 +3553,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
 
 [[package]]
 name = "unicode-bidi"
-version = "0.3.14"
+version = "0.3.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416"
+checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-ident"
@@ -3631,9 +3621,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
 [[package]]
 name = "uuid"
-version = "1.6.1"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
+checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a"
 dependencies = [
  "getrandom",
  "serde",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index d084938030b1..07ee65e3f6cd 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-cli"
 description = "Command Line Client for DataFusion query engine."
-version = "34.0.0"
+version = "35.0.0"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
 edition = "2021"
 keywords = ["arrow", "datafusion", "query", "sql"]
@@ -34,7 +34,7 @@ async-trait = "0.1.41"
 aws-config = "0.55"
 aws-credential-types = "0.55"
 clap = { version = "3", features = ["derive", "cargo"] }
-datafusion = { path = "../datafusion/core", version = "34.0.0", features = ["avro", "crypto_expressions", "encoding_expressions", "parquet", "regex_expressions", "unicode_expressions", "compression"] }
+datafusion = { path = "../datafusion/core", version = "35.0.0", features = ["avro", "crypto_expressions", "encoding_expressions", "parquet", "regex_expressions", "unicode_expressions", "compression"] }
 datafusion-common = { path = "../datafusion/common" }
 dirs = "4.0.0"
 env_logger = "0.9"
diff --git a/datafusion-cli/README.md b/datafusion-cli/README.md
index 1d99cfbcb00a..0afcd489f725 100644
--- a/datafusion-cli/README.md
+++ b/datafusion-cli/README.md
@@ -25,4 +25,22 @@
 
 The DataFusion CLI is a command line utility that runs SQL queries using the DataFusion engine.
 
-See the [`datafusion-cli` documentation](https://arrow.apache.org/datafusion/user-guide/cli.html) for further information.
+# Frequently Asked Questions
+
+## Where can I find more information?
+
+Answer: See the [`datafusion-cli` documentation](https://arrow.apache.org/datafusion/user-guide/cli.html) for further information.
+
+## How do I make my IDE work with `datafusion-cli`?
+
+Answer: "open" the `datafusion/datafusion-cli` project as its own top level
+project in my IDE (rather than opening `datafusion`)
+
+The reason `datafusion-cli` is not listed as part of the workspace in the main
+[`datafusion Cargo.toml`] file is that `datafusion-cli` is a binary and has a
+checked in `Cargo.lock` file to ensure reproducible builds.
+
+However, the `datafusion` and sub crates are intended for use as libraries and
+thus do not have a `Cargo.lock` file checked in.
+
+[`datafusion cargo.toml`]: https://github.com/apache/arrow-datafusion/blob/main/Cargo.toml
diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 69d412db5afa..0e146d575718 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -18,6 +18,8 @@
 //! Helper that helps with interactive editing, including multi-line parsing and validation,
 //! and auto-completion for file name during creating external table.
 
+use std::borrow::Cow;
+
 use datafusion::common::sql_err;
 use datafusion::error::DataFusionError;
 use datafusion::sql::parser::{DFParser, Statement};
@@ -36,9 +38,12 @@ use rustyline::Context;
 use rustyline::Helper;
 use rustyline::Result;
 
+use crate::highlighter::SyntaxHighlighter;
+
 pub struct CliHelper {
     completer: FilenameCompleter,
     dialect: String,
+    highlighter: SyntaxHighlighter,
 }
 
 impl CliHelper {
@@ -46,6 +51,7 @@ impl CliHelper {
         Self {
             completer: FilenameCompleter::new(),
             dialect: dialect.into(),
+            highlighter: SyntaxHighlighter::new(dialect),
         }
     }
 
@@ -100,7 +106,15 @@ impl Default for CliHelper {
     }
 }
 
-impl Highlighter for CliHelper {}
+impl Highlighter for CliHelper {
+    fn highlight<'l>(&self, line: &'l str, pos: usize) -> Cow<'l, str> {
+        self.highlighter.highlight(line, pos)
+    }
+
+    fn highlight_char(&self, line: &str, pos: usize) -> bool {
+        self.highlighter.highlight_char(line, pos)
+    }
+}
 
 impl Hinter for CliHelper {
     type Hint = String;
diff --git a/datafusion-cli/src/highlighter.rs b/datafusion-cli/src/highlighter.rs
new file mode 100644
index 000000000000..28732d5b976f
--- /dev/null
+++ b/datafusion-cli/src/highlighter.rs
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The syntax highlighter.
+
+use std::{
+    borrow::Cow::{self, Borrowed},
+    fmt::Display,
+};
+
+use datafusion::sql::sqlparser::{
+    dialect::{dialect_from_str, Dialect, GenericDialect},
+    keywords::Keyword,
+    tokenizer::{Token, Tokenizer},
+};
+use rustyline::highlight::Highlighter;
+
+/// The syntax highlighter.
+pub struct SyntaxHighlighter {
+    dialect: Box<dyn Dialect>,
+}
+
+impl SyntaxHighlighter {
+    pub fn new(dialect: &str) -> Self {
+        let dialect = match dialect_from_str(dialect) {
+            Some(dialect) => dialect,
+            None => Box::new(GenericDialect {}),
+        };
+        Self { dialect }
+    }
+}
+
+impl Highlighter for SyntaxHighlighter {
+    fn highlight<'l>(&self, line: &'l str, _: usize) -> Cow<'l, str> {
+        let mut out_line = String::new();
+
+        // `with_unescape(false)` since we want to rebuild the original string.
+        let mut tokenizer =
+            Tokenizer::new(self.dialect.as_ref(), line).with_unescape(false);
+        let tokens = tokenizer.tokenize();
+        match tokens {
+            Ok(tokens) => {
+                for token in tokens.iter() {
+                    match token {
+                        Token::Word(w) if w.keyword != Keyword::NoKeyword => {
+                            out_line.push_str(&Color::red(token));
+                        }
+                        Token::SingleQuotedString(_) => {
+                            out_line.push_str(&Color::green(token));
+                        }
+                        other => out_line.push_str(&format!("{other}")),
+                    }
+                }
+                out_line.into()
+            }
+            Err(_) => Borrowed(line),
+        }
+    }
+
+    fn highlight_char(&self, line: &str, _: usize) -> bool {
+        !line.is_empty()
+    }
+}
+
+/// Convenient utility to return strings with [ANSI color](https://gist.github.com/JBlond/2fea43a3049b38287e5e9cefc87b2124).
+struct Color {}
+
+impl Color {
+    fn green(s: impl Display) -> String {
+        format!("\x1b[92m{s}\x1b[0m")
+    }
+
+    fn red(s: impl Display) -> String {
+        format!("\x1b[91m{s}\x1b[0m")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SyntaxHighlighter;
+    use rustyline::highlight::Highlighter;
+
+    #[test]
+    fn highlighter_valid() {
+        let s = "SElect col_a from tab_1;";
+        let highlighter = SyntaxHighlighter::new("generic");
+        let out = highlighter.highlight(s, s.len());
+        assert_eq!(
+            "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1;",
+            out
+        );
+    }
+
+    #[test]
+    fn highlighter_valid_with_new_line() {
+        let s = "SElect col_a from tab_1\n WHERE col_b = 'なにか';";
+        let highlighter = SyntaxHighlighter::new("generic");
+        let out = highlighter.highlight(s, s.len());
+        assert_eq!(
+            "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1\n \u{1b}[91mWHERE\u{1b}[0m col_b = \u{1b}[92m'なにか'\u{1b}[0m;",
+            out
+        );
+    }
+
+    #[test]
+    fn highlighter_invalid() {
+        let s = "SElect col_a from tab_1 WHERE col_b = ';";
+        let highlighter = SyntaxHighlighter::new("generic");
+        let out = highlighter.highlight(s, s.len());
+        assert_eq!("SElect col_a from tab_1 WHERE col_b = ';", out);
+    }
+}
diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs
index 7eb3cb51c1f8..61f9eae7dd53 100644
--- a/datafusion-cli/src/lib.rs
+++ b/datafusion-cli/src/lib.rs
@@ -26,3 +26,5 @@ pub mod helper;
 pub mod object_storage;
 pub mod print_format;
 pub mod print_options;
+
+mod highlighter;
diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs
index 0a8c7b4b3e3a..2de52be612bb 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -190,117 +190,212 @@ impl PrintFormat {
 
 #[cfg(test)]
 mod tests {
-    use std::io::{Cursor, Read, Write};
-    use std::sync::Arc;
-
     use super::*;
+    use std::sync::Arc;
 
     use arrow::array::{ArrayRef, Int32Array};
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion::error::Result;
-
-    fn run_test<F>(batches: &[RecordBatch], test_fn: F) -> Result<String>
-    where
-        F: Fn(&mut Cursor<Vec<u8>>, &[RecordBatch]) -> Result<()>,
-    {
-        let mut buffer = Cursor::new(Vec::new());
-        test_fn(&mut buffer, batches)?;
-        buffer.set_position(0);
-        let mut contents = String::new();
-        buffer.read_to_string(&mut contents)?;
-        Ok(contents)
+
+    #[test]
+    fn print_empty() {
+        for format in [
+            PrintFormat::Csv,
+            PrintFormat::Tsv,
+            PrintFormat::Table,
+            PrintFormat::Json,
+            PrintFormat::NdJson,
+            PrintFormat::Automatic,
+        ] {
+            // no output for empty batches, even with header set
+            PrintBatchesTest::new()
+                .with_format(format)
+                .with_batches(vec![])
+                .with_expected(&[""])
+                .run();
+        }
     }
 
     #[test]
-    fn test_print_batches_with_sep() -> Result<()> {
-        let contents = run_test(&[], |buffer, batches| {
-            print_batches_with_sep(buffer, batches, b',', true)
-        })?;
-        assert_eq!(contents, "");
+    fn print_csv_no_header() {
+        #[rustfmt::skip]
+        let expected = &[
+            "1,4,7",
+            "2,5,8",
+            "3,6,9",
+        ];
 
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Csv)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::No)
+            .with_expected(expected)
+            .run();
+    }
 
-        let contents = run_test(&[batch], |buffer, batches| {
-            print_batches_with_sep(buffer, batches, b',', true)
-        })?;
-        assert_eq!(contents, "a,b,c\n1,4,7\n2,5,8\n3,6,9\n");
+    #[test]
+    fn print_csv_with_header() {
+        #[rustfmt::skip]
+        let expected = &[
+            "a,b,c",
+            "1,4,7",
+            "2,5,8",
+            "3,6,9",
+        ];
 
-        Ok(())
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Csv)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::Yes)
+            .with_expected(expected)
+            .run();
     }
 
     #[test]
-    fn test_print_batches_to_json_empty() -> Result<()> {
-        let contents = run_test(&[], |buffer, batches| {
-            batches_to_json!(ArrayWriter, buffer, batches)
-        })?;
-        assert_eq!(contents, "");
+    fn print_tsv_no_header() {
+        #[rustfmt::skip]
+        let expected = &[
+            "1\t4\t7",
+            "2\t5\t8",
+            "3\t6\t9",
+        ];
 
-        let contents = run_test(&[], |buffer, batches| {
-            batches_to_json!(LineDelimitedWriter, buffer, batches)
-        })?;
-        assert_eq!(contents, "");
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Tsv)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::No)
+            .with_expected(expected)
+            .run();
+    }
 
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-        let batches = vec![batch];
+    #[test]
+    fn print_tsv_with_header() {
+        #[rustfmt::skip]
+        let expected = &[
+            "a\tb\tc",
+            "1\t4\t7",
+            "2\t5\t8",
+            "3\t6\t9",
+        ];
+
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Tsv)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::Yes)
+            .with_expected(expected)
+            .run();
+    }
 
-        let contents = run_test(&batches, |buffer, batches| {
-            batches_to_json!(ArrayWriter, buffer, batches)
-        })?;
-        assert_eq!(contents, "[{\"a\":1,\"b\":4,\"c\":7},{\"a\":2,\"b\":5,\"c\":8},{\"a\":3,\"b\":6,\"c\":9}]\n");
+    #[test]
+    fn print_table() {
+        let expected = &[
+            "+---+---+---+",
+            "| a | b | c |",
+            "+---+---+---+",
+            "| 1 | 4 | 7 |",
+            "| 2 | 5 | 8 |",
+            "| 3 | 6 | 9 |",
+            "+---+---+---+",
+        ];
 
-        let contents = run_test(&batches, |buffer, batches| {
-            batches_to_json!(LineDelimitedWriter, buffer, batches)
-        })?;
-        assert_eq!(contents, "{\"a\":1,\"b\":4,\"c\":7}\n{\"a\":2,\"b\":5,\"c\":8}\n{\"a\":3,\"b\":6,\"c\":9}\n");
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Table)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::Ignored)
+            .with_expected(expected)
+            .run();
+    }
+    #[test]
+    fn print_json() {
+        let expected =
+            &[r#"[{"a":1,"b":4,"c":7},{"a":2,"b":5,"c":8},{"a":3,"b":6,"c":9}]"#];
 
-        Ok(())
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Json)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::Ignored)
+            .with_expected(expected)
+            .run();
     }
 
     #[test]
-    fn test_format_batches_with_maxrows() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-        )?;
+    fn print_ndjson() {
+        let expected = &[
+            r#"{"a":1,"b":4,"c":7}"#,
+            r#"{"a":2,"b":5,"c":8}"#,
+            r#"{"a":3,"b":6,"c":9}"#,
+        ];
+
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::NdJson)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::Ignored)
+            .with_expected(expected)
+            .run();
+    }
 
+    #[test]
+    fn print_automatic_no_header() {
         #[rustfmt::skip]
-        let all_rows_expected = [
+            let expected = &[
+            "1,4,7",
+            "2,5,8",
+            "3,6,9",
+        ];
+
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Automatic)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::No)
+            .with_expected(expected)
+            .run();
+    }
+    #[test]
+    fn print_automatic_with_header() {
+        #[rustfmt::skip]
+            let expected = &[
+            "a,b,c",
+            "1,4,7",
+            "2,5,8",
+            "3,6,9",
+        ];
+
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Automatic)
+            .with_batches(split_batch(three_column_batch()))
+            .with_header(WithHeader::Yes)
+            .with_expected(expected)
+            .run();
+    }
+
+    #[test]
+    fn print_maxrows_unlimited() {
+        #[rustfmt::skip]
+            let expected = &[
             "+---+",
             "| a |",
             "+---+",
             "| 1 |",
             "| 2 |",
             "| 3 |",
-            "+---+\n",
-        ].join("\n");
+            "+---+",
+        ];
+
+        // should print out entire output with no truncation if unlimited or
+        // limit greater than number of batches or equal to the number of batches
+        for max_rows in [MaxRows::Unlimited, MaxRows::Limited(5), MaxRows::Limited(3)] {
+            PrintBatchesTest::new()
+                .with_format(PrintFormat::Table)
+                .with_batches(vec![one_column_batch()])
+                .with_maxrows(max_rows)
+                .with_expected(expected)
+                .run();
+        }
+    }
 
+    #[test]
+    fn print_maxrows_limited_one_batch() {
         #[rustfmt::skip]
-        let one_row_expected = [
+            let expected = &[
             "+---+",
             "| a |",
             "+---+",
@@ -308,11 +403,21 @@ mod tests {
             "| . |",
             "| . |",
             "| . |",
-            "+---+\n",
-        ].join("\n");
+            "+---+",
+        ];
 
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Table)
+            .with_batches(vec![one_column_batch()])
+            .with_maxrows(MaxRows::Limited(1))
+            .with_expected(expected)
+            .run();
+    }
+
+    #[test]
+    fn print_maxrows_limited_multi_batched() {
         #[rustfmt::skip]
-        let multi_batches_expected = [
+            let expected = &[
             "+---+",
             "| a |",
             "+---+",
@@ -324,42 +429,23 @@ mod tests {
             "| . |",
             "| . |",
             "| . |",
-            "+---+\n",
-        ].join("\n");
-
-        let no_limit = run_test(&[batch.clone()], |buffer, batches| {
-            format_batches_with_maxrows(buffer, batches, MaxRows::Unlimited)
-        })?;
-        assert_eq!(no_limit, all_rows_expected);
-
-        let maxrows_less_than_actual = run_test(&[batch.clone()], |buffer, batches| {
-            format_batches_with_maxrows(buffer, batches, MaxRows::Limited(1))
-        })?;
-        assert_eq!(maxrows_less_than_actual, one_row_expected);
-
-        let maxrows_more_than_actual = run_test(&[batch.clone()], |buffer, batches| {
-            format_batches_with_maxrows(buffer, batches, MaxRows::Limited(5))
-        })?;
-        assert_eq!(maxrows_more_than_actual, all_rows_expected);
-
-        let maxrows_equals_actual = run_test(&[batch.clone()], |buffer, batches| {
-            format_batches_with_maxrows(buffer, batches, MaxRows::Limited(3))
-        })?;
-        assert_eq!(maxrows_equals_actual, all_rows_expected);
-
-        let multi_batches = run_test(
-            &[batch.clone(), batch.clone(), batch.clone()],
-            |buffer, batches| {
-                format_batches_with_maxrows(buffer, batches, MaxRows::Limited(5))
-            },
-        )?;
-        assert_eq!(multi_batches, multi_batches_expected);
-
-        Ok(())
+            "+---+",
+        ];
+
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Table)
+            .with_batches(vec![
+                one_column_batch(),
+                one_column_batch(),
+                one_column_batch(),
+            ])
+            .with_maxrows(MaxRows::Limited(5))
+            .with_expected(expected)
+            .run();
     }
 
     #[test]
-    fn test_print_batches_empty_batches() -> Result<()> {
+    fn test_print_batches_empty_batches() {
         let batch = one_column_batch();
         let empty_batch = RecordBatch::new_empty(batch.schema());
 
@@ -371,7 +457,7 @@ mod tests {
                 "| 1 |",
                 "| 2 |",
                 "| 3 |",
-                "+---+\n",
+                "+---+",
             ];
 
         PrintBatchesTest::new()
@@ -379,11 +465,10 @@ mod tests {
             .with_batches(vec![empty_batch.clone(), batch, empty_batch])
             .with_expected(expected)
             .run();
-        Ok(())
     }
 
     #[test]
-    fn test_print_batches_empty_batches_no_header() -> Result<()> {
+    fn test_print_batches_empty_batches_no_header() {
         let empty_batch = RecordBatch::new_empty(one_column_batch().schema());
 
         // empty batches should not print a header
@@ -392,27 +477,36 @@ mod tests {
         PrintBatchesTest::new()
             .with_format(PrintFormat::Table)
             .with_batches(vec![empty_batch])
-            .with_header(true)
+            .with_header(WithHeader::Yes)
             .with_expected(expected)
             .run();
-        Ok(())
     }
 
+    #[derive(Debug)]
     struct PrintBatchesTest {
         format: PrintFormat,
         batches: Vec<RecordBatch>,
         maxrows: MaxRows,
-        with_header: bool,
+        with_header: WithHeader,
         expected: Vec<&'static str>,
     }
 
+    /// How to test with_header
+    #[derive(Debug, Clone)]
+    enum WithHeader {
+        Yes,
+        No,
+        /// output should be the same with or without header
+        Ignored,
+    }
+
     impl PrintBatchesTest {
         fn new() -> Self {
             Self {
                 format: PrintFormat::Table,
                 batches: vec![],
                 maxrows: MaxRows::Unlimited,
-                with_header: false,
+                with_header: WithHeader::Ignored,
                 expected: vec![],
             }
         }
@@ -429,8 +523,14 @@ mod tests {
             self
         }
 
-        /// set whether to include a header
-        fn with_header(mut self, with_header: bool) -> Self {
+        /// set maxrows
+        fn with_maxrows(mut self, maxrows: MaxRows) -> Self {
+            self.maxrows = maxrows;
+            self
+        }
+
+        /// set with_header
+        fn with_header(mut self, with_header: WithHeader) -> Self {
             self.with_header = with_header;
             self
         }
@@ -443,17 +543,58 @@ mod tests {
 
         /// run the test
         fn run(self) {
-            let mut buffer: Vec<u8> = vec![];
-            self.format
-                .print_batches(&mut buffer, &self.batches, self.maxrows, self.with_header)
-                .unwrap();
-            let actual = String::from_utf8(buffer).unwrap();
-            let expected = self.expected.join("\n");
+            let actual = self.output();
+            let actual: Vec<_> = actual.trim_end().split('\n').collect();
+            let expected = self.expected;
             assert_eq!(
                 actual, expected,
-                "actual:\n\n{actual}expected:\n\n{expected}"
+                "\n\nactual:\n{actual:#?}\n\nexpected:\n{expected:#?}"
             );
         }
+
+        /// formats batches using parameters and returns the resulting output
+        fn output(&self) -> String {
+            match self.with_header {
+                WithHeader::Yes => self.output_with_header(true),
+                WithHeader::No => self.output_with_header(false),
+                WithHeader::Ignored => {
+                    let output = self.output_with_header(true);
+                    // ensure the output is the same without header
+                    let output_without_header = self.output_with_header(false);
+                    assert_eq!(
+                        output, output_without_header,
+                        "Expected output to be the same with or without header"
+                    );
+                    output
+                }
+            }
+        }
+
+        fn output_with_header(&self, with_header: bool) -> String {
+            let mut buffer: Vec<u8> = vec![];
+            self.format
+                .print_batches(&mut buffer, &self.batches, self.maxrows, with_header)
+                .unwrap();
+            String::from_utf8(buffer).unwrap()
+        }
+    }
+
+    /// Return a batch with three columns and three rows
+    fn three_column_batch() -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+        ]));
+        RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(Int32Array::from(vec![4, 5, 6])),
+                Arc::new(Int32Array::from(vec![7, 8, 9])),
+            ],
+        )
+        .unwrap()
     }
 
     /// return a batch with one column and three rows
@@ -464,4 +605,14 @@ mod tests {
         )])
         .unwrap()
     }
+
+    /// Slice the record batch into 2 batches
+    fn split_batch(batch: RecordBatch) -> Vec<RecordBatch> {
+        assert!(batch.num_rows() > 1);
+        let split = batch.num_rows() / 2;
+        vec![
+            batch.slice(0, split),
+            batch.slice(split, batch.num_rows() - split),
+        ]
+    }
 }
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 59580bcb6a05..45c9709a342e 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -40,6 +40,7 @@ datafusion = { path = "../datafusion/core", features = ["avro"] }
 datafusion-common = { path = "../datafusion/common" }
 datafusion-expr = { path = "../datafusion/expr" }
 datafusion-optimizer = { path = "../datafusion/optimizer" }
+datafusion-physical-expr = { workspace = true }
 datafusion-sql = { path = "../datafusion/sql" }
 env_logger = { workspace = true }
 futures = { workspace = true }
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index eecb63d3be65..298ee9364efe 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -64,6 +64,7 @@ cargo run --example csv_sql
 - [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
 - [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
 - [`simple_udfw.rs`](examples/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF)
+- [`to_timestamp.rs`](examples/to_timestamp.rs): Examples of using the to_timestamp functions
 - [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
 
 ## Distributed
diff --git a/datafusion-examples/examples/advanced_udaf.rs b/datafusion-examples/examples/advanced_udaf.rs
index 8d5314bfbea5..10164a850bfb 100644
--- a/datafusion-examples/examples/advanced_udaf.rs
+++ b/datafusion-examples/examples/advanced_udaf.rs
@@ -16,16 +16,22 @@
 // under the License.
 
 use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility};
+use datafusion_physical_expr::NullState;
 use std::{any::Any, sync::Arc};
 
 use arrow::{
-    array::{ArrayRef, Float32Array},
+    array::{
+        ArrayRef, AsArray, Float32Array, PrimitiveArray, PrimitiveBuilder, UInt32Array,
+    },
+    datatypes::{ArrowNativeTypeOp, ArrowPrimitiveType, Float64Type, UInt32Type},
     record_batch::RecordBatch,
 };
 use datafusion::error::Result;
 use datafusion::prelude::*;
 use datafusion_common::{cast::as_float64_array, ScalarValue};
-use datafusion_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature};
+use datafusion_expr::{
+    Accumulator, AggregateUDF, AggregateUDFImpl, GroupsAccumulator, Signature,
+};
 
 /// This example shows how to use the full AggregateUDFImpl API to implement a user
 /// defined aggregate function. As in the `simple_udaf.rs` example, this struct implements
@@ -33,12 +39,12 @@ use datafusion_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature};
 ///
 /// To do so, we must implement the `AggregateUDFImpl` trait.
 #[derive(Debug, Clone)]
-struct GeoMeanUdf {
+struct GeoMeanUdaf {
     signature: Signature,
 }
 
-impl GeoMeanUdf {
-    /// Create a new instance of the GeoMeanUdf struct
+impl GeoMeanUdaf {
+    /// Create a new instance of the GeoMeanUdaf struct
     fn new() -> Self {
         Self {
             signature: Signature::exact(
@@ -52,7 +58,7 @@ impl GeoMeanUdf {
     }
 }
 
-impl AggregateUDFImpl for GeoMeanUdf {
+impl AggregateUDFImpl for GeoMeanUdaf {
     /// We implement as_any so that we can downcast the AggregateUDFImpl trait object
     fn as_any(&self) -> &dyn Any {
         self
@@ -74,6 +80,11 @@ impl AggregateUDFImpl for GeoMeanUdf {
     }
 
     /// This is the accumulator factory; DataFusion uses it to create new accumulators.
+    ///
+    /// This is the accumulator factory for row wise accumulation; Even when `GroupsAccumulator`
+    /// is supported, DataFusion will use this row oriented
+    /// accumulator when the aggregate function is used as a window function
+    /// or when there are only aggregates (no GROUP BY columns) in the plan.
     fn accumulator(&self, _arg: &DataType) -> Result<Box<dyn Accumulator>> {
         Ok(Box::new(GeometricMean::new()))
     }
@@ -82,6 +93,16 @@ impl AggregateUDFImpl for GeoMeanUdf {
     fn state_type(&self, _return_type: &DataType) -> Result<Vec<DataType>> {
         Ok(vec![DataType::Float64, DataType::UInt32])
     }
+
+    /// Tell DataFusion that this aggregate supports the more performant `GroupsAccumulator`
+    /// which is used for cases when there are grouping columns in the query
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(GeometricMeanGroupsAccumulator::new()))
+    }
 }
 
 /// A UDAF has state across multiple rows, and thus we require a `struct` with that state.
@@ -104,7 +125,7 @@ impl Accumulator for GeometricMean {
     // This function serializes our state to `ScalarValue`, which DataFusion uses
     // to pass this state between execution stages.
     // Note that this can be arbitrary data.
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.prod),
             ScalarValue::from(self.n),
@@ -113,7 +134,7 @@ impl Accumulator for GeometricMean {
 
     // DataFusion expects this function to return the final value of this aggregator.
     // in this case, this is the formula of the geometric mean
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let value = self.prod.powf(1.0 / self.n as f64);
         Ok(ScalarValue::from(value))
     }
@@ -173,16 +194,25 @@ fn create_context() -> Result<SessionContext> {
     use datafusion::arrow::datatypes::{Field, Schema};
     use datafusion::datasource::MemTable;
     // define a schema.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Float32, false),
+        Field::new("b", DataType::Float32, false),
+    ]));
 
     // define data in two partitions
     let batch1 = RecordBatch::try_new(
         schema.clone(),
-        vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
+        vec![
+            Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0])),
+            Arc::new(Float32Array::from(vec![2.0, 2.0, 2.0])),
+        ],
     )?;
     let batch2 = RecordBatch::try_new(
         schema.clone(),
-        vec![Arc::new(Float32Array::from(vec![64.0]))],
+        vec![
+            Arc::new(Float32Array::from(vec![64.0])),
+            Arc::new(Float32Array::from(vec![2.0])),
+        ],
     )?;
 
     // declare a new context. In spark API, this corresponds to a new spark SQLsession
@@ -194,15 +224,183 @@ fn create_context() -> Result<SessionContext> {
     Ok(ctx)
 }
 
+// Define a `GroupsAccumulator` for GeometricMean
+/// which handles accumulator state for multiple groups at once.
+/// This API is significantly more complicated than `Accumulator`, which manages
+/// the state for a single group, but for queries with a large number of groups
+/// can be significantly faster. See the `GroupsAccumulator` documentation for
+/// more information.
+struct GeometricMeanGroupsAccumulator {
+    /// The type of the internal sum
+    prod_data_type: DataType,
+
+    /// The type of the returned sum
+    return_data_type: DataType,
+
+    /// Count per group (use u32 to make UInt32Array)
+    counts: Vec<u32>,
+
+    /// product per group, stored as the native type (not `ScalarValue`)
+    prods: Vec<f64>,
+
+    /// Track nulls in the input / filters
+    null_state: NullState,
+}
+
+impl GeometricMeanGroupsAccumulator {
+    fn new() -> Self {
+        Self {
+            prod_data_type: DataType::Float64,
+            return_data_type: DataType::Float64,
+            counts: vec![],
+            prods: vec![],
+            null_state: NullState::new(),
+        }
+    }
+}
+
+impl GroupsAccumulator for GeometricMeanGroupsAccumulator {
+    /// Updates the accumulator state given input. DataFusion provides `group_indices`,
+    /// the groups that each row in `values` belongs to as well as an optional filter of which rows passed.
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow::array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values[0].as_primitive::<Float64Type>();
+
+        // increment counts, update sums
+        self.counts.resize(total_num_groups, 0);
+        self.prods.resize(total_num_groups, 1.0);
+        // Use the `NullState` structure to generate specialized code for null / non null input elements
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let prod = &mut self.prods[group_index];
+                *prod = prod.mul_wrapping(new_value);
+
+                self.counts[group_index] += 1;
+            },
+        );
+
+        Ok(())
+    }
+
+    /// Merge the results from previous invocations of `evaluate` into this accumulator's state
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow::array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 2, "two arguments to merge_batch");
+        // first batch is counts, second is partial sums
+        let partial_prods = values[0].as_primitive::<Float64Type>();
+        let partial_counts = values[1].as_primitive::<UInt32Type>();
+        // update counts with partial counts
+        self.counts.resize(total_num_groups, 0);
+        self.null_state.accumulate(
+            group_indices,
+            partial_counts,
+            opt_filter,
+            total_num_groups,
+            |group_index, partial_count| {
+                self.counts[group_index] += partial_count;
+            },
+        );
+
+        // update prods
+        self.prods.resize(total_num_groups, 1.0);
+        self.null_state.accumulate(
+            group_indices,
+            partial_prods,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value: <Float64Type as ArrowPrimitiveType>::Native| {
+                let prod = &mut self.prods[group_index];
+                *prod = prod.mul_wrapping(new_value);
+            },
+        );
+
+        Ok(())
+    }
+
+    /// Generate output, as specififed by `emit_to` and update the intermediate state
+    fn evaluate(&mut self, emit_to: datafusion_expr::EmitTo) -> Result<ArrayRef> {
+        let counts = emit_to.take_needed(&mut self.counts);
+        let prods = emit_to.take_needed(&mut self.prods);
+        let nulls = self.null_state.build(emit_to);
+
+        assert_eq!(nulls.len(), prods.len());
+        assert_eq!(counts.len(), prods.len());
+
+        // don't evaluate geometric mean with null inputs to avoid errors on null values
+
+        let array: PrimitiveArray<Float64Type> = if nulls.null_count() > 0 {
+            let mut builder = PrimitiveBuilder::<Float64Type>::with_capacity(nulls.len());
+            let iter = prods.into_iter().zip(counts).zip(nulls.iter());
+
+            for ((prod, count), is_valid) in iter {
+                if is_valid {
+                    builder.append_value(prod.powf(1.0 / count as f64))
+                } else {
+                    builder.append_null();
+                }
+            }
+            builder.finish()
+        } else {
+            let geo_mean: Vec<<Float64Type as ArrowPrimitiveType>::Native> = prods
+                .into_iter()
+                .zip(counts)
+                .map(|(prod, count)| prod.powf(1.0 / count as f64))
+                .collect::<Vec<_>>();
+            PrimitiveArray::new(geo_mean.into(), Some(nulls)) // no copy
+                .with_data_type(self.return_data_type.clone())
+        };
+
+        Ok(Arc::new(array))
+    }
+
+    // return arrays for counts and prods
+    fn state(&mut self, emit_to: datafusion_expr::EmitTo) -> Result<Vec<ArrayRef>> {
+        let nulls = self.null_state.build(emit_to);
+        let nulls = Some(nulls);
+
+        let counts = emit_to.take_needed(&mut self.counts);
+        let counts = UInt32Array::new(counts.into(), nulls.clone()); // zero copy
+
+        let prods = emit_to.take_needed(&mut self.prods);
+        let prods = PrimitiveArray::<Float64Type>::new(prods.into(), nulls) // zero copy
+            .with_data_type(self.prod_data_type.clone());
+
+        Ok(vec![
+            Arc::new(prods) as ArrayRef,
+            Arc::new(counts) as ArrayRef,
+        ])
+    }
+
+    fn size(&self) -> usize {
+        self.counts.capacity() * std::mem::size_of::<u32>()
+            + self.prods.capacity() * std::mem::size_of::<Float64Type>()
+    }
+}
+
 #[tokio::main]
 async fn main() -> Result<()> {
     let ctx = create_context()?;
 
     // create the AggregateUDF
-    let geometric_mean = AggregateUDF::from(GeoMeanUdf::new());
+    let geometric_mean = AggregateUDF::from(GeoMeanUdaf::new());
     ctx.register_udaf(geometric_mean.clone());
 
-    let sql_df = ctx.sql("SELECT geo_mean(a) FROM t").await?;
+    let sql_df = ctx.sql("SELECT geo_mean(a) FROM t group by b").await?;
     sql_df.show().await?;
 
     // get a DataFrame from the context
diff --git a/datafusion-examples/examples/simple_udaf.rs b/datafusion-examples/examples/simple_udaf.rs
index 2c797f221b2c..0996a67245a8 100644
--- a/datafusion-examples/examples/simple_udaf.rs
+++ b/datafusion-examples/examples/simple_udaf.rs
@@ -72,7 +72,7 @@ impl Accumulator for GeometricMean {
     // This function serializes our state to `ScalarValue`, which DataFusion uses
     // to pass this state between execution stages.
     // Note that this can be arbitrary data.
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.prod),
             ScalarValue::from(self.n),
@@ -81,7 +81,7 @@ impl Accumulator for GeometricMean {
 
     // DataFusion expects this function to return the final value of this aggregator.
     // in this case, this is the formula of the geometric mean
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let value = self.prod.powf(1.0 / self.n as f64);
         Ok(ScalarValue::from(value))
     }
diff --git a/datafusion-examples/examples/simple_udf.rs b/datafusion-examples/examples/simple_udf.rs
index 39e1e13ce39a..dda6ba62e0af 100644
--- a/datafusion-examples/examples/simple_udf.rs
+++ b/datafusion-examples/examples/simple_udf.rs
@@ -24,9 +24,11 @@ use datafusion::{
     logical_expr::Volatility,
 };
 
+use datafusion::error::Result;
 use datafusion::prelude::*;
-use datafusion::{error::Result, physical_plan::functions::make_scalar_function};
 use datafusion_common::cast::as_float64_array;
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr::functions::columnar_values_to_array;
 use std::sync::Arc;
 
 /// create local execution context with an in-memory table:
@@ -61,7 +63,7 @@ async fn main() -> Result<()> {
     let ctx = create_context()?;
 
     // First, declare the actual implementation of the calculation
-    let pow = |args: &[ArrayRef]| {
+    let pow = Arc::new(|args: &[ColumnarValue]| {
         // in DataFusion, all `args` and output are dynamically-typed arrays, which means that we need to:
         // 1. cast the values to the type we want
         // 2. perform the computation for every element in the array (using a loop or SIMD) and construct the result
@@ -69,6 +71,8 @@ async fn main() -> Result<()> {
         // this is guaranteed by DataFusion based on the function's signature.
         assert_eq!(args.len(), 2);
 
+        let args = columnar_values_to_array(args)?;
+
         // 1. cast both arguments to f64. These casts MUST be aligned with the signature or this function panics!
         let base = as_float64_array(&args[0]).expect("cast failed");
         let exponent = as_float64_array(&args[1]).expect("cast failed");
@@ -92,11 +96,8 @@ async fn main() -> Result<()> {
 
         // `Ok` because no error occurred during the calculation (we should add one if exponent was [0, 1[ and the base < 0 because that panics!)
         // `Arc` because arrays are immutable, thread-safe, trait objects.
-        Ok(Arc::new(array) as ArrayRef)
-    };
-    // the function above expects an `ArrayRef`, but DataFusion may pass a scalar to a UDF.
-    // thus, we use `make_scalar_function` to decorare the closure so that it can handle both Arrays and Scalar values.
-    let pow = make_scalar_function(pow);
+        Ok(ColumnarValue::from(Arc::new(array) as ArrayRef))
+    });
 
     // Next:
     // * give it a name so that it shows nicely when the plan is printed
diff --git a/datafusion-examples/examples/to_timestamp.rs b/datafusion-examples/examples/to_timestamp.rs
new file mode 100644
index 000000000000..a07dbaefb75b
--- /dev/null
+++ b/datafusion-examples/examples/to_timestamp.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use datafusion::arrow::array::StringArray;
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::error::Result;
+use datafusion::prelude::*;
+use datafusion_common::assert_contains;
+
+/// This example demonstrates how to use the to_timestamp series
+/// of functions in the DataFrame API as well as via sql.
+#[tokio::main]
+async fn main() -> Result<()> {
+    // define a schema.
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+    ]));
+
+    // define data.
+    let batch = RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(StringArray::from(vec![
+                "2020-09-08T13:42:29Z",
+                "2020-09-08T13:42:29.190855-05:00",
+                "2020-08-09 12:13:29",
+                "2020-01-02",
+            ])),
+            Arc::new(StringArray::from(vec![
+                "2020-09-08T13:42:29Z",
+                "2020-09-08T13:42:29.190855-05:00",
+                "08-09-2020 13/42/29",
+                "09-27-2020 13:42:29-05:30",
+            ])),
+        ],
+    )?;
+
+    // declare a new context. In spark API, this corresponds to a new spark SQLsession
+    let ctx = SessionContext::new();
+
+    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
+    ctx.register_batch("t", batch)?;
+    let df = ctx.table("t").await?;
+
+    // use to_timestamp function to convert col 'a' to timestamp type using the default parsing
+    let df = df.with_column("a", to_timestamp(vec![col("a")]))?;
+    // use to_timestamp_seconds function to convert col 'b' to timestamp(Seconds) type using a list
+    // of chrono formats (https://docs.rs/chrono/latest/chrono/format/strftime/index.html) to try
+    let df = df.with_column(
+        "b",
+        to_timestamp_seconds(vec![
+            col("b"),
+            lit("%+"),
+            lit("%d-%m-%Y %H/%M/%S"),
+            lit("%m-%d-%Y %H:%M:%S%#z"),
+        ]),
+    )?;
+
+    let df = df.select_columns(&["a", "b"])?;
+
+    // print the results
+    df.show().await?;
+
+    // use sql to convert col 'a' to timestamp using the default parsing
+    let df = ctx.sql("select to_timestamp(a) from t").await?;
+
+    // print the results
+    df.show().await?;
+
+    // use sql to convert col 'b' to timestamp using a list of chrono formats to try
+    let df = ctx.sql("select to_timestamp(b, '%+', '%d-%m-%Y %H/%M/%S', '%m-%d-%Y %H:%M:%S%#z') from t").await?;
+
+    // print the results
+    df.show().await?;
+
+    // use sql to convert a static string to a timestamp using a list of chrono formats to try
+    // note that one of the formats is invalid ('%q') but since DataFusion will try all the
+    // formats until it encounters one that parses the timestamp expression successfully
+    // no error will be returned
+    let df = ctx.sql("select to_timestamp_micros('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z')").await?;
+
+    // print the results
+    df.show().await?;
+
+    // casting a string to TIMESTAMP will also work for RFC3339 timestamps
+    let df = ctx
+        .sql("select to_timestamp_millis(TIMESTAMP '2022-08-03T14:38:50Z')")
+        .await?;
+
+    // print the results
+    df.show().await?;
+
+    // unix timestamps (in seconds) are also supported
+    let df = ctx.sql("select to_timestamp(1926632005)").await?;
+
+    // print the results
+    df.show().await?;
+
+    // use sql to convert a static string to a timestamp using a non-matching chrono format to try
+    let result = ctx
+        .sql("select to_timestamp_nanos('01-14-2023 01/01/30', '%d-%m-%Y %H:%M:%S')")
+        .await?
+        .collect()
+        .await;
+
+    let expected = "Error parsing timestamp from '01-14-2023 01/01/30' using format '%d-%m-%Y %H:%M:%S': input contains invalid characters";
+    assert_contains!(result.unwrap_err().to_string(), expected);
+
+    // note that using arrays for the chrono formats is not supported
+    let result = ctx
+        .sql("SELECT to_timestamp('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+'))")
+        .await?
+        .collect()
+        .await;
+
+    let expected = "to_timestamp function unsupported data type at index 1: List";
+    assert_contains!(result.unwrap_err().to_string(), expected);
+
+    Ok(())
+}
diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md
index d64bbeda877d..ae9da0e865e9 100644
--- a/datafusion/CHANGELOG.md
+++ b/datafusion/CHANGELOG.md
@@ -19,6 +19,7 @@
 
 # Changelog
 
+- [35.0.0](../dev/changelog/35.0.0.md)
 - [34.0.0](../dev/changelog/34.0.0.md)
 - [33.0.0](../dev/changelog/33.0.0.md)
 - [32.0.0](../dev/changelog/32.0.0.md)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index e00c17930850..0d773ddb2b4c 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -408,7 +408,7 @@ config_namespace! {
         /// parquet files by serializing them in parallel. Each column
         /// in each row group in each output file are serialized in parallel
         /// leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
-        pub allow_single_file_parallelism: bool, default = false
+        pub allow_single_file_parallelism: bool, default = true
 
         /// By default parallel parquet writer is tuned for minimum
         /// memory usage in a streaming execution plan. You may see
@@ -561,6 +561,10 @@ config_namespace! {
         /// will be collected into a single partition
         pub hash_join_single_partition_threshold: usize, default = 1024 * 1024
 
+        /// The maximum estimated size in rows for one input side of a HashJoin
+        /// will be collected into a single partition
+        pub hash_join_single_partition_threshold_rows: usize, default = 1024 * 128
+
         /// The default filter selectivity used by Filter Statistics
         /// when an exact selectivity cannot be determined. Valid values are
         /// between 0 (no selectivity) and 100 (all rows are selected).
diff --git a/datafusion/common/src/file_options/arrow_writer.rs b/datafusion/common/src/file_options/arrow_writer.rs
index a30e6d800e20..cb921535aba5 100644
--- a/datafusion/common/src/file_options/arrow_writer.rs
+++ b/datafusion/common/src/file_options/arrow_writer.rs
@@ -27,6 +27,18 @@ use super::StatementOptions;
 #[derive(Clone, Debug)]
 pub struct ArrowWriterOptions {}
 
+impl ArrowWriterOptions {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Default for ArrowWriterOptions {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl TryFrom<(&ConfigOptions, &StatementOptions)> for ArrowWriterOptions {
     type Error = DataFusionError;
 
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index 8dcc00ca1c29..d5a1b3ee363b 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -27,8 +27,9 @@ use arrow::{downcast_dictionary_array, downcast_primitive_array};
 use arrow_buffer::i256;
 
 use crate::cast::{
-    as_boolean_array, as_generic_binary_array, as_large_list_array, as_list_array,
-    as_primitive_array, as_string_array, as_struct_array,
+    as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
+    as_large_list_array, as_list_array, as_primitive_array, as_string_array,
+    as_struct_array,
 };
 use crate::error::{DataFusionError, Result, _internal_err};
 
@@ -267,6 +268,38 @@ where
     Ok(())
 }
 
+fn hash_fixed_list_array(
+    array: &FixedSizeListArray,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+) -> Result<()> {
+    let values = array.values().clone();
+    let value_len = array.value_length();
+    let offset_size = value_len as usize / array.len();
+    let nulls = array.nulls();
+    let mut values_hashes = vec![0u64; values.len()];
+    create_hashes(&[values], random_state, &mut values_hashes)?;
+    if let Some(nulls) = nulls {
+        for i in 0..array.len() {
+            if nulls.is_valid(i) {
+                let hash = &mut hashes_buffer[i];
+                for values_hash in &values_hashes[i * offset_size..(i + 1) * offset_size]
+                {
+                    *hash = combine_hashes(*hash, *values_hash);
+                }
+            }
+        }
+    } else {
+        for i in 0..array.len() {
+            let hash = &mut hashes_buffer[i];
+            for values_hash in &values_hashes[i * offset_size..(i + 1) * offset_size] {
+                *hash = combine_hashes(*hash, *values_hash);
+            }
+        }
+    }
+    Ok(())
+}
+
 /// Test version of `create_hashes` that produces the same value for
 /// all hashes (to test collisions)
 ///
@@ -366,6 +399,10 @@ pub fn create_hashes<'a>(
                 let array = as_large_list_array(array)?;
                 hash_list_array(array, random_state, hashes_buffer)?;
             }
+            DataType::FixedSizeList(_,_) => {
+                let array = as_fixed_size_list_array(array)?;
+                hash_fixed_list_array(array, random_state, hashes_buffer)?;
+            }
             _ => {
                 // This is internal because we should have caught this before.
                 return _internal_err!(
@@ -546,6 +583,30 @@ mod tests {
         assert_eq!(hashes[2], hashes[3]);
     }
 
+    #[test]
+    // Tests actual values of hashes, which are different if forcing collisions
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_fixed_size_list_arrays() {
+        let data = vec![
+            Some(vec![Some(0), Some(1), Some(2)]),
+            None,
+            Some(vec![Some(3), None, Some(5)]),
+            Some(vec![Some(3), None, Some(5)]),
+            None,
+            Some(vec![Some(0), Some(1), Some(2)]),
+        ];
+        let list_array =
+            Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
+                data, 3,
+            )) as ArrayRef;
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut hashes = vec![0; list_array.len()];
+        create_hashes(&[list_array], &random_state, &mut hashes).unwrap();
+        assert_eq!(hashes[0], hashes[5]);
+        assert_eq!(hashes[1], hashes[4]);
+        assert_eq!(hashes[2], hashes[3]);
+    }
+
     #[test]
     // Tests actual values of hashes, which are different if forcing collisions
     #[cfg(not(feature = "force_hash_collisions"))]
diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs
index 3fe2ba99ab83..c614098713d6 100644
--- a/datafusion/common/src/param_value.rs
+++ b/datafusion/common/src/param_value.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::error::{_internal_err, _plan_err};
+use crate::error::_plan_err;
 use crate::{DataFusionError, Result, ScalarValue};
 use arrow_schema::DataType;
 use std::collections::HashMap;
@@ -65,11 +65,7 @@ impl ParamValues {
         }
     }
 
-    pub fn get_placeholders_with_values(
-        &self,
-        id: &str,
-        data_type: Option<&DataType>,
-    ) -> Result<ScalarValue> {
+    pub fn get_placeholders_with_values(&self, id: &str) -> Result<ScalarValue> {
         match self {
             ParamValues::List(list) => {
                 if id.is_empty() {
@@ -90,14 +86,6 @@ impl ParamValues {
                         "No value found for placeholder with id {id}"
                     ))
                 })?;
-                // check if the data type of the value matches the data type of the placeholder
-                if Some(&value.data_type()) != data_type {
-                    return _internal_err!(
-                        "Placeholder value type mismatch: expected {:?}, got {:?}",
-                        data_type,
-                        value.data_type()
-                    );
-                }
                 Ok(value.clone())
             }
             ParamValues::Map(map) => {
@@ -109,14 +97,6 @@ impl ParamValues {
                         "No value found for placeholder with name {id}"
                     ))
                 })?;
-                // check if the data type of the value matches the data type of the placeholder
-                if Some(&value.data_type()) != data_type {
-                    return _internal_err!(
-                        "Placeholder value type mismatch: expected {:?}, got {:?}",
-                        data_type,
-                        value.data_type()
-                    );
-                }
                 Ok(value.clone())
             }
         }
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 20d03c70960a..2f9e374bd7f4 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -34,8 +34,9 @@ use crate::cast::{
 };
 use crate::error::{DataFusionError, Result, _internal_err, _not_impl_err};
 use crate::hash_utils::create_hashes;
-use crate::utils::{array_into_large_list_array, array_into_list_array};
-
+use crate::utils::{
+    array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array,
+};
 use arrow::compute::kernels::numeric::*;
 use arrow::util::display::{ArrayFormatter, FormatOptions};
 use arrow::{
@@ -2223,9 +2224,11 @@ impl ScalarValue {
                 let list_array = as_fixed_size_list_array(array)?;
                 let nested_array = list_array.value(index);
                 // Produces a single element `ListArray` with the value at `index`.
-                let arr = Arc::new(array_into_list_array(nested_array));
+                let list_size = nested_array.len();
+                let arr =
+                    Arc::new(array_into_fixed_size_list_array(nested_array, list_size));
 
-                ScalarValue::List(arr)
+                ScalarValue::FixedSizeList(arr)
             }
             DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?,
             DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?,
@@ -2361,6 +2364,16 @@ impl ScalarValue {
         ScalarValue::try_from_array(&cast_arr, 0)
     }
 
+    /// Try to cast this value to a ScalarValue of type `data_type`
+    pub fn cast_to(&self, data_type: &DataType) -> Result<Self> {
+        let cast_options = CastOptions {
+            safe: false,
+            format_options: Default::default(),
+        };
+        let cast_arr = cast_with_options(&self.to_array()?, data_type, &cast_options)?;
+        ScalarValue::try_from_array(&cast_arr, 0)
+    }
+
     fn eq_array_decimal(
         array: &ArrayRef,
         index: usize,
@@ -2971,6 +2984,19 @@ impl TryFrom<&DataType> for ScalarValue {
                 .to_owned()
                 .into(),
             ),
+            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
+            DataType::FixedSizeList(field, _) => ScalarValue::FixedSizeList(
+                new_null_array(
+                    &DataType::FixedSizeList(
+                        Arc::new(Field::new("item", field.data_type().clone(), true)),
+                        1,
+                    ),
+                    1,
+                )
+                .as_fixed_size_list()
+                .to_owned()
+                .into(),
+            ),
             DataType::Struct(fields) => ScalarValue::Struct(None, fields.clone()),
             DataType::Null => ScalarValue::Null,
             _ => {
diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs
index 0a61fce15482..d21bd464f850 100644
--- a/datafusion/common/src/utils.rs
+++ b/datafusion/common/src/utils.rs
@@ -25,7 +25,9 @@ use arrow::compute;
 use arrow::compute::{partition, SortColumn, SortOptions};
 use arrow::datatypes::{Field, SchemaRef, UInt32Type};
 use arrow::record_batch::RecordBatch;
-use arrow_array::{Array, LargeListArray, ListArray, RecordBatchOptions};
+use arrow_array::{
+    Array, FixedSizeListArray, LargeListArray, ListArray, RecordBatchOptions,
+};
 use arrow_schema::DataType;
 use sqlparser::ast::Ident;
 use sqlparser::dialect::GenericDialect;
@@ -368,6 +370,19 @@ pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
     )
 }
 
+pub fn array_into_fixed_size_list_array(
+    arr: ArrayRef,
+    list_size: usize,
+) -> FixedSizeListArray {
+    let list_size = list_size as i32;
+    FixedSizeListArray::new(
+        Arc::new(Field::new("item", arr.data_type().to_owned(), true)),
+        list_size,
+        arr,
+        None,
+    )
+}
+
 /// Wrap arrays into a single element `ListArray`.
 ///
 /// Example:
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index f5496d4c4700..69b18a326951 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -62,11 +62,11 @@ bytes = { workspace = true }
 bzip2 = { version = "0.4.3", optional = true }
 chrono = { workspace = true }
 dashmap = { workspace = true }
-datafusion-common = { path = "../common", version = "34.0.0", features = ["object_store"], default-features = false }
+datafusion-common = { path = "../common", version = "35.0.0", features = ["object_store"], default-features = false }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
-datafusion-optimizer = { path = "../optimizer", version = "34.0.0", default-features = false }
-datafusion-physical-expr = { path = "../physical-expr", version = "34.0.0", default-features = false }
+datafusion-optimizer = { path = "../optimizer", version = "35.0.0", default-features = false }
+datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false }
 datafusion-physical-plan = { workspace = true }
 datafusion-sql = { workspace = true }
 flate2 = { version = "1.0.24", optional = true }
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index ce27d57da00d..da7e1f5e2193 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -29,7 +29,10 @@ use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result};
 use std::any::Any;
 use std::sync::Arc;
 
-/// Represent a list of named catalogs
+/// Represent a list of named [`CatalogProvider`]s.
+///
+/// Please see the documentation on `CatalogProvider` for details of
+/// implementing a custom catalog.
 pub trait CatalogList: Sync + Send {
     /// Returns the catalog list as [`Any`]
     /// so that it can be downcast to a specific implementation.
@@ -94,6 +97,88 @@ impl CatalogList for MemoryCatalogList {
 }
 
 /// Represents a catalog, comprising a number of named schemas.
+///
+/// # Catalog Overview
+///
+/// To plan and execute queries, DataFusion needs a "Catalog" that provides
+/// metadata such as which schemas and tables exist, their columns and data
+/// types, and how to access the data.
+///
+/// The Catalog API consists:
+/// * [`CatalogList`]: a collection of `CatalogProvider`s
+/// * [`CatalogProvider`]: a collection of `SchemaProvider`s (sometimes called a "database" in other systems)
+/// * [`SchemaProvider`]:  a collection of `TableProvider`s (often called a "schema" in other systems)
+/// * [`TableProvider]`:  individual tables
+///
+/// # Implementing Catalogs
+///
+/// To implement a catalog, you implement at least one of the [`CatalogList`],
+/// [`CatalogProvider`] and [`SchemaProvider`] traits and register them
+/// appropriately the [`SessionContext`].
+///
+/// [`SessionContext`]: crate::execution::context::SessionContext
+///
+/// DataFusion comes with a simple in-memory catalog implementation,
+/// [`MemoryCatalogProvider`], that is used by default and has no persistence.
+/// DataFusion does not include more complex Catalog implementations because
+/// catalog management is a key design choice for most data systems, and thus
+/// it is unlikely that any general-purpose catalog implementation will work
+/// well across many use cases.
+///
+/// # Implementing "Remote" catalogs
+///
+/// Sometimes catalog information is stored remotely and requires a network call
+/// to retrieve. For example, the [Delta Lake] table format stores table
+/// metadata in files on S3 that must be first downloaded to discover what
+/// schemas and tables exist.
+///
+/// [Delta Lake]: https://delta.io/
+///
+/// The [`CatalogProvider`] can support this use case, but it takes some care.
+/// The planning APIs in DataFusion are not `async` and thus network IO can not
+/// be performed "lazily" / "on demand" during query planning. The rationale for
+/// this design is that using remote procedure calls for all catalog accesses
+/// required for query planning would likely result in multiple network calls
+/// per plan, resulting in very poor planning performance.
+///
+/// To implement [`CatalogProvider`] and [`SchemaProvider`] for remote catalogs,
+/// you need to provide an in memory snapshot of the required metadata. Most
+/// systems typically either already have this information cached locally or can
+/// batch access to the remote catalog to retrieve multiple schemas and tables
+/// in a single network call.
+///
+/// Note that [`SchemaProvider::table`] is an `async` function in order to
+/// simplify implementing simple [`SchemaProvider`]s. For many table formats it
+/// is easy to list all available tables but there is additional non trivial
+/// access required to read table details (e.g. statistics).
+///
+/// The pattern that DataFusion itself uses to plan SQL queries is to walk over
+/// the query to [find all schema / table references in an `async` function],
+/// performing required remote catalog in parallel, and then plans the query
+/// using that snapshot.
+///
+/// [find all schema / table references in an `async` function]: crate::execution::context::SessionState::resolve_table_references
+///
+/// # Example Catalog Implementations
+///
+/// Here are some examples of how to implement custom catalogs:
+///
+/// * [`datafusion-cli`]: [`DynamicFileCatalogProvider`] catalog provider
+/// that treats files and directories on a filesystem as tables.
+///
+/// * The [`catalog.rs`]:  a simple directory based catalog.
+///
+///  * [delta-rs]:  [`UnityCatalogProvider`] implementation that can
+///  read from Delta Lake tables
+///
+/// [`datafusion-cli`]: https://arrow.apache.org/datafusion/user-guide/cli.html
+/// [`DynamicFileCatalogProvider`]: https://github.com/apache/arrow-datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
+/// [`catalog.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/external_dependency/catalog.rs
+/// [delta-rs]: https://github.com/delta-io/delta-rs
+/// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123
+///
+/// [`TableProvider]: crate::datasource::TableProvider
+
 pub trait CatalogProvider: Sync + Send {
     /// Returns the catalog provider as [`Any`]
     /// so that it can be downcast to a specific implementation.
diff --git a/datafusion/core/src/catalog/schema.rs b/datafusion/core/src/catalog/schema.rs
index 1bb2df914ab2..2cebad717249 100644
--- a/datafusion/core/src/catalog/schema.rs
+++ b/datafusion/core/src/catalog/schema.rs
@@ -28,20 +28,28 @@ use crate::datasource::TableProvider;
 use crate::error::{DataFusionError, Result};
 
 /// Represents a schema, comprising a number of named tables.
+///
+/// Please see [`CatalogProvider`] for details of implementing a custom catalog.
+///
+/// [`CatalogProvider`]: super::CatalogProvider
 #[async_trait]
 pub trait SchemaProvider: Sync + Send {
-    /// Returns the schema provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
+    /// Returns this `SchemaProvider` as [`Any`] so that it can be downcast to a
+    /// specific implementation.
     fn as_any(&self) -> &dyn Any;
 
     /// Retrieves the list of available table names in this schema.
     fn table_names(&self) -> Vec<String>;
 
-    /// Retrieves a specific table from the schema by name, provided it exists.
+    /// Retrieves a specific table from the schema by name, if it exists,
+    /// otherwise returns `None`.
     async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>>;
 
-    /// If supported by the implementation, adds a new table to this schema.
-    /// If a table of the same name existed before, it returns "Table already exists" error.
+    /// If supported by the implementation, adds a new table named `name` to
+    /// this schema.
+    ///
+    /// If a table of the same name was already registered, returns "Table
+    /// already exists" error.
     #[allow(unused_variables)]
     fn register_table(
         &self,
@@ -51,16 +59,16 @@ pub trait SchemaProvider: Sync + Send {
         exec_err!("schema provider does not support registering tables")
     }
 
-    /// If supported by the implementation, removes an existing table from this schema and returns it.
-    /// If no table of that name exists, returns Ok(None).
+    /// If supported by the implementation, removes the `name` table from this
+    /// schema and returns the previously registered [`TableProvider`], if any.
+    ///
+    /// If no `name` table exists, returns Ok(None).
     #[allow(unused_variables)]
     fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
         exec_err!("schema provider does not support deregistering tables")
     }
 
-    /// If supported by the implementation, checks the table exist in the schema provider or not.
-    /// If no matched table in the schema provider, return false.
-    /// Otherwise, return true.
+    /// Returns true if table exist in the schema provider, false otherwise.
     fn table_exist(&self, name: &str) -> bool;
 }
 
diff --git a/datafusion/core/src/datasource/cte_worktable.rs b/datafusion/core/src/datasource/cte_worktable.rs
new file mode 100644
index 000000000000..71075839b9a0
--- /dev/null
+++ b/datafusion/core/src/datasource/cte_worktable.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! CteWorkTable implementation used for recursive queries
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::SchemaRef;
+use async_trait::async_trait;
+use datafusion_physical_plan::work_table::WorkTableExec;
+
+use crate::{
+    error::Result,
+    logical_expr::{Expr, LogicalPlan, TableProviderFilterPushDown},
+    physical_plan::ExecutionPlan,
+};
+
+use crate::datasource::{TableProvider, TableType};
+use crate::execution::context::SessionState;
+
+/// The temporary working table where the previous iteration of a recursive query is stored
+/// Naming is based on PostgreSQL's implementation.
+/// See here for more details: www.postgresql.org/docs/11/queries-with.html#id-1.5.6.12.5.4
+pub struct CteWorkTable {
+    /// The name of the CTE work table
+    // WIP, see https://github.com/apache/arrow-datafusion/issues/462
+    #[allow(dead_code)]
+    name: String,
+    /// This schema must be shared across both the static and recursive terms of a recursive query
+    table_schema: SchemaRef,
+}
+
+impl CteWorkTable {
+    /// construct a new CteWorkTable with the given name and schema
+    /// This schema must match the schema of the recursive term of the query
+    /// Since the scan method will contain an physical plan that assumes this schema
+    pub fn new(name: &str, table_schema: SchemaRef) -> Self {
+        Self {
+            name: name.to_owned(),
+            table_schema,
+        }
+    }
+}
+
+#[async_trait]
+impl TableProvider for CteWorkTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
+        None
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.table_schema.clone()
+    }
+
+    fn table_type(&self) -> TableType {
+        TableType::Temporary
+    }
+
+    async fn scan(
+        &self,
+        _state: &SessionState,
+        _projection: Option<&Vec<usize>>,
+        _filters: &[Expr],
+        _limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // TODO: pushdown filters and limits
+        Ok(Arc::new(WorkTableExec::new(
+            self.name.clone(),
+            self.table_schema.clone(),
+        )))
+    }
+
+    fn supports_filter_pushdown(
+        &self,
+        _filter: &Expr,
+    ) -> Result<TableProviderFilterPushDown> {
+        // TODO: should we support filter pushdown?
+        Ok(TableProviderFilterPushDown::Unsupported)
+    }
+}
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 9729bfa163af..fdf6277a5ed2 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -885,16 +885,17 @@ async fn send_arrays_to_col_writers(
     rb: &RecordBatch,
     schema: Arc<Schema>,
 ) -> Result<()> {
-    for (tx, array, field) in col_array_channels
-        .iter()
-        .zip(rb.columns())
-        .zip(schema.fields())
-        .map(|((a, b), c)| (a, b, c))
-    {
+    // Each leaf column has its own channel, increment next_channel for each leaf column sent.
+    let mut next_channel = 0;
+    for (array, field) in rb.columns().iter().zip(schema.fields()) {
         for c in compute_leaves(field, array)? {
-            tx.send(c).await.map_err(|_| {
-                DataFusionError::Internal("Unable to send array to writer!".into())
-            })?;
+            col_array_channels[next_channel]
+                .send(c)
+                .await
+                .map_err(|_| {
+                    DataFusionError::Internal("Unable to send array to writer!".into())
+                })?;
+            next_channel += 1;
         }
     }
 
@@ -902,7 +903,7 @@ async fn send_arrays_to_col_writers(
 }
 
 /// Spawns a tokio task which joins the parallel column writer tasks,
-/// and finalizes the row group.
+/// and finalizes the row group
 fn spawn_rg_join_and_finalize_task(
     column_writer_handles: Vec<JoinHandle<Result<ArrowColumnWriter>>>,
     rg_rows: usize,
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 2e516cc36a01..8f20da183a93 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -20,6 +20,7 @@
 //! [`ListingTable`]: crate::datasource::listing::ListingTable
 
 pub mod avro_to_arrow;
+pub mod cte_worktable;
 pub mod default_table_source;
 pub mod empty;
 pub mod file_format;
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index c2689cfb10a6..7215cdd60716 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -549,8 +549,13 @@ impl FileOpener for ParquetOpener {
             // with that range can be skipped as well
             if enable_page_index && !row_groups.is_empty() {
                 if let Some(p) = page_pruning_predicate {
-                    let pruned =
-                        p.prune(&row_groups, file_metadata.as_ref(), &file_metrics)?;
+                    let pruned = p.prune(
+                        &file_schema,
+                        builder.parquet_schema(),
+                        &row_groups,
+                        file_metadata.as_ref(),
+                        &file_metrics,
+                    )?;
                     if let Some(row_selection) = pruned {
                         builder = builder.with_row_selection(row_selection);
                     }
@@ -782,7 +787,8 @@ mod tests {
         array::{Int64Array, Int8Array, StringArray},
         datatypes::{DataType, Field, SchemaBuilder},
     };
-    use arrow_array::Date64Array;
+    use arrow_array::{Date64Array, StructArray};
+    use arrow_schema::Fields;
     use chrono::{TimeZone, Utc};
     use datafusion_common::{assert_contains, ToDFSchema};
     use datafusion_common::{FileType, GetExt, ScalarValue};
@@ -793,6 +799,7 @@ mod tests {
     use object_store::local::LocalFileSystem;
     use object_store::path::Path;
     use object_store::ObjectMeta;
+    use parquet::arrow::ArrowWriter;
     use std::fs::{self, File};
     use std::io::Write;
     use tempfile::TempDir;
@@ -1765,12 +1772,14 @@ mod tests {
 
         // assert the batches and some metrics
         #[rustfmt::skip]
-        let expected = ["+-----+",
+        let expected = [
+            "+-----+",
             "| int |",
             "+-----+",
             "| 4   |",
             "| 5   |",
-            "+-----+"];
+            "+-----+"
+        ];
         assert_batches_sorted_eq!(expected, &rt.batches.unwrap());
         assert_eq!(get_value(&metrics, "page_index_rows_filtered"), 4);
         assert!(
@@ -2136,4 +2145,65 @@ mod tests {
         let execution_props = ExecutionProps::new();
         create_physical_expr(expr, &df_schema, &execution_props).unwrap()
     }
+
+    #[tokio::test]
+    async fn test_struct_filter_parquet() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let path = tmp_dir.path().to_str().unwrap().to_string() + "/test.parquet";
+        write_file(&path);
+        let ctx = SessionContext::new();
+        let opt = ListingOptions::new(Arc::new(ParquetFormat::default()));
+        ctx.register_listing_table("base_table", path, opt, None, None)
+            .await
+            .unwrap();
+        let sql = "select * from base_table where name='test02'";
+        let batch = ctx.sql(sql).await.unwrap().collect().await.unwrap();
+        assert_eq!(batch.len(), 1);
+        let expected = [
+            "+---------------------+----+--------+",
+            "| struct              | id | name   |",
+            "+---------------------+----+--------+",
+            "| {id: 4, name: aaa2} | 2  | test02 |",
+            "+---------------------+----+--------+",
+        ];
+        crate::assert_batches_eq!(expected, &batch);
+        Ok(())
+    }
+
+    fn write_file(file: &String) {
+        let struct_fields = Fields::from(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("name", DataType::Utf8, false),
+        ]);
+        let schema = Schema::new(vec![
+            Field::new("struct", DataType::Struct(struct_fields.clone()), false),
+            Field::new("id", DataType::Int64, true),
+            Field::new("name", DataType::Utf8, false),
+        ]);
+        let id_array = Int64Array::from(vec![Some(1), Some(2)]);
+        let columns = vec![
+            Arc::new(Int64Array::from(vec![3, 4])) as _,
+            Arc::new(StringArray::from(vec!["aaa1", "aaa2"])) as _,
+        ];
+        let struct_array = StructArray::new(struct_fields, columns, None);
+
+        let name_array = StringArray::from(vec![Some("test01"), Some("test02")]);
+        let schema = Arc::new(schema);
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(struct_array),
+                Arc::new(id_array),
+                Arc::new(name_array),
+            ],
+        )
+        .unwrap();
+        let file = File::create(file).unwrap();
+        let w_opt = WriterProperties::builder().build();
+        let mut writer = ArrowWriter::try_new(file, schema, Some(w_opt)).unwrap();
+        writer.write(&batch).unwrap();
+        writer.flush().unwrap();
+        writer.close().unwrap();
+    }
 }
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
index a0637f379610..f0a8e6608990 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
@@ -23,11 +23,12 @@ use arrow::array::{
 };
 use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::SchemaRef, error::ArrowError};
+use arrow_schema::Schema;
 use datafusion_common::{DataFusionError, Result, ScalarValue};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{split_conjunction, PhysicalExpr};
 use log::{debug, trace};
-use parquet::schema::types::ColumnDescriptor;
+use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor};
 use parquet::{
     arrow::arrow_reader::{RowSelection, RowSelector},
     errors::ParquetError,
@@ -41,7 +42,9 @@ use std::collections::HashSet;
 use std::sync::Arc;
 
 use crate::datasource::physical_plan::parquet::parquet_to_arrow_decimal_type;
-use crate::datasource::physical_plan::parquet::statistics::from_bytes_to_i128;
+use crate::datasource::physical_plan::parquet::statistics::{
+    from_bytes_to_i128, parquet_column,
+};
 use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
 
 use super::metrics::ParquetFileMetrics;
@@ -128,6 +131,8 @@ impl PagePruningPredicate {
     /// Returns a [`RowSelection`] for the given file
     pub fn prune(
         &self,
+        arrow_schema: &Schema,
+        parquet_schema: &SchemaDescriptor,
         row_groups: &[usize],
         file_metadata: &ParquetMetaData,
         file_metrics: &ParquetFileMetrics,
@@ -163,9 +168,8 @@ impl PagePruningPredicate {
 
         let mut row_selections = Vec::with_capacity(page_index_predicates.len());
         for predicate in page_index_predicates {
-            // find column index by looking in the row group metadata.
-            let col_idx = find_column_index(predicate, &groups[0]);
-
+            // find column index in the parquet schema
+            let col_idx = find_column_index(predicate, arrow_schema, parquet_schema);
             let mut selectors = Vec::with_capacity(row_groups.len());
             for r in row_groups.iter() {
                 let row_group_metadata = &groups[*r];
@@ -231,7 +235,7 @@ impl PagePruningPredicate {
     }
 }
 
-/// Returns the column index in the row group metadata for the single
+/// Returns the column index in the row parquet schema for the single
 /// column of a single column pruning predicate.
 ///
 /// For example, give the predicate `y > 5`
@@ -246,12 +250,12 @@ impl PagePruningPredicate {
 /// Panics:
 ///
 /// If the predicate contains more than one column reference (assumes
-/// that `extract_page_index_push_down_predicates` only return
+/// that `extract_page_index_push_down_predicates` only returns
 /// predicate with one col)
-///
 fn find_column_index(
     predicate: &PruningPredicate,
-    row_group_metadata: &RowGroupMetaData,
+    arrow_schema: &Schema,
+    parquet_schema: &SchemaDescriptor,
 ) -> Option<usize> {
     let mut found_required_column: Option<&Column> = None;
 
@@ -269,25 +273,12 @@ fn find_column_index(
         }
     }
 
-    let column = if let Some(found_required_column) = found_required_column.as_ref() {
-        found_required_column
-    } else {
+    let Some(column) = found_required_column.as_ref() else {
         trace!("No column references in pruning predicate");
         return None;
     };
 
-    let col_idx = row_group_metadata
-        .columns()
-        .iter()
-        .enumerate()
-        .find(|(_idx, c)| c.column_descr().name() == column.name())
-        .map(|(idx, _c)| idx);
-
-    if col_idx.is_none() {
-        trace!("Can not find column {} in row group meta", column.name());
-    }
-
-    col_idx
+    parquet_column(parquet_schema, arrow_schema, column.name()).map(|x| x.0)
 }
 
 /// Intersects the [`RowSelector`]s
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
index c519d41aad01..fa9523a76380 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
@@ -19,6 +19,8 @@ use arrow::{array::ArrayRef, datatypes::Schema};
 use arrow_array::BooleanArray;
 use arrow_schema::FieldRef;
 use datafusion_common::{Column, ScalarValue};
+use parquet::basic::Type;
+use parquet::data_type::Decimal;
 use parquet::file::metadata::ColumnChunkMetaData;
 use parquet::schema::types::SchemaDescriptor;
 use parquet::{
@@ -143,7 +145,10 @@ pub(crate) async fn prune_row_groups_by_bloom_filters<
                     continue;
                 }
             };
-            column_sbbf.insert(column_name.to_string(), bf);
+            let physical_type =
+                builder.parquet_schema().column(column_idx).physical_type();
+
+            column_sbbf.insert(column_name.to_string(), (bf, physical_type));
         }
 
         let stats = BloomFilterStatistics { column_sbbf };
@@ -169,8 +174,8 @@ pub(crate) async fn prune_row_groups_by_bloom_filters<
 
 /// Implements `PruningStatistics` for Parquet Split Block Bloom Filters (SBBF)
 struct BloomFilterStatistics {
-    /// Maps column name to the parquet bloom filter
-    column_sbbf: HashMap<String, Sbbf>,
+    /// Maps column name to the parquet bloom filter and parquet physical type
+    column_sbbf: HashMap<String, (Sbbf, Type)>,
 }
 
 impl PruningStatistics for BloomFilterStatistics {
@@ -200,7 +205,7 @@ impl PruningStatistics for BloomFilterStatistics {
         column: &Column,
         values: &HashSet<ScalarValue>,
     ) -> Option<BooleanArray> {
-        let sbbf = self.column_sbbf.get(column.name.as_str())?;
+        let (sbbf, parquet_type) = self.column_sbbf.get(column.name.as_str())?;
 
         // Bloom filters are probabilistic data structures that can return false
         // positives (i.e. it might return true even if the value is not
@@ -209,16 +214,63 @@ impl PruningStatistics for BloomFilterStatistics {
 
         let known_not_present = values
             .iter()
-            .map(|value| match value {
-                ScalarValue::Utf8(Some(v)) => sbbf.check(&v.as_str()),
-                ScalarValue::Boolean(Some(v)) => sbbf.check(v),
-                ScalarValue::Float64(Some(v)) => sbbf.check(v),
-                ScalarValue::Float32(Some(v)) => sbbf.check(v),
-                ScalarValue::Int64(Some(v)) => sbbf.check(v),
-                ScalarValue::Int32(Some(v)) => sbbf.check(v),
-                ScalarValue::Int16(Some(v)) => sbbf.check(v),
-                ScalarValue::Int8(Some(v)) => sbbf.check(v),
-                _ => true,
+            .map(|value| {
+                match value {
+                    ScalarValue::Utf8(Some(v)) => sbbf.check(&v.as_str()),
+                    ScalarValue::Boolean(Some(v)) => sbbf.check(v),
+                    ScalarValue::Float64(Some(v)) => sbbf.check(v),
+                    ScalarValue::Float32(Some(v)) => sbbf.check(v),
+                    ScalarValue::Int64(Some(v)) => sbbf.check(v),
+                    ScalarValue::Int32(Some(v)) => sbbf.check(v),
+                    ScalarValue::Int16(Some(v)) => sbbf.check(v),
+                    ScalarValue::Int8(Some(v)) => sbbf.check(v),
+                    ScalarValue::Decimal128(Some(v), p, s) => match parquet_type {
+                        Type::INT32 => {
+                            //https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/Encodings.md?plain=1#L35-L42
+                            // All physical type  are little-endian
+                            if *p > 9 {
+                                //DECIMAL can be used to annotate the following types:
+                                //
+                                // int32: for 1 <= precision <= 9
+                                // int64: for 1 <= precision <= 18
+                                return true;
+                            }
+                            let b = (*v as i32).to_le_bytes();
+                            // Use Decimal constructor after https://github.com/apache/arrow-rs/issues/5325
+                            let decimal = Decimal::Int32 {
+                                value: b,
+                                precision: *p as i32,
+                                scale: *s as i32,
+                            };
+                            sbbf.check(&decimal)
+                        }
+                        Type::INT64 => {
+                            if *p > 18 {
+                                return true;
+                            }
+                            let b = (*v as i64).to_le_bytes();
+                            let decimal = Decimal::Int64 {
+                                value: b,
+                                precision: *p as i32,
+                                scale: *s as i32,
+                            };
+                            sbbf.check(&decimal)
+                        }
+                        Type::FIXED_LEN_BYTE_ARRAY => {
+                            // keep with from_bytes_to_i128
+                            let b = v.to_be_bytes().to_vec();
+                            // Use Decimal constructor after https://github.com/apache/arrow-rs/issues/5325
+                            let decimal = Decimal::Bytes {
+                                value: b.into(),
+                                precision: *p as i32,
+                                scale: *s as i32,
+                            };
+                            sbbf.check(&decimal)
+                        }
+                        _ => true,
+                    },
+                    _ => true,
+                }
             })
             // The row group doesn't contain any of the values if
             // all the checks are false
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
index 695e139517cf..73896f8eb7c1 100644
--- a/datafusion/core/src/datasource/statistics.rs
+++ b/datafusion/core/src/datasource/statistics.rs
@@ -194,11 +194,11 @@ pub(crate) fn get_col_stats(
 ) -> Vec<ColumnStatistics> {
     (0..schema.fields().len())
         .map(|i| {
-            let max_value = match &max_values[i] {
+            let max_value = match max_values.get_mut(i).unwrap() {
                 Some(max_value) => max_value.evaluate().ok(),
                 None => None,
             };
-            let min_value = match &min_values[i] {
+            let min_value = match min_values.get_mut(i).unwrap() {
                 Some(min_value) => min_value.evaluate().ok(),
                 None => None,
             };
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 1e378541b624..9b623d7a51ec 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -26,6 +26,7 @@ mod parquet;
 use crate::{
     catalog::{CatalogList, MemoryCatalogList},
     datasource::{
+        cte_worktable::CteWorkTable,
         function::{TableFunction, TableFunctionImpl},
         listing::{ListingOptions, ListingTable},
         provider::TableProviderFactory,
@@ -1899,6 +1900,18 @@ impl<'a> ContextProvider for SessionContextProvider<'a> {
         Ok(provider_as_source(provider))
     }
 
+    /// Create a new CTE work table for a recursive CTE logical plan
+    /// This table will be used in conjunction with a Worktable physical plan
+    /// to read and write each iteration of a recursive CTE
+    fn create_cte_work_table(
+        &self,
+        name: &str,
+        schema: SchemaRef,
+    ) -> Result<Arc<dyn TableSource>> {
+        let table = Arc::new(CteWorkTable::new(name, schema));
+        Ok(provider_as_source(table))
+    }
+
     fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
         self.state.scalar_functions().get(name).cloned()
     }
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 8fc724a22443..365f359f495d 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -342,16 +342,20 @@
 //!
 //! [`ExecutionPlan`]s process data using the [Apache Arrow] memory
 //! format, making heavy use of functions from the [arrow]
-//! crate. Calling [`execute`] produces 1 or more partitions of data,
-//! consisting an operator that implements
-//! [`SendableRecordBatchStream`].
-//!
-//! Values are represented with [`ColumnarValue`], which are either
+//! crate. Values are represented with [`ColumnarValue`], which are either
 //! [`ScalarValue`] (single constant values) or [`ArrayRef`] (Arrow
 //! Arrays).
 //!
-//! Balanced parallelism is achieved using [`RepartitionExec`], which
-//! implements a [Volcano style] "Exchange".
+//! Calling [`execute`] produces 1 or more partitions of data,
+//! as a [`SendableRecordBatchStream`], which implements a pull based execution
+//! API. Calling `.next().await` will incrementally compute and return the next
+//! [`RecordBatch`]. Balanced parallelism is achieved using [Volcano style]
+//! "Exchange" operations implemented by [`RepartitionExec`].
+//!
+//! While some recent research such as [Morsel-Driven Parallelism] describes challenges
+//! with the pull style Volcano execution model on NUMA architectures, in practice DataFusion achieves
+//! similar scalability as systems that use morsel driven approach such as DuckDB.
+//! See the [DataFusion paper submitted to SIGMOD] for more details.
 //!
 //! [`execute`]: physical_plan::ExecutionPlan::execute
 //! [`SendableRecordBatchStream`]: crate::physical_plan::SendableRecordBatchStream
@@ -364,8 +368,26 @@
 //!
 //! [`RepartitionExec`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html
 //! [Volcano style]: https://w6113.github.io/files/papers/volcanoparallelism-89.pdf
+//! [Morsel-Driven Parallelism]: https://db.in.tum.de/~leis/papers/morsels.pdf
+//! [DataFusion paper submitted SIGMOD]: https://github.com/apache/arrow-datafusion/files/13874720/DataFusion_Query_Engine___SIGMOD_2024.pdf
 //! [implementors of `ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#implementors
 //!
+//! ## Thread Scheduling
+//!
+//! DataFusion incrementally computes output from a [`SendableRecordBatchStream`]
+//! with `target_partitions` threads. Parallelism is implementing using multiple
+//! [Tokio] [`task`]s, which are executed by threads managed by a tokio Runtime.
+//! While tokio is most commonly used
+//! for asynchronous network I/O, its combination of an efficient, work-stealing
+//! scheduler, first class compiler support for automatic continuation generation,
+//! and exceptional performance makes it a compelling choice for CPU intensive
+//! applications as well. This is explained in more detail in [Using Rustlang’s Async Tokio
+//! Runtime for CPU-Bound Tasks].
+//!
+//! [Tokio]:  https://tokio.rs
+//! [`task`]: tokio::task
+//! [Using Rustlang’s Async Tokio Runtime for CPU-Bound Tasks]: https://thenewstack.io/using-rustlangs-async-tokio-runtime-for-cpu-bound-tasks/
+//!
 //! ## State Management and Configuration
 //!
 //! [`ConfigOptions`] contain options to control DataFusion's
@@ -393,10 +415,12 @@
 //!
 //! The amount of memory and temporary local disk space used by
 //! DataFusion when running a plan can be controlled using the
-//! [`MemoryPool`] and [`DiskManager`].
+//! [`MemoryPool`] and [`DiskManager`]. Other runtime options can be
+//! found on [`RuntimeEnv`].
 //!
 //! [`DiskManager`]: crate::execution::DiskManager
 //! [`MemoryPool`]: crate::execution::memory_pool::MemoryPool
+//! [`RuntimeEnv`]: crate::execution::runtime_env::RuntimeEnv
 //! [`ObjectStoreRegistry`]: crate::datasource::object_store::ObjectStoreRegistry
 //!
 //! ## Crate Organization
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index 86a8cdb7b3d4..4fe11c14a758 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -197,20 +197,46 @@ fn take_optimizable_min(
     agg_expr: &dyn AggregateExpr,
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
-    let col_stats = &stats.column_statistics;
-    if let Some(casted_expr) = agg_expr.as_any().downcast_ref::<expressions::Min>() {
-        if casted_expr.expressions().len() == 1 {
-            // TODO optimize with exprs other than Column
-            if let Some(col_expr) = casted_expr.expressions()[0]
-                .as_any()
-                .downcast_ref::<expressions::Column>()
-            {
-                if let Precision::Exact(val) = &col_stats[col_expr.index()].min_value {
-                    if !val.is_null() {
-                        return Some((val.clone(), casted_expr.name().to_string()));
+    if let Precision::Exact(num_rows) = &stats.num_rows {
+        match *num_rows {
+            0 => {
+                // MIN/MAX with 0 rows is always null
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Min>()
+                {
+                    if let Ok(min_data_type) =
+                        ScalarValue::try_from(casted_expr.field().unwrap().data_type())
+                    {
+                        return Some((min_data_type, casted_expr.name().to_string()));
+                    }
+                }
+            }
+            value if value > 0 => {
+                let col_stats = &stats.column_statistics;
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Min>()
+                {
+                    if casted_expr.expressions().len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) = casted_expr.expressions()[0]
+                            .as_any()
+                            .downcast_ref::<expressions::Column>()
+                        {
+                            if let Precision::Exact(val) =
+                                &col_stats[col_expr.index()].min_value
+                            {
+                                if !val.is_null() {
+                                    return Some((
+                                        val.clone(),
+                                        casted_expr.name().to_string(),
+                                    ));
+                                }
+                            }
+                        }
                     }
                 }
             }
+            _ => {}
         }
     }
     None
@@ -221,20 +247,46 @@ fn take_optimizable_max(
     agg_expr: &dyn AggregateExpr,
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
-    let col_stats = &stats.column_statistics;
-    if let Some(casted_expr) = agg_expr.as_any().downcast_ref::<expressions::Max>() {
-        if casted_expr.expressions().len() == 1 {
-            // TODO optimize with exprs other than Column
-            if let Some(col_expr) = casted_expr.expressions()[0]
-                .as_any()
-                .downcast_ref::<expressions::Column>()
-            {
-                if let Precision::Exact(val) = &col_stats[col_expr.index()].max_value {
-                    if !val.is_null() {
-                        return Some((val.clone(), casted_expr.name().to_string()));
+    if let Precision::Exact(num_rows) = &stats.num_rows {
+        match *num_rows {
+            0 => {
+                // MIN/MAX with 0 rows is always null
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Max>()
+                {
+                    if let Ok(max_data_type) =
+                        ScalarValue::try_from(casted_expr.field().unwrap().data_type())
+                    {
+                        return Some((max_data_type, casted_expr.name().to_string()));
+                    }
+                }
+            }
+            value if value > 0 => {
+                let col_stats = &stats.column_statistics;
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Max>()
+                {
+                    if casted_expr.expressions().len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) = casted_expr.expressions()[0]
+                            .as_any()
+                            .downcast_ref::<expressions::Column>()
+                        {
+                            if let Precision::Exact(val) =
+                                &col_stats[col_expr.index()].max_value
+                            {
+                                if !val.is_null() {
+                                    return Some((
+                                        val.clone(),
+                                        casted_expr.name().to_string(),
+                                    ));
+                                }
+                            }
+                        }
                     }
                 }
             }
+            _ => {}
         }
     }
     None
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index ba66dca55b35..f9b9fdf85cfa 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -87,9 +87,10 @@ fn should_swap_join_order(
     }
 }
 
-fn supports_collect_by_size(
+fn supports_collect_by_thresholds(
     plan: &dyn ExecutionPlan,
-    collection_size_threshold: usize,
+    threshold_byte_size: usize,
+    threshold_num_rows: usize,
 ) -> bool {
     // Currently we do not trust the 0 value from stats, due to stats collection might have bug
     // TODO check the logic in datasource::get_statistics_with_limit()
@@ -97,10 +98,10 @@ fn supports_collect_by_size(
         return false;
     };
 
-    if let Some(size) = stats.total_byte_size.get_value() {
-        *size != 0 && *size < collection_size_threshold
-    } else if let Some(row_count) = stats.num_rows.get_value() {
-        *row_count != 0 && *row_count < collection_size_threshold
+    if let Some(byte_size) = stats.total_byte_size.get_value() {
+        *byte_size != 0 && *byte_size < threshold_byte_size
+    } else if let Some(num_rows) = stats.num_rows.get_value() {
+        *num_rows != 0 && *num_rows < threshold_num_rows
     } else {
         false
     }
@@ -251,9 +252,14 @@ impl PhysicalOptimizerRule for JoinSelection {
         // - We will also swap left and right sides for cross joins so that the left
         //   side is the small side.
         let config = &config.optimizer;
-        let collect_left_threshold = config.hash_join_single_partition_threshold;
+        let collect_threshold_byte_size = config.hash_join_single_partition_threshold;
+        let collect_threshold_num_rows = config.hash_join_single_partition_threshold_rows;
         state.plan.transform_up(&|plan| {
-            statistical_join_selection_subrule(plan, collect_left_threshold)
+            statistical_join_selection_subrule(
+                plan,
+                collect_threshold_byte_size,
+                collect_threshold_num_rows,
+            )
         })
     }
 
@@ -270,8 +276,8 @@ impl PhysicalOptimizerRule for JoinSelection {
 ///
 /// This function will first consider the given join type and check whether the
 /// `CollectLeft` mode is applicable. Otherwise, it will try to swap the join sides.
-/// When the `collect_threshold` is provided, this function will also check left
-/// and right sizes.
+/// When the `ignore_threshold` is false, this function will also check left
+/// and right sizes in bytes or rows.
 ///
 /// For [`JoinType::Full`], it can not use `CollectLeft` mode and will return `None`.
 /// For [`JoinType::Left`] and [`JoinType::LeftAnti`], it can not run `CollectLeft`
@@ -279,7 +285,9 @@ impl PhysicalOptimizerRule for JoinSelection {
 /// and [`JoinType::RightAnti`], respectively.
 fn try_collect_left(
     hash_join: &HashJoinExec,
-    collect_threshold: Option<usize>,
+    ignore_threshold: bool,
+    threshold_byte_size: usize,
+    threshold_num_rows: usize,
 ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
     let left = hash_join.left();
     let right = hash_join.right();
@@ -291,9 +299,14 @@ fn try_collect_left(
         | JoinType::LeftSemi
         | JoinType::Right
         | JoinType::RightSemi
-        | JoinType::RightAnti => collect_threshold.map_or(true, |threshold| {
-            supports_collect_by_size(&**left, threshold)
-        }),
+        | JoinType::RightAnti => {
+            ignore_threshold
+                || supports_collect_by_thresholds(
+                    &**left,
+                    threshold_byte_size,
+                    threshold_num_rows,
+                )
+        }
     };
     let right_can_collect = match join_type {
         JoinType::Right | JoinType::Full | JoinType::RightAnti => false,
@@ -301,9 +314,14 @@ fn try_collect_left(
         | JoinType::RightSemi
         | JoinType::Left
         | JoinType::LeftSemi
-        | JoinType::LeftAnti => collect_threshold.map_or(true, |threshold| {
-            supports_collect_by_size(&**right, threshold)
-        }),
+        | JoinType::LeftAnti => {
+            ignore_threshold
+                || supports_collect_by_thresholds(
+                    &**right,
+                    threshold_byte_size,
+                    threshold_num_rows,
+                )
+        }
     };
     match (left_can_collect, right_can_collect) {
         (true, true) => {
@@ -366,52 +384,56 @@ fn partitioned_hash_join(hash_join: &HashJoinExec) -> Result<Arc<dyn ExecutionPl
 /// optimize hash and cross joins in the plan according to available statistical information.
 fn statistical_join_selection_subrule(
     plan: Arc<dyn ExecutionPlan>,
-    collect_left_threshold: usize,
+    collect_threshold_byte_size: usize,
+    collect_threshold_num_rows: usize,
 ) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
-    let transformed = if let Some(hash_join) =
-        plan.as_any().downcast_ref::<HashJoinExec>()
-    {
-        match hash_join.partition_mode() {
-            PartitionMode::Auto => {
-                try_collect_left(hash_join, Some(collect_left_threshold))?.map_or_else(
-                    || partitioned_hash_join(hash_join).map(Some),
-                    |v| Ok(Some(v)),
+    let transformed =
+        if let Some(hash_join) = plan.as_any().downcast_ref::<HashJoinExec>() {
+            match hash_join.partition_mode() {
+                PartitionMode::Auto => try_collect_left(
+                    hash_join,
+                    false,
+                    collect_threshold_byte_size,
+                    collect_threshold_num_rows,
                 )?
-            }
-            PartitionMode::CollectLeft => try_collect_left(hash_join, None)?
                 .map_or_else(
                     || partitioned_hash_join(hash_join).map(Some),
                     |v| Ok(Some(v)),
                 )?,
-            PartitionMode::Partitioned => {
-                let left = hash_join.left();
-                let right = hash_join.right();
-                if should_swap_join_order(&**left, &**right)?
-                    && supports_swap(*hash_join.join_type())
-                {
-                    swap_hash_join(hash_join, PartitionMode::Partitioned).map(Some)?
-                } else {
-                    None
+                PartitionMode::CollectLeft => try_collect_left(hash_join, true, 0, 0)?
+                    .map_or_else(
+                        || partitioned_hash_join(hash_join).map(Some),
+                        |v| Ok(Some(v)),
+                    )?,
+                PartitionMode::Partitioned => {
+                    let left = hash_join.left();
+                    let right = hash_join.right();
+                    if should_swap_join_order(&**left, &**right)?
+                        && supports_swap(*hash_join.join_type())
+                    {
+                        swap_hash_join(hash_join, PartitionMode::Partitioned).map(Some)?
+                    } else {
+                        None
+                    }
                 }
             }
-        }
-    } else if let Some(cross_join) = plan.as_any().downcast_ref::<CrossJoinExec>() {
-        let left = cross_join.left();
-        let right = cross_join.right();
-        if should_swap_join_order(&**left, &**right)? {
-            let new_join = CrossJoinExec::new(Arc::clone(right), Arc::clone(left));
-            // TODO avoid adding ProjectionExec again and again, only adding Final Projection
-            let proj: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
-                swap_reverting_projection(&left.schema(), &right.schema()),
-                Arc::new(new_join),
-            )?);
-            Some(proj)
+        } else if let Some(cross_join) = plan.as_any().downcast_ref::<CrossJoinExec>() {
+            let left = cross_join.left();
+            let right = cross_join.right();
+            if should_swap_join_order(&**left, &**right)? {
+                let new_join = CrossJoinExec::new(Arc::clone(right), Arc::clone(left));
+                // TODO avoid adding ProjectionExec again and again, only adding Final Projection
+                let proj: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
+                    swap_reverting_projection(&left.schema(), &right.schema()),
+                    Arc::new(new_join),
+                )?);
+                Some(proj)
+            } else {
+                None
+            }
         } else {
             None
-        }
-    } else {
-        None
-    };
+        };
 
     Ok(if let Some(transformed) = transformed {
         Transformed::Yes(transformed)
@@ -682,22 +704,62 @@ mod tests_statistical {
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr::PhysicalExpr;
 
+    /// Return statistcs for empty table
+    fn empty_statistics() -> Statistics {
+        Statistics {
+            num_rows: Precision::Absent,
+            total_byte_size: Precision::Absent,
+            column_statistics: vec![ColumnStatistics::new_unknown()],
+        }
+    }
+
+    /// Get table thresholds: (num_rows, byte_size)
+    fn get_thresholds() -> (usize, usize) {
+        let optimizer_options = ConfigOptions::new().optimizer;
+        (
+            optimizer_options.hash_join_single_partition_threshold_rows,
+            optimizer_options.hash_join_single_partition_threshold,
+        )
+    }
+
+    /// Return statistcs for small table
+    fn small_statistics() -> Statistics {
+        let (threshold_num_rows, threshold_byte_size) = get_thresholds();
+        Statistics {
+            num_rows: Precision::Inexact(threshold_num_rows / 128),
+            total_byte_size: Precision::Inexact(threshold_byte_size / 128),
+            column_statistics: vec![ColumnStatistics::new_unknown()],
+        }
+    }
+
+    /// Return statistcs for big table
+    fn big_statistics() -> Statistics {
+        let (threshold_num_rows, threshold_byte_size) = get_thresholds();
+        Statistics {
+            num_rows: Precision::Inexact(threshold_num_rows * 2),
+            total_byte_size: Precision::Inexact(threshold_byte_size * 2),
+            column_statistics: vec![ColumnStatistics::new_unknown()],
+        }
+    }
+
+    /// Return statistcs for big table
+    fn bigger_statistics() -> Statistics {
+        let (threshold_num_rows, threshold_byte_size) = get_thresholds();
+        Statistics {
+            num_rows: Precision::Inexact(threshold_num_rows * 4),
+            total_byte_size: Precision::Inexact(threshold_byte_size * 4),
+            column_statistics: vec![ColumnStatistics::new_unknown()],
+        }
+    }
+
     fn create_big_and_small() -> (Arc<dyn ExecutionPlan>, Arc<dyn ExecutionPlan>) {
         let big = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Inexact(10),
-                total_byte_size: Precision::Inexact(100000),
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
+            big_statistics(),
             Schema::new(vec![Field::new("big_col", DataType::Int32, false)]),
         ));
 
         let small = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Inexact(100000),
-                total_byte_size: Precision::Inexact(10),
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
+            small_statistics(),
             Schema::new(vec![Field::new("small_col", DataType::Int32, false)]),
         ));
         (big, small)
@@ -821,11 +883,11 @@ mod tests_statistical {
 
         assert_eq!(
             swapped_join.left().statistics().unwrap().total_byte_size,
-            Precision::Inexact(10)
+            Precision::Inexact(8192)
         );
         assert_eq!(
             swapped_join.right().statistics().unwrap().total_byte_size,
-            Precision::Inexact(100000)
+            Precision::Inexact(2097152)
         );
     }
 
@@ -872,11 +934,11 @@ mod tests_statistical {
 
         assert_eq!(
             swapped_join.left().statistics().unwrap().total_byte_size,
-            Precision::Inexact(100000)
+            Precision::Inexact(2097152)
         );
         assert_eq!(
             swapped_join.right().statistics().unwrap().total_byte_size,
-            Precision::Inexact(10)
+            Precision::Inexact(8192)
         );
     }
 
@@ -917,11 +979,11 @@ mod tests_statistical {
 
             assert_eq!(
                 swapped_join.left().statistics().unwrap().total_byte_size,
-                Precision::Inexact(10)
+                Precision::Inexact(8192)
             );
             assert_eq!(
                 swapped_join.right().statistics().unwrap().total_byte_size,
-                Precision::Inexact(100000)
+                Precision::Inexact(2097152)
             );
 
             assert_eq!(original_schema, swapped_join.schema());
@@ -1032,11 +1094,11 @@ mod tests_statistical {
 
         assert_eq!(
             swapped_join.left().statistics().unwrap().total_byte_size,
-            Precision::Inexact(10)
+            Precision::Inexact(8192)
         );
         assert_eq!(
             swapped_join.right().statistics().unwrap().total_byte_size,
-            Precision::Inexact(100000)
+            Precision::Inexact(2097152)
         );
     }
 
@@ -1078,29 +1140,17 @@ mod tests_statistical {
     #[tokio::test]
     async fn test_join_selection_collect_left() {
         let big = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Inexact(10000000),
-                total_byte_size: Precision::Inexact(10000000),
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
+            big_statistics(),
             Schema::new(vec![Field::new("big_col", DataType::Int32, false)]),
         ));
 
         let small = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Inexact(10),
-                total_byte_size: Precision::Inexact(10),
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
+            small_statistics(),
             Schema::new(vec![Field::new("small_col", DataType::Int32, false)]),
         ));
 
         let empty = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Absent,
-                total_byte_size: Precision::Absent,
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
+            empty_statistics(),
             Schema::new(vec![Field::new("empty_col", DataType::Int32, false)]),
         ));
 
@@ -1121,7 +1171,7 @@ mod tests_statistical {
             Column::new_with_schema("small_col", &small.schema()).unwrap(),
         )];
         check_join_partition_mode(
-            big,
+            big.clone(),
             small.clone(),
             join_on,
             true,
@@ -1145,8 +1195,8 @@ mod tests_statistical {
             Column::new_with_schema("small_col", &small.schema()).unwrap(),
         )];
         check_join_partition_mode(
-            empty,
-            small,
+            empty.clone(),
+            small.clone(),
             join_on,
             true,
             PartitionMode::CollectLeft,
@@ -1155,52 +1205,40 @@ mod tests_statistical {
 
     #[tokio::test]
     async fn test_join_selection_partitioned() {
-        let big1 = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Inexact(10000000),
-                total_byte_size: Precision::Inexact(10000000),
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
-            Schema::new(vec![Field::new("big_col1", DataType::Int32, false)]),
+        let bigger = Arc::new(StatisticsExec::new(
+            bigger_statistics(),
+            Schema::new(vec![Field::new("bigger_col", DataType::Int32, false)]),
         ));
 
-        let big2 = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Inexact(20000000),
-                total_byte_size: Precision::Inexact(20000000),
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
-            Schema::new(vec![Field::new("big_col2", DataType::Int32, false)]),
+        let big = Arc::new(StatisticsExec::new(
+            big_statistics(),
+            Schema::new(vec![Field::new("big_col", DataType::Int32, false)]),
         ));
 
         let empty = Arc::new(StatisticsExec::new(
-            Statistics {
-                num_rows: Precision::Absent,
-                total_byte_size: Precision::Absent,
-                column_statistics: vec![ColumnStatistics::new_unknown()],
-            },
+            empty_statistics(),
             Schema::new(vec![Field::new("empty_col", DataType::Int32, false)]),
         ));
 
         let join_on = vec![(
-            Column::new_with_schema("big_col1", &big1.schema()).unwrap(),
-            Column::new_with_schema("big_col2", &big2.schema()).unwrap(),
+            Column::new_with_schema("big_col", &big.schema()).unwrap(),
+            Column::new_with_schema("bigger_col", &bigger.schema()).unwrap(),
         )];
         check_join_partition_mode(
-            big1.clone(),
-            big2.clone(),
+            big.clone(),
+            bigger.clone(),
             join_on,
             false,
             PartitionMode::Partitioned,
         );
 
         let join_on = vec![(
-            Column::new_with_schema("big_col2", &big2.schema()).unwrap(),
-            Column::new_with_schema("big_col1", &big1.schema()).unwrap(),
+            Column::new_with_schema("bigger_col", &bigger.schema()).unwrap(),
+            Column::new_with_schema("big_col", &big.schema()).unwrap(),
         )];
         check_join_partition_mode(
-            big2,
-            big1.clone(),
+            bigger.clone(),
+            big.clone(),
             join_on,
             true,
             PartitionMode::Partitioned,
@@ -1208,27 +1246,21 @@ mod tests_statistical {
 
         let join_on = vec![(
             Column::new_with_schema("empty_col", &empty.schema()).unwrap(),
-            Column::new_with_schema("big_col1", &big1.schema()).unwrap(),
+            Column::new_with_schema("big_col", &big.schema()).unwrap(),
         )];
         check_join_partition_mode(
             empty.clone(),
-            big1.clone(),
+            big.clone(),
             join_on,
             false,
             PartitionMode::Partitioned,
         );
 
         let join_on = vec![(
-            Column::new_with_schema("big_col1", &big1.schema()).unwrap(),
+            Column::new_with_schema("big_col", &big.schema()).unwrap(),
             Column::new_with_schema("empty_col", &empty.schema()).unwrap(),
         )];
-        check_join_partition_mode(
-            big1,
-            empty,
-            join_on,
-            false,
-            PartitionMode::Partitioned,
-        );
+        check_join_partition_mode(big, empty, join_on, false, PartitionMode::Partitioned);
     }
 
     fn check_join_partition_mode(
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 34d1af85565a..2d20c487e473 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -163,8 +163,12 @@ fn try_swapping_with_csv(
     // This process can be moved into CsvExec, but it would be an overlap of their responsibility.
     all_alias_free_columns(projection.expr()).then(|| {
         let mut file_scan = csv.base_config().clone();
-        let new_projections =
-            new_projections_for_columns(projection, &file_scan.projection);
+        let new_projections = new_projections_for_columns(
+            projection,
+            &file_scan
+                .projection
+                .unwrap_or((0..csv.schema().fields().len()).collect()),
+        );
         file_scan.projection = Some(new_projections);
 
         Arc::new(CsvExec::new(
@@ -188,8 +192,11 @@ fn try_swapping_with_memory(
     // This process can be moved into MemoryExec, but it would be an overlap of their responsibility.
     all_alias_free_columns(projection.expr())
         .then(|| {
-            let new_projections =
-                new_projections_for_columns(projection, memory.projection());
+            let all_projections = (0..memory.schema().fields().len()).collect();
+            let new_projections = new_projections_for_columns(
+                projection,
+                memory.projection().as_ref().unwrap_or(&all_projections),
+            );
 
             MemoryExec::try_new(
                 memory.partitions(),
@@ -216,8 +223,11 @@ fn try_swapping_with_streaming_table(
         .projection()
         .as_ref()
         .map(|i| i.as_ref().to_vec());
-    let new_projections =
-        new_projections_for_columns(projection, &streaming_table_projections);
+    let new_projections = new_projections_for_columns(
+        projection,
+        &streaming_table_projections
+            .unwrap_or((0..streaming_table.schema().fields().len()).collect()),
+    );
 
     let mut lex_orderings = vec![];
     for lex_ordering in streaming_table.projected_output_ordering().into_iter() {
@@ -238,7 +248,7 @@ fn try_swapping_with_streaming_table(
     StreamingTableExec::try_new(
         streaming_table.partition_schema().clone(),
         streaming_table.partitions().clone(),
-        Some(&new_projections),
+        Some(new_projections.as_ref()),
         lex_orderings,
         streaming_table.is_infinite(),
     )
@@ -833,7 +843,7 @@ fn all_alias_free_columns(exprs: &[(Arc<dyn PhysicalExpr>, String)]) -> bool {
 /// ensure that all expressions are `Column` expressions without aliases.
 fn new_projections_for_columns(
     projection: &ProjectionExec,
-    source: &Option<Vec<usize>>,
+    source: &[usize],
 ) -> Vec<usize> {
     projection
         .expr()
@@ -841,7 +851,7 @@ fn new_projections_for_columns(
         .filter_map(|(expr, _)| {
             expr.as_any()
                 .downcast_ref::<Column>()
-                .and_then(|expr| source.as_ref().map(|proj| proj[expr.index()]))
+                .map(|expr| source[expr.index()])
         })
         .collect()
 }
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 98390ac271d0..ac3b7ebaeac1 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -58,6 +58,7 @@ use crate::physical_plan::joins::{
 use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use crate::physical_plan::memory::MemoryExec;
 use crate::physical_plan::projection::ProjectionExec;
+use crate::physical_plan::recursive_query::RecursiveQueryExec;
 use crate::physical_plan::repartition::RepartitionExec;
 use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::union::UnionExec;
@@ -87,8 +88,8 @@ use datafusion_expr::expr::{
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
 use datafusion_expr::{
-    DescribeTable, DmlStatement, ScalarFunctionDefinition, StringifiedPlan, WindowFrame,
-    WindowFrameBound, WriteOp,
+    DescribeTable, DmlStatement, RecursiveQuery, ScalarFunctionDefinition,
+    StringifiedPlan, WindowFrame, WindowFrameBound, WriteOp,
 };
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
@@ -719,14 +720,12 @@ impl DefaultPhysicalPlanner {
                     }
 
                     let logical_input_schema = input.schema();
-                    let physical_input_schema = input_exec.schema();
                     let window_expr = window_expr
                         .iter()
                         .map(|e| {
                             create_window_expr(
                                 e,
                                 logical_input_schema,
-                                &physical_input_schema,
                                 session_state.execution_props(),
                             )
                         })
@@ -896,7 +895,7 @@ impl DefaultPhysicalPlanner {
                     let filter = FilterExec::try_new(runtime_expr, physical_input)?;
                     Ok(Arc::new(filter.with_default_selectivity(selectivity)?))
                 }
-                LogicalPlan::Union(Union { inputs, .. }) => {
+                LogicalPlan::Union(Union { inputs, schema: _ }) => {
                     let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state).await?;
 
                     Ok(Arc::new(UnionExec::new(physical_plans)))
@@ -1290,6 +1289,11 @@ impl DefaultPhysicalPlanner {
                         Ok(plan)
                     }
                 }
+                LogicalPlan::RecursiveQuery(RecursiveQuery { name, static_term, recursive_term, is_distinct,.. }) => {
+                    let static_term = self.create_initial_plan(static_term, session_state).await?;
+                    let recursive_term = self.create_initial_plan(recursive_term, session_state).await?;
+                    Ok(Arc::new(RecursiveQueryExec::try_new(name.clone(), static_term, recursive_term, *is_distinct)?))
+                }
             };
             exec_plan
         }.boxed()
@@ -1526,7 +1530,7 @@ fn get_physical_expr_pair(
 /// queries like:
 /// OVER (ORDER BY a RANGES BETWEEN 3 PRECEDING AND 5 PRECEDING)
 /// OVER (ORDER BY a RANGES BETWEEN INTERVAL '3 DAY' PRECEDING AND '5 DAY' PRECEDING)  are rejected
-pub fn is_window_valid(window_frame: &WindowFrame) -> bool {
+pub fn is_window_frame_bound_valid(window_frame: &WindowFrame) -> bool {
     match (&window_frame.start_bound, &window_frame.end_bound) {
         (WindowFrameBound::Following(_), WindowFrameBound::Preceding(_))
         | (WindowFrameBound::Following(_), WindowFrameBound::CurrentRow)
@@ -1546,10 +1550,10 @@ pub fn create_window_expr_with_name(
     e: &Expr,
     name: impl Into<String>,
     logical_input_schema: &DFSchema,
-    physical_input_schema: &Schema,
     execution_props: &ExecutionProps,
 ) -> Result<Arc<dyn WindowExpr>> {
     let name = name.into();
+    let physical_input_schema: &Schema = &logical_input_schema.into();
     match e {
         Expr::WindowFunction(WindowFunction {
             fun,
@@ -1572,7 +1576,8 @@ pub fn create_window_expr_with_name(
                     create_physical_sort_expr(e, logical_input_schema, execution_props)
                 })
                 .collect::<Result<Vec<_>>>()?;
-            if !is_window_valid(window_frame) {
+
+            if !is_window_frame_bound_valid(window_frame) {
                 return plan_err!(
                         "Invalid window frame: start bound ({}) cannot be larger than end bound ({})",
                         window_frame.start_bound, window_frame.end_bound
@@ -1598,7 +1603,6 @@ pub fn create_window_expr_with_name(
 pub fn create_window_expr(
     e: &Expr,
     logical_input_schema: &DFSchema,
-    physical_input_schema: &Schema,
     execution_props: &ExecutionProps,
 ) -> Result<Arc<dyn WindowExpr>> {
     // unpack aliased logical expressions, e.g. "sum(col) over () as total"
@@ -1606,13 +1610,7 @@ pub fn create_window_expr(
         Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()),
         _ => (e.display_name()?, e),
     };
-    create_window_expr_with_name(
-        e,
-        name,
-        logical_input_schema,
-        physical_input_schema,
-        execution_props,
-    )
+    create_window_expr_with_name(e, name, logical_input_schema, execution_props)
 }
 
 type AggregateExprWithOptionalArgs = (
diff --git a/datafusion/core/tests/data/recursive_cte/balance.csv b/datafusion/core/tests/data/recursive_cte/balance.csv
new file mode 100644
index 000000000000..a77c742dd2e5
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/balance.csv
@@ -0,0 +1,5 @@
+time,name,account_balance
+1,John,100
+1,Tim,200
+2,John,300
+2,Tim,400
\ No newline at end of file
diff --git a/datafusion/core/tests/data/recursive_cte/growth.csv b/datafusion/core/tests/data/recursive_cte/growth.csv
new file mode 100644
index 000000000000..912208bad2eb
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/growth.csv
@@ -0,0 +1,4 @@
+name,account_growth
+John,3
+Tim,20
+Eliza,150
\ No newline at end of file
diff --git a/datafusion/core/tests/data/recursive_cte/prices.csv b/datafusion/core/tests/data/recursive_cte/prices.csv
new file mode 100644
index 000000000000..b294ecfad774
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/prices.csv
@@ -0,0 +1,101 @@
+Index,product,price,prices_row_num
+1,Holden,334.8,1
+2,Mercedes-Benz,623.22,2
+3,Aston Martin,363.48,3
+4,GMC,615.67,4
+5,Lincoln,521.13,5
+6,Mitsubishi,143.05,6
+7,Infiniti,861.82,7
+8,Ford,330.57,8
+9,GMC,136.87,9
+10,Toyota,106.29,10
+11,Pontiac,686.95,11
+12,Ford,197.48,12
+13,Honda,774.42,13
+14,Dodge,854.26,14
+15,Bentley,628.82,15
+16,Chevrolet,756.82,16
+17,Volkswagen,438.51,17
+18,Mazda,156.15,18
+19,Hyundai,322.43,19
+20,Oldsmobile,979.95,20
+21,Geo,359.59,21
+22,Ford,960.75,22
+23,Subaru,106.75,23
+24,Pontiac,13.4,24
+25,Mercedes-Benz,858.46,25
+26,Subaru,55.72,26
+27,BMW,316.69,27
+28,Chevrolet,290.32,28
+29,Mercury,296.8,29
+30,Dodge,410.78,30
+31,Oldsmobile,18.07,31
+32,Subaru,442.22,32
+33,Dodge,93.29,33
+34,Honda,282.9,34
+35,Chevrolet,750.87,35
+36,Lexus,249.82,36
+37,Ford,732.33,37
+38,Toyota,680.78,38
+39,Nissan,657.01,39
+40,Mazda,200.76,40
+41,Nissan,251.44,41
+42,Buick,714.44,42
+43,Ford,436.2,43
+44,Volvo,865.53,44
+45,Saab,471.52,45
+46,Mercedes-Benz,51.13,46
+47,Chrysler,943.52,47
+48,Lamborghini,181.6,48
+49,Hyundai,634.89,49
+50,Ford,757.58,50
+51,Porsche,294.64,51
+52,Ford,261.34,52
+53,Chrysler,822.01,53
+54,Audi,430.68,54
+55,Mitsubishi,69.12,55
+56,Mazda,723.16,56
+57,Mazda,711.46,57
+58,Land Rover,435.15,58
+59,Buick,189.58,59
+60,GMC,651.92,60
+61,Mazda,491.37,61
+62,BMW,346.18,62
+63,Ford,456.25,63
+64,Ford,10.65,64
+65,Mazda,985.39,65
+66,Mercedes-Benz,955.79,66
+67,Honda,550.95,67
+68,Mitsubishi,127.6,68
+69,Mercedes-Benz,840.65,69
+70,Infiniti,647.45,70
+71,Bentley,827.26,71
+72,Lincoln,822.22,72
+73,Plymouth,970.55,73
+74,Ford,595.05,74
+75,Maybach,808.46,75
+76,Chevrolet,341.48,76
+77,Jaguar,759.03,77
+78,Land Rover,625.01,78
+79,Lincoln,289.13,79
+80,Suzuki,285.24,80
+81,GMC,253.4,81
+82,Oldsmobile,174.76,82
+83,Lincoln,434.17,83
+84,Dodge,887.38,84
+85,Mercedes-Benz,308.65,85
+86,GMC,182.71,86
+87,Ford,619.62,87
+88,Lexus,228.63,88
+89,Hyundai,901.06,89
+90,Chevrolet,615.65,90
+91,GMC,460.19,91
+92,Mercedes-Benz,729.28,92
+93,Dodge,414.69,93
+94,Maserati,300.83,94
+95,Suzuki,503.64,95
+96,Audi,275.05,96
+97,Ford,303.25,97
+98,Lotus,101.01,98
+99,Lincoln,721.05,99
+100,Kia,833.31,100
\ No newline at end of file
diff --git a/datafusion/core/tests/data/recursive_cte/sales.csv b/datafusion/core/tests/data/recursive_cte/sales.csv
new file mode 100644
index 000000000000..12299c39d635
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/sales.csv
@@ -0,0 +1,10 @@
+region_id,salesperson_id,sale_amount
+101,1,1000
+102,2,500
+101,2,700
+103,3,800
+102,4,300
+101,4,400
+102,5,600
+103,6,500
+101,7,900
\ No newline at end of file
diff --git a/datafusion/core/tests/data/recursive_cte/salespersons.csv b/datafusion/core/tests/data/recursive_cte/salespersons.csv
new file mode 100644
index 000000000000..dc941c450246
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/salespersons.csv
@@ -0,0 +1,8 @@
+salesperson_id,manager_id
+1,
+2,1
+3,1
+4,2
+5,2
+6,3
+7,3
\ No newline at end of file
diff --git a/datafusion/core/tests/data/recursive_cte/time.csv b/datafusion/core/tests/data/recursive_cte/time.csv
new file mode 100644
index 000000000000..21026bd41a4a
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/time.csv
@@ -0,0 +1,5 @@
+time,other
+1,foo
+2,bar
+4,baz
+5,qux
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index fe56fc22ea8c..2d4203464300 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -267,6 +267,26 @@ async fn test_fn_initcap() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_fn_instr() -> Result<()> {
+    let expr = instr(col("a"), lit("b"));
+
+    let expected = [
+        "+-------------------------+",
+        "| instr(test.a,Utf8(\"b\")) |",
+        "+-------------------------+",
+        "| 2                       |",
+        "| 2                       |",
+        "| 0                       |",
+        "| 5                       |",
+        "+-------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
+    Ok(())
+}
+
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_left() -> Result<()> {
@@ -634,6 +654,26 @@ async fn test_fn_starts_with() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_fn_ends_with() -> Result<()> {
+    let expr = ends_with(col("a"), lit("DEF"));
+
+    let expected = [
+        "+-------------------------------+",
+        "| ends_with(test.a,Utf8(\"DEF\")) |",
+        "+-------------------------------+",
+        "| true                          |",
+        "| false                         |",
+        "| false                         |",
+        "| false                         |",
+        "+-------------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
+    Ok(())
+}
+
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_strpos() -> Result<()> {
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 588b4647e5c1..89ab04dfee89 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -34,18 +34,19 @@ use std::sync::Arc;
 use datafusion::dataframe::DataFrame;
 use datafusion::datasource::MemTable;
 use datafusion::error::Result;
-use datafusion::execution::context::SessionContext;
+use datafusion::execution::context::{SessionContext, SessionState};
 use datafusion::prelude::JoinType;
 use datafusion::prelude::{CsvReadOptions, ParquetReadOptions};
 use datafusion::test_util::parquet_test_data;
 use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
 use datafusion_common::{assert_contains, DataFusionError, ScalarValue, UnnestOptions};
 use datafusion_execution::config::SessionConfig;
+use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_expr::expr::{GroupingSet, Sort};
 use datafusion_expr::{
-    array_agg, avg, col, count, exists, expr, in_subquery, lit, max, out_ref_col,
-    scalar_subquery, sum, wildcard, AggregateFunction, Expr, ExprSchemable, WindowFrame,
-    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+    array_agg, avg, cast, col, count, exists, expr, in_subquery, lit, max, out_ref_col,
+    scalar_subquery, sum, when, wildcard, AggregateFunction, Expr, ExprSchemable,
+    WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
 };
 use datafusion_physical_expr::var_provider::{VarProvider, VarType};
 
@@ -1430,6 +1431,60 @@ async fn unnest_analyze_metrics() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn consecutive_projection_same_schema() -> Result<()> {
+    let config = SessionConfig::new();
+    let runtime = Arc::new(RuntimeEnv::default());
+    let state = SessionState::new_with_config_rt(config, runtime);
+    let ctx = SessionContext::new_with_state(state);
+
+    let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+
+    let batch =
+        RecordBatch::try_new(schema, vec![Arc::new(Int32Array::from(vec![0, 1]))])
+            .unwrap();
+
+    let df = ctx.read_batch(batch).unwrap();
+    df.clone().show().await.unwrap();
+
+    // Add `t` column full of nulls
+    let df = df
+        .with_column("t", cast(Expr::Literal(ScalarValue::Null), DataType::Int32))
+        .unwrap();
+    df.clone().show().await.unwrap();
+
+    let df = df
+        // (case when id = 1 then 10 else t) as t
+        .with_column(
+            "t",
+            when(col("id").eq(lit(1)), lit(10))
+                .otherwise(col("t"))
+                .unwrap(),
+        )
+        .unwrap()
+        // (case when id = 1 then 10 else t) as t2
+        .with_column(
+            "t2",
+            when(col("id").eq(lit(1)), lit(10))
+                .otherwise(col("t"))
+                .unwrap(),
+        )
+        .unwrap();
+
+    let results = df.collect().await?;
+    let expected = [
+        "+----+----+----+",
+        "| id | t  | t2 |",
+        "+----+----+----+",
+        "| 0  |    |    |",
+        "| 1  | 10 | 10 |",
+        "+----+----+----+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
 async fn create_test_table(name: &str) -> Result<DataFrame> {
     let schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 6e5c5f8eb95e..7358ec288432 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -227,14 +227,14 @@ fn get_random_function(
     rng: &mut StdRng,
     is_linear: bool,
 ) -> (WindowFunctionDefinition, Vec<Arc<dyn PhysicalExpr>>, String) {
-    let mut args = if is_linear {
+    let arg = if is_linear {
         // In linear test for the test version with WindowAggExec we use insert SortExecs to the plan to be able to generate
         // same result with BoundedWindowAggExec which doesn't use any SortExec. To make result
         // non-dependent on table order. We should use column a in the window function
         // (Given that we do not use ROWS for the window frame. ROWS also introduces dependency to the table order.).
-        vec![col("a", schema).unwrap()]
+        col("a", schema).unwrap()
     } else {
-        vec![col("x", schema).unwrap()]
+        col("x", schema).unwrap()
     };
     let mut window_fn_map = HashMap::new();
     // HashMap values consists of tuple first element is WindowFunction, second is additional argument
@@ -243,28 +243,28 @@ fn get_random_function(
         "sum",
         (
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Sum),
-            vec![],
+            vec![arg.clone()],
         ),
     );
     window_fn_map.insert(
         "count",
         (
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Count),
-            vec![],
+            vec![arg.clone()],
         ),
     );
     window_fn_map.insert(
         "min",
         (
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
-            vec![],
+            vec![arg.clone()],
         ),
     );
     window_fn_map.insert(
         "max",
         (
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
-            vec![],
+            vec![arg.clone()],
         ),
     );
     if !is_linear {
@@ -305,6 +305,7 @@ fn get_random_function(
                     BuiltInWindowFunction::Lead,
                 ),
                 vec![
+                    arg.clone(),
                     lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))),
                     lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))),
                 ],
@@ -317,6 +318,7 @@ fn get_random_function(
                     BuiltInWindowFunction::Lag,
                 ),
                 vec![
+                    arg.clone(),
                     lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))),
                     lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))),
                 ],
@@ -329,7 +331,7 @@ fn get_random_function(
             WindowFunctionDefinition::BuiltInWindowFunction(
                 BuiltInWindowFunction::FirstValue,
             ),
-            vec![],
+            vec![arg.clone()],
         ),
     );
     window_fn_map.insert(
@@ -338,7 +340,7 @@ fn get_random_function(
             WindowFunctionDefinition::BuiltInWindowFunction(
                 BuiltInWindowFunction::LastValue,
             ),
-            vec![],
+            vec![arg.clone()],
         ),
     );
     window_fn_map.insert(
@@ -347,23 +349,26 @@ fn get_random_function(
             WindowFunctionDefinition::BuiltInWindowFunction(
                 BuiltInWindowFunction::NthValue,
             ),
-            vec![lit(ScalarValue::Int64(Some(rng.gen_range(1..10))))],
+            vec![
+                arg.clone(),
+                lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))),
+            ],
         ),
     );
 
     let rand_fn_idx = rng.gen_range(0..window_fn_map.len());
     let fn_name = window_fn_map.keys().collect::<Vec<_>>()[rand_fn_idx];
-    let (window_fn, new_args) = window_fn_map.values().collect::<Vec<_>>()[rand_fn_idx];
+    let (window_fn, args) = window_fn_map.values().collect::<Vec<_>>()[rand_fn_idx];
+    let mut args = args.clone();
     if let WindowFunctionDefinition::AggregateFunction(f) = window_fn {
-        let a = args[0].clone();
-        let dt = a.data_type(schema.as_ref()).unwrap();
-        let sig = f.signature();
-        let coerced = coerce_types(f, &[dt], &sig).unwrap();
-        args[0] = cast(a, schema, coerced[0].clone()).unwrap();
-    }
-
-    for new_arg in new_args {
-        args.push(new_arg.clone());
+        if !args.is_empty() {
+            // Do type coercion first argument
+            let a = args[0].clone();
+            let dt = a.data_type(schema.as_ref()).unwrap();
+            let sig = f.signature();
+            let coerced = coerce_types(f, &[dt], &sig).unwrap();
+            args[0] = cast(a, schema, coerced[0].clone()).unwrap();
+        }
     }
 
     (window_fn.clone(), args, fn_name.to_string())
@@ -482,7 +487,6 @@ async fn run_window_test(
     let session_config = SessionConfig::new().with_batch_size(50);
     let ctx = SessionContext::new_with_config(session_config);
     let (window_fn, args, fn_name) = get_random_function(&schema, &mut rng, is_linear);
-
     let window_frame = get_random_window_frame(&mut rng, is_linear);
     let mut orderby_exprs = vec![];
     for column in &orderby_columns {
@@ -532,6 +536,7 @@ async fn run_window_test(
     if is_linear {
         exec1 = Arc::new(SortExec::new(sort_keys.clone(), exec1)) as _;
     }
+
     let usual_window_exec = Arc::new(
         WindowAggExec::try_new(
             vec![create_window_expr(
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index 0602b4d4c525..b056db6a0bd3 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -66,7 +66,10 @@ enum Scenario {
     Int32Range,
     Float64,
     Decimal,
+    DecimalBloomFilterInt32,
+    DecimalBloomFilterInt64,
     DecimalLargePrecision,
+    DecimalLargePrecisionBloomFilter,
     PeriodsInColumnNames,
 }
 
@@ -549,6 +552,22 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
             ]
         }
+        Scenario::DecimalBloomFilterInt32 => {
+            // decimal record batch
+            vec![
+                make_decimal_batch(vec![100, 200, 300, 400, 500], 6, 2),
+                make_decimal_batch(vec![100, 200, 300, 400, 600], 6, 2),
+                make_decimal_batch(vec![100, 200, 300, 400, 600], 6, 2),
+            ]
+        }
+        Scenario::DecimalBloomFilterInt64 => {
+            // decimal record batch
+            vec![
+                make_decimal_batch(vec![100, 200, 300, 400, 500], 9, 2),
+                make_decimal_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal_batch(vec![100, 200, 300, 400, 600], 9, 2),
+            ]
+        }
         Scenario::DecimalLargePrecision => {
             // decimal record batch with large precision,
             // and the data will stored as FIXED_LENGTH_BYTE_ARRAY
@@ -558,6 +577,15 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 38, 2),
             ]
         }
+        Scenario::DecimalLargePrecisionBloomFilter => {
+            // decimal record batch with large precision,
+            // and the data will stored as FIXED_LENGTH_BYTE_ARRAY
+            vec![
+                make_decimal_batch(vec![100000, 200000, 300000, 400000, 500000], 38, 5),
+                make_decimal_batch(vec![-100000, 200000, 300000, 400000, 600000], 38, 5),
+                make_decimal_batch(vec![100000, 200000, 300000, 400000, 600000], 38, 5),
+            ]
+        }
         Scenario::PeriodsInColumnNames => {
             vec![
                 // all frontend
diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs
index 2bc5bd3f1ca7..449a311777dc 100644
--- a/datafusion/core/tests/parquet/row_group_pruning.rs
+++ b/datafusion/core/tests/parquet/row_group_pruning.rs
@@ -25,125 +25,164 @@ use itertools::Itertools;
 use crate::parquet::Unit::RowGroup;
 use crate::parquet::{ContextWithParquet, Scenario};
 use datafusion_expr::{col, lit};
-
-async fn test_prune(
-    case_data_type: Scenario,
-    sql: &str,
+struct RowGroupPruningTest {
+    scenario: Scenario,
+    query: String,
     expected_errors: Option<usize>,
-    expected_row_group_pruned: Option<usize>,
+    expected_row_group_pruned_by_statistics: Option<usize>,
+    expected_row_group_pruned_by_bloom_filter: Option<usize>,
     expected_results: usize,
-) {
-    let output = ContextWithParquet::new(case_data_type, RowGroup)
-        .await
-        .query(sql)
-        .await;
-
-    println!("{}", output.description());
-    assert_eq!(output.predicate_evaluation_errors(), expected_errors);
-    assert_eq!(output.row_groups_pruned(), expected_row_group_pruned);
-    assert_eq!(
-        output.result_rows,
-        expected_results,
-        "{}",
-        output.description()
-    );
 }
-
-/// check row group pruning by bloom filter and statistics independently
-async fn test_prune_verbose(
-    case_data_type: Scenario,
-    sql: &str,
-    expected_errors: Option<usize>,
-    expected_row_group_pruned_sbbf: Option<usize>,
-    expected_row_group_pruned_statistics: Option<usize>,
-    expected_results: usize,
-) {
-    let output = ContextWithParquet::new(case_data_type, RowGroup)
-        .await
-        .query(sql)
-        .await;
-
-    println!("{}", output.description());
-    assert_eq!(output.predicate_evaluation_errors(), expected_errors);
-    assert_eq!(
-        output.row_groups_pruned_bloom_filter(),
-        expected_row_group_pruned_sbbf
-    );
-    assert_eq!(
-        output.row_groups_pruned_statistics(),
-        expected_row_group_pruned_statistics
-    );
-    assert_eq!(
-        output.result_rows,
-        expected_results,
-        "{}",
-        output.description()
-    );
+impl RowGroupPruningTest {
+    // Start building the test configuration
+    fn new() -> Self {
+        Self {
+            scenario: Scenario::Timestamps, // or another default
+            query: String::new(),
+            expected_errors: None,
+            expected_row_group_pruned_by_statistics: None,
+            expected_row_group_pruned_by_bloom_filter: None,
+            expected_results: 0,
+        }
+    }
+
+    // Set the scenario for the test
+    fn with_scenario(mut self, scenario: Scenario) -> Self {
+        self.scenario = scenario;
+        self
+    }
+
+    // Set the SQL query for the test
+    fn with_query(mut self, query: &str) -> Self {
+        self.query = query.to_string();
+        self
+    }
+
+    // Set the expected errors for the test
+    fn with_expected_errors(mut self, errors: Option<usize>) -> Self {
+        self.expected_errors = errors;
+        self
+    }
+
+    // Set the expected pruned row groups by statistics
+    fn with_pruned_by_stats(mut self, pruned_by_stats: Option<usize>) -> Self {
+        self.expected_row_group_pruned_by_statistics = pruned_by_stats;
+        self
+    }
+
+    // Set the expected pruned row groups by bloom filter
+    fn with_pruned_by_bloom_filter(mut self, pruned_by_bf: Option<usize>) -> Self {
+        self.expected_row_group_pruned_by_bloom_filter = pruned_by_bf;
+        self
+    }
+
+    // Set the expected rows for the test
+    fn with_expected_rows(mut self, rows: usize) -> Self {
+        self.expected_results = rows;
+        self
+    }
+
+    // Execute the test with the current configuration
+    async fn test_row_group_prune(self) {
+        let output = ContextWithParquet::new(self.scenario, RowGroup)
+            .await
+            .query(&self.query)
+            .await;
+
+        println!("{}", output.description());
+        assert_eq!(output.predicate_evaluation_errors(), self.expected_errors);
+        assert_eq!(
+            output.row_groups_pruned_statistics(),
+            self.expected_row_group_pruned_by_statistics
+        );
+        assert_eq!(
+            output.row_groups_pruned_bloom_filter(),
+            self.expected_row_group_pruned_by_bloom_filter
+        );
+        assert_eq!(
+            output.result_rows,
+            self.expected_results,
+            "{}",
+            output.description()
+        );
+    }
 }
 
 #[tokio::test]
 async fn prune_timestamps_nanos() {
-    test_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_timestamps_micros() {
-    test_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query(
+            "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 01:01:11Z')",
+        )
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_timestamps_millis() {
-    test_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where millis < to_timestamp_millis('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query(
+            "SELECT * FROM t where micros < to_timestamp_millis('2020-01-02 01:01:11Z')",
+        )
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_timestamps_seconds() {
-    test_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query(
+            "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02 01:01:11Z')",
+        )
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_date32() {
-    test_prune(
-        Scenario::Dates,
-        "SELECT * FROM t where date32 < cast('2020-01-02' as date)",
-        Some(0),
-        Some(3),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Dates)
+        .with_query("SELECT * FROM t where date32 < cast('2020-01-02' as date)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_date64() {
     // work around for not being able to cast Date32 to Date64 automatically
+
     let date = "2020-01-02"
         .parse::<chrono::NaiveDate>()
         .unwrap()
@@ -168,14 +207,15 @@ async fn prune_date64() {
 
 #[tokio::test]
 async fn prune_disabled() {
-    test_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 
     // test without pruning
     let query = "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')";
@@ -201,217 +241,233 @@ async fn prune_disabled() {
 
 #[tokio::test]
 async fn prune_int32_lt() {
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i < 1",
-        Some(0),
-        Some(1),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i < 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
+
     // result of sql "SELECT * FROM t where i < 1" is same as
     // "SELECT * FROM t where -i > -1"
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where -i > -1",
-        Some(0),
-        Some(1),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where -i > -1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq() {
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i = 1",
-        Some(0),
-        Some(3),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 #[tokio::test]
 async fn prune_int32_scalar_fun_and_eq() {
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where abs(i) = 1  and i = 1",
-        Some(0),
-        Some(3),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_scalar_fun() {
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where abs(i) = 1",
-        Some(0),
-        Some(0),
-        3,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where abs(i) = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(3)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_complex_expr() {
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i+1 = 1",
-        Some(0),
-        Some(0),
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i+1 = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_complex_expr_subtract() {
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where 1-i > 1",
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where 1-i > 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_lt() {
-    test_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where f < 1",
-        Some(0),
-        Some(1),
-        11,
-    )
-    .await;
-    test_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where -f > -1",
-        Some(0),
-        Some(1),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where f < 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where -f > -1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_scalar_fun_and_gt() {
     // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1"
     // only use "f >= 0" to prune
-    test_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1",
-        Some(0),
-        Some(2),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_scalar_fun() {
     // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not supported
-    test_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where abs(f-1) <= 0.000001",
-        Some(0),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where abs(f-1) <= 0.000001")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_complex_expr() {
     // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
-    test_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where f+1 > 1.1",
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where f+1 > 1.1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_complex_expr_subtract() {
     // result of sql "SELECT * FROM t where 1-f > 1" is not supported
-    test_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where 1-f > 1",
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where 1-f > 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list() {
     // result of sql "SELECT * FROM t where in (1)"
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i in (1)",
-        Some(0),
-        Some(3),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i in (1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list_2() {
     // result of sql "SELECT * FROM t where in (1000)", prune all
     // test whether statistics works
-    test_prune_verbose(
-        Scenario::Int32,
-        "SELECT * FROM t where i in (1000)",
-        Some(0),
-        Some(0),
-        Some(4),
-        0,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i in (1000)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(4))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(0)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_large_in_list() {
     // result of sql "SELECT * FROM t where i in (2050...2582)", prune all
-    // test whether sbbf works
-    test_prune_verbose(
-        Scenario::Int32Range,
-        format!(
-            "SELECT * FROM t where i in ({})",
-            (200050..200082).join(",")
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32Range)
+        .with_query(
+            format!(
+                "SELECT * FROM t where i in ({})",
+                (200050..200082).join(",")
+            )
+            .as_str(),
         )
-        .as_str(),
-        Some(0),
-        Some(1),
-        // we don't support pruning by statistics for in_list with more than 20 elements currently
-        Some(0),
-        0,
-    )
-    .await;
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(1))
+        .with_expected_rows(0)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list_negated() {
     // result of sql "SELECT * FROM t where not in (1)" prune nothing
-    test_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i not in (1)",
-        Some(0),
-        Some(0),
-        19,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i not in (1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(19)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
@@ -419,42 +475,42 @@ async fn prune_decimal_lt() {
     // The data type of decimal_col is decimal(9,2)
     // There are three row groups:
     // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
-    test_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col < 4",
-        Some(0),
-        Some(1),
-        6,
-    )
-    .await;
-    // compare with the casted decimal value
-    test_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
-        Some(0),
-        Some(1),
-        8,
-    )
-    .await;
-
-    // The data type of decimal_col is decimal(38,2)
-    test_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col < 4",
-        Some(0),
-        Some(1),
-        6,
-    )
-    .await;
-    // compare with the casted decimal value
-    test_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
-        Some(0),
-        Some(1),
-        8,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col < 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(8)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col < 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(8)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
@@ -462,40 +518,44 @@ async fn prune_decimal_eq() {
     // The data type of decimal_col is decimal(9,2)
     // There are three row groups:
     // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
-    test_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col = 4",
-        Some(0),
-        Some(1),
-        2,
-    )
-    .await;
-    test_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col = 4.00",
-        Some(0),
-        Some(1),
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col = 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col = 4.00")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col = 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col = 4.00")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
     // The data type of decimal_col is decimal(38,2)
-    test_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col = 4",
-        Some(0),
-        Some(1),
-        2,
-    )
-    .await;
-    test_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col = 4.00",
-        Some(0),
-        Some(1),
-        2,
-    )
-    .await;
 }
 
 #[tokio::test]
@@ -503,40 +563,75 @@ async fn prune_decimal_in_list() {
     // The data type of decimal_col is decimal(9,2)
     // There are three row groups:
     // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
-    test_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col in (4,3,2,123456789123)",
-        Some(0),
-        Some(1),
-        5,
-    )
-    .await;
-    test_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)",
-        Some(0),
-        Some(1),
-        6,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col in (4,3,2,123456789123)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(5)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col in (4,3,2,123456789123)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(5)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
 
-    // The data type of decimal_col is decimal(38,2)
-    test_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col in (4,3,2,123456789123)",
-        Some(0),
-        Some(1),
-        5,
-    )
-    .await;
-    test_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)",
-        Some(0),
-        Some(1),
-        6,
-    )
-    .await;
+    // test data -> r1: {1,2,3,4,5}, r2: {1,2,3,4,6}, r3: {1,2,3,4,6}
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalBloomFilterInt32)
+        .with_query("SELECT * FROM t where decimal_col in (5)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(2))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
+
+    // test data -> r1: {1,2,3,4,5}, r2: {1,2,3,4,6}, r3: {1,2,3,4,6}
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalBloomFilterInt64)
+        .with_query("SELECT * FROM t where decimal_col in (5)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(2))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
+
+    // test data -> r1: {1,2,3,4,5}, r2: {1,2,3,4,6}, r3: {1,2,3,4,6}
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecisionBloomFilter)
+        .with_query("SELECT * FROM t where decimal_col in (5)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(2))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
@@ -545,29 +640,31 @@ async fn prune_periods_in_column_names() {
     // name = "HTTP GET / DISPATCH", service.name = ['frontend', 'frontend'],
     // name = "HTTP PUT / DISPATCH", service.name = ['backend',  'frontend'],
     // name = "HTTP GET / DISPATCH", service.name = ['backend',  'backend' ],
-    test_prune(
-        Scenario::PeriodsInColumnNames,
-        // use double quotes to use column named "service.name"
-        "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend'",
-        Some(0),
-        Some(1), // prune out last row group
-        7,
-    )
-    .await;
-    test_prune(
-        Scenario::PeriodsInColumnNames,
-        "SELECT \"name\", \"service.name\" FROM t WHERE \"name\" != 'HTTP GET / DISPATCH'",
-        Some(0),
-        Some(2), // prune out first and last row group
-        5,
-    )
-    .await;
-    test_prune(
-        Scenario::PeriodsInColumnNames,
-        "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend' AND \"name\" != 'HTTP GET / DISPATCH'",
-        Some(0),
-        Some(2), // prune out middle and last row group
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::PeriodsInColumnNames)
+        .with_query(  "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend'")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(7)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::PeriodsInColumnNames)
+        .with_query(  "SELECT \"name\", \"service.name\" FROM t WHERE \"name\" != 'HTTP GET / DISPATCH'")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(5)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::PeriodsInColumnNames)
+        .with_query(  "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend' AND \"name\" != 'HTTP GET / DISPATCH'")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 }
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 0cc102002ec3..f7d5205db0d3 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -20,65 +20,6 @@ use datafusion::test_util::register_unbounded_file_with_ordering;
 
 use super::*;
 
-#[tokio::test]
-#[ignore]
-/// TODO: need to repair. Wrong Test: ambiguous column name: a
-async fn nestedjoin_with_alias() -> Result<()> {
-    // repro case for https://github.com/apache/arrow-datafusion/issues/2867
-    let sql = "select * from ((select 1 as a, 2 as b) c INNER JOIN (select 1 as a, 3 as d) e on c.a = e.a) f;";
-    let expected = [
-        "+---+---+---+---+",
-        "| a | b | a | d |",
-        "+---+---+---+---+",
-        "| 1 | 2 | 1 | 3 |",
-        "+---+---+---+---+",
-    ];
-    let ctx = SessionContext::new();
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn join_partitioned() -> Result<()> {
-    // self join on partition id (workaround for duplicate column name)
-    let results = execute_with_partition(
-        "SELECT 1 FROM test JOIN (SELECT c1 AS id1 FROM test) AS a ON c1=id1",
-        4,
-    )
-    .await?;
-
-    assert_eq!(
-        results.iter().map(|b| b.num_rows()).sum::<usize>(),
-        4 * 10 * 10
-    );
-
-    Ok(())
-}
-
-#[tokio::test]
-#[ignore = "Test ignored, will be enabled after fixing the NAAJ bug"]
-// https://github.com/apache/arrow-datafusion/issues/4211
-async fn null_aware_left_anti_join() -> Result<()> {
-    let test_repartition_joins = vec![true, false];
-    for repartition_joins in test_repartition_joins {
-        let ctx = create_left_semi_anti_join_context_with_null_ids(
-            "t1_id",
-            "t2_id",
-            repartition_joins,
-        )
-        .unwrap();
-
-        let sql = "SELECT t1_id, t1_name FROM t1 WHERE t1_id NOT IN (SELECT t2_id FROM t2) ORDER BY t1_id";
-        let actual = execute_to_batches(&ctx, sql).await;
-        let expected = ["++", "++"];
-        assert_batches_eq!(expected, &actual);
-    }
-
-    Ok(())
-}
-
 #[tokio::test]
 async fn join_change_in_planner() -> Result<()> {
     let config = SessionConfig::new().with_target_partitions(8);
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 981bdf34f539..246191e48ad2 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -22,16 +22,15 @@ use arrow::{
     util::display::array_value_to_string,
 };
 
-use datafusion::datasource::TableProvider;
 use datafusion::error::Result;
 use datafusion::logical_expr::{Aggregate, LogicalPlan, TableScan};
+use datafusion::physical_plan::collect;
 use datafusion::physical_plan::metrics::MetricValue;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::physical_plan::ExecutionPlanVisitor;
 use datafusion::prelude::*;
 use datafusion::test_util;
 use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
-use datafusion::{datasource::MemTable, physical_plan::collect};
 use datafusion::{execution::context::SessionContext, physical_plan::displayable};
 use datafusion_common::{assert_contains, assert_not_contains};
 use object_store::path::Path;
@@ -72,84 +71,9 @@ pub mod create_drop;
 pub mod explain_analyze;
 pub mod expr;
 pub mod joins;
-pub mod partitioned_csv;
-pub mod repartition;
 pub mod select;
 mod sql_api;
 
-fn create_left_semi_anti_join_context_with_null_ids(
-    column_left: &str,
-    column_right: &str,
-    repartition_joins: bool,
-) -> Result<SessionContext> {
-    let ctx = SessionContext::new_with_config(
-        SessionConfig::new()
-            .with_repartition_joins(repartition_joins)
-            .with_target_partitions(2)
-            .with_batch_size(4096),
-    );
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new(column_left, DataType::UInt32, true),
-        Field::new("t1_name", DataType::Utf8, true),
-        Field::new("t1_int", DataType::UInt32, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema,
-        vec![
-            Arc::new(UInt32Array::from(vec![
-                Some(11),
-                Some(11),
-                Some(22),
-                Some(33),
-                Some(44),
-                None,
-            ])),
-            Arc::new(StringArray::from(vec![
-                Some("a"),
-                Some("a"),
-                Some("b"),
-                Some("c"),
-                Some("d"),
-                Some("e"),
-            ])),
-            Arc::new(UInt32Array::from(vec![1, 1, 2, 3, 4, 0])),
-        ],
-    )?;
-    ctx.register_batch("t1", t1_data)?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new(column_right, DataType::UInt32, true),
-        Field::new("t2_name", DataType::Utf8, true),
-        Field::new("t2_int", DataType::UInt32, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema,
-        vec![
-            Arc::new(UInt32Array::from(vec![
-                Some(11),
-                Some(11),
-                Some(22),
-                Some(44),
-                Some(55),
-                None,
-            ])),
-            Arc::new(StringArray::from(vec![
-                Some("z"),
-                Some("z"),
-                Some("y"),
-                Some("x"),
-                Some("w"),
-                Some("v"),
-            ])),
-            Arc::new(UInt32Array::from(vec![3, 3, 1, 3, 3, 0])),
-        ],
-    )?;
-    ctx.register_batch("t2", t2_data)?;
-
-    Ok(ctx)
-}
-
 async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
     let testdata = datafusion::test_util::arrow_test_data();
 
@@ -323,21 +247,6 @@ async fn register_alltypes_parquet(ctx: &SessionContext) {
     .unwrap();
 }
 
-/// Return a new table provider that has a single Int32 column with
-/// values between `seq_start` and `seq_end`
-pub fn table_with_sequence(
-    seq_start: i32,
-    seq_end: i32,
-) -> Result<Arc<dyn TableProvider>> {
-    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::<Vec<_>>()));
-    let partitions = vec![vec![RecordBatch::try_new(
-        schema.clone(),
-        vec![arr as ArrayRef],
-    )?]];
-    Ok(Arc::new(MemTable::try_new(schema, partitions)?))
-}
-
 pub struct ExplainNormalizer {
     replacements: Vec<(String, String)>,
 }
diff --git a/datafusion/core/tests/sql/partitioned_csv.rs b/datafusion/core/tests/sql/partitioned_csv.rs
deleted file mode 100644
index b77557a66cd8..000000000000
--- a/datafusion/core/tests/sql/partitioned_csv.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utility functions for creating and running with a partitioned csv dataset.
-
-use std::{io::Write, sync::Arc};
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::{
-    error::Result,
-    prelude::{CsvReadOptions, SessionConfig, SessionContext},
-};
-use tempfile::TempDir;
-
-/// Generate CSV partitions within the supplied directory
-fn populate_csv_partitions(
-    tmp_dir: &TempDir,
-    partition_count: usize,
-    file_extension: &str,
-) -> Result<SchemaRef> {
-    // define schema for data source (csv file)
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::UInt32, false),
-        Field::new("c2", DataType::UInt64, false),
-        Field::new("c3", DataType::Boolean, false),
-    ]));
-
-    // generate a partitioned file
-    for partition in 0..partition_count {
-        let filename = format!("partition-{partition}.{file_extension}");
-        let file_path = tmp_dir.path().join(filename);
-        let mut file = std::fs::File::create(file_path)?;
-
-        // generate some data
-        for i in 0..=10 {
-            let data = format!("{},{},{}\n", partition, i, i % 2 == 0);
-            file.write_all(data.as_bytes())?;
-        }
-    }
-
-    Ok(schema)
-}
-
-/// Generate a partitioned CSV file and register it with an execution context
-pub async fn create_ctx(
-    tmp_dir: &TempDir,
-    partition_count: usize,
-) -> Result<SessionContext> {
-    let ctx =
-        SessionContext::new_with_config(SessionConfig::new().with_target_partitions(8));
-
-    let schema = populate_csv_partitions(tmp_dir, partition_count, ".csv")?;
-
-    // register csv file with the execution context
-    ctx.register_csv(
-        "test",
-        tmp_dir.path().to_str().unwrap(),
-        CsvReadOptions::new().schema(&schema),
-    )
-    .await?;
-
-    Ok(ctx)
-}
diff --git a/datafusion/core/tests/sql/repartition.rs b/datafusion/core/tests/sql/repartition.rs
deleted file mode 100644
index 332f18e941aa..000000000000
--- a/datafusion/core/tests/sql/repartition.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::array::UInt32Array;
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::record_batch::RecordBatch;
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion::physical_plan::repartition::RepartitionExec;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::prelude::{SessionConfig, SessionContext};
-use datafusion::test_util::UnboundedExec;
-use datafusion_common::Result;
-use datafusion_physical_expr::expressions::Column;
-use datafusion_physical_expr::PhysicalExpr;
-use futures::StreamExt;
-use std::sync::Arc;
-
-/// See <https://github.com/apache/arrow-datafusion/issues/5278>
-#[tokio::test]
-async fn unbounded_repartition() -> Result<()> {
-    let config = SessionConfig::new();
-    let ctx = SessionContext::new_with_config(config);
-    let task = ctx.task_ctx();
-    let schema = Arc::new(Schema::new(vec![Field::new("a2", DataType::UInt32, false)]));
-    let batch = RecordBatch::try_new(
-        Arc::clone(&schema),
-        vec![Arc::new(UInt32Array::from(vec![1]))],
-    )?;
-    let input = Arc::new(UnboundedExec::new(None, batch.clone(), 1));
-    let on: Vec<Arc<dyn PhysicalExpr>> = vec![Arc::new(Column::new("a2", 0))];
-    let plan = Arc::new(RepartitionExec::try_new(input, Partitioning::Hash(on, 3))?);
-    let plan = Arc::new(CoalescePartitionsExec::new(plan.clone()));
-    let mut stream = plan.execute(0, task)?;
-
-    // Note: `tokio::time::timeout` does NOT help here because in the mentioned issue, the whole runtime is blocked by a
-    // CPU-spinning thread. Using a multithread runtime with multiple threads is NOT a solution since this would not
-    // trigger the bug (the bug is not specific to a single-thread RT though, it's just the only way to trigger it reliably).
-    let batch_actual = stream
-        .next()
-        .await
-        .expect("not terminated")
-        .expect("no error in stream");
-    assert_eq!(batch_actual, batch);
-    Ok(())
-}
diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index cbdea9d72948..667d3eeab31e 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -20,508 +20,34 @@ use datafusion_common::ScalarValue;
 use tempfile::TempDir;
 
 #[tokio::test]
-async fn query_get_indexed_field() -> Result<()> {
-    let ctx = SessionContext::new();
-    let schema = Arc::new(Schema::new(vec![Field::new_list(
-        "some_list",
-        Field::new("item", DataType::Int64, true),
-        false,
-    )]));
-    let builder = PrimitiveBuilder::<Int64Type>::with_capacity(3);
-    let mut lb = ListBuilder::new(builder);
-    for int_vec in [[0, 1, 2], [4, 5, 6], [7, 8, 9]] {
-        let builder = lb.values();
-        for int in int_vec {
-            builder.append_value(int);
-        }
-        lb.append(true);
-    }
-
-    let data = RecordBatch::try_new(schema.clone(), vec![Arc::new(lb.finish())])?;
-
-    ctx.register_batch("ints", data)?;
-
-    // Original column is micros, convert to millis and check timestamp
-    let sql = "SELECT some_list[1] as i0 FROM ints LIMIT 3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    #[rustfmt::skip]
-    let expected = ["+----+",
-        "| i0 |",
-        "+----+",
-        "| 0  |",
-        "| 4  |",
-        "| 7  |",
-        "+----+"];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_nested_get_indexed_field() -> Result<()> {
-    let ctx = SessionContext::new();
-    let nested_dt = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
-    // Nested schema of { "some_list": [[i64]] }
-    let schema = Arc::new(Schema::new(vec![Field::new(
-        "some_list",
-        DataType::List(Arc::new(Field::new("item", nested_dt.clone(), true))),
-        false,
-    )]));
-
-    let builder = PrimitiveBuilder::<Int64Type>::with_capacity(3);
-    let nested_lb = ListBuilder::new(builder);
-    let mut lb = ListBuilder::new(nested_lb);
-    for int_vec_vec in [
-        [[0, 1], [2, 3], [3, 4]],
-        [[5, 6], [7, 8], [9, 10]],
-        [[11, 12], [13, 14], [15, 16]],
-    ] {
-        let nested_builder = lb.values();
-        for int_vec in int_vec_vec {
-            let builder = nested_builder.values();
-            for int in int_vec {
-                builder.append_value(int);
-            }
-            nested_builder.append(true);
-        }
-        lb.append(true);
-    }
-
-    let data = RecordBatch::try_new(schema.clone(), vec![Arc::new(lb.finish())])?;
-
-    ctx.register_batch("ints", data)?;
-
-    // Original column is micros, convert to millis and check timestamp
-    let sql = "SELECT some_list[1] as i0 FROM ints LIMIT 3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+----------+",
-        "| i0       |",
-        "+----------+",
-        "| [0, 1]   |",
-        "| [5, 6]   |",
-        "| [11, 12] |",
-        "+----------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-    let sql = "SELECT some_list[1][1] as i0 FROM ints LIMIT 3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    #[rustfmt::skip]
-    let expected = ["+----+",
-        "| i0 |",
-        "+----+",
-        "| 0  |",
-        "| 5  |",
-        "| 11 |",
-        "+----+"];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_nested_get_indexed_field_on_struct() -> Result<()> {
-    let ctx = SessionContext::new();
-    let nested_dt = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
-    // Nested schema of { "some_struct": { "bar": [i64] } }
-    let struct_fields = vec![Field::new("bar", nested_dt.clone(), true)];
-    let schema = Arc::new(Schema::new(vec![Field::new(
-        "some_struct",
-        DataType::Struct(struct_fields.clone().into()),
-        false,
-    )]));
-
-    let builder = PrimitiveBuilder::<Int64Type>::with_capacity(3);
-    let nested_lb = ListBuilder::new(builder);
-    let mut sb = StructBuilder::new(struct_fields, vec![Box::new(nested_lb)]);
-    for int_vec in [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] {
-        let lb = sb.field_builder::<ListBuilder<Int64Builder>>(0).unwrap();
-        for int in int_vec {
-            lb.values().append_value(int);
-        }
-        lb.append(true);
-        sb.append(true);
-    }
-    let s = sb.finish();
-    let data = RecordBatch::try_new(schema.clone(), vec![Arc::new(s)])?;
-
-    ctx.register_batch("structs", data)?;
-
-    // Original column is micros, convert to millis and check timestamp
-    let sql = "SELECT some_struct['bar'] as l0 FROM structs LIMIT 3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+----------------+",
-        "| l0             |",
-        "+----------------+",
-        "| [0, 1, 2, 3]   |",
-        "| [4, 5, 6, 7]   |",
-        "| [8, 9, 10, 11] |",
-        "+----------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // Access to field of struct by CompoundIdentifier
-    let sql = "SELECT some_struct.bar as l0 FROM structs LIMIT 3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+----------------+",
-        "| l0             |",
-        "+----------------+",
-        "| [0, 1, 2, 3]   |",
-        "| [4, 5, 6, 7]   |",
-        "| [8, 9, 10, 11] |",
-        "+----------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    let sql = "SELECT some_struct['bar'][1] as i0 FROM structs LIMIT 3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    #[rustfmt::skip]
-    let expected = ["+----+",
-        "| i0 |",
-        "+----+",
-        "| 0  |",
-        "| 4  |",
-        "| 8  |",
-        "+----+"];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_on_string_dictionary() -> Result<()> {
-    // Test to ensure DataFusion can operate on dictionary types
-    // Use StringDictionary (32 bit indexes = keys)
-    let d1: DictionaryArray<Int32Type> =
-        vec![Some("one"), None, Some("three")].into_iter().collect();
-
-    let d2: DictionaryArray<Int32Type> = vec![Some("blarg"), None, Some("three")]
-        .into_iter()
-        .collect();
-
-    let d3: StringArray = vec![Some("XYZ"), None, Some("three")].into_iter().collect();
-
-    let batch = RecordBatch::try_from_iter(vec![
-        ("d1", Arc::new(d1) as ArrayRef),
-        ("d2", Arc::new(d2) as ArrayRef),
-        ("d3", Arc::new(d3) as ArrayRef),
-    ])
-    .unwrap();
-
-    let ctx = SessionContext::new();
-    ctx.register_batch("test", batch)?;
-
-    // Basic SELECT
-    let sql = "SELECT d1 FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| one   |",
-        "|       |",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // basic filtering
-    let sql = "SELECT d1 FROM test WHERE d1 IS NOT NULL";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| one   |",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // comparison with constant
-    let sql = "SELECT d1 FROM test WHERE d1 = 'three'";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // comparison with another dictionary column
-    let sql = "SELECT d1 FROM test WHERE d1 = d2";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // order comparison with another dictionary column
-    let sql = "SELECT d1 FROM test WHERE d1 <= d2";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // comparison with a non dictionary column
-    let sql = "SELECT d1 FROM test WHERE d1 = d3";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // filtering with constant
-    let sql = "SELECT d1 FROM test WHERE d1 = 'three'";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+",
-        "| d1    |",
-        "+-------+",
-        "| three |",
-        "+-------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // Expression evaluation
-    let sql = "SELECT concat(d1, '-foo') FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+------------------------------+",
-        "| concat(test.d1,Utf8(\"-foo\")) |",
-        "+------------------------------+",
-        "| one-foo                      |",
-        "| -foo                         |",
-        "| three-foo                    |",
-        "+------------------------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // Expression evaluation with two dictionaries
-    let sql = "SELECT concat(d1, d2) FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------------------------+",
-        "| concat(test.d1,test.d2) |",
-        "+-------------------------+",
-        "| oneblarg                |",
-        "|                         |",
-        "| threethree              |",
-        "+-------------------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // aggregation
-    let sql = "SELECT COUNT(d1) FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+----------------+",
-        "| COUNT(test.d1) |",
-        "+----------------+",
-        "| 2              |",
-        "+----------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // aggregation min
-    let sql = "SELECT MIN(d1) FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+--------------+",
-        "| MIN(test.d1) |",
-        "+--------------+",
-        "| one          |",
-        "+--------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // aggregation max
-    let sql = "SELECT MAX(d1) FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+--------------+",
-        "| MAX(test.d1) |",
-        "+--------------+",
-        "| three        |",
-        "+--------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // grouping
-    let sql = "SELECT d1, COUNT(*) FROM test group by d1";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+----------+",
-        "| d1    | COUNT(*) |",
-        "+-------+----------+",
-        "|       | 1        |",
-        "| one   | 1        |",
-        "| three | 1        |",
-        "+-------+----------+",
-    ];
-    assert_batches_sorted_eq!(expected, &actual);
-
-    // window functions
-    let sql = "SELECT d1, row_number() OVER (partition by d1) as rn1 FROM test";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = [
-        "+-------+-----+",
-        "| d1    | rn1 |",
-        "+-------+-----+",
-        "|       | 1   |",
-        "| one   | 1   |",
-        "| three | 1   |",
-        "+-------+-----+",
-    ];
-    assert_batches_sorted_eq!(expected, &actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn sort_on_window_null_string() -> Result<()> {
-    let d1: DictionaryArray<Int32Type> =
-        vec![Some("one"), None, Some("three")].into_iter().collect();
-    let d2: StringArray = vec![Some("ONE"), None, Some("THREE")].into_iter().collect();
-    let d3: LargeStringArray =
-        vec![Some("One"), None, Some("Three")].into_iter().collect();
-
-    let batch = RecordBatch::try_from_iter(vec![
-        ("d1", Arc::new(d1) as ArrayRef),
-        ("d2", Arc::new(d2) as ArrayRef),
-        ("d3", Arc::new(d3) as ArrayRef),
-    ])
-    .unwrap();
-
-    let ctx =
-        SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1));
-    ctx.register_batch("test", batch)?;
-
-    let sql =
-        "SELECT d1, row_number() OVER (partition by d1) as rn1 FROM test order by d1 asc";
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    // NULLS LAST
-    let expected = [
-        "+-------+-----+",
-        "| d1    | rn1 |",
-        "+-------+-----+",
-        "| one   | 1   |",
-        "| three | 1   |",
-        "|       | 1   |",
-        "+-------+-----+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    let sql =
-        "SELECT d2, row_number() OVER (partition by d2) as rn1 FROM test ORDER BY d2 asc";
-    let actual = execute_to_batches(&ctx, sql).await;
-    // NULLS LAST
-    let expected = [
-        "+-------+-----+",
-        "| d2    | rn1 |",
-        "+-------+-----+",
-        "| ONE   | 1   |",
-        "| THREE | 1   |",
-        "|       | 1   |",
-        "+-------+-----+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    let sql =
-        "SELECT d2, row_number() OVER (partition by d2 order by d2 desc) as rn1 FROM test ORDER BY d2 desc";
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    // NULLS FIRST
-    let expected = [
-        "+-------+-----+",
-        "| d2    | rn1 |",
-        "+-------+-----+",
-        "|       | 1   |",
-        "| THREE | 1   |",
-        "| ONE   | 1   |",
-        "+-------+-----+",
-    ];
-    assert_batches_eq!(expected, &actual);
-
-    // FIXME sort on LargeUtf8 String has bug.
-    // let sql =
-    //     "SELECT d3, row_number() OVER (partition by d3) as rn1 FROM test";
-    // let actual = execute_to_batches(&ctx, sql).await;
-    // let expected = vec![
-    //     "+-------+-----+",
-    //     "| d3    | rn1 |",
-    //     "+-------+-----+",
-    //     "|       | 1   |",
-    //     "| One   | 1   |",
-    //     "| Three | 1   |",
-    //     "+-------+-----+",
-    // ];
-    // assert_batches_eq!(expected, &actual);
-
-    Ok(())
-}
-
-// Test prepare statement from sql to final result
-// This test is equivalent with the test parallel_query_with_filter below but using prepare statement
-#[tokio::test]
-async fn test_prepare_statement() -> Result<()> {
+async fn test_list_query_parameters() -> Result<()> {
     let tmp_dir = TempDir::new()?;
     let partition_count = 4;
-    let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
-
-    // sql to statement then to prepare logical plan with parameters
-    // c1 defined as UINT32, c2 defined as UInt64 but the params are Int32 and Float64
-    let dataframe =
-        ctx.sql("PREPARE my_plan(INT, DOUBLE) AS SELECT c1, c2 FROM test WHERE c1 > $2 AND c1 < $1").await?;
-
-    // prepare logical plan to logical plan without parameters
-    let param_values = vec![ScalarValue::Int32(Some(3)), ScalarValue::Float64(Some(0.0))];
-    let dataframe = dataframe.with_param_values(param_values)?;
-    let results = dataframe.collect().await?;
+    let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;
 
+    let results = ctx
+        .sql("SELECT * FROM test WHERE c1 = $1")
+        .await?
+        .with_param_values(vec![ScalarValue::from(3i32)])?
+        .collect()
+        .await?;
     let expected = vec![
-        "+----+----+",
-        "| c1 | c2 |",
-        "+----+----+",
-        "| 1  | 1  |",
-        "| 1  | 10 |",
-        "| 1  | 2  |",
-        "| 1  | 3  |",
-        "| 1  | 4  |",
-        "| 1  | 5  |",
-        "| 1  | 6  |",
-        "| 1  | 7  |",
-        "| 1  | 8  |",
-        "| 1  | 9  |",
-        "| 2  | 1  |",
-        "| 2  | 10 |",
-        "| 2  | 2  |",
-        "| 2  | 3  |",
-        "| 2  | 4  |",
-        "| 2  | 5  |",
-        "| 2  | 6  |",
-        "| 2  | 7  |",
-        "| 2  | 8  |",
-        "| 2  | 9  |",
-        "+----+----+",
+        "+----+----+-------+",
+        "| c1 | c2 | c3    |",
+        "+----+----+-------+",
+        "| 3  | 1  | false |",
+        "| 3  | 10 | true  |",
+        "| 3  | 2  | true  |",
+        "| 3  | 3  | false |",
+        "| 3  | 4  | true  |",
+        "| 3  | 5  | false |",
+        "| 3  | 6  | true  |",
+        "| 3  | 7  | false |",
+        "| 3  | 8  | true  |",
+        "| 3  | 9  | false |",
+        "+----+----+-------+",
     ];
     assert_batches_sorted_eq!(expected, &results);
-
     Ok(())
 }
 
@@ -529,7 +55,7 @@ async fn test_prepare_statement() -> Result<()> {
 async fn test_named_query_parameters() -> Result<()> {
     let tmp_dir = TempDir::new()?;
     let partition_count = 4;
-    let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
+    let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;
 
     // sql to statement then to logical plan with parameters
     // c1 defined as UINT32, c2 defined as UInt64
@@ -572,16 +98,24 @@ async fn test_named_query_parameters() -> Result<()> {
     Ok(())
 }
 
+// Test prepare statement from sql to final result
+// This test is equivalent with the test parallel_query_with_filter below but using prepare statement
 #[tokio::test]
-async fn parallel_query_with_filter() -> Result<()> {
+async fn test_prepare_statement() -> Result<()> {
     let tmp_dir = TempDir::new()?;
     let partition_count = 4;
-    let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
+    let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;
+
+    // sql to statement then to prepare logical plan with parameters
+    // c1 defined as UINT32, c2 defined as UInt64 but the params are Int32 and Float64
+    let dataframe =
+        ctx.sql("PREPARE my_plan(INT, DOUBLE) AS SELECT c1, c2 FROM test WHERE c1 > $2 AND c1 < $1").await?;
+
+    // prepare logical plan to logical plan without parameters
+    let param_values = vec![ScalarValue::Int32(Some(3)), ScalarValue::Float64(Some(0.0))];
+    let dataframe = dataframe.with_param_values(param_values)?;
+    let results = dataframe.collect().await?;
 
-    let dataframe = ctx
-        .sql("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3")
-        .await?;
-    let results = dataframe.collect().await.unwrap();
     let expected = vec![
         "+----+----+",
         "| c1 | c2 |",
@@ -614,50 +148,106 @@ async fn parallel_query_with_filter() -> Result<()> {
 }
 
 #[tokio::test]
-async fn boolean_literal() -> Result<()> {
-    let results =
-        execute_with_partition("SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true", 4)
-            .await?;
-
+async fn prepared_statement_type_coercion() -> Result<()> {
+    let ctx = SessionContext::new();
+    let signed_ints: Int32Array = vec![-1, 0, 1].into();
+    let unsigned_ints: UInt64Array = vec![1, 2, 3].into();
+    let batch = RecordBatch::try_from_iter(vec![
+        ("signed", Arc::new(signed_ints) as ArrayRef),
+        ("unsigned", Arc::new(unsigned_ints) as ArrayRef),
+    ])?;
+    ctx.register_batch("test", batch)?;
+    let results = ctx.sql("PREPARE my_plan(BIGINT, INT, TEXT) AS SELECT signed, unsigned FROM test WHERE $1 >= signed AND signed <= $2 AND unsigned = $3")
+        .await?
+        .with_param_values(vec![
+            ScalarValue::from(1_i64),
+            ScalarValue::from(-1_i32),
+            ScalarValue::from("1"),
+        ])?
+        .collect()
+        .await?;
     let expected = [
-        "+----+------+",
-        "| c1 | c3   |",
-        "+----+------+",
-        "| 3  | true |",
-        "| 3  | true |",
-        "| 3  | true |",
-        "| 3  | true |",
-        "| 3  | true |",
-        "+----+------+",
+        "+--------+----------+",
+        "| signed | unsigned |",
+        "+--------+----------+",
+        "| -1     | 1        |",
+        "+--------+----------+",
     ];
     assert_batches_sorted_eq!(expected, &results);
-
     Ok(())
 }
 
 #[tokio::test]
-async fn unprojected_filter() {
-    let config = SessionConfig::new();
-    let ctx = SessionContext::new_with_config(config);
-    let df = ctx.read_table(table_with_sequence(1, 3).unwrap()).unwrap();
-
-    let df = df
-        .filter(col("i").gt(lit(2)))
-        .unwrap()
-        .select(vec![col("i") + col("i")])
-        .unwrap();
-
-    let plan = df.clone().into_optimized_plan().unwrap();
-    println!("{}", plan.display_indent());
-
-    let results = df.collect().await.unwrap();
+async fn prepared_statement_invalid_types() -> Result<()> {
+    let ctx = SessionContext::new();
+    let signed_ints: Int32Array = vec![-1, 0, 1].into();
+    let unsigned_ints: UInt64Array = vec![1, 2, 3].into();
+    let batch = RecordBatch::try_from_iter(vec![
+        ("signed", Arc::new(signed_ints) as ArrayRef),
+        ("unsigned", Arc::new(unsigned_ints) as ArrayRef),
+    ])?;
+    ctx.register_batch("test", batch)?;
+    let results = ctx
+        .sql("PREPARE my_plan(INT) AS SELECT signed FROM test WHERE signed = $1")
+        .await?
+        .with_param_values(vec![ScalarValue::from("1")]);
+    assert_eq!(
+        results.unwrap_err().strip_backtrace(),
+        "Error during planning: Expected parameter of type Int32, got Utf8 at index 0"
+    );
+    Ok(())
+}
 
+#[tokio::test]
+async fn test_parameter_type_coercion() -> Result<()> {
+    let ctx = SessionContext::new();
+    let signed_ints: Int32Array = vec![-1, 0, 1].into();
+    let unsigned_ints: UInt64Array = vec![1, 2, 3].into();
+    let batch = RecordBatch::try_from_iter(vec![
+        ("signed", Arc::new(signed_ints) as ArrayRef),
+        ("unsigned", Arc::new(unsigned_ints) as ArrayRef),
+    ])?;
+    ctx.register_batch("test", batch)?;
+    let results = ctx.sql("SELECT signed, unsigned FROM test WHERE $foo >= signed AND signed <= $bar AND unsigned <= $baz AND unsigned = $str")
+        .await?
+        .with_param_values(vec![
+            ("foo", ScalarValue::from(1_u64)),
+            ("bar", ScalarValue::from(-1_i64)),
+            ("baz", ScalarValue::from(2_i32)),
+            ("str", ScalarValue::from("1")),
+        ])?
+        .collect().await?;
     let expected = [
-        "+-----------------------+",
-        "| ?table?.i + ?table?.i |",
-        "+-----------------------+",
-        "| 6                     |",
-        "+-----------------------+",
+        "+--------+----------+",
+        "| signed | unsigned |",
+        "+--------+----------+",
+        "| -1     | 1        |",
+        "+--------+----------+",
     ];
     assert_batches_sorted_eq!(expected, &results);
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_parameter_invalid_types() -> Result<()> {
+    let ctx = SessionContext::new();
+    let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
+        Some(1),
+        Some(2),
+        Some(3),
+    ])]);
+    let batch =
+        RecordBatch::try_from_iter(vec![("list", Arc::new(list_array) as ArrayRef)])?;
+    ctx.register_batch("test", batch)?;
+    let results = ctx
+        .sql("SELECT list FROM test WHERE list = $1")
+        .await?
+        .with_param_values(vec![ScalarValue::from(4_i32)])?
+        .collect()
+        .await;
+    assert_eq!(
+        results.unwrap_err().strip_backtrace(),
+        "Arrow error: Invalid argument error: Invalid comparison operation: List(Field { name: \"item\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) == List(Field { name: \"item\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })"
+);
+    Ok(())
 }
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 5882718acefd..5dbac0322fc0 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -19,7 +19,7 @@
 //! user defined aggregate functions
 
 use arrow::{array::AsArray, datatypes::Fields};
-use arrow_array::Int32Array;
+use arrow_array::{types::UInt64Type, Int32Array, PrimitiveArray};
 use arrow_schema::Schema;
 use std::sync::{
     atomic::{AtomicBool, Ordering},
@@ -45,7 +45,9 @@ use datafusion::{
 use datafusion_common::{
     assert_contains, cast::as_primitive_array, exec_err, DataFusionError,
 };
-use datafusion_expr::{create_udaf, SimpleAggregateUDF};
+use datafusion_expr::{
+    create_udaf, AggregateUDFImpl, GroupsAccumulator, SimpleAggregateUDF,
+};
 use datafusion_physical_expr::expressions::AvgAccumulator;
 
 /// Test to show the contents of the setup
@@ -297,6 +299,25 @@ async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_groups_accumulator() -> Result<()> {
+    let ctx = SessionContext::new();
+    let arr = Int32Array::from(vec![1]);
+    let batch = RecordBatch::try_from_iter(vec![("a", Arc::new(arr) as _)])?;
+    ctx.register_batch("t", batch).unwrap();
+
+    let udaf = AggregateUDF::from(TestGroupsAccumulator {
+        signature: Signature::exact(vec![DataType::Float64], Volatility::Immutable),
+        result: 1,
+    });
+    ctx.register_udaf(udaf.clone());
+
+    let sql_df = ctx.sql("SELECT geo_mean(a) FROM t group by a").await?;
+    sql_df.show().await?;
+
+    Ok(())
+}
+
 /// Returns an context with a table "t" and the "first" and "time_sum"
 /// aggregate functions registered.
 ///
@@ -435,7 +456,7 @@ impl TimeSum {
 }
 
 impl Accumulator for TimeSum {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
@@ -457,7 +478,7 @@ impl Accumulator for TimeSum {
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         println!("Evaluating to {}", self.sum);
         Ok(ScalarValue::TimestampNanosecond(Some(self.sum), None))
     }
@@ -582,14 +603,14 @@ impl FirstSelector {
 }
 
 impl Accumulator for FirstSelector {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let state = self.to_state().into_iter().collect::<Vec<_>>();
 
         Ok(state)
     }
 
     /// produce the output structure
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.to_scalar())
     }
 
@@ -621,3 +642,106 @@ impl Accumulator for FirstSelector {
         std::mem::size_of_val(self)
     }
 }
+
+#[derive(Debug, Clone)]
+struct TestGroupsAccumulator {
+    signature: Signature,
+    result: u64,
+}
+
+impl AggregateUDFImpl for TestGroupsAccumulator {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "geo_mean"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::UInt64)
+    }
+
+    fn accumulator(&self, _arg: &DataType) -> Result<Box<dyn Accumulator>> {
+        // should use groups accumulator
+        panic!("accumulator shouldn't invoke");
+    }
+
+    fn state_type(&self, _return_type: &DataType) -> Result<Vec<DataType>> {
+        Ok(vec![DataType::UInt64])
+    }
+
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(self.clone()))
+    }
+}
+
+impl Accumulator for TestGroupsAccumulator {
+    fn update_batch(&mut self, _values: &[ArrayRef]) -> Result<()> {
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        Ok(ScalarValue::from(self.result))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of::<u64>()
+    }
+
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![ScalarValue::from(self.result)])
+    }
+
+    fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
+        Ok(())
+    }
+}
+
+impl GroupsAccumulator for TestGroupsAccumulator {
+    fn update_batch(
+        &mut self,
+        _values: &[ArrayRef],
+        _group_indices: &[usize],
+        _opt_filter: Option<&arrow_array::BooleanArray>,
+        _total_num_groups: usize,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    fn evaluate(&mut self, _emit_to: datafusion_expr::EmitTo) -> Result<ArrayRef> {
+        Ok(Arc::new(PrimitiveArray::<UInt64Type>::new(
+            vec![self.result].into(),
+            None,
+        )) as ArrayRef)
+    }
+
+    fn state(&mut self, _emit_to: datafusion_expr::EmitTo) -> Result<Vec<ArrayRef>> {
+        Ok(vec![Arc::new(PrimitiveArray::<UInt64Type>::new(
+            vec![self.result].into(),
+            None,
+        )) as ArrayRef])
+    }
+
+    fn merge_batch(
+        &mut self,
+        _values: &[ArrayRef],
+        _group_indices: &[usize],
+        _opt_filter: Option<&arrow_array::BooleanArray>,
+        _total_num_groups: usize,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of::<u64>()
+    }
+}
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index fe88ea6cf115..f1e32591fb90 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -19,10 +19,7 @@ use arrow::compute::kernels::numeric::add;
 use arrow_array::{ArrayRef, Float64Array, Int32Array, RecordBatch};
 use arrow_schema::{DataType, Field, Schema};
 use datafusion::prelude::*;
-use datafusion::{
-    execution::registry::FunctionRegistry,
-    physical_plan::functions::make_scalar_function, test_util,
-};
+use datafusion::{execution::registry::FunctionRegistry, test_util};
 use datafusion_common::cast::as_float64_array;
 use datafusion_common::{assert_batches_eq, cast::as_int32_array, Result, ScalarValue};
 use datafusion_expr::{
@@ -87,12 +84,18 @@ async fn scalar_udf() -> Result<()> {
 
     ctx.register_batch("t", batch)?;
 
-    let myfunc = |args: &[ArrayRef]| {
-        let l = as_int32_array(&args[0])?;
-        let r = as_int32_array(&args[1])?;
-        Ok(Arc::new(add(l, r)?) as ArrayRef)
-    };
-    let myfunc = make_scalar_function(myfunc);
+    let myfunc = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(l) = &args[0] else {
+            panic!("should be array")
+        };
+        let ColumnarValue::Array(r) = &args[1] else {
+            panic!("should be array")
+        };
+
+        let l = as_int32_array(l)?;
+        let r = as_int32_array(r)?;
+        Ok(ColumnarValue::from(Arc::new(add(l, r)?) as ArrayRef))
+    });
 
     ctx.register_udf(create_udf(
         "my_add",
@@ -163,11 +166,14 @@ async fn scalar_udf_zero_params() -> Result<()> {
 
     ctx.register_batch("t", batch)?;
     // create function just returns 100 regardless of inp
-    let myfunc = |args: &[ArrayRef]| {
-        let num_rows = args[0].len();
-        Ok(Arc::new((0..num_rows).map(|_| 100).collect::<Int32Array>()) as ArrayRef)
-    };
-    let myfunc = make_scalar_function(myfunc);
+    let myfunc = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Scalar(_) = &args[0] else {
+            panic!("expect scalar")
+        };
+        Ok(ColumnarValue::Array(
+            Arc::new((0..1).map(|_| 100).collect::<Int32Array>()) as ArrayRef,
+        ))
+    });
 
     ctx.register_udf(create_udf(
         "get_100",
@@ -248,7 +254,7 @@ async fn udaf_as_window_func() -> Result<()> {
     struct MyAccumulator;
 
     impl Accumulator for MyAccumulator {
-        fn state(&self) -> Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> Result<Vec<ScalarValue>> {
             unimplemented!()
         }
 
@@ -260,7 +266,7 @@ async fn udaf_as_window_func() -> Result<()> {
             unimplemented!()
         }
 
-        fn evaluate(&self) -> Result<ScalarValue> {
+        fn evaluate(&mut self) -> Result<ScalarValue> {
             unimplemented!()
         }
 
@@ -307,8 +313,12 @@ async fn case_sensitive_identifiers_user_defined_functions() -> Result<()> {
     let batch = RecordBatch::try_from_iter(vec![("i", Arc::new(arr) as _)])?;
     ctx.register_batch("t", batch).unwrap();
 
-    let myfunc = |args: &[ArrayRef]| Ok(Arc::clone(&args[0]));
-    let myfunc = make_scalar_function(myfunc);
+    let myfunc = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(array) = &args[0] else {
+            panic!("should be array")
+        };
+        Ok(ColumnarValue::from(Arc::clone(array)))
+    });
 
     ctx.register_udf(create_udf(
         "MY_FUNC",
@@ -348,8 +358,12 @@ async fn test_user_defined_functions_with_alias() -> Result<()> {
     let batch = RecordBatch::try_from_iter(vec![("i", Arc::new(arr) as _)])?;
     ctx.register_batch("t", batch).unwrap();
 
-    let myfunc = |args: &[ArrayRef]| Ok(Arc::clone(&args[0]));
-    let myfunc = make_scalar_function(myfunc);
+    let myfunc = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(array) = &args[0] else {
+            panic!("should be array")
+        };
+        Ok(ColumnarValue::from(Arc::clone(array)))
+    });
 
     let udf = create_udf(
         "dummy",
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index 55555014f2ef..58ed1ebff04c 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -25,30 +25,60 @@ pub mod proxy;
 
 pub use pool::*;
 
-/// The pool of memory on which [`MemoryReservation`]s record their
-/// memory reservations.
+/// Tracks and potentially limits memory use across operators during execution.
 ///
-/// DataFusion is a streaming query engine, processing most queries
-/// without buffering the entire input. However, certain operations
-/// such as sorting and grouping/joining with a large number of
-/// distinct groups/keys, can require buffering intermediate results
-/// and for large datasets this can require large amounts of memory.
+/// # Memory Management Overview
 ///
-/// In order to avoid allocating memory until the OS or the container
-/// system kills the process, DataFusion operators only allocate
-/// memory they are able to reserve from the configured
-/// [`MemoryPool`]. Once the memory tracked by the pool is exhausted,
-/// operators must either free memory by spilling to local disk or
-/// error.
+/// DataFusion is a streaming query engine, processing most queries without
+/// buffering the entire input. Most operators require a fixed amount of memory
+/// based on the schema and target batch size. However, certain operations such
+/// as sorting and grouping/joining, require buffering intermediate results,
+/// which can require memory proportional to the number of input rows.
 ///
-/// A `MemoryPool` can be shared by concurrently executing plans in
-/// the same process to control memory usage in a multi-tenant system.
+/// Rather than tracking all allocations, DataFusion takes a pragmatic approach:
+/// Intermediate memory used as data streams through the system is not accounted
+/// (it assumed to be "small") but the large consumers of memory must register
+/// and constrain their use. This design trades off the additional code
+/// complexity of memory tracking with limiting resource usage.
 ///
-/// The following memory pool implementations are available:
+/// When limiting memory with a `MemoryPool` you should typically reserve some
+/// overhead (e.g. 10%) for the "small" memory allocations that are not tracked.
 ///
-/// * [`UnboundedMemoryPool`]
-/// * [`GreedyMemoryPool`]
-/// * [`FairSpillPool`]
+/// # Memory Management Design
+///
+/// As explained above, DataFusion's design ONLY limits operators that require
+/// "large" amounts of memory (proportional to number of input rows), such as
+/// `GroupByHashExec`. It does NOT track and limit memory used internally by
+/// other operators such as `ParquetExec` or the `RecordBatch`es that flow
+/// between operators.
+///
+/// In order to avoid allocating memory until the OS or the container system
+/// kills the process, DataFusion `ExecutionPlan`s (operators) that consume
+/// large amounts of memory must first request their desired allocation from a
+/// [`MemoryPool`] before allocating more.  The request is typically managed via
+/// a  [`MemoryReservation`].
+///
+/// If the allocation is successful, the operator should proceed and allocate
+/// the desired memory. If the allocation fails, the operator must either first
+/// free memory (e.g. by spilling to local disk) and try again, or error.
+///
+/// Note that a `MemoryPool` can be shared by concurrently executing plans,
+/// which can be used to control memory usage in a multi-tenant system.
+///
+/// # Implementing `MemoryPool`
+///
+/// You can implement a custom allocation policy by implementing the
+/// [`MemoryPool`] trait and configuring a `SessionContext` appropriately.
+/// However, mDataFusion comes with the following simple memory pool implementations that
+/// handle many common cases:
+///
+/// * [`UnboundedMemoryPool`]: no memory limits (the default)
+///
+/// * [`GreedyMemoryPool`]: Limits memory usage to a fixed size using a "first
+/// come first served" policy
+///
+/// * [`FairSpillPool`]: Limits memory usage to a fixed size, allocating memory
+/// to all spilling operators fairly
 pub trait MemoryPool: Send + Sync + std::fmt::Debug {
     /// Registers a new [`MemoryConsumer`]
     ///
diff --git a/datafusion/execution/src/object_store.rs b/datafusion/execution/src/object_store.rs
index 7626f8bef162..c0c58a87dcc6 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -180,7 +180,8 @@ impl DefaultObjectStoreRegistry {
     /// Default without any backend registered.
     #[cfg(target_arch = "wasm32")]
     pub fn new() -> Self {
-        Self::default()
+        let object_stores: DashMap<String, Arc<dyn ObjectStore>> = DashMap::new();
+        Self { object_stores }
     }
 }
 
diff --git a/datafusion/expr/src/accumulator.rs b/datafusion/expr/src/accumulator.rs
index 32de88b3d99f..523e4e21a640 100644
--- a/datafusion/expr/src/accumulator.rs
+++ b/datafusion/expr/src/accumulator.rs
@@ -56,11 +56,18 @@ pub trait Accumulator: Send + Sync + Debug {
     /// running sum.
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()>;
 
-    /// Returns the final aggregate value.
+    /// Returns the final aggregate value, consuming the internal state.
     ///
     /// For example, the `SUM` accumulator maintains a running sum,
     /// and `evaluate` will produce that running sum as its output.
-    fn evaluate(&self) -> Result<ScalarValue>;
+    ///
+    /// After this call, the accumulator's internal state should be
+    /// equivalent to when it was first created.
+    ///
+    /// This function gets `&mut self` to allow for the accumulator to build
+    /// arrow compatible internal state that can be returned without copying
+    /// when possible (for example distinct strings)
+    fn evaluate(&mut self) -> Result<ScalarValue>;
 
     /// Returns the allocated size required for this accumulator, in
     /// bytes, including `Self`.
@@ -72,7 +79,15 @@ pub trait Accumulator: Send + Sync + Debug {
     /// the `capacity` should be used not the `len`.
     fn size(&self) -> usize;
 
-    /// Returns the intermediate state of the accumulator.
+    /// Returns the intermediate state of the accumulator, consuming the
+    /// intermediate state.
+    ///
+    /// After this call, the accumulator's internal state should be
+    /// equivalent to when it was first created.
+    ///
+    /// This function gets `&mut self` to allow for the accumulator to build
+    /// arrow compatible internal state that can be returned without copying
+    /// when possible (for example distinct strings).
     ///
     /// Intermediate state is used for "multi-phase" grouping in
     /// DataFusion, where an aggregate is computed in parallel with
@@ -129,7 +144,7 @@ pub trait Accumulator: Send + Sync + Debug {
     /// Note that [`ScalarValue::List`] can be used to pass multiple
     /// values if the number of intermediate values is not known at
     /// planning time (e.g. for `MEDIAN`)
-    fn state(&self) -> Result<Vec<ScalarValue>>;
+    fn state(&mut self) -> Result<Vec<ScalarValue>>;
 
     /// Updates the accumulator's state from an `Array` containing one
     /// or more intermediate values.
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 6f64642f60d9..e86d6172cecd 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -221,8 +221,12 @@ pub enum BuiltinScalarFunction {
     DateTrunc,
     /// date_bin
     DateBin,
+    /// ends_with
+    EndsWith,
     /// initcap
     InitCap,
+    /// InStr
+    InStr,
     /// left
     Left,
     /// lpad
@@ -446,7 +450,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::DatePart => Volatility::Immutable,
             BuiltinScalarFunction::DateTrunc => Volatility::Immutable,
             BuiltinScalarFunction::DateBin => Volatility::Immutable,
+            BuiltinScalarFunction::EndsWith => Volatility::Immutable,
             BuiltinScalarFunction::InitCap => Volatility::Immutable,
+            BuiltinScalarFunction::InStr => Volatility::Immutable,
             BuiltinScalarFunction::Left => Volatility::Immutable,
             BuiltinScalarFunction::Lpad => Volatility::Immutable,
             BuiltinScalarFunction::Lower => Volatility::Immutable,
@@ -708,6 +714,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::InitCap => {
                 utf8_to_str_type(&input_expr_types[0], "initcap")
             }
+            BuiltinScalarFunction::InStr => {
+                utf8_to_int_type(&input_expr_types[0], "instr")
+            }
             BuiltinScalarFunction::Left => utf8_to_str_type(&input_expr_types[0], "left"),
             BuiltinScalarFunction::Lower => {
                 utf8_to_str_type(&input_expr_types[0], "lower")
@@ -795,6 +804,7 @@ impl BuiltinScalarFunction {
                 true,
             )))),
             BuiltinScalarFunction::StartsWith => Ok(Boolean),
+            BuiltinScalarFunction::EndsWith => Ok(Boolean),
             BuiltinScalarFunction::Strpos => {
                 utf8_to_int_type(&input_expr_types[0], "strpos")
             }
@@ -977,7 +987,10 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayReplaceAll => {
                 Signature::any(3, self.volatility())
             }
-            BuiltinScalarFunction::ArraySlice => Signature::any(3, self.volatility()),
+            BuiltinScalarFunction::ArraySlice => {
+                Signature::variadic_any(self.volatility())
+            }
+
             BuiltinScalarFunction::ArrayToString => {
                 Signature::variadic_any(self.volatility())
             }
@@ -1053,67 +1066,13 @@ impl BuiltinScalarFunction {
                 vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::ToTimestamp => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Float64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampMillis => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampMicros => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampNanos => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampSeconds => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
+            BuiltinScalarFunction::ToTimestamp
+            | BuiltinScalarFunction::ToTimestampSeconds
+            | BuiltinScalarFunction::ToTimestampMillis
+            | BuiltinScalarFunction::ToTimestampMicros
+            | BuiltinScalarFunction::ToTimestampNanos => {
+                Signature::variadic_any(self.volatility())
+            }
             BuiltinScalarFunction::FromUnixtime => {
                 Signature::uniform(1, vec![Int64], self.volatility())
             }
@@ -1262,17 +1221,19 @@ impl BuiltinScalarFunction {
                 ],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith => {
-                Signature::one_of(
-                    vec![
-                        Exact(vec![Utf8, Utf8]),
-                        Exact(vec![Utf8, LargeUtf8]),
-                        Exact(vec![LargeUtf8, Utf8]),
-                        Exact(vec![LargeUtf8, LargeUtf8]),
-                    ],
-                    self.volatility(),
-                )
-            }
+
+            BuiltinScalarFunction::EndsWith
+            | BuiltinScalarFunction::InStr
+            | BuiltinScalarFunction::Strpos
+            | BuiltinScalarFunction::StartsWith => Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8, LargeUtf8]),
+                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
+                ],
+                self.volatility(),
+            ),
 
             BuiltinScalarFunction::Substr => Signature::one_of(
                 vec![
@@ -1524,7 +1485,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Concat => &["concat"],
             BuiltinScalarFunction::ConcatWithSeparator => &["concat_ws"],
             BuiltinScalarFunction::Chr => &["chr"],
+            BuiltinScalarFunction::EndsWith => &["ends_with"],
             BuiltinScalarFunction::InitCap => &["initcap"],
+            BuiltinScalarFunction::InStr => &["instr"],
             BuiltinScalarFunction::Left => &["left"],
             BuiltinScalarFunction::Lower => &["lower"],
             BuiltinScalarFunction::Lpad => &["lpad"],
diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs
index a03e3d2d24a9..f4b1cd03db1f 100644
--- a/datafusion/expr/src/built_in_window_function.rs
+++ b/datafusion/expr/src/built_in_window_function.rs
@@ -133,11 +133,11 @@ impl BuiltInWindowFunction {
         match self {
             BuiltInWindowFunction::RowNumber
             | BuiltInWindowFunction::Rank
-            | BuiltInWindowFunction::DenseRank => Ok(DataType::UInt64),
+            | BuiltInWindowFunction::DenseRank
+            | BuiltInWindowFunction::Ntile => Ok(DataType::UInt64),
             BuiltInWindowFunction::PercentRank | BuiltInWindowFunction::CumeDist => {
                 Ok(DataType::Float64)
             }
-            BuiltInWindowFunction::Ntile => Ok(DataType::UInt64),
             BuiltInWindowFunction::Lag
             | BuiltInWindowFunction::Lead
             | BuiltInWindowFunction::FirstValue
diff --git a/datafusion/expr/src/columnar_value.rs b/datafusion/expr/src/columnar_value.rs
index 7a2883928169..58c534b50aad 100644
--- a/datafusion/expr/src/columnar_value.rs
+++ b/datafusion/expr/src/columnar_value.rs
@@ -37,6 +37,18 @@ pub enum ColumnarValue {
     Scalar(ScalarValue),
 }
 
+impl From<ArrayRef> for ColumnarValue {
+    fn from(value: ArrayRef) -> Self {
+        ColumnarValue::Array(value)
+    }
+}
+
+impl From<ScalarValue> for ColumnarValue {
+    fn from(value: ScalarValue) -> Self {
+        ColumnarValue::Scalar(value)
+    }
+}
+
 impl ColumnarValue {
     pub fn data_type(&self) -> DataType {
         match self {
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 40d40692e593..c5d158d87638 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1266,6 +1266,54 @@ impl Expr {
             Ok(Transformed::Yes(expr))
         })
     }
+
+    /// Returns true if some of this `exprs` subexpressions may not be evaluated
+    /// and thus any side effects (like divide by zero) may not be encountered
+    pub fn short_circuits(&self) -> bool {
+        match self {
+            Expr::ScalarFunction(ScalarFunction { func_def, .. }) => {
+                matches!(func_def, ScalarFunctionDefinition::BuiltIn(fun) if *fun == BuiltinScalarFunction::Coalesce)
+            }
+            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
+                matches!(op, Operator::And | Operator::Or)
+            }
+            Expr::Case { .. } => true,
+            // Use explicit pattern match instead of a default
+            // implementation, so that in the future if someone adds
+            // new Expr types, they will check here as well
+            Expr::AggregateFunction(..)
+            | Expr::Alias(..)
+            | Expr::Between(..)
+            | Expr::Cast(..)
+            | Expr::Column(..)
+            | Expr::Exists(..)
+            | Expr::GetIndexedField(..)
+            | Expr::GroupingSet(..)
+            | Expr::InList(..)
+            | Expr::InSubquery(..)
+            | Expr::IsFalse(..)
+            | Expr::IsNotFalse(..)
+            | Expr::IsNotNull(..)
+            | Expr::IsNotTrue(..)
+            | Expr::IsNotUnknown(..)
+            | Expr::IsNull(..)
+            | Expr::IsTrue(..)
+            | Expr::IsUnknown(..)
+            | Expr::Like(..)
+            | Expr::ScalarSubquery(..)
+            | Expr::ScalarVariable(_, _)
+            | Expr::SimilarTo(..)
+            | Expr::Not(..)
+            | Expr::Negative(..)
+            | Expr::OuterReferenceColumn(_, _)
+            | Expr::TryCast(..)
+            | Expr::Wildcard { .. }
+            | Expr::WindowFunction(..)
+            | Expr::Literal(..)
+            | Expr::Sort(..)
+            | Expr::Placeholder(..) => false,
+        }
+    }
 }
 
 // modifies expr if it is a placeholder with datatype of right
@@ -1869,10 +1917,14 @@ mod test {
         let exp2 = col("a") + lit(2);
         let exp3 = !(col("a") + lit(2));
 
-        assert!(exp1 < exp2);
-        assert!(exp2 > exp1);
-        assert!(exp2 > exp3);
-        assert!(exp3 < exp2);
+        // Since comparisons are done using hash value of the expression
+        // expr < expr2 may return false, or true. There is no guaranteed result.
+        // The only guarantee is "<" operator should have the opposite result of ">=" operator
+        let greater_or_equal = exp1 >= exp2;
+        assert_eq!(exp1 < exp2, !greater_or_equal);
+
+        let greater_or_equal = exp3 >= exp2;
+        assert_eq!(exp3 < exp2, !greater_or_equal);
     }
 
     #[test]
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 834420e413b0..006b5f10f10d 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -731,7 +731,7 @@ scalar_expr!(
 scalar_expr!(
     ArraySlice,
     array_slice,
-    array offset length,
+    array begin end stride,
     "returns a slice of the array."
 );
 scalar_expr!(
@@ -798,6 +798,7 @@ scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input
 scalar_expr!(Encode, encode, input encoding, "encode the `input`, using the `encoding`. encoding can be base64 or hex");
 scalar_expr!(Decode, decode, input encoding, "decode the`input`, using the `encoding`. encoding can be base64 or hex");
 scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase");
+scalar_expr!(InStr, instr, string substring, "returns the position of the first occurrence of `substring` in `string`");
 scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`");
 scalar_expr!(Lower, lower, string, "convert the string to lower case");
 scalar_expr!(
@@ -830,6 +831,7 @@ scalar_expr!(SHA512, sha512, string, "SHA-512 hash");
 scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
 scalar_expr!(StringToArray, string_to_array, string delimiter null_string, "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`");
 scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
+scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
 scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`");
 scalar_expr!(Substr, substr, string position, "substring from the `position` to the end");
 scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters");
@@ -885,29 +887,30 @@ nary_scalar_expr!(
 scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
 scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
 scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
-scalar_expr!(
+nary_scalar_expr!(
+    ToTimestamp,
+    to_timestamp,
+    "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`"
+);
+nary_scalar_expr!(
     ToTimestampMillis,
     to_timestamp_millis,
-    date,
-    "converts a string to a `Timestamp(Milliseconds, None)`"
+    "converts a string and optional formats  to a `Timestamp(Milliseconds, None)`"
 );
-scalar_expr!(
+nary_scalar_expr!(
     ToTimestampMicros,
     to_timestamp_micros,
-    date,
-    "converts a string to a `Timestamp(Microseconds, None)`"
+    "converts a string and optional formats  to a `Timestamp(Microseconds, None)`"
 );
-scalar_expr!(
+nary_scalar_expr!(
     ToTimestampNanos,
     to_timestamp_nanos,
-    date,
-    "converts a string to a `Timestamp(Nanoseconds, None)`"
+    "converts a string and optional formats  to a `Timestamp(Nanoseconds, None)`"
 );
-scalar_expr!(
+nary_scalar_expr!(
     ToTimestampSeconds,
     to_timestamp_seconds,
-    date,
-    "converts a string to a `Timestamp(Seconds, None)`"
+    "converts a string and optional formats  to a `Timestamp(Seconds, None)`"
 );
 scalar_expr!(
     FromUnixtime,
@@ -1371,6 +1374,7 @@ mod test {
         test_scalar_expr!(Gcd, gcd, arg_1, arg_2);
         test_scalar_expr!(Lcm, lcm, arg_1, arg_2);
         test_scalar_expr!(InitCap, initcap, string);
+        test_scalar_expr!(InStr, instr, string, substring);
         test_scalar_expr!(Left, left, string, count);
         test_scalar_expr!(Lower, lower, string);
         test_nary_scalar_expr!(Lpad, lpad, string, count);
@@ -1409,6 +1413,7 @@ mod test {
         test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
         test_scalar_expr!(StringToArray, string_to_array, expr, delimiter, null_value);
         test_scalar_expr!(StartsWith, starts_with, string, characters);
+        test_scalar_expr!(EndsWith, ends_with, string, characters);
         test_scalar_expr!(Strpos, strpos, string, substring);
         test_scalar_expr!(Substr, substr, string, position);
         test_scalar_expr!(Substr, substring, string, position, count);
diff --git a/datafusion/expr/src/groups_accumulator.rs b/datafusion/expr/src/groups_accumulator.rs
new file mode 100644
index 000000000000..6580de19bc68
--- /dev/null
+++ b/datafusion/expr/src/groups_accumulator.rs
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Vectorized [`GroupsAccumulator`]
+
+use arrow_array::{ArrayRef, BooleanArray};
+use datafusion_common::Result;
+
+/// Describes how many rows should be emitted during grouping.
+#[derive(Debug, Clone, Copy)]
+pub enum EmitTo {
+    /// Emit all groups
+    All,
+    /// Emit only the first `n` groups and shift all existing group
+    /// indexes down by `n`.
+    ///
+    /// For example, if `n=10`, group_index `0, 1, ... 9` are emitted
+    /// and group indexes '`10, 11, 12, ...` become `0, 1, 2, ...`.
+    First(usize),
+}
+
+impl EmitTo {
+    /// Removes the number of rows from `v` required to emit the right
+    /// number of rows, returning a `Vec` with elements taken, and the
+    /// remaining values in `v`.
+    ///
+    /// This avoids copying if Self::All
+    pub fn take_needed<T>(&self, v: &mut Vec<T>) -> Vec<T> {
+        match self {
+            Self::All => {
+                // Take the entire vector, leave new (empty) vector
+                std::mem::take(v)
+            }
+            Self::First(n) => {
+                // get end n+1,.. values into t
+                let mut t = v.split_off(*n);
+                // leave n+1,.. in v
+                std::mem::swap(v, &mut t);
+                t
+            }
+        }
+    }
+}
+
+/// `GroupAccumulator` implements a single aggregate (e.g. AVG) and
+/// stores the state for *all* groups internally.
+///
+/// Each group is assigned a `group_index` by the hash table and each
+/// accumulator manages the specific state, one per group_index.
+///
+/// group_indexes are contiguous (there aren't gaps), and thus it is
+/// expected that each GroupAccumulator will use something like `Vec<..>`
+/// to store the group states.
+pub trait GroupsAccumulator: Send {
+    /// Updates the accumulator's state from its arguments, encoded as
+    /// a vector of [`ArrayRef`]s.
+    ///
+    /// * `values`: the input arguments to the accumulator
+    ///
+    /// * `group_indices`: To which groups do the rows in `values`
+    /// belong, group id)
+    ///
+    /// * `opt_filter`: if present, only update aggregate state using
+    /// `values[i]` if `opt_filter[i]` is true
+    ///
+    /// * `total_num_groups`: the number of groups (the largest
+    /// group_index is thus `total_num_groups - 1`).
+    ///
+    /// Note that subsequent calls to update_batch may have larger
+    /// total_num_groups as new groups are seen.
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()>;
+
+    /// Returns the final aggregate value for each group as a single
+    /// `RecordBatch`, resetting the internal state.
+    ///
+    /// The rows returned *must* be in group_index order: The value
+    /// for group_index 0, followed by 1, etc.  Any group_index that
+    /// did not have values, should be null.
+    ///
+    /// For example, a `SUM` accumulator maintains a running sum for
+    /// each group, and `evaluate` will produce that running sum as
+    /// its output for all groups, in group_index order
+    ///
+    /// If `emit_to`` is [`EmitTo::All`], the accumulator should
+    /// return all groups and release / reset its internal state
+    /// equivalent to when it was first created.
+    ///
+    /// If `emit_to` is [`EmitTo::First`], only the first `n` groups
+    /// should be emitted and the state for those first groups
+    /// removed. State for the remaining groups must be retained for
+    /// future use. The group_indices on subsequent calls to
+    /// `update_batch` or `merge_batch` will be shifted down by
+    /// `n`. See [`EmitTo::First`] for more details.
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef>;
+
+    /// Returns the intermediate aggregate state for this accumulator,
+    /// used for multi-phase grouping, resetting its internal state.
+    ///
+    /// For example, `AVG` might return two arrays: `SUM` and `COUNT`
+    /// but the `MIN` aggregate would just return a single array.
+    ///
+    /// Note more sophisticated internal state can be passed as
+    /// single `StructArray` rather than multiple arrays.
+    ///
+    /// See [`Self::evaluate`] for details on the required output
+    /// order and  `emit_to`.
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
+
+    /// Merges intermediate state (the output from [`Self::state`])
+    /// into this accumulator's values.
+    ///
+    /// For some aggregates (such as `SUM`), `merge_batch` is the same
+    /// as `update_batch`, but for some aggregates (such as `COUNT`,
+    /// where the partial counts must be summed) the operations
+    /// differ. See [`Self::state`] for more details on how state is
+    /// used and merged.
+    ///
+    /// * `values`: arrays produced from calling `state` previously to the accumulator
+    ///
+    /// Other arguments are the same as for [`Self::update_batch`];
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()>;
+
+    /// Amount of memory used to store the state of this accumulator,
+    /// in bytes. This function is called once per batch, so it should
+    /// be `O(n)` to compute, not `O(num_groups)`
+    fn size(&self) -> usize;
+}
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 21647f384159..c29535456327 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -47,6 +47,7 @@ pub mod expr_rewriter;
 pub mod expr_schema;
 pub mod field_util;
 pub mod function;
+pub mod groups_accumulator;
 pub mod interval_arithmetic;
 pub mod logical_plan;
 pub mod tree_node;
@@ -70,6 +71,7 @@ pub use function::{
     AccumulatorFactoryFunction, PartitionEvaluatorFactory, ReturnTypeFunction,
     ScalarFunctionImplementation, StateTypeFunction,
 };
+pub use groups_accumulator::{EmitTo, GroupsAccumulator};
 pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
 pub use logical_plan::*;
 pub use nullif::SUPPORTED_NULLIF_TYPES;
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 847fbbbf61c7..eb5e5bd42634 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -55,6 +55,8 @@ use datafusion_common::{
     ScalarValue, TableReference, ToDFSchema, UnnestOptions,
 };
 
+use super::plan::RecursiveQuery;
+
 /// Default table name for unnamed table
 pub const UNNAMED_TABLE: &str = "?table?";
 
@@ -121,6 +123,28 @@ impl LogicalPlanBuilder {
         }))
     }
 
+    /// Convert a regular plan into a recursive query.
+    /// `is_distinct` indicates whether the recursive term should be de-duplicated (`UNION`) after each iteration or not (`UNION ALL`).
+    pub fn to_recursive_query(
+        &self,
+        name: String,
+        recursive_term: LogicalPlan,
+        is_distinct: bool,
+    ) -> Result<Self> {
+        // TODO: we need to do a bunch of validation here. Maybe more.
+        if is_distinct {
+            return Err(DataFusionError::NotImplemented(
+                "Recursive queries with a distinct 'UNION' (in which the previous iteration's results will be de-duplicated) is not supported".to_string(),
+            ));
+        }
+        Ok(Self::from(LogicalPlan::RecursiveQuery(RecursiveQuery {
+            name,
+            static_term: Arc::new(self.plan.clone()),
+            recursive_term: Arc::new(recursive_term),
+            is_distinct,
+        })))
+    }
+
     /// Create a values list based relation, and the schema is inferred from data, consuming
     /// `value`. See the [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
     /// documentation for more details.
diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
index bc722dd69ace..f6e6000897a5 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -36,8 +36,8 @@ pub use plan::{
     projection_schema, Aggregate, Analyze, CrossJoin, DescribeTable, Distinct,
     DistinctOn, EmptyRelation, Explain, Extension, Filter, Join, JoinConstraint,
     JoinType, Limit, LogicalPlan, Partitioning, PlanType, Prepare, Projection,
-    Repartition, Sort, StringifiedPlan, Subquery, SubqueryAlias, TableScan,
-    ToStringifiedPlan, Union, Unnest, Values, Window,
+    RecursiveQuery, Repartition, Sort, StringifiedPlan, Subquery, SubqueryAlias,
+    TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
 };
 pub use statement::{
     SetVariable, Statement, TransactionAccessMode, TransactionConclusion, TransactionEnd,
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 93a38fb40df5..aee3a59dd2da 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -154,6 +154,8 @@ pub enum LogicalPlan {
     /// Unnest a column that contains a nested list type such as an
     /// ARRAY. This is used to implement SQL `UNNEST`
     Unnest(Unnest),
+    /// A variadic query (e.g. "Recursive CTEs")
+    RecursiveQuery(RecursiveQuery),
 }
 
 impl LogicalPlan {
@@ -191,6 +193,10 @@ impl LogicalPlan {
             LogicalPlan::Copy(CopyTo { input, .. }) => input.schema(),
             LogicalPlan::Ddl(ddl) => ddl.schema(),
             LogicalPlan::Unnest(Unnest { schema, .. }) => schema,
+            LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
+                // we take the schema of the static term as the schema of the entire recursive query
+                static_term.schema()
+            }
         }
     }
 
@@ -243,6 +249,10 @@ impl LogicalPlan {
             | LogicalPlan::TableScan(_) => {
                 vec![self.schema()]
             }
+            LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
+                // return only the schema of the static term
+                static_term.all_schemas()
+            }
             // return children schemas
             LogicalPlan::Limit(_)
             | LogicalPlan::Subquery(_)
@@ -384,6 +394,7 @@ impl LogicalPlan {
                 .try_for_each(f),
             // plans without expressions
             LogicalPlan::EmptyRelation(_)
+            | LogicalPlan::RecursiveQuery(_)
             | LogicalPlan::Subquery(_)
             | LogicalPlan::SubqueryAlias(_)
             | LogicalPlan::Limit(_)
@@ -430,6 +441,11 @@ impl LogicalPlan {
             LogicalPlan::Ddl(ddl) => ddl.inputs(),
             LogicalPlan::Unnest(Unnest { input, .. }) => vec![input],
             LogicalPlan::Prepare(Prepare { input, .. }) => vec![input],
+            LogicalPlan::RecursiveQuery(RecursiveQuery {
+                static_term,
+                recursive_term,
+                ..
+            }) => vec![static_term, recursive_term],
             // plans without inputs
             LogicalPlan::TableScan { .. }
             | LogicalPlan::Statement { .. }
@@ -510,6 +526,9 @@ impl LogicalPlan {
                     cross.left.head_output_expr()
                 }
             }
+            LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
+                static_term.head_output_expr()
+            }
             LogicalPlan::Union(union) => Ok(Some(Expr::Column(
                 union.schema.fields()[0].qualified_column(),
             ))),
@@ -835,6 +854,14 @@ impl LogicalPlan {
                 };
                 Ok(LogicalPlan::Distinct(distinct))
             }
+            LogicalPlan::RecursiveQuery(RecursiveQuery {
+                name, is_distinct, ..
+            }) => Ok(LogicalPlan::RecursiveQuery(RecursiveQuery {
+                name: name.clone(),
+                static_term: Arc::new(inputs[0].clone()),
+                recursive_term: Arc::new(inputs[1].clone()),
+                is_distinct: *is_distinct,
+            })),
             LogicalPlan::Analyze(a) => {
                 assert!(expr.is_empty());
                 assert_eq!(inputs.len(), 1);
@@ -1073,6 +1100,7 @@ impl LogicalPlan {
                 }),
             LogicalPlan::TableScan(TableScan { fetch, .. }) => *fetch,
             LogicalPlan::EmptyRelation(_) => Some(0),
+            LogicalPlan::RecursiveQuery(_) => None,
             LogicalPlan::Subquery(_) => None,
             LogicalPlan::SubqueryAlias(SubqueryAlias { input, .. }) => input.max_rows(),
             LogicalPlan::Limit(Limit { fetch, .. }) => *fetch,
@@ -1215,9 +1243,8 @@ impl LogicalPlan {
     ) -> Result<Expr> {
         expr.transform(&|expr| {
             match &expr {
-                Expr::Placeholder(Placeholder { id, data_type }) => {
-                    let value = param_values
-                        .get_placeholders_with_values(id, data_type.as_ref())?;
+                Expr::Placeholder(Placeholder { id, .. }) => {
+                    let value = param_values.get_placeholders_with_values(id)?;
                     // Replace the placeholder with the value
                     Ok(Transformed::Yes(Expr::Literal(value)))
                 }
@@ -1408,6 +1435,11 @@ impl LogicalPlan {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
                 match self.0 {
                     LogicalPlan::EmptyRelation(_) => write!(f, "EmptyRelation"),
+                    LogicalPlan::RecursiveQuery(RecursiveQuery {
+                        is_distinct, ..
+                    }) => {
+                        write!(f, "RecursiveQuery: is_distinct={}", is_distinct)
+                    }
                     LogicalPlan::Values(Values { ref values, .. }) => {
                         let str_values: Vec<_> = values
                             .iter()
@@ -1718,6 +1750,42 @@ pub struct EmptyRelation {
     pub schema: DFSchemaRef,
 }
 
+/// A variadic query operation, Recursive CTE.
+///
+/// # Recursive Query Evaluation
+///
+/// From the [Postgres Docs]:
+///
+/// 1. Evaluate the non-recursive term. For `UNION` (but not `UNION ALL`),
+/// discard duplicate rows. Include all remaining rows in the result of the
+/// recursive query, and also place them in a temporary working table.
+//
+/// 2. So long as the working table is not empty, repeat these steps:
+///
+/// * Evaluate the recursive term, substituting the current contents of the
+/// working table for the recursive self-reference. For `UNION` (but not `UNION
+/// ALL`), discard duplicate rows and rows that duplicate any previous result
+/// row. Include all remaining rows in the result of the recursive query, and
+/// also place them in a temporary intermediate table.
+///
+/// * Replace the contents of the working table with the contents of the
+/// intermediate table, then empty the intermediate table.
+///
+/// [Postgres Docs]: https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-RECURSIVE
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct RecursiveQuery {
+    /// Name of the query
+    pub name: String,
+    /// The static term (initial contents of the working table)
+    pub static_term: Arc<LogicalPlan>,
+    /// The recursive term (evaluated on the contents of the working table until
+    /// it returns an empty set)
+    pub recursive_term: Arc<LogicalPlan>,
+    /// Should the output of the recursive term be deduplicated (`UNION`) or
+    /// not (`UNION ALL`).
+    pub is_distinct: bool,
+}
+
 /// Values expression. See
 /// [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
 /// documentation for more details.
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index 57888a11d426..a10312e23446 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -69,6 +69,14 @@ pub enum Operator {
     RegexNotMatch,
     /// Case insensitive regex not match
     RegexNotIMatch,
+    /// Case sensitive pattern match
+    LikeMatch,
+    /// Case insensitive pattern match
+    ILikeMatch,
+    /// Case sensitive pattern not match
+    NotLikeMatch,
+    /// Case insensitive pattern not match
+    NotILikeMatch,
     /// Bitwise and, like `&`
     BitwiseAnd,
     /// Bitwise or, like `|`
@@ -100,6 +108,10 @@ impl Operator {
             Operator::GtEq => Some(Operator::Lt),
             Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
             Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
+            Operator::LikeMatch => Some(Operator::NotLikeMatch),
+            Operator::ILikeMatch => Some(Operator::NotILikeMatch),
+            Operator::NotLikeMatch => Some(Operator::LikeMatch),
+            Operator::NotILikeMatch => Some(Operator::ILikeMatch),
             Operator::Plus
             | Operator::Minus
             | Operator::Multiply
@@ -192,6 +204,10 @@ impl Operator {
             | Operator::RegexIMatch
             | Operator::RegexNotMatch
             | Operator::RegexNotIMatch
+            | Operator::LikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotLikeMatch
+            | Operator::NotILikeMatch
             | Operator::BitwiseAnd
             | Operator::BitwiseOr
             | Operator::BitwiseXor
@@ -221,6 +237,10 @@ impl Operator {
             | Operator::RegexNotMatch
             | Operator::RegexIMatch
             | Operator::RegexNotIMatch
+            | Operator::LikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotLikeMatch
+            | Operator::NotILikeMatch
             | Operator::BitwiseAnd
             | Operator::BitwiseOr
             | Operator::BitwiseShiftLeft
@@ -253,6 +273,10 @@ impl fmt::Display for Operator {
             Operator::RegexIMatch => "~*",
             Operator::RegexNotMatch => "!~",
             Operator::RegexNotIMatch => "!~*",
+            Operator::LikeMatch => "~~",
+            Operator::ILikeMatch => "~~*",
+            Operator::NotLikeMatch => "!~~",
+            Operator::NotILikeMatch => "!~~*",
             Operator::IsDistinctFrom => "IS DISTINCT FROM",
             Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
             Operator::BitwiseAnd => "&",
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 6bacc1870079..70015c699296 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -101,6 +101,13 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
                 )
             })
         }
+        LikeMatch | ILikeMatch | NotLikeMatch | NotILikeMatch => {
+            regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+                plan_datafusion_err!(
+                    "Cannot infer common argument type for regex operation {lhs} {op} {rhs}"
+                )
+            })
+        }
         BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseShiftRight | BitwiseShiftLeft => {
             bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
                 plan_datafusion_err!(
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 4983f6247d24..444a4f1e8099 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -17,12 +17,13 @@
 
 //! [`AggregateUDF`]: User Defined Aggregate Functions
 
+use crate::groups_accumulator::GroupsAccumulator;
 use crate::{Accumulator, Expr};
 use crate::{
     AccumulatorFactoryFunction, ReturnTypeFunction, Signature, StateTypeFunction,
 };
 use arrow::datatypes::DataType;
-use datafusion_common::Result;
+use datafusion_common::{not_impl_err, DataFusionError, Result};
 use std::any::Any;
 use std::fmt::{self, Debug, Formatter};
 use std::sync::Arc;
@@ -163,6 +164,16 @@ impl AggregateUDF {
     pub fn state_type(&self, return_type: &DataType) -> Result<Vec<DataType>> {
         self.inner.state_type(return_type)
     }
+
+    /// See [`AggregateUDFImpl::groups_accumulator_supported`] for more details.
+    pub fn groups_accumulator_supported(&self) -> bool {
+        self.inner.groups_accumulator_supported()
+    }
+
+    /// See [`AggregateUDFImpl::create_groups_accumulator`] for more details.
+    pub fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        self.inner.create_groups_accumulator()
+    }
 }
 
 impl<F> From<F> for AggregateUDF
@@ -250,6 +261,22 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     /// Return the type used to serialize the  [`Accumulator`]'s intermediate state.
     /// See [`Accumulator::state()`] for more details
     fn state_type(&self, return_type: &DataType) -> Result<Vec<DataType>>;
+
+    /// If the aggregate expression has a specialized
+    /// [`GroupsAccumulator`] implementation. If this returns true,
+    /// `[Self::create_groups_accumulator`] will be called.
+    fn groups_accumulator_supported(&self) -> bool {
+        false
+    }
+
+    /// Return a specialized [`GroupsAccumulator`] that manages state
+    /// for all groups.
+    ///
+    /// For maximum performance, a [`GroupsAccumulator`] should be
+    /// implemented in addition to [`Accumulator`].
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        not_impl_err!("GroupsAccumulator hasn't been implemented for {self:?} yet")
+    }
 }
 
 /// Implementation of [`AggregateUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 40c2c4705362..02479c0765bd 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -911,6 +911,7 @@ pub fn can_hash(data_type: &DataType) -> bool {
         }
         DataType::List(_) => true,
         DataType::LargeList(_) => true,
+        DataType::FixedSizeList(_, _) => true,
         _ => false,
     }
 }
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index b350d41d3fe3..6aec52ad70d1 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -44,7 +44,7 @@ async-trait = { workspace = true }
 chrono = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-expr = { workspace = true }
-datafusion-physical-expr = { path = "../physical-expr", version = "34.0.0", default-features = false }
+datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
 itertools = { workspace = true }
 log = { workspace = true }
@@ -52,5 +52,5 @@ regex-syntax = "0.8.0"
 
 [dev-dependencies]
 ctor = { workspace = true }
-datafusion-sql = { path = "../sql", version = "34.0.0" }
+datafusion-sql = { path = "../sql", version = "35.0.0" }
 env_logger = "0.10.0"
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 8c4e907e6734..c0dad2ef4006 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -44,7 +44,7 @@ use datafusion_expr::type_coercion::other::{
 use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
-    is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown,
+    is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
     type_coercion, AggregateFunction, BuiltinScalarFunction, Expr, ExprSchemable,
     LogicalPlan, Operator, Projection, ScalarFunctionDefinition, Signature, WindowFrame,
     WindowFrameBound, WindowFrameUnits,
@@ -176,6 +176,10 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
                     negated,
                 )))
             }
+            Expr::Not(expr) => {
+                let expr = not(get_casted_expr_for_bool_op(&expr, &self.schema)?);
+                Ok(expr)
+            }
             Expr::IsTrue(expr) => {
                 let expr = is_true(get_casted_expr_for_bool_op(&expr, &self.schema)?);
                 Ok(expr)
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index fc867df23c36..fe71171ce545 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -365,6 +365,7 @@ impl OptimizerRule for CommonSubexprEliminate {
             | LogicalPlan::Dml(_)
             | LogicalPlan::Copy(_)
             | LogicalPlan::Unnest(_)
+            | LogicalPlan::RecursiveQuery(_)
             | LogicalPlan::Prepare(_) => {
                 // apply the optimization to all inputs of the plan
                 utils::optimize_children(self, plan, config)?
@@ -615,8 +616,8 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
 
     fn pre_visit(&mut self, expr: &Expr) -> Result<VisitRecursion> {
         // related to https://github.com/apache/arrow-datafusion/issues/8814
-        // If the expr contain volatile expression or is a case expression, skip it.
-        if matches!(expr, Expr::Case(..)) || is_volatile_expression(expr)? {
+        // If the expr contain volatile expression or is a short-circuit expression, skip it.
+        if expr.short_circuits() || is_volatile_expression(expr)? {
             return Ok(VisitRecursion::Skip);
         }
         self.visit_stack
@@ -695,7 +696,13 @@ struct CommonSubexprRewriter<'a> {
 impl TreeNodeRewriter for CommonSubexprRewriter<'_> {
     type N = Expr;
 
-    fn pre_visit(&mut self, _: &Expr) -> Result<RewriteRecursion> {
+    fn pre_visit(&mut self, expr: &Expr) -> Result<RewriteRecursion> {
+        // The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to generate
+        // the `id_array`, which records the expr's identifier used to rewrite expr. So if we
+        // skip an expr in `ExprIdentifierVisitor`, we should skip it here, too.
+        if expr.short_circuits() || is_volatile_expression(expr)? {
+            return Ok(RewriteRecursion::Stop);
+        }
         if self.curr_index >= self.id_array.len()
             || self.max_series_number > self.id_array[self.curr_index].0
         {
@@ -1248,12 +1255,11 @@ mod test {
         let table_scan = test_table_scan()?;
 
         let plan = LogicalPlanBuilder::from(table_scan)
-            .filter(lit(1).gt(col("a")).and(lit(1).gt(col("a"))))?
+            .filter((lit(1) + col("a") - lit(10)).gt(lit(1) + col("a")))?
             .build()?;
 
         let expected = "Projection: test.a, test.b, test.c\
-        \n  Filter: Int32(1) > test.atest.aInt32(1) AS Int32(1) > test.a AND Int32(1) > test.atest.aInt32(1) AS Int32(1) > test.a\
-        \n    Projection: Int32(1) > test.a AS Int32(1) > test.atest.aInt32(1), test.a, test.b, test.c\
+        \n  Filter: Int32(1) + test.atest.aInt32(1) AS Int32(1) + test.a - Int32(10) > Int32(1) + test.atest.aInt32(1) AS Int32(1) + test.a\n    Projection: Int32(1) + test.a AS Int32(1) + test.atest.aInt32(1), test.a, test.b, test.c\
         \n      TableScan: test";
 
         assert_optimized_plan_eq(expected, &plan);
diff --git a/datafusion/optimizer/src/optimize_projections.rs b/datafusion/optimizer/src/optimize_projections.rs
index d9c45510972c..103599564252 100644
--- a/datafusion/optimizer/src/optimize_projections.rs
+++ b/datafusion/optimizer/src/optimize_projections.rs
@@ -163,6 +163,7 @@ fn optimize_projections(
                 .collect::<Vec<_>>()
         }
         LogicalPlan::EmptyRelation(_)
+        | LogicalPlan::RecursiveQuery(_)
         | LogicalPlan::Statement(_)
         | LogicalPlan::Values(_)
         | LogicalPlan::Extension(_)
@@ -217,6 +218,22 @@ fn optimize_projections(
             // Only use the absolutely necessary aggregate expressions required
             // by the parent:
             let mut new_aggr_expr = get_at_indices(&aggregate.aggr_expr, &aggregate_reqs);
+
+            // Aggregations always need at least one aggregate expression.
+            // With a nested count, we don't require any column as input, but
+            // still need to create a correct aggregate, which may be optimized
+            // out later. As an example, consider the following query:
+            //
+            // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...])
+            //
+            // which always returns 1.
+            if new_aggr_expr.is_empty()
+                && new_group_bys.is_empty()
+                && !aggregate.aggr_expr.is_empty()
+            {
+                new_aggr_expr = vec![aggregate.aggr_expr[0].clone()];
+            }
+
             let all_exprs_iter = new_group_bys.iter().chain(new_aggr_expr.iter());
             let schema = aggregate.input.schema();
             let necessary_indices = indices_referred_by_exprs(schema, all_exprs_iter)?;
@@ -237,21 +254,6 @@ fn optimize_projections(
             let (aggregate_input, _) =
                 add_projection_on_top_if_helpful(aggregate_input, necessary_exprs)?;
 
-            // Aggregations always need at least one aggregate expression.
-            // With a nested count, we don't require any column as input, but
-            // still need to create a correct aggregate, which may be optimized
-            // out later. As an example, consider the following query:
-            //
-            // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...])
-            //
-            // which always returns 1.
-            if new_aggr_expr.is_empty()
-                && new_group_bys.is_empty()
-                && !aggregate.aggr_expr.is_empty()
-            {
-                new_aggr_expr = vec![aggregate.aggr_expr[0].clone()];
-            }
-
             // Create a new aggregate plan with the updated input and only the
             // absolutely necessary fields:
             return Aggregate::try_new(
@@ -866,7 +868,9 @@ fn rewrite_projection_given_requirements(
     return if let Some(input) =
         optimize_projections(&proj.input, config, &required_indices)?
     {
-        if &projection_schema(&input, &exprs_used)? == input.schema() {
+        if &projection_schema(&input, &exprs_used)? == input.schema()
+            && exprs_used.iter().all(is_expr_trivial)
+        {
             Ok(Some(input))
         } else {
             Projection::try_new(exprs_used, Arc::new(input))
@@ -898,7 +902,7 @@ mod tests {
     use datafusion_common::{Result, TableReference};
     use datafusion_expr::{
         binary_expr, col, count, lit, logical_plan::builder::LogicalPlanBuilder, not,
-        table_scan, try_cast, Expr, Like, LogicalPlan, Operator,
+        table_scan, try_cast, when, Expr, Like, LogicalPlan, Operator,
     };
 
     fn assert_optimized_plan_equal(plan: &LogicalPlan, expected: &str) -> Result<()> {
@@ -1162,4 +1166,25 @@ mod tests {
         \n  TableScan: test projection=[a]";
         assert_optimized_plan_equal(&plan, expected)
     }
+
+    // Test outer projection isn't discarded despite the same schema as inner
+    // https://github.com/apache/arrow-datafusion/issues/8942
+    #[test]
+    fn test_derived_column() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![col("a"), lit(0).alias("d")])?
+            .project(vec![
+                col("a"),
+                when(col("a").eq(lit(1)), lit(10))
+                    .otherwise(col("d"))?
+                    .alias("d"),
+            ])?
+            .build()?;
+
+        let expected = "Projection: test.a, CASE WHEN test.a = Int32(1) THEN Int32(10) ELSE d END AS d\
+        \n  Projection: test.a, Int32(0) AS d\
+        \n    TableScan: test projection=[a]";
+        assert_optimized_plan_equal(&plan, expected)
+    }
 }
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 2cb59d511ccf..f53e70ab6489 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -375,10 +375,10 @@ impl Optimizer {
 
         let new_inputs = result
             .into_iter()
-            .enumerate()
-            .map(|(i, o)| match o {
+            .zip(inputs)
+            .map(|(new_plan, old_plan)| match new_plan {
                 Some(plan) => plan,
-                None => (*(inputs.get(i).unwrap())).clone(),
+                None => old_plan.clone(),
             })
             .collect::<Vec<_>>();
 
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 3ba343003e33..1c1228949171 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -19,8 +19,10 @@
 
 use std::ops::Not;
 
-use super::or_in_list_simplifier::OrInListSimplifier;
 use super::utils::*;
+use super::{
+    inlist_simplifier::InListSimplifier, or_in_list_simplifier::OrInListSimplifier,
+};
 use crate::analyzer::type_coercion::TypeCoercionRewriter;
 use crate::simplify_expressions::guarantees::GuaranteeRewriter;
 use crate::simplify_expressions::regex::simplify_regex_expr;
@@ -33,11 +35,10 @@ use arrow::{
 };
 use datafusion_common::{
     cast::{as_large_list_array, as_list_array},
-    plan_err,
     tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter},
 };
 use datafusion_common::{
-    exec_err, internal_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue,
+    internal_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::{
     and, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr, Like,
@@ -133,6 +134,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         let mut simplifier = Simplifier::new(&self.info);
         let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;
         let mut or_in_list_simplifier = OrInListSimplifier::new();
+        let mut inlist_simplifier = InListSimplifier::new();
         let mut guarantee_rewriter = GuaranteeRewriter::new(&self.guarantees);
 
         // TODO iterate until no changes are made during rewrite
@@ -142,12 +144,17 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         expr.rewrite(&mut const_evaluator)?
             .rewrite(&mut simplifier)?
             .rewrite(&mut or_in_list_simplifier)?
+            .rewrite(&mut inlist_simplifier)?
             .rewrite(&mut guarantee_rewriter)?
             // run both passes twice to try an minimize simplifications that we missed
             .rewrite(&mut const_evaluator)?
             .rewrite(&mut simplifier)
     }
 
+    pub fn canonicalize(&self, expr: Expr) -> Result<Expr> {
+        let mut canonicalizer = Canonicalizer::new();
+        expr.rewrite(&mut canonicalizer)
+    }
     /// Apply type coercion to an [`Expr`] so that it can be
     /// evaluated as a [`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
     ///
@@ -224,6 +231,51 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     }
 }
 
+/// Canonicalize any BinaryExprs that are not in canonical form
+///
+/// `<literal> <op> <col>` is rewritten to `<col> <op> <literal>`
+///
+/// `<col1> <op> <col2>` is rewritten so that the name of `col1` sorts higher
+/// than `col2` (`b > a` would be canonicalized to `a < b`)
+struct Canonicalizer {}
+
+impl Canonicalizer {
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl TreeNodeRewriter for Canonicalizer {
+    type N = Expr;
+
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
+            return Ok(expr);
+        };
+        match (left.as_ref(), right.as_ref(), op.swap()) {
+            // <col1> <op> <col2>
+            (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
+                if right_col > left_col =>
+            {
+                Ok(Expr::BinaryExpr(BinaryExpr {
+                    left: right,
+                    op: swapped_op,
+                    right: left,
+                }))
+            }
+            // <literal> <op> <col>
+            (Expr::Literal(_a), Expr::Column(_b), Some(swapped_op)) => {
+                Ok(Expr::BinaryExpr(BinaryExpr {
+                    left: right,
+                    op: swapped_op,
+                    right: left,
+                }))
+            }
+            _ => Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })),
+        }
+    }
+}
+
 #[allow(rustdoc::private_intra_doc_links)]
 /// Partially evaluate `Expr`s so constant subtrees are evaluated at plan time.
 ///
@@ -249,6 +301,14 @@ struct ConstEvaluator<'a> {
     input_batch: RecordBatch,
 }
 
+/// The simplify result of ConstEvaluator
+enum ConstSimplifyResult {
+    // Expr was simplifed and contains the new expression
+    Simplified(ScalarValue),
+    // Evalaution encountered an error, contains the original expression
+    SimplifyRuntimeError(DataFusionError, Expr),
+}
+
 impl<'a> TreeNodeRewriter for ConstEvaluator<'a> {
     type N = Expr;
 
@@ -281,7 +341,17 @@ impl<'a> TreeNodeRewriter for ConstEvaluator<'a> {
 
     fn mutate(&mut self, expr: Expr) -> Result<Expr> {
         match self.can_evaluate.pop() {
-            Some(true) => Ok(Expr::Literal(self.evaluate_to_scalar(expr)?)),
+            // Certain expressions such as `CASE` and `COALESCE` are short circuiting
+            // and may not evalute all their sub expressions. Thus if
+            // if any error is countered during simplification, return the original
+            // so that normal evaluation can occur
+            Some(true) => {
+                let result = self.evaluate_to_scalar(expr);
+                match result {
+                    ConstSimplifyResult::Simplified(s) => Ok(Expr::Literal(s)),
+                    ConstSimplifyResult::SimplifyRuntimeError(_, expr) => Ok(expr),
+                }
+            }
             Some(false) => Ok(expr),
             _ => internal_err!("Failed to pop can_evaluate"),
         }
@@ -376,29 +446,40 @@ impl<'a> ConstEvaluator<'a> {
     }
 
     /// Internal helper to evaluates an Expr
-    pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> Result<ScalarValue> {
+    pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
         if let Expr::Literal(s) = expr {
-            return Ok(s);
+            return ConstSimplifyResult::Simplified(s);
         }
 
         let phys_expr =
-            create_physical_expr(&expr, &self.input_schema, self.execution_props)?;
-        let col_val = phys_expr.evaluate(&self.input_batch)?;
+            match create_physical_expr(&expr, &self.input_schema, self.execution_props) {
+                Ok(e) => e,
+                Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
+            };
+        let col_val = match phys_expr.evaluate(&self.input_batch) {
+            Ok(v) => v,
+            Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
+        };
         match col_val {
             ColumnarValue::Array(a) => {
                 if a.len() != 1 {
-                    exec_err!(
-                        "Could not evaluate the expression, found a result of length {}",
-                        a.len()
+                    ConstSimplifyResult::SimplifyRuntimeError(
+                        DataFusionError::Execution(format!("Could not evaluate the expression, found a result of length {}", a.len())),
+                        expr,
                     )
                 } else if as_list_array(&a).is_ok() || as_large_list_array(&a).is_ok() {
-                    Ok(ScalarValue::List(a.as_list().to_owned().into()))
+                    ConstSimplifyResult::Simplified(ScalarValue::List(
+                        a.as_list().to_owned().into(),
+                    ))
                 } else {
                     // Non-ListArray
-                    ScalarValue::try_from_array(&a, 0)
+                    match ScalarValue::try_from_array(&a, 0) {
+                        Ok(s) => ConstSimplifyResult::Simplified(s),
+                        Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
+                    }
                 }
             }
-            ColumnarValue::Scalar(s) => Ok(s),
+            ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s),
         }
     }
 }
@@ -796,18 +877,6 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
                 op: Divide,
                 right,
             }) if is_null(&right) => *right,
-            // A / 0 -> Divide by zero error if A is not null and not floating
-            // (float / 0 -> inf | -inf | NAN)
-            Expr::BinaryExpr(BinaryExpr {
-                left,
-                op: Divide,
-                right,
-            }) if !info.nullable(&left)?
-                && !info.get_data_type(&left)?.is_floating()
-                && is_zero(&right) =>
-            {
-                return plan_err!("Divide by zero");
-            }
 
             //
             // Rules for Modulo
@@ -836,21 +905,6 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
             {
                 lit(0)
             }
-            // A % 0 --> Divide by zero Error (if A is not floating and not null)
-            // A % 0 --> NAN (if A is floating and not null)
-            Expr::BinaryExpr(BinaryExpr {
-                left,
-                op: Modulo,
-                right,
-            }) if !info.nullable(&left)? && is_zero(&right) => {
-                match info.get_data_type(&left)? {
-                    DataType::Float32 => lit(f32::NAN),
-                    DataType::Float64 => lit(f64::NAN),
-                    _ => {
-                        return plan_err!("Divide by zero");
-                    }
-                }
-            }
 
             //
             // Rules for BitwiseAnd
@@ -1317,15 +1371,12 @@ mod tests {
         array::{ArrayRef, Int32Array},
         datatypes::{DataType, Field, Schema},
     };
-    use datafusion_common::{
-        assert_contains, cast::as_int32_array, plan_datafusion_err, DFField, ToDFSchema,
-    };
+    use datafusion_common::{assert_contains, cast::as_int32_array, DFField, ToDFSchema};
     use datafusion_expr::{interval_arithmetic::Interval, *};
-    use datafusion_physical_expr::{
-        execution_props::ExecutionProps, functions::make_scalar_function,
-    };
+    use datafusion_physical_expr::execution_props::ExecutionProps;
 
     use chrono::{DateTime, TimeZone, Utc};
+    use datafusion_physical_expr::functions::columnar_values_to_array;
 
     // ------------------------------
     // --- ExprSimplifier tests -----
@@ -1438,7 +1489,9 @@ mod tests {
         let input_types = vec![DataType::Int32, DataType::Int32];
         let return_type = Arc::new(DataType::Int32);
 
-        let fun = |args: &[ArrayRef]| {
+        let fun = Arc::new(|args: &[ColumnarValue]| {
+            let args = columnar_values_to_array(args)?;
+
             let arg0 = as_int32_array(&args[0])?;
             let arg1 = as_int32_array(&args[1])?;
 
@@ -1456,10 +1509,9 @@ mod tests {
                 })
                 .collect::<Int32Array>();
 
-            Ok(Arc::new(array) as ArrayRef)
-        };
+            Ok(ColumnarValue::from(Arc::new(array) as ArrayRef))
+        });
 
-        let fun = make_scalar_function(fun);
         Arc::new(create_udf(
             "udf_add",
             input_types,
@@ -1590,6 +1642,58 @@ mod tests {
     // --- Simplifier tests -----
     // ------------------------------
 
+    #[test]
+    fn test_simplify_canonicalize() {
+        {
+            let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
+            let expected = col("c2").gt(lit(1));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
+            let expected = col("c2").gt(col("c1"));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1")
+                .eq(lit(1))
+                .and(lit(1).eq(col("c1")))
+                .and(col("c1").eq(lit(3)));
+            let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1")
+                .eq(col("c2"))
+                .and(col("c1").gt(lit(5)))
+                .and(col("c2").eq(col("c1")));
+            let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1")
+                .eq(lit(1))
+                .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
+            let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
+            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
+            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
+            assert_eq!(simplify(expr), expected);
+        }
+        {
+            let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
+            let expected = col("c2").lt(col("c1"));
+            assert_eq!(simplify(expr), expected);
+        }
+    }
+
     #[test]
     fn test_simplify_or_true() {
         let expr_a = col("c2").or(lit(true));
@@ -1773,27 +1877,6 @@ mod tests {
         assert_eq!(simplify(expr), expected);
     }
 
-    #[test]
-    fn test_simplify_divide_zero_by_zero() {
-        // 0 / 0 -> Divide by zero
-        let expr = lit(0) / lit(0);
-        let err = try_simplify(expr).unwrap_err();
-
-        let _expected = plan_datafusion_err!("Divide by zero");
-
-        assert!(matches!(err, ref _expected), "{err}");
-    }
-
-    #[test]
-    fn test_simplify_divide_by_zero() {
-        // A / 0 -> DivideByZeroError
-        let expr = col("c2_non_null") / lit(0);
-        assert_eq!(
-            try_simplify(expr).unwrap_err().strip_backtrace(),
-            "Error during planning: Divide by zero"
-        );
-    }
-
     #[test]
     fn test_simplify_modulo_by_null() {
         let null = lit(ScalarValue::Null);
@@ -1818,6 +1901,26 @@ mod tests {
         assert_eq!(simplify(expr), expected);
     }
 
+    #[test]
+    fn test_simplify_divide_zero_by_zero() {
+        // because divide by 0 maybe occur in short-circuit expression
+        // so we should not simplify this, and throw error in runtime
+        let expr = lit(0) / lit(0);
+        let expected = expr.clone();
+
+        assert_eq!(simplify(expr), expected);
+    }
+
+    #[test]
+    fn test_simplify_divide_by_zero() {
+        // because divide by 0 maybe occur in short-circuit expression
+        // so we should not simplify this, and throw error in runtime
+        let expr = col("c2_non_null") / lit(0);
+        let expected = expr.clone();
+
+        assert_eq!(simplify(expr), expected);
+    }
+
     #[test]
     fn test_simplify_modulo_by_one_non_null() {
         let expr = col("c2_non_null") % lit(1);
@@ -2212,11 +2315,12 @@ mod tests {
 
     #[test]
     fn test_simplify_modulo_by_zero_non_null() {
+        // because modulo by 0 maybe occur in short-circuit expression
+        // so we should not simplify this, and throw error in runtime.
         let expr = col("c2_non_null") % lit(0);
-        assert_eq!(
-            try_simplify(expr).unwrap_err().strip_backtrace(),
-            "Error during planning: Divide by zero"
-        );
+        let expected = expr.clone();
+
+        assert_eq!(simplify(expr), expected);
     }
 
     #[test]
@@ -2785,7 +2889,8 @@ mod tests {
         let simplifier = ExprSimplifier::new(
             SimplifyContext::new(&execution_props).with_schema(schema),
         );
-        simplifier.simplify(expr)
+        let cano = simplifier.canonicalize(expr)?;
+        simplifier.simplify(cano)
     }
 
     fn simplify(expr: Expr) -> Expr {
@@ -3182,11 +3287,118 @@ mod tests {
             col("c1").eq(subquery1).or(col("c1").eq(subquery2))
         );
 
-        // c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) ->
-        // c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8)
+        // 1. c1 IN (1,2,3,4) AND c1 IN (5,6,7,8) -> false
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
+            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
+        );
+        assert_eq!(simplify(expr.clone()), lit(false));
+
+        // 2. c1 IN (1,2,3,4) AND c1 IN (4,5,6,7) -> c1 = 4
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
+            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
+        );
+        assert_eq!(simplify(expr.clone()), col("c1").eq(lit(4)));
+
+        // 3. c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) -> true
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
             in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
         );
+        assert_eq!(simplify(expr.clone()), lit(true));
+
+        // 4. c1 NOT IN (1,2,3,4) AND c1 NOT IN (4,5,6,7) -> c1 NOT IN (1,2,3,4,5,6,7)
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
+            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
+        );
+        assert_eq!(
+            simplify(expr.clone()),
+            in_list(
+                col("c1"),
+                vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
+                true
+            )
+        );
+
+        // 5. c1 IN (1,2,3,4) OR c1 IN (2,3,4,5) -> c1 IN (1,2,3,4,5)
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
+            in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
+        );
+        assert_eq!(
+            simplify(expr.clone()),
+            in_list(
+                col("c1"),
+                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
+                false
+            )
+        );
+
+        // 6. c1 IN (1,2,3) AND c1 NOT INT (1,2,3,4,5) -> false
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
+            col("c1"),
+            vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
+            true,
+        ));
+        assert_eq!(simplify(expr.clone()), lit(false));
+
+        // 7. c1 NOT IN (1,2,3,4) AND c1 IN (1,2,3,4,5) -> c1 = 5
+        let expr =
+            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
+                col("c1"),
+                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
+                false,
+            ));
+        assert_eq!(simplify(expr.clone()), col("c1").eq(lit(5)));
+
+        // 8. c1 IN (1,2,3,4) AND c1 NOT IN (5,6,7,8) -> c1 IN (1,2,3,4)
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
+            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
+        );
+        assert_eq!(
+            simplify(expr.clone()),
+            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
+        );
+
+        // inlist with more than two expressions
+        // c1 IN (1,2,3,4,5,6) AND c1 IN (1,3,5,6) AND c1 IN (3,6) -> c1 = 3 OR c1 = 6
+        let expr = in_list(
+            col("c1"),
+            vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
+            false,
+        )
+        .and(in_list(
+            col("c1"),
+            vec![lit(1), lit(3), lit(5), lit(6)],
+            false,
+        ))
+        .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
+        assert_eq!(
+            simplify(expr.clone()),
+            col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
+        );
+
+        // c1 NOT IN (1,2,3,4) AND c1 IN (5,6,7,8) AND c1 NOT IN (3,4,5,6) AND c1 IN (8,9,10) -> c1 = 8
+        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
+            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
+                .and(in_list(
+                    col("c1"),
+                    vec![lit(3), lit(4), lit(5), lit(6)],
+                    true,
+                ))
+                .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
+        );
+        assert_eq!(simplify(expr.clone()), col("c1").eq(lit(8)));
+
+        // Contains non-InList expression
+        // c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) -> c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9)
+        let expr =
+            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
+                .not_eq(lit(5))
+                .or(in_list(
+                    col("c1"),
+                    vec![lit(6), lit(7), lit(8), lit(9)],
+                    true,
+                )));
+        // TODO: Further simplify this expression
+        // assert_eq!(simplify(expr.clone()), lit(true));
         assert_eq!(simplify(expr.clone()), expr);
     }
 
@@ -3366,4 +3578,22 @@ mod tests {
         let output = simplify_with_guarantee(expr.clone(), guarantees);
         assert_eq!(&output, &expr_x);
     }
+
+    #[test]
+    fn test_expression_partial_simplify_1() {
+        // (1 + 2) + (4 / 0) -> 3 + (4 / 0)
+        let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
+        let expected = (lit(3)) + (lit(4) / lit(0));
+
+        assert_eq!(simplify(expr), expected);
+    }
+
+    #[test]
+    fn test_expression_partial_simplify_2() {
+        // (1 > 2) and (4 / 0) -> false
+        let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
+        let expected = lit(false);
+
+        assert_eq!(simplify(expr), expected);
+    }
 }
diff --git a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs
new file mode 100644
index 000000000000..fa95f1688e6f
--- /dev/null
+++ b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module implements a rule that simplifies the values for `InList`s
+
+use std::collections::HashSet;
+
+use datafusion_common::tree_node::TreeNodeRewriter;
+use datafusion_common::Result;
+use datafusion_expr::expr::InList;
+use datafusion_expr::{lit, BinaryExpr, Expr, Operator};
+
+/// Simplify expressions that is guaranteed to be true or false to a literal boolean expression
+///
+/// Rules:
+/// If both expressions are `IN` or `NOT IN`, then we can apply intersection or union on both lists
+///   Intersection:
+///     1. `a in (1,2,3) AND a in (4,5) -> a in (), which is false`
+///     2. `a in (1,2,3) AND a in (2,3,4) -> a in (2,3)`
+///     3. `a not in (1,2,3) OR a not in (3,4,5,6) -> a not in (3)`
+///   Union:
+///     4. `a not int (1,2,3) AND a not in (4,5,6) -> a not in (1,2,3,4,5,6)`
+///     # This rule is handled by `or_in_list_simplifier.rs`
+///     5. `a in (1,2,3) OR a in (4,5,6) -> a in (1,2,3,4,5,6)`
+/// If one of the expressions is `IN` and another one is `NOT IN`, then we apply exception on `In` expression
+///     6. `a in (1,2,3,4) AND a not in (1,2,3,4,5) -> a in (), which is false`
+///     7. `a not in (1,2,3,4) AND a in (1,2,3,4,5) -> a = 5`
+///     8. `a in (1,2,3,4) AND a not in (5,6,7,8) -> a in (1,2,3,4)`
+pub(super) struct InListSimplifier {}
+
+impl InListSimplifier {
+    pub(super) fn new() -> Self {
+        Self {}
+    }
+}
+
+impl TreeNodeRewriter for InListSimplifier {
+    type N = Expr;
+
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = &expr {
+            if let (Expr::InList(l1), Operator::And, Expr::InList(l2)) =
+                (left.as_ref(), op, right.as_ref())
+            {
+                if l1.expr == l2.expr && !l1.negated && !l2.negated {
+                    return inlist_intersection(l1, l2, false);
+                } else if l1.expr == l2.expr && l1.negated && l2.negated {
+                    return inlist_union(l1, l2, true);
+                } else if l1.expr == l2.expr && !l1.negated && l2.negated {
+                    return inlist_except(l1, l2);
+                } else if l1.expr == l2.expr && l1.negated && !l2.negated {
+                    return inlist_except(l2, l1);
+                }
+            } else if let (Expr::InList(l1), Operator::Or, Expr::InList(l2)) =
+                (left.as_ref(), op, right.as_ref())
+            {
+                if l1.expr == l2.expr && l1.negated && l2.negated {
+                    return inlist_intersection(l1, l2, true);
+                }
+            }
+        }
+
+        Ok(expr)
+    }
+}
+
+fn inlist_union(l1: &InList, l2: &InList, negated: bool) -> Result<Expr> {
+    let mut seen: HashSet<Expr> = HashSet::new();
+    let list = l1
+        .list
+        .iter()
+        .chain(l2.list.iter())
+        .filter(|&e| seen.insert(e.to_owned()))
+        .cloned()
+        .collect::<Vec<_>>();
+    let merged_inlist = InList {
+        expr: l1.expr.clone(),
+        list,
+        negated,
+    };
+    Ok(Expr::InList(merged_inlist))
+}
+
+fn inlist_intersection(l1: &InList, l2: &InList, negated: bool) -> Result<Expr> {
+    let l1_set: HashSet<Expr> = l1.list.iter().cloned().collect();
+    let intersect_list: Vec<Expr> = l2
+        .list
+        .iter()
+        .filter(|x| l1_set.contains(x))
+        .cloned()
+        .collect();
+    // e in () is always false
+    // e not in () is always true
+    if intersect_list.is_empty() {
+        return Ok(lit(negated));
+    }
+    let merged_inlist = InList {
+        expr: l1.expr.clone(),
+        list: intersect_list,
+        negated,
+    };
+    Ok(Expr::InList(merged_inlist))
+}
+
+fn inlist_except(l1: &InList, l2: &InList) -> Result<Expr> {
+    let l2_set: HashSet<Expr> = l2.list.iter().cloned().collect();
+    let except_list: Vec<Expr> = l1
+        .list
+        .iter()
+        .filter(|x| !l2_set.contains(x))
+        .cloned()
+        .collect();
+    if except_list.is_empty() {
+        return Ok(lit(false));
+    }
+    let merged_inlist = InList {
+        expr: l1.expr.clone(),
+        list: except_list,
+        negated: false,
+    };
+    Ok(Expr::InList(merged_inlist))
+}
diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs
index 2cf6ed166cdd..44ba5b3e3b84 100644
--- a/datafusion/optimizer/src/simplify_expressions/mod.rs
+++ b/datafusion/optimizer/src/simplify_expressions/mod.rs
@@ -18,6 +18,7 @@
 pub mod context;
 pub mod expr_simplifier;
 mod guarantees;
+mod inlist_simplifier;
 mod or_in_list_simplifier;
 mod regex;
 pub mod simplify_exprs;
diff --git a/datafusion/optimizer/src/simplify_expressions/or_in_list_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/or_in_list_simplifier.rs
index cebaaccc41c7..fd5c9ecaf82c 100644
--- a/datafusion/optimizer/src/simplify_expressions/or_in_list_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/or_in_list_simplifier.rs
@@ -18,6 +18,7 @@
 //! This module implements a rule that simplifies OR expressions into IN list expressions
 
 use std::borrow::Cow;
+use std::collections::HashSet;
 
 use datafusion_common::tree_node::TreeNodeRewriter;
 use datafusion_common::Result;
@@ -52,9 +53,14 @@ impl TreeNodeRewriter for OrInListSimplifier {
                     {
                         let lhs = lhs.into_owned();
                         let rhs = rhs.into_owned();
-                        let mut list = vec![];
-                        list.extend(lhs.list);
-                        list.extend(rhs.list);
+                        let mut seen: HashSet<Expr> = HashSet::new();
+                        let list = lhs
+                            .list
+                            .into_iter()
+                            .chain(rhs.list)
+                            .filter(|e| seen.insert(e.to_owned()))
+                            .collect::<Vec<_>>();
+
                         let merged_inlist = InList {
                             expr: lhs.expr,
                             list,
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index 43a41b1185a3..7265b17dd0f3 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -93,16 +93,34 @@ impl SimplifyExpressions {
             .map(|input| Self::optimize_internal(input, execution_props))
             .collect::<Result<Vec<_>>>()?;
 
-        let expr = plan
-            .expressions()
-            .into_iter()
-            .map(|e| {
-                // TODO: unify with `rewrite_preserving_name`
-                let original_name = e.name_for_alias()?;
-                let new_e = simplifier.simplify(e)?;
-                new_e.alias_if_changed(original_name)
-            })
-            .collect::<Result<Vec<_>>>()?;
+        let expr = match plan {
+            // Canonicalize step won't reorder expressions in a Join on clause.
+            // The left and right expressions in a Join on clause are not commutative,
+            // since the order of the columns must match the order of the children.
+            LogicalPlan::Join(_) => {
+                plan.expressions()
+                    .into_iter()
+                    .map(|e| {
+                        // TODO: unify with `rewrite_preserving_name`
+                        let original_name = e.name_for_alias()?;
+                        let new_e = simplifier.simplify(e)?;
+                        new_e.alias_if_changed(original_name)
+                    })
+                    .collect::<Result<Vec<_>>>()?
+            }
+            _ => {
+                plan.expressions()
+                    .into_iter()
+                    .map(|e| {
+                        // TODO: unify with `rewrite_preserving_name`
+                        let original_name = e.name_for_alias()?;
+                        let cano_e = simplifier.canonicalize(e)?;
+                        let new_e = simplifier.simplify(cano_e)?;
+                        new_e.alias_if_changed(original_name)
+                    })
+                    .collect::<Result<Vec<_>>>()?
+            }
+        };
 
         plan.with_new_exprs(expr, &new_inputs)
     }
@@ -138,28 +156,6 @@ mod tests {
         ExprSchemable, JoinType,
     };
 
-    /// A macro to assert that one string is contained within another with
-    /// a nice error message if they are not.
-    ///
-    /// Usage: `assert_contains!(actual, expected)`
-    ///
-    /// Is a macro so test error
-    /// messages are on the same line as the failure;
-    ///
-    /// Both arguments must be convertable into Strings (Into<String>)
-    macro_rules! assert_contains {
-        ($ACTUAL: expr, $EXPECTED: expr) => {
-            let actual_value: String = $ACTUAL.into();
-            let expected_value: String = $EXPECTED.into();
-            assert!(
-                actual_value.contains(&expected_value),
-                "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
-                expected_value,
-                actual_value
-            );
-        };
-    }
-
     fn test_table_scan() -> LogicalPlan {
         let schema = Schema::new(vec![
             Field::new("a", DataType::Boolean, false),
@@ -425,18 +421,6 @@ mod tests {
         assert_optimized_plan_eq(&plan, expected)
     }
 
-    // expect optimizing will result in an error, returning the error string
-    fn get_optimized_plan_err(plan: &LogicalPlan, date_time: &DateTime<Utc>) -> String {
-        let config = OptimizerContext::new().with_query_execution_start_time(*date_time);
-        let rule = SimplifyExpressions::new();
-
-        let err = rule
-            .try_optimize(plan, &config)
-            .expect_err("expected optimization to fail");
-
-        err.to_string()
-    }
-
     fn get_optimized_plan_formatted(
         plan: &LogicalPlan,
         date_time: &DateTime<Utc>,
@@ -468,21 +452,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn to_timestamp_expr_wrong_arg() -> Result<()> {
-        let table_scan = test_table_scan();
-        let proj = vec![to_timestamp_expr("I'M NOT A TIMESTAMP")];
-        let plan = LogicalPlanBuilder::from(table_scan)
-            .project(proj)?
-            .build()?;
-
-        let expected =
-            "Error parsing timestamp from 'I'M NOT A TIMESTAMP': error parsing date";
-        let actual = get_optimized_plan_err(&plan, &Utc::now());
-        assert_contains!(actual, expected);
-        Ok(())
-    }
-
     #[test]
     fn cast_expr() -> Result<()> {
         let table_scan = test_table_scan();
@@ -498,20 +467,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn cast_expr_wrong_arg() -> Result<()> {
-        let table_scan = test_table_scan();
-        let proj = vec![Expr::Cast(Cast::new(Box::new(lit("")), DataType::Int32))];
-        let plan = LogicalPlanBuilder::from(table_scan)
-            .project(proj)?
-            .build()?;
-
-        let expected = "Cannot cast string '' to value of Int32 type";
-        let actual = get_optimized_plan_err(&plan, &Utc::now());
-        assert_contains!(actual, expected);
-        Ok(())
-    }
-
     #[test]
     fn multiple_now_expr() -> Result<()> {
         let table_scan = test_table_scan();
diff --git a/datafusion/physical-expr/src/aggregate/approx_distinct.rs b/datafusion/physical-expr/src/aggregate/approx_distinct.rs
index b79a5611c334..66e1310695ad 100644
--- a/datafusion/physical-expr/src/aggregate/approx_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/approx_distinct.rs
@@ -244,12 +244,12 @@ macro_rules! default_accumulator_impl {
             Ok(())
         }
 
-        fn state(&self) -> Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> Result<Vec<ScalarValue>> {
             let value = ScalarValue::from(&self.hll);
             Ok(vec![value])
         }
 
-        fn evaluate(&self) -> Result<ScalarValue> {
+        fn evaluate(&mut self) -> Result<ScalarValue> {
             Ok(ScalarValue::UInt64(Some(self.hll.count() as u64)))
         }
 
diff --git a/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs b/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
index 15c0fb3ace4d..b3de7b0b4d36 100644
--- a/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
+++ b/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
@@ -377,7 +377,7 @@ impl ApproxPercentileAccumulator {
 }
 
 impl Accumulator for ApproxPercentileAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(self.digest.to_scalar_state().into_iter().collect())
     }
 
@@ -389,7 +389,7 @@ impl Accumulator for ApproxPercentileAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         if self.digest.count() == 0.0 {
             return exec_err!("aggregate function needs at least one non-null element");
         }
diff --git a/datafusion/physical-expr/src/aggregate/approx_percentile_cont_with_weight.rs b/datafusion/physical-expr/src/aggregate/approx_percentile_cont_with_weight.rs
index ee5ef7228f4b..3fa715a59238 100644
--- a/datafusion/physical-expr/src/aggregate/approx_percentile_cont_with_weight.rs
+++ b/datafusion/physical-expr/src/aggregate/approx_percentile_cont_with_weight.rs
@@ -129,7 +129,7 @@ impl ApproxPercentileWithWeightAccumulator {
 }
 
 impl Accumulator for ApproxPercentileWithWeightAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         self.approx_percentile_cont_accumulator.state()
     }
 
@@ -155,7 +155,7 @@ impl Accumulator for ApproxPercentileWithWeightAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         self.approx_percentile_cont_accumulator.evaluate()
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/array_agg.rs b/datafusion/physical-expr/src/aggregate/array_agg.rs
index 91d5c867d312..5dc29f834feb 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg.rs
@@ -153,11 +153,11 @@ impl Accumulator for ArrayAggAccumulator {
         Ok(())
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         // Transform Vec<ListArr> to ListArr
 
         let element_arrays: Vec<&dyn Array> =
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
index 2d263a42e0ff..a58856e398e3 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
@@ -129,7 +129,7 @@ impl DistinctArrayAggAccumulator {
 }
 
 impl Accumulator for DistinctArrayAggAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
@@ -163,7 +163,7 @@ impl Accumulator for DistinctArrayAggAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let values: Vec<ScalarValue> = self.values.iter().cloned().collect();
         let arr = ScalarValue::new_list(&values, &self.datatype);
         Ok(ScalarValue::List(arr))
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
index 34f8d20628dc..5263fa83a6eb 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
@@ -279,13 +279,13 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
         Ok(())
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let mut result = vec![self.evaluate()?];
         result.push(self.evaluate_orderings()?);
         Ok(result)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let values = self.values.clone();
         let array = if self.reverse {
             ScalarValue::new_list_from_iter(values.into_iter().rev(), &self.datatypes[0])
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 91f2fb952dce..57f8fa211e58 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -27,7 +27,7 @@ use std::sync::Arc;
 use crate::aggregate::groups_accumulator::accumulate::NullState;
 use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr};
 use arrow::compute::sum;
 use arrow::datatypes::{DataType, Decimal128Type, Float64Type, UInt64Type};
 use arrow::{
@@ -41,9 +41,8 @@ use arrow_array::{
 use arrow_buffer::{i256, ArrowNativeType};
 use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::type_coercion::aggregates::avg_return_type;
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, EmitTo, GroupsAccumulator};
 
-use super::groups_accumulator::EmitTo;
 use super::utils::DecimalAverager;
 
 /// AVG aggregate expression
@@ -239,7 +238,7 @@ pub struct AvgAccumulator {
 }
 
 impl Accumulator for AvgAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.count),
             ScalarValue::Float64(self.sum),
@@ -277,7 +276,7 @@ impl Accumulator for AvgAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Float64(
             self.sum.map(|f| f / self.count as f64),
         ))
@@ -315,7 +314,7 @@ impl<T: DecimalType + ArrowNumericType> Debug for DecimalAvgAccumulator<T> {
 }
 
 impl<T: DecimalType + ArrowNumericType> Accumulator for DecimalAvgAccumulator<T> {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.count),
             ScalarValue::new_primitive::<T>(
@@ -357,7 +356,7 @@ impl<T: DecimalType + ArrowNumericType> Accumulator for DecimalAvgAccumulator<T>
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let v = self
             .sum
             .map(|v| {
diff --git a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
index 6c97d620616a..ad5e8a5ac78f 100644
--- a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
+++ b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
@@ -22,11 +22,11 @@ use datafusion_common::cast::as_list_array;
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr};
 use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::Field};
 use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, GroupsAccumulator};
 use std::collections::HashSet;
 
 use crate::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
@@ -190,11 +190,11 @@ where
         self.update_batch(states)
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         ScalarValue::new_primitive::<T>(self.value, &T::DATA_TYPE)
     }
 
@@ -339,7 +339,7 @@ impl<T: ArrowNumericType> Accumulator for BitOrAccumulator<T>
 where
     T::Native: std::ops::BitOr<Output = T::Native>,
 {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
@@ -355,7 +355,7 @@ where
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         ScalarValue::new_primitive::<T>(self.value, &T::DATA_TYPE)
     }
 
@@ -500,7 +500,7 @@ impl<T: ArrowNumericType> Accumulator for BitXorAccumulator<T>
 where
     T::Native: std::ops::BitXor<Output = T::Native>,
 {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
@@ -516,7 +516,7 @@ where
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         ScalarValue::new_primitive::<T>(self.value, &T::DATA_TYPE)
     }
 
@@ -634,7 +634,7 @@ impl<T: ArrowNumericType> Accumulator for DistinctBitXorAccumulator<T>
 where
     T::Native: std::ops::BitXor<Output = T::Native> + std::hash::Hash + Eq,
 {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         // 1. Stores aggregate state in `ScalarValue::List`
         // 2. Constructs `ScalarValue::List` state from distinct numeric stored in hash set
         let state_out = {
@@ -679,7 +679,7 @@ where
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let mut acc = T::Native::usize_as(0);
         for distinct_value in self.values.iter() {
             acc = acc ^ *distinct_value;
diff --git a/datafusion/physical-expr/src/aggregate/bool_and_or.rs b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
index 9757d314b6aa..0a018fe086d2 100644
--- a/datafusion/physical-expr/src/aggregate/bool_and_or.rs
+++ b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
@@ -17,7 +17,7 @@
 
 //! Defines physical expressions that can evaluated at runtime during query execution
 
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr};
 use arrow::datatypes::DataType;
 use arrow::{
     array::{ArrayRef, BooleanArray},
@@ -26,7 +26,7 @@ use arrow::{
 use datafusion_common::{
     downcast_value, internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
 };
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, GroupsAccumulator};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -191,11 +191,11 @@ impl Accumulator for BoolAndAccumulator {
         self.update_batch(states)
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![ScalarValue::Boolean(self.acc)])
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Boolean(self.acc))
     }
 
@@ -309,7 +309,7 @@ struct BoolOrAccumulator {
 }
 
 impl Accumulator for BoolOrAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![ScalarValue::Boolean(self.acc)])
     }
 
@@ -328,7 +328,7 @@ impl Accumulator for BoolOrAccumulator {
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Boolean(self.acc))
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/correlation.rs b/datafusion/physical-expr/src/aggregate/correlation.rs
index 61f2db5c8ef9..4dca1e4a881e 100644
--- a/datafusion/physical-expr/src/aggregate/correlation.rs
+++ b/datafusion/physical-expr/src/aggregate/correlation.rs
@@ -149,7 +149,7 @@ impl CorrelationAccumulator {
 }
 
 impl Accumulator for CorrelationAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.covar.get_count()),
             ScalarValue::from(self.covar.get_mean1()),
@@ -215,7 +215,7 @@ impl Accumulator for CorrelationAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let covar = self.covar.evaluate()?;
         let stddev1 = self.stddev1.evaluate()?;
         let stddev2 = self.stddev2.evaluate()?;
@@ -519,7 +519,7 @@ mod tests {
             .collect::<Result<Vec<_>>>()?;
         accum1.update_batch(&values1)?;
         accum2.update_batch(&values2)?;
-        let state2 = get_accum_scalar_values_as_arrays(accum2.as_ref())?;
+        let state2 = get_accum_scalar_values_as_arrays(accum2.as_mut())?;
         accum1.merge_batch(&state2)?;
         accum1.evaluate()
     }
diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index 8e9ae5cea36b..3b0fe0efd3ca 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -23,7 +23,7 @@ use std::ops::BitAnd;
 use std::sync::Arc;
 
 use crate::aggregate::utils::down_cast_any_ref;
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr};
 use arrow::array::{Array, Int64Array};
 use arrow::compute;
 use arrow::datatypes::DataType;
@@ -34,12 +34,11 @@ use arrow_array::PrimitiveArray;
 use arrow_buffer::BooleanBuffer;
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, EmitTo, GroupsAccumulator};
 
 use crate::expressions::format_state_name;
 
 use super::groups_accumulator::accumulate::accumulate_indices;
-use super::groups_accumulator::EmitTo;
 
 /// COUNT aggregate expression
 /// Returns the amount of non-null values of the given expression.
@@ -295,7 +294,7 @@ impl CountAccumulator {
 }
 
 impl Accumulator for CountAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![ScalarValue::Int64(Some(self.count))])
     }
 
@@ -320,7 +319,7 @@ impl Accumulator for CountAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Int64(Some(self.count)))
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/count_distinct.rs b/datafusion/physical-expr/src/aggregate/count_distinct.rs
index 021c33fb94a7..ef1a248d5f82 100644
--- a/datafusion/physical-expr/src/aggregate/count_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/count_distinct.rs
@@ -212,7 +212,7 @@ impl DistinctCountAccumulator {
 }
 
 impl Accumulator for DistinctCountAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let scalars = self.values.iter().cloned().collect::<Vec<_>>();
         let arr = ScalarValue::new_list(scalars.as_slice(), &self.state_data_type);
         Ok(vec![ScalarValue::List(arr)])
@@ -249,7 +249,7 @@ impl Accumulator for DistinctCountAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
     }
 
@@ -288,7 +288,7 @@ where
     T: ArrowPrimitiveType + Send + Debug,
     T::Native: Eq + Hash,
 {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let arr = Arc::new(PrimitiveArray::<T>::from_iter_values(
             self.values.iter().cloned(),
         )) as ArrayRef;
@@ -331,7 +331,7 @@ where
         })
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
     }
 
@@ -374,7 +374,7 @@ impl<T> Accumulator for FloatDistinctCountAccumulator<T>
 where
     T: ArrowPrimitiveType + Send + Debug,
 {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let arr = Arc::new(PrimitiveArray::<T>::from_iter_values(
             self.values.iter().map(|v| v.0),
         )) as ArrayRef;
@@ -418,7 +418,7 @@ where
         })
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/covariance.rs b/datafusion/physical-expr/src/aggregate/covariance.rs
index 0f838eb6fa1c..45f9926975d3 100644
--- a/datafusion/physical-expr/src/aggregate/covariance.rs
+++ b/datafusion/physical-expr/src/aggregate/covariance.rs
@@ -260,7 +260,7 @@ impl CovarianceAccumulator {
 }
 
 impl Accumulator for CovarianceAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.count),
             ScalarValue::from(self.mean1),
@@ -381,7 +381,7 @@ impl Accumulator for CovarianceAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let count = match self.stats_type {
             StatsType::Population => self.count,
             StatsType::Sample => {
@@ -768,7 +768,7 @@ mod tests {
             .collect::<Result<Vec<_>>>()?;
         accum1.update_batch(&values1)?;
         accum2.update_batch(&values2)?;
-        let state2 = get_accum_scalar_values_as_arrays(accum2.as_ref())?;
+        let state2 = get_accum_scalar_values_as_arrays(accum2.as_mut())?;
         accum1.merge_batch(&state2)?;
         accum1.evaluate()
     }
diff --git a/datafusion/physical-expr/src/aggregate/first_last.rs b/datafusion/physical-expr/src/aggregate/first_last.rs
index 4afa8d0dd5ec..d2bf48551f0d 100644
--- a/datafusion/physical-expr/src/aggregate/first_last.rs
+++ b/datafusion/physical-expr/src/aggregate/first_last.rs
@@ -270,7 +270,7 @@ impl FirstValueAccumulator {
 }
 
 impl Accumulator for FirstValueAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let mut result = vec![self.first.clone()];
         result.extend(self.orderings.iter().cloned());
         result.push(ScalarValue::Boolean(Some(self.is_set)));
@@ -336,7 +336,7 @@ impl Accumulator for FirstValueAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.first.clone())
     }
 
@@ -586,7 +586,7 @@ impl LastValueAccumulator {
 }
 
 impl Accumulator for LastValueAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let mut result = vec![self.last.clone()];
         result.extend(self.orderings.clone());
         result.push(ScalarValue::Boolean(Some(self.is_set)));
@@ -655,7 +655,7 @@ impl Accumulator for LastValueAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.last.clone())
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 596265a737da..7080ea40039d 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -17,14 +17,13 @@
 
 //! [`GroupsAccumulator`] helpers: [`NullState`] and [`accumulate_indices`]
 //!
-//! [`GroupsAccumulator`]: crate::GroupsAccumulator
+//! [`GroupsAccumulator`]: datafusion_expr::GroupsAccumulator
 
 use arrow::datatypes::ArrowPrimitiveType;
 use arrow_array::{Array, BooleanArray, PrimitiveArray};
 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer};
 
-use crate::EmitTo;
-
+use datafusion_expr::EmitTo;
 /// Track the accumulator null state per row: if any values for that
 /// group were null and if any values have been seen at all for that group.
 ///
@@ -49,7 +48,7 @@ use crate::EmitTo;
 /// had at least one value to accumulate so they do not need to track
 /// if they have seen values for a particular group.
 ///
-/// [`GroupsAccumulator`]: crate::GroupsAccumulator
+/// [`GroupsAccumulator`]: datafusion_expr::GroupsAccumulator
 #[derive(Debug)]
 pub struct NullState {
     /// Have we seen any non-filtered input values for `group_index`?
@@ -62,6 +61,12 @@ pub struct NullState {
     seen_values: BooleanBufferBuilder,
 }
 
+impl Default for NullState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl NullState {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index c6fd17a69b39..9856e1c989b3 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -17,7 +17,6 @@
 
 //! Adapter that makes [`GroupsAccumulator`] out of [`Accumulator`]
 
-use super::{EmitTo, GroupsAccumulator};
 use arrow::{
     array::{AsArray, UInt32Builder},
     compute,
@@ -28,7 +27,7 @@ use datafusion_common::{
     arrow_datafusion_err, utils::get_arrayref_at_indices, DataFusionError, Result,
     ScalarValue,
 };
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, EmitTo, GroupsAccumulator};
 
 /// An adapter that implements [`GroupsAccumulator`] for any [`Accumulator`]
 ///
@@ -272,7 +271,7 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
 
         let results: Vec<ScalarValue> = states
             .into_iter()
-            .map(|state| {
+            .map(|mut state| {
                 self.free_allocation(state.size());
                 state.accumulator.evaluate()
             })
@@ -293,7 +292,7 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
         // which we need to form into columns
         let mut results: Vec<Vec<ScalarValue>> = vec![];
 
-        for state in states {
+        for mut state in states {
             self.free_allocation(state.size());
             let accumulator_state = state.accumulator.state()?;
             results.resize_with(accumulator_state.len(), Vec::new);
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
index 21b6cc29e83d..f40c661a7a2f 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
@@ -21,10 +21,9 @@ use arrow::array::AsArray;
 use arrow_array::{ArrayRef, BooleanArray};
 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
 use datafusion_common::Result;
+use datafusion_expr::{EmitTo, GroupsAccumulator};
 
-use crate::GroupsAccumulator;
-
-use super::{accumulate::NullState, EmitTo};
+use super::accumulate::NullState;
 
 /// An accumulator that implements a single operation over a
 /// [`BooleanArray`] where the accumulated state is also boolean (such
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index d2e64d373be2..de090badd349 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -15,146 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Vectorized [`GroupsAccumulator`]
-
 pub(crate) mod accumulate;
 mod adapter;
+pub use accumulate::NullState;
 pub use adapter::GroupsAccumulatorAdapter;
 
 pub(crate) mod bool_op;
 pub(crate) mod prim_op;
-
-use arrow_array::{ArrayRef, BooleanArray};
-use datafusion_common::Result;
-
-/// Describes how many rows should be emitted during grouping.
-#[derive(Debug, Clone, Copy)]
-pub enum EmitTo {
-    /// Emit all groups
-    All,
-    /// Emit only the first `n` groups and shift all existing group
-    /// indexes down by `n`.
-    ///
-    /// For example, if `n=10`, group_index `0, 1, ... 9` are emitted
-    /// and group indexes '`10, 11, 12, ...` become `0, 1, 2, ...`.
-    First(usize),
-}
-
-impl EmitTo {
-    /// Removes the number of rows from `v` required to emit the right
-    /// number of rows, returning a `Vec` with elements taken, and the
-    /// remaining values in `v`.
-    ///
-    /// This avoids copying if Self::All
-    pub fn take_needed<T>(&self, v: &mut Vec<T>) -> Vec<T> {
-        match self {
-            Self::All => {
-                // Take the entire vector, leave new (empty) vector
-                std::mem::take(v)
-            }
-            Self::First(n) => {
-                // get end n+1,.. values into t
-                let mut t = v.split_off(*n);
-                // leave n+1,.. in v
-                std::mem::swap(v, &mut t);
-                t
-            }
-        }
-    }
-}
-
-/// `GroupAccumulator` implements a single aggregate (e.g. AVG) and
-/// stores the state for *all* groups internally.
-///
-/// Each group is assigned a `group_index` by the hash table and each
-/// accumulator manages the specific state, one per group_index.
-///
-/// group_indexes are contiguous (there aren't gaps), and thus it is
-/// expected that each GroupAccumulator will use something like `Vec<..>`
-/// to store the group states.
-pub trait GroupsAccumulator: Send {
-    /// Updates the accumulator's state from its arguments, encoded as
-    /// a vector of [`ArrayRef`]s.
-    ///
-    /// * `values`: the input arguments to the accumulator
-    ///
-    /// * `group_indices`: To which groups do the rows in `values`
-    /// belong, group id)
-    ///
-    /// * `opt_filter`: if present, only update aggregate state using
-    /// `values[i]` if `opt_filter[i]` is true
-    ///
-    /// * `total_num_groups`: the number of groups (the largest
-    /// group_index is thus `total_num_groups - 1`).
-    ///
-    /// Note that subsequent calls to update_batch may have larger
-    /// total_num_groups as new groups are seen.
-    fn update_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        opt_filter: Option<&BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()>;
-
-    /// Returns the final aggregate value for each group as a single
-    /// `RecordBatch`, resetting the internal state.
-    ///
-    /// The rows returned *must* be in group_index order: The value
-    /// for group_index 0, followed by 1, etc.  Any group_index that
-    /// did not have values, should be null.
-    ///
-    /// For example, a `SUM` accumulator maintains a running sum for
-    /// each group, and `evaluate` will produce that running sum as
-    /// its output for all groups, in group_index order
-    ///
-    /// If `emit_to`` is [`EmitTo::All`], the accumulator should
-    /// return all groups and release / reset its internal state
-    /// equivalent to when it was first created.
-    ///
-    /// If `emit_to` is [`EmitTo::First`], only the first `n` groups
-    /// should be emitted and the state for those first groups
-    /// removed. State for the remaining groups must be retained for
-    /// future use. The group_indices on subsequent calls to
-    /// `update_batch` or `merge_batch` will be shifted down by
-    /// `n`. See [`EmitTo::First`] for more details.
-    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef>;
-
-    /// Returns the intermediate aggregate state for this accumulator,
-    /// used for multi-phase grouping, resetting its internal state.
-    ///
-    /// For example, `AVG` might return two arrays: `SUM` and `COUNT`
-    /// but the `MIN` aggregate would just return a single array.
-    ///
-    /// Note more sophisticated internal state can be passed as
-    /// single `StructArray` rather than multiple arrays.
-    ///
-    /// See [`Self::evaluate`] for details on the required output
-    /// order and  `emit_to`.
-    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
-
-    /// Merges intermediate state (the output from [`Self::state`])
-    /// into this accumulator's values.
-    ///
-    /// For some aggregates (such as `SUM`), `merge_batch` is the same
-    /// as `update_batch`, but for some aggregates (such as `COUNT`,
-    /// where the partial counts must be summed) the operations
-    /// differ. See [`Self::state`] for more details on how state is
-    /// used and merged.
-    ///
-    /// * `values`: arrays produced from calling `state` previously to the accumulator
-    ///
-    /// Other arguments are the same as for [`Self::update_batch`];
-    fn merge_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        opt_filter: Option<&BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()>;
-
-    /// Amount of memory used to store the state of this accumulator,
-    /// in bytes. This function is called once per batch, so it should
-    /// be `O(n)` to compute, not `O(num_groups)`
-    fn size(&self) -> usize;
-}
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
index 130d56271280..994f5447d7c0 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
@@ -21,10 +21,9 @@ use arrow::{array::AsArray, datatypes::ArrowPrimitiveType};
 use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
 use arrow_schema::DataType;
 use datafusion_common::Result;
+use datafusion_expr::{EmitTo, GroupsAccumulator};
 
-use crate::GroupsAccumulator;
-
-use super::{accumulate::NullState, EmitTo};
+use super::accumulate::NullState;
 
 /// An accumulator that implements a single operation over
 /// [`ArrowPrimitiveType`] where the accumulated state is the same as
diff --git a/datafusion/physical-expr/src/aggregate/median.rs b/datafusion/physical-expr/src/aggregate/median.rs
index 691b1c1752f4..94cc5c7fb76a 100644
--- a/datafusion/physical-expr/src/aggregate/median.rs
+++ b/datafusion/physical-expr/src/aggregate/median.rs
@@ -145,7 +145,7 @@ impl<T: ArrowNumericType> std::fmt::Debug for MedianAccumulator<T> {
 }
 
 impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let all_values = self
             .all_values
             .iter()
@@ -171,9 +171,8 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
-        // TODO: evaluate could pass &mut self
-        let mut d = self.all_values.clone();
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let mut d = std::mem::take(&mut self.all_values);
         let cmp = |x: &T::Native, y: &T::Native| x.compare(*y);
 
         let len = d.len();
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 7e3ef2a2abab..3573df3743ee 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -22,7 +22,7 @@ use std::convert::TryFrom;
 use std::sync::Arc;
 
 use crate::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr};
 use arrow::compute;
 use arrow::datatypes::{
     DataType, Date32Type, Date64Type, Time32MillisecondType, Time32SecondType,
@@ -47,7 +47,7 @@ use arrow_array::types::{
 use datafusion_common::internal_err;
 use datafusion_common::ScalarValue;
 use datafusion_common::{downcast_value, DataFusionError, Result};
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, GroupsAccumulator};
 
 use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
@@ -764,11 +764,11 @@ impl Accumulator for MaxAccumulator {
         self.update_batch(states)
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.max.clone()])
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.max.clone())
     }
 
@@ -820,11 +820,11 @@ impl Accumulator for SlidingMaxAccumulator {
         self.update_batch(states)
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.max.clone()])
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.max.clone())
     }
 
@@ -1016,7 +1016,7 @@ impl MinAccumulator {
 }
 
 impl Accumulator for MinAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.min.clone()])
     }
 
@@ -1031,7 +1031,7 @@ impl Accumulator for MinAccumulator {
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.min.clone())
     }
 
@@ -1058,7 +1058,7 @@ impl SlidingMinAccumulator {
 }
 
 impl Accumulator for SlidingMinAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.min.clone()])
     }
 
@@ -1092,7 +1092,7 @@ impl Accumulator for SlidingMinAccumulator {
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.min.clone())
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 270a8e6f7705..2bb205ce90dc 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -19,13 +19,12 @@ use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use self::groups_accumulator::GroupsAccumulator;
 use crate::expressions::{NthValueAgg, OrderSensitiveArrayAgg};
 use crate::{PhysicalExpr, PhysicalSortExpr};
 
 use arrow::datatypes::Field;
 use datafusion_common::{not_impl_err, DataFusionError, Result};
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, GroupsAccumulator};
 
 mod hyperloglog;
 mod tdigest;
diff --git a/datafusion/physical-expr/src/aggregate/nth_value.rs b/datafusion/physical-expr/src/aggregate/nth_value.rs
index 5a1ca90b7f5e..26a125485826 100644
--- a/datafusion/physical-expr/src/aggregate/nth_value.rs
+++ b/datafusion/physical-expr/src/aggregate/nth_value.rs
@@ -302,7 +302,7 @@ impl Accumulator for NthValueAccumulator {
         Ok(())
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         let mut result = vec![self.evaluate_values()];
         if !self.ordering_req.is_empty() {
             result.push(self.evaluate_orderings());
@@ -310,7 +310,7 @@ impl Accumulator for NthValueAccumulator {
         Ok(result)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let n_required = self.n.unsigned_abs() as usize;
         let from_start = self.n > 0;
         let nth_value_idx = if from_start {
diff --git a/datafusion/physical-expr/src/aggregate/regr.rs b/datafusion/physical-expr/src/aggregate/regr.rs
index 6922cb131cac..36e7b7c9b3e4 100644
--- a/datafusion/physical-expr/src/aggregate/regr.rs
+++ b/datafusion/physical-expr/src/aggregate/regr.rs
@@ -251,7 +251,7 @@ impl RegrAccumulator {
 }
 
 impl Accumulator for RegrAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.count),
             ScalarValue::from(self.mean_x),
@@ -418,7 +418,7 @@ impl Accumulator for RegrAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let cov_pop_x_y = self.algo_const / self.count as f64;
         let var_pop_x = self.m2_x / self.count as f64;
         let var_pop_y = self.m2_y / self.count as f64;
diff --git a/datafusion/physical-expr/src/aggregate/stddev.rs b/datafusion/physical-expr/src/aggregate/stddev.rs
index 64e19ef502c7..dcc2b0e69c02 100644
--- a/datafusion/physical-expr/src/aggregate/stddev.rs
+++ b/datafusion/physical-expr/src/aggregate/stddev.rs
@@ -200,7 +200,7 @@ impl StddevAccumulator {
 }
 
 impl Accumulator for StddevAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.variance.get_count()),
             ScalarValue::from(self.variance.get_mean()),
@@ -220,7 +220,7 @@ impl Accumulator for StddevAccumulator {
         self.variance.merge_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let variance = self.variance.evaluate()?;
         match variance {
             ScalarValue::Float64(e) => {
@@ -459,7 +459,7 @@ mod tests {
             .collect::<Result<Vec<_>>>()?;
         accum1.update_batch(&values1)?;
         accum2.update_batch(&values2)?;
-        let state2 = get_accum_scalar_values_as_arrays(accum2.as_ref())?;
+        let state2 = get_accum_scalar_values_as_arrays(accum2.as_mut())?;
         accum1.merge_batch(&state2)?;
         accum1.evaluate()
     }
diff --git a/datafusion/physical-expr/src/aggregate/string_agg.rs b/datafusion/physical-expr/src/aggregate/string_agg.rs
index 7adc736932ad..7a1da6d62246 100644
--- a/datafusion/physical-expr/src/aggregate/string_agg.rs
+++ b/datafusion/physical-expr/src/aggregate/string_agg.rs
@@ -153,11 +153,11 @@ impl Accumulator for StringAggAccumulator {
         Ok(())
     }
 
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(ScalarValue::LargeUtf8(self.values.clone()))
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 03f666cc4e5d..6cf2810ce588 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use super::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr};
 use arrow::compute::sum;
 use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::Field};
@@ -35,7 +35,7 @@ use arrow_array::{Array, ArrowNativeTypeOp, ArrowNumericType};
 use arrow_buffer::ArrowNativeType;
 use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::type_coercion::aggregates::sum_return_type;
-use datafusion_expr::Accumulator;
+use datafusion_expr::{Accumulator, GroupsAccumulator};
 
 /// SUM aggregate expression
 #[derive(Debug, Clone)]
@@ -191,7 +191,7 @@ impl<T: ArrowNumericType> SumAccumulator<T> {
 }
 
 impl<T: ArrowNumericType> Accumulator for SumAccumulator<T> {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?])
     }
 
@@ -208,7 +208,7 @@ impl<T: ArrowNumericType> Accumulator for SumAccumulator<T> {
         self.update_batch(states)
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         ScalarValue::new_primitive::<T>(self.sum, &self.data_type)
     }
 
@@ -243,7 +243,7 @@ impl<T: ArrowNumericType> SlidingSumAccumulator<T> {
 }
 
 impl<T: ArrowNumericType> Accumulator for SlidingSumAccumulator<T> {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.evaluate()?, self.count.into()])
     }
 
@@ -267,7 +267,7 @@ impl<T: ArrowNumericType> Accumulator for SlidingSumAccumulator<T> {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let v = (self.count != 0).then_some(self.sum);
         ScalarValue::new_primitive::<T>(v, &self.data_type)
     }
diff --git a/datafusion/physical-expr/src/aggregate/sum_distinct.rs b/datafusion/physical-expr/src/aggregate/sum_distinct.rs
index 6dbb39224629..4c0f94b3a2bb 100644
--- a/datafusion/physical-expr/src/aggregate/sum_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/sum_distinct.rs
@@ -140,7 +140,7 @@ impl<T: ArrowPrimitiveType> DistinctSumAccumulator<T> {
 }
 
 impl<T: ArrowPrimitiveType> Accumulator for DistinctSumAccumulator<T> {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         // 1. Stores aggregate state in `ScalarValue::List`
         // 2. Constructs `ScalarValue::List` state from distinct numeric stored in hash set
         let state_out = {
@@ -186,7 +186,7 @@ impl<T: ArrowPrimitiveType> Accumulator for DistinctSumAccumulator<T> {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let mut acc = T::Native::usize_as(0);
         for distinct_value in self.values.iter() {
             acc = acc.add_wrapping(distinct_value.0)
diff --git a/datafusion/physical-expr/src/aggregate/utils.rs b/datafusion/physical-expr/src/aggregate/utils.rs
index 6dd586bfb8ce..60d59c16be5f 100644
--- a/datafusion/physical-expr/src/aggregate/utils.rs
+++ b/datafusion/physical-expr/src/aggregate/utils.rs
@@ -35,7 +35,7 @@ use datafusion_expr::Accumulator;
 
 /// Convert scalar values from an accumulator into arrays.
 pub fn get_accum_scalar_values_as_arrays(
-    accum: &dyn Accumulator,
+    accum: &mut dyn Accumulator,
 ) -> Result<Vec<ArrayRef>> {
     accum
         .state()?
diff --git a/datafusion/physical-expr/src/aggregate/variance.rs b/datafusion/physical-expr/src/aggregate/variance.rs
index d82c5ad5626f..94d7be4265d7 100644
--- a/datafusion/physical-expr/src/aggregate/variance.rs
+++ b/datafusion/physical-expr/src/aggregate/variance.rs
@@ -231,7 +231,7 @@ impl VarianceAccumulator {
 }
 
 impl Accumulator for VarianceAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
             ScalarValue::from(self.count),
             ScalarValue::from(self.mean),
@@ -302,7 +302,7 @@ impl Accumulator for VarianceAccumulator {
         Ok(())
     }
 
-    fn evaluate(&self) -> Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         let count = match self.stats_type {
             StatsType::Population => self.count,
             StatsType::Sample => {
@@ -533,7 +533,7 @@ mod tests {
             .collect::<Result<Vec<_>>>()?;
         accum1.update_batch(&values1)?;
         accum2.update_batch(&values2)?;
-        let state2 = get_accum_scalar_values_as_arrays(accum2.as_ref())?;
+        let state2 = get_accum_scalar_values_as_arrays(accum2.as_mut())?;
         accum1.merge_batch(&state2)?;
         accum1.evaluate()
     }
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index af6587631df5..a3dec2762c10 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 
 use arrow::array::*;
 use arrow::buffer::OffsetBuffer;
-use arrow::compute;
+use arrow::compute::{self};
 use arrow::datatypes::{DataType, Field, UInt64Type};
 use arrow::row::{RowConverter, SortField};
 use arrow_buffer::{ArrowNativeType, NullBuffer};
@@ -575,23 +575,31 @@ pub fn array_except(args: &[ArrayRef]) -> Result<ArrayRef> {
 ///
 /// See test cases in `array.slt` for more details.
 pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 3 {
-        return exec_err!("array_slice needs three arguments");
+    let args_len = args.len();
+    if args_len != 3 && args_len != 4 {
+        return exec_err!("array_slice needs three or four arguments");
     }
 
+    let stride = if args_len == 4 {
+        Some(as_int64_array(&args[3])?)
+    } else {
+        None
+    };
+
+    let from_array = as_int64_array(&args[1])?;
+    let to_array = as_int64_array(&args[2])?;
+
     let array_data_type = args[0].data_type();
     match array_data_type {
         DataType::List(_) => {
             let array = as_list_array(&args[0])?;
-            let from_array = as_int64_array(&args[1])?;
-            let to_array = as_int64_array(&args[2])?;
-            general_array_slice::<i32>(array, from_array, to_array)
+            general_array_slice::<i32>(array, from_array, to_array, stride)
         }
         DataType::LargeList(_) => {
             let array = as_large_list_array(&args[0])?;
             let from_array = as_int64_array(&args[1])?;
             let to_array = as_int64_array(&args[2])?;
-            general_array_slice::<i64>(array, from_array, to_array)
+            general_array_slice::<i64>(array, from_array, to_array, stride)
         }
         _ => exec_err!("array_slice does not support type: {:?}", array_data_type),
     }
@@ -601,6 +609,7 @@ fn general_array_slice<O: OffsetSizeTrait>(
     array: &GenericListArray<O>,
     from_array: &Int64Array,
     to_array: &Int64Array,
+    stride: Option<&Int64Array>,
 ) -> Result<ArrayRef>
 where
     i64: TryInto<O>,
@@ -652,7 +661,7 @@ where
         let adjusted_zero_index = if index < 0 {
             // array_slice in duckdb with negative to_index is python-like, so index itself is exclusive
             if let Ok(index) = index.try_into() {
-                index + len - O::usize_as(1)
+                index + len
             } else {
                 return exec_err!("array_slice got invalid index: {}", index);
             }
@@ -700,17 +709,67 @@ where
         };
 
         if let (Some(from), Some(to)) = (from_index, to_index) {
+            let stride = stride.map(|s| s.value(row_index));
+            // array_slice with stride in duckdb, return empty array if stride is not supported and from > to.
+            if stride.is_none() && from > to {
+                // return empty array
+                offsets.push(offsets[row_index]);
+                continue;
+            }
+            let stride = stride.unwrap_or(1);
+            if stride.is_zero() {
+                return exec_err!(
+                    "array_slice got invalid stride: {:?}, it cannot be 0",
+                    stride
+                );
+            } else if from <= to && stride.is_negative() {
+                // return empty array
+                offsets.push(offsets[row_index]);
+                continue;
+            }
+
+            let stride: O = stride.try_into().map_err(|_| {
+                internal_datafusion_err!("array_slice got invalid stride: {}", stride)
+            })?;
+
             if from <= to {
                 assert!(start + to <= end);
-                mutable.extend(
-                    0,
-                    (start + from).to_usize().unwrap(),
-                    (start + to + O::usize_as(1)).to_usize().unwrap(),
-                );
-                offsets.push(offsets[row_index] + (to - from + O::usize_as(1)));
+                if stride.eq(&O::one()) {
+                    // stride is default to 1
+                    mutable.extend(
+                        0,
+                        (start + from).to_usize().unwrap(),
+                        (start + to + O::usize_as(1)).to_usize().unwrap(),
+                    );
+                    offsets.push(offsets[row_index] + (to - from + O::usize_as(1)));
+                    continue;
+                }
+                let mut index = start + from;
+                let mut cnt = 0;
+                while index <= start + to {
+                    mutable.extend(
+                        0,
+                        index.to_usize().unwrap(),
+                        index.to_usize().unwrap() + 1,
+                    );
+                    index += stride;
+                    cnt += 1;
+                }
+                offsets.push(offsets[row_index] + O::usize_as(cnt));
             } else {
+                let mut index = start + from;
+                let mut cnt = 0;
+                while index >= start + to {
+                    mutable.extend(
+                        0,
+                        index.to_usize().unwrap(),
+                        index.to_usize().unwrap() + 1,
+                    );
+                    index += stride;
+                    cnt += 1;
+                }
                 // invalid range, return empty array
-                offsets.push(offsets[row_index]);
+                offsets.push(offsets[row_index] + O::usize_as(cnt));
             }
         } else {
             // invalid range, return empty array
@@ -741,7 +800,7 @@ where
             .map(|arr| arr.map_or(0, |arr| arr.len() as i64))
             .collect::<Vec<i64>>(),
     );
-    general_array_slice::<O>(array, &from_array, &to_array)
+    general_array_slice::<O>(array, &from_array, &to_array, None)
 }
 
 fn general_pop_back_list<O: OffsetSizeTrait>(
@@ -757,7 +816,7 @@ where
             .map(|arr| arr.map_or(0, |arr| arr.len() as i64 - 1))
             .collect::<Vec<i64>>(),
     );
-    general_array_slice::<O>(array, &from_array, &to_array)
+    general_array_slice::<O>(array, &from_array, &to_array, None)
 }
 
 /// array_pop_front SQL function
@@ -1031,12 +1090,6 @@ where
     let res = match list_array.value_type() {
         DataType::List(_) => concat_internal::<i32>(args)?,
         DataType::LargeList(_) => concat_internal::<i64>(args)?,
-        DataType::Null => {
-            return make_array(&[
-                list_array.values().to_owned(),
-                element_array.to_owned(),
-            ]);
-        }
         data_type => {
             return generic_append_and_prepend::<O>(
                 list_array,
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs
index 589bbc8a952b..d21d89c19d2e 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -17,7 +17,6 @@
 
 //! DateTime expressions
 
-use crate::datetime_expressions;
 use crate::expressions::cast_column;
 use arrow::compute::cast;
 use arrow::{
@@ -37,7 +36,9 @@ use arrow::{
 use arrow_array::temporal_conversions::NANOSECONDS;
 use arrow_array::timezone::Tz;
 use arrow_array::types::ArrowTimestampType;
+use arrow_array::GenericStringArray;
 use chrono::prelude::*;
+use chrono::LocalResult::Single;
 use chrono::{Duration, Months, NaiveDate};
 use datafusion_common::cast::{
     as_date32_array, as_date64_array, as_generic_string_array, as_primitive_array,
@@ -49,9 +50,96 @@ use datafusion_common::{
     ScalarValue,
 };
 use datafusion_expr::ColumnarValue;
+use itertools::Either;
 use std::str::FromStr;
 use std::sync::Arc;
 
+/// Error message if nanosecond conversion request beyond supported interval
+const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
+
+/// Accepts a string with a `chrono` format and converts it to a
+/// nanosecond precision timestamp.
+///
+/// See [`chrono::format::strftime`] for the full set of supported formats.
+///
+/// Implements the `to_timestamp` function to convert a string to a
+/// timestamp, following the model of spark SQL’s to_`timestamp`.
+///
+/// Internally, this function uses the `chrono` library for the
+/// datetime parsing
+///
+/// ## Timestamp Precision
+///
+/// Function uses the maximum precision timestamps supported by
+/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
+/// means the range of dates that timestamps can represent is ~1677 AD
+/// to 2262 AM
+///
+/// ## Timezone / Offset Handling
+///
+/// Numerical values of timestamps are stored compared to offset UTC.
+///
+/// Any timestamp in the formatting string is handled according to the rules
+/// defined by `chrono`.
+///
+/// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
+///
+#[inline]
+pub(crate) fn string_to_timestamp_nanos_formatted(
+    s: &str,
+    format: &str,
+) -> Result<i64, DataFusionError> {
+    string_to_datetime_formatted(&Utc, s, format)?
+        .naive_utc()
+        .timestamp_nanos_opt()
+        .ok_or_else(|| {
+            DataFusionError::Execution(ERR_NANOSECONDS_NOT_SUPPORTED.to_string())
+        })
+}
+
+/// Accepts a string and parses it using the [`chrono::format::strftime`] specifiers
+/// relative to the provided `timezone`
+///
+/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
+///
+/// * `2023-01-01 040506 America/Los_Angeles`
+///
+/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
+/// will be returned
+///
+/// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
+/// [IANA timezones]: https://www.iana.org/time-zones
+pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
+    timezone: &T,
+    s: &str,
+    format: &str,
+) -> Result<DateTime<T>, DataFusionError> {
+    let err = |err_ctx: &str| {
+        DataFusionError::Execution(format!(
+            "Error parsing timestamp from '{s}' using format '{format}': {err_ctx}"
+        ))
+    };
+
+    // attempt to parse the string assuming it has a timezone
+    let dt = DateTime::parse_from_str(s, format);
+
+    if let Err(e) = &dt {
+        // no timezone or other failure, try without a timezone
+        let ndt = NaiveDateTime::parse_from_str(s, format);
+        if let Err(e) = &ndt {
+            return Err(err(&e.to_string()));
+        }
+
+        if let Single(e) = &timezone.from_local_datetime(&ndt.unwrap()) {
+            Ok(e.to_owned())
+        } else {
+            Err(err(&e.to_string()))
+        }
+    } else {
+        Ok(dt.unwrap().with_timezone(timezone))
+    }
+}
+
 /// given a function `op` that maps a `&str` to a Result of an arrow native type,
 /// returns a `PrimitiveArray` after the application
 /// of the function to `args[0]`.
@@ -84,7 +172,96 @@ where
     array.iter().map(|x| x.map(&op).transpose()).collect()
 }
 
-// given an function that maps a `&str` to a arrow native type,
+/// given a function `op` that maps `&str`, `&str` to the first successful Result
+/// of an arrow native type, returns a `PrimitiveArray` after the application of the
+/// function to `args` and the subsequence application of the `op2` function to any
+/// successful result. This function calls the `op` function with the first and second
+/// argument and if not successful continues with first and third, first and fourth,
+/// etc until the result was successful or no more arguments are present.
+/// # Errors
+/// This function errors iff:
+/// * the number of arguments is not > 1 or
+/// * the array arguments are not castable to a `GenericStringArray` or
+/// * the function `op` errors for all input
+pub(crate) fn strings_to_primitive_function<'a, T, O, F, F2>(
+    args: &'a [ColumnarValue],
+    op: F,
+    op2: F2,
+    name: &str,
+) -> Result<PrimitiveArray<O>>
+where
+    O: ArrowPrimitiveType,
+    T: OffsetSizeTrait,
+    F: Fn(&'a str, &'a str) -> Result<O::Native>,
+    F2: Fn(O::Native) -> O::Native,
+{
+    if args.len() < 2 {
+        return internal_err!(
+            "{:?} args were supplied but {} takes 2 or more arguments",
+            args.len(),
+            name
+        );
+    }
+
+    // this will throw the error if any of the array args are not castable to GenericStringArray
+    let data = args
+        .iter()
+        .map(|a| match a {
+            ColumnarValue::Array(a) => {
+                Ok(Either::Left(as_generic_string_array::<T>(a.as_ref())?))
+            }
+            ColumnarValue::Scalar(s) => match s {
+                ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => Ok(Either::Right(a)),
+                other => internal_err!(
+                    "Unexpected scalar type encountered '{other}' for function '{name}'"
+                ),
+            },
+        })
+        .collect::<Result<Vec<Either<&GenericStringArray<T>, &Option<String>>>>>()?;
+
+    let first_arg = &data.first().unwrap().left().unwrap();
+
+    first_arg
+        .iter()
+        .enumerate()
+        .map(|(pos, x)| {
+            let mut val = None;
+
+            if let Some(x) = x {
+                let param_args = data.iter().skip(1);
+
+                // go through the args and find the first successful result. Only the last
+                // failure will be returned if no successful result was received.
+                for param_arg in param_args {
+                    // param_arg is an array, use the corresponding index into the array as the arg
+                    // we're currently parsing
+                    let p = *param_arg;
+                    let r = if p.is_left() {
+                        let p = p.left().unwrap();
+                        op(x, p.value(pos))
+                    }
+                    // args is a scalar, use it directly
+                    else if let Some(p) = p.right().unwrap() {
+                        op(x, p.as_str())
+                    } else {
+                        continue;
+                    };
+
+                    if r.is_ok() {
+                        val = Some(Ok(op2(r.unwrap())));
+                        break;
+                    } else {
+                        val = Some(r);
+                    }
+                }
+            };
+
+            val.transpose()
+        })
+        .collect()
+}
+
+// given an function that maps a `&str` to an arrow native type,
 // returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue`
 // depending on the `args`'s variant.
 fn handle<'a, O, F, S>(
@@ -99,24 +276,112 @@ where
 {
     match &args[0] {
         ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(
+            DataType::Utf8 | DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
                 unary_string_to_primitive_function::<i32, O, _>(&[a.as_ref()], op, name)?,
             ))),
-            DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i64, O, _>(&[a.as_ref()], op, name)?,
-            ))),
-            other => internal_err!("Unsupported data type {other:?} for function {name}"),
+            other => exec_err!("Unsupported data type {other:?} for function {name}"),
         },
         ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
+            ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
                 let result = a.as_ref().map(|x| (op)(x)).transpose()?;
                 Ok(ColumnarValue::Scalar(S::scalar(result)))
             }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
+            other => exec_err!("Unsupported data type {other:?} for function {name}"),
+        },
+    }
+}
+
+// given an function that maps a `&str`, `&str` to an arrow native type,
+// returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue`
+// depending on the `args`'s variant.
+fn handle_multiple<'a, O, F, S, M>(
+    args: &'a [ColumnarValue],
+    op: F,
+    op2: M,
+    name: &str,
+) -> Result<ColumnarValue>
+where
+    O: ArrowPrimitiveType,
+    S: ScalarType<O::Native>,
+    F: Fn(&'a str, &'a str) -> Result<O::Native>,
+    M: Fn(O::Native) -> O::Native,
+{
+    match &args[0] {
+        ColumnarValue::Array(a) => match a.data_type() {
+            DataType::Utf8 | DataType::LargeUtf8 => {
+                // validate the column types
+                for (pos, arg) in args.iter().enumerate() {
+                    match arg {
+                        ColumnarValue::Array(arg) => match arg.data_type() {
+                                DataType::Utf8 | DataType::LargeUtf8 => {
+                                    // all good
+                                },
+                                other => return exec_err!("Unsupported data type {other:?} for function {name}, arg # {pos}"),
+                            },
+                        ColumnarValue::Scalar(arg) => { match arg.data_type() {
+                            DataType::Utf8 | DataType::LargeUtf8 => {
+                                // all good
+                            },
+                            other => return exec_err!("Unsupported data type {other:?} for function {name}, arg # {pos}"),
+                        }}
+                    }
+                }
+
+                Ok(ColumnarValue::Array(Arc::new(
+                    strings_to_primitive_function::<i32, O, _, _>(args, op, op2, name)?,
+                )))
+            }
+            other => {
+                exec_err!("Unsupported data type {other:?} for function {name}")
+            }
+        },
+        // if the first argument is a scalar utf8 all arguments are expected to be scalar utf8
+        ColumnarValue::Scalar(scalar) => match scalar {
+            ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
+                let mut val: Option<Result<ColumnarValue>> = None;
+                let mut err: Option<DataFusionError> = None;
+
+                match a {
+                    Some(a) => {
+                        // enumerate all the values finding the first one that returns an Ok result
+                        for (pos, v) in args.iter().enumerate().skip(1) {
+                            if let ColumnarValue::Scalar(s) = v {
+                                if let ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x) =
+                                    s
+                                {
+                                    if let Some(s) = x {
+                                        match op(a.as_str(), s.as_str()) {
+                                            Ok(r) => {
+                                                val = Some(Ok(ColumnarValue::Scalar(
+                                                    S::scalar(Some(op2(r))),
+                                                )));
+                                                break;
+                                            }
+                                            Err(e) => {
+                                                err = Some(e);
+                                            }
+                                        }
+                                    }
+                                } else {
+                                    return exec_err!("Unsupported data type {s:?} for function {name}, arg # {pos}");
+                                }
+                            } else {
+                                return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}");
+                            }
+                        }
+                    }
+                    None => (),
+                }
+
+                if let Some(v) = val {
+                    v
+                } else {
+                    Err(err.unwrap())
+                }
+            }
+            other => {
+                exec_err!("Unsupported data type {other:?} for function {name}")
             }
-            other => internal_err!("Unsupported data type {other:?} for function {name}"),
         },
     }
 }
@@ -126,53 +391,61 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
     string_to_timestamp_nanos(s).map_err(|e| e.into())
 }
 
+fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
+    args: &[ColumnarValue],
+    name: &str,
+) -> Result<ColumnarValue> {
+    let factor = match T::UNIT {
+        TimeUnit::Second => 1_000_000_000,
+        TimeUnit::Millisecond => 1_000_000,
+        TimeUnit::Microsecond => 1_000,
+        TimeUnit::Nanosecond => 1,
+    };
+
+    match args.len() {
+        1 => handle::<T, _, T>(
+            args,
+            |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
+            name,
+        ),
+        n if n >= 2 => handle_multiple::<T, _, T, _>(
+            args,
+            string_to_timestamp_nanos_formatted,
+            |n| n / factor,
+            name,
+        ),
+        _ => internal_err!("Unsupported 0 argument count for function {name}"),
+    }
+}
+
 /// to_timestamp SQL function
 ///
-/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**. The supported range for integer input is between `-9223372037` and `9223372036`.
+/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
+/// The supported range for integer input is between `-9223372037` and `9223372036`.
 /// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`.
 /// Please use `to_timestamp_seconds` for the input outside of supported bounds.
 pub fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampNanosecondType, _, TimestampNanosecondType>(
-        args,
-        string_to_timestamp_nanos_shim,
-        "to_timestamp",
-    )
+    to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
 }
 
 /// to_timestamp_millis SQL function
 pub fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampMillisecondType, _, TimestampMillisecondType>(
-        args,
-        |s| string_to_timestamp_nanos_shim(s).map(|n| n / 1_000_000),
-        "to_timestamp_millis",
-    )
+    to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
 }
 
 /// to_timestamp_micros SQL function
 pub fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampMicrosecondType, _, TimestampMicrosecondType>(
-        args,
-        |s| string_to_timestamp_nanos_shim(s).map(|n| n / 1_000),
-        "to_timestamp_micros",
-    )
+    to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
 }
 
 /// to_timestamp_nanos SQL function
 pub fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampNanosecondType, _, TimestampNanosecondType>(
-        args,
-        string_to_timestamp_nanos_shim,
-        "to_timestamp_nanos",
-    )
+    to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
 }
 
 /// to_timestamp_seconds SQL function
 pub fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampSecondType, _, TimestampSecondType>(
-        args,
-        |s| string_to_timestamp_nanos_shim(s).map(|n| n / 1_000_000_000),
-        "to_timestamp_seconds",
-    )
+    to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
 }
 
 /// Create an implementation of `now()` that always returns the
@@ -915,22 +1188,51 @@ where
     Ok(b)
 }
 
-/// to_timestammp() SQL function implementation
+fn validate_to_timestamp_data_types(
+    args: &[ColumnarValue],
+    name: &str,
+) -> Option<Result<ColumnarValue>> {
+    for (idx, a) in args.iter().skip(1).enumerate() {
+        match a.data_type() {
+            DataType::Utf8 | DataType::LargeUtf8 => {
+                // all good
+            }
+            _ => {
+                return Some(internal_err!(
+                    "{name} function unsupported data type at index {}: {}",
+                    idx + 1,
+                    a.data_type()
+                ));
+            }
+        }
+    }
+
+    None
+}
+
+/// to_timestamp() SQL function implementation
 pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 1 {
+    if args.is_empty() {
         return internal_err!(
-            "to_timestamp function requires 1 arguments, got {}",
+            "to_timestamp function requires 1 or more arguments, got {}",
             args.len()
         );
     }
 
+    // validate that any args after the first one are Utf8
+    if args.len() > 1 {
+        if let Some(value) = validate_to_timestamp_data_types(args, "to_timestamp") {
+            return value;
+        }
+    }
+
     match args[0].data_type() {
-        DataType::Int64 => cast_column(
+        DataType::Int32 | DataType::Int64 => cast_column(
             &cast_column(&args[0], &DataType::Timestamp(TimeUnit::Second, None), None)?,
             &DataType::Timestamp(TimeUnit::Nanosecond, None),
             None,
         ),
-        DataType::Float64 => cast_column(
+        DataType::Null | DataType::Float64 => cast_column(
             &args[0],
             &DataType::Timestamp(TimeUnit::Nanosecond, None),
             None,
@@ -940,7 +1242,7 @@ pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             &DataType::Timestamp(TimeUnit::Nanosecond, None),
             None,
         ),
-        DataType::Utf8 => datetime_expressions::to_timestamp(args),
+        DataType::Utf8 => to_timestamp(args),
         other => {
             internal_err!(
                 "Unsupported data type {:?} for function to_timestamp",
@@ -952,20 +1254,31 @@ pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
 /// to_timestamp_millis() SQL function implementation
 pub fn to_timestamp_millis_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 1 {
+    if args.is_empty() {
         return internal_err!(
-            "to_timestamp_millis function requires 1 argument, got {}",
+            "to_timestamp_millis function requires 1 or more arguments, got {}",
             args.len()
         );
     }
 
+    // validate that any args after the first one are Utf8
+    if args.len() > 1 {
+        if let Some(value) = validate_to_timestamp_data_types(args, "to_timestamp_millis")
+        {
+            return value;
+        }
+    }
+
     match args[0].data_type() {
-        DataType::Int64 | DataType::Timestamp(_, None) => cast_column(
+        DataType::Null
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::Timestamp(_, None) => cast_column(
             &args[0],
             &DataType::Timestamp(TimeUnit::Millisecond, None),
             None,
         ),
-        DataType::Utf8 => datetime_expressions::to_timestamp_millis(args),
+        DataType::Utf8 => to_timestamp_millis(args),
         other => {
             internal_err!(
                 "Unsupported data type {:?} for function to_timestamp_millis",
@@ -977,20 +1290,31 @@ pub fn to_timestamp_millis_invoke(args: &[ColumnarValue]) -> Result<ColumnarValu
 
 /// to_timestamp_micros() SQL function implementation
 pub fn to_timestamp_micros_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 1 {
+    if args.is_empty() {
         return internal_err!(
-            "to_timestamp_micros function requires 1 argument, got {}",
+            "to_timestamp_micros function requires 1 or more arguments, got {}",
             args.len()
         );
     }
 
+    // validate that any args after the first one are Utf8
+    if args.len() > 1 {
+        if let Some(value) = validate_to_timestamp_data_types(args, "to_timestamp_micros")
+        {
+            return value;
+        }
+    }
+
     match args[0].data_type() {
-        DataType::Int64 | DataType::Timestamp(_, None) => cast_column(
+        DataType::Null
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::Timestamp(_, None) => cast_column(
             &args[0],
             &DataType::Timestamp(TimeUnit::Microsecond, None),
             None,
         ),
-        DataType::Utf8 => datetime_expressions::to_timestamp_micros(args),
+        DataType::Utf8 => to_timestamp_micros(args),
         other => {
             internal_err!(
                 "Unsupported data type {:?} for function to_timestamp_micros",
@@ -1002,20 +1326,31 @@ pub fn to_timestamp_micros_invoke(args: &[ColumnarValue]) -> Result<ColumnarValu
 
 /// to_timestamp_nanos() SQL function implementation
 pub fn to_timestamp_nanos_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 1 {
+    if args.is_empty() {
         return internal_err!(
-            "to_timestamp_nanos function requires 1 argument, got {}",
+            "to_timestamp_nanos function requires 1 or more arguments, got {}",
             args.len()
         );
     }
 
+    // validate that any args after the first one are Utf8
+    if args.len() > 1 {
+        if let Some(value) = validate_to_timestamp_data_types(args, "to_timestamp_nanos")
+        {
+            return value;
+        }
+    }
+
     match args[0].data_type() {
-        DataType::Int64 | DataType::Timestamp(_, None) => cast_column(
+        DataType::Null
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::Timestamp(_, None) => cast_column(
             &args[0],
             &DataType::Timestamp(TimeUnit::Nanosecond, None),
             None,
         ),
-        DataType::Utf8 => datetime_expressions::to_timestamp_nanos(args),
+        DataType::Utf8 => to_timestamp_nanos(args),
         other => {
             internal_err!(
                 "Unsupported data type {:?} for function to_timestamp_nanos",
@@ -1027,18 +1362,30 @@ pub fn to_timestamp_nanos_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue
 
 /// to_timestamp_seconds() SQL function implementation
 pub fn to_timestamp_seconds_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 1 {
+    if args.is_empty() {
         return internal_err!(
-            "to_timestamp_seconds function requires 1 argument, got {}",
+            "to_timestamp_seconds function requires 1 or more arguments, got {}",
             args.len()
         );
     }
 
+    // validate that any args after the first one are Utf8
+    if args.len() > 1 {
+        if let Some(value) =
+            validate_to_timestamp_data_types(args, "to_timestamp_seconds")
+        {
+            return value;
+        }
+    }
+
     match args[0].data_type() {
-        DataType::Int64 | DataType::Timestamp(_, None) => {
+        DataType::Null
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::Timestamp(_, None) => {
             cast_column(&args[0], &DataType::Timestamp(TimeUnit::Second, None), None)
         }
-        DataType::Utf8 => datetime_expressions::to_timestamp_seconds(args),
+        DataType::Utf8 => to_timestamp_seconds(args),
         other => {
             internal_err!(
                 "Unsupported data type {:?} for function to_timestamp_seconds",
@@ -1077,7 +1424,13 @@ mod tests {
     use arrow::array::{
         as_primitive_array, ArrayRef, Int64Array, IntervalDayTimeArray, StringBuilder,
     };
-    use arrow_array::TimestampNanosecondArray;
+    use arrow_array::types::Int64Type;
+    use arrow_array::{
+        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+        TimestampSecondArray,
+    };
+    use datafusion_common::assert_contains;
+    use datafusion_expr::ScalarFunctionImplementation;
 
     use super::*;
 
@@ -1108,6 +1461,47 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
+        // ensure that arrow array implementation is wired up and handles nulls correctly
+
+        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
+        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
+        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
+        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
+        let mut ts_builder = TimestampNanosecondArray::builder(2);
+
+        date_string_builder.append_null();
+        format1_builder.append_null();
+        format2_builder.append_null();
+        format3_builder.append_null();
+        ts_builder.append_null();
+
+        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
+        format1_builder.append_value("%s");
+        format2_builder.append_value("%c");
+        format3_builder.append_value("%+");
+        ts_builder.append_value(1599572549190850000);
+
+        let expected_timestamps = &ts_builder.finish() as &dyn Array;
+
+        let string_array = [
+            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
+            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
+            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
+            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
+        ];
+        let parsed_timestamps = to_timestamp(&string_array)
+            .expect("that to_timestamp with format args parsed values without error");
+        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
+            assert_eq!(parsed_array.len(), 2);
+            assert_eq!(expected_timestamps, parsed_array.as_ref());
+        } else {
+            panic!("Expected a columnar array")
+        }
+        Ok(())
+    }
+
     #[test]
     fn date_trunc_test() {
         let cases = vec![
@@ -1663,7 +2057,7 @@ mod tests {
         let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
 
         let expected_err =
-            "Internal error: Unsupported data type Int64 for function to_timestamp";
+            "Execution error: Unsupported data type Int64 for function to_timestamp";
         match to_timestamp(&[int64array]) {
             Ok(_) => panic!("Expected error but got success"),
             Err(e) => {
@@ -1675,4 +2069,303 @@ mod tests {
         }
         Ok(())
     }
+
+    #[test]
+    fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
+        // pass the wrong type of input array to to_timestamp and test
+        // that we get an error.
+
+        let mut builder = Int64Array::builder(1);
+        builder.append_value(1);
+        let int64array = [
+            ColumnarValue::Array(Arc::new(builder.finish())),
+            ColumnarValue::Array(Arc::new(builder.finish())),
+        ];
+
+        let expected_err =
+            "Execution error: Unsupported data type Int64 for function to_timestamp";
+        match to_timestamp(&int64array) {
+            Ok(_) => panic!("Expected error but got success"),
+            Err(e) => {
+                assert!(
+                    e.to_string().contains(expected_err),
+                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
+                );
+            }
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn to_timestamp_with_unparseable_data() -> Result<()> {
+        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
+
+        date_string_builder.append_null();
+
+        date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
+
+        let string_array =
+            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
+
+        let expected_err =
+            "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
+        match to_timestamp(&[string_array]) {
+            Ok(_) => panic!("Expected error but got success"),
+            Err(e) => {
+                assert!(
+                    e.to_string().contains(expected_err),
+                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
+                );
+            }
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn to_timestamp_with_no_matching_formats() -> Result<()> {
+        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
+        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
+        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
+        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
+
+        date_string_builder.append_null();
+        format1_builder.append_null();
+        format2_builder.append_null();
+        format3_builder.append_null();
+
+        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
+        format1_builder.append_value("%s");
+        format2_builder.append_value("%c");
+        format3_builder.append_value("%H:%M:%S");
+
+        let string_array = [
+            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
+            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
+            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
+            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
+        ];
+
+        let expected_err =
+            "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
+        match to_timestamp(&string_array) {
+            Ok(_) => panic!("Expected error but got success"),
+            Err(e) => {
+                assert!(
+                    e.to_string().contains(expected_err),
+                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
+                );
+            }
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn string_to_timestamp_formatted() {
+        // Explicit timezone
+        assert_eq!(
+            1599572549190855000,
+            parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
+        );
+        assert_eq!(
+            1599572549190855000,
+            parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
+        );
+        assert_eq!(
+            1599572549000000000,
+            parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
+        ); // no fractional part
+        assert_eq!(
+            1599590549190855000,
+            parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
+        );
+        assert_eq!(
+            1599590549000000000,
+            parse_timestamp_formatted("1599590549", "%s").unwrap()
+        );
+        assert_eq!(
+            1599572549000000000,
+            parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
+                .unwrap()
+        );
+    }
+
+    fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
+        let result = string_to_timestamp_nanos_formatted(s, format);
+        if let Err(e) = &result {
+            eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
+        }
+        result
+    }
+
+    #[test]
+    fn string_to_timestamp_formatted_invalid() {
+        // Test parsing invalid formats
+        let cases = [
+            ("", "%Y%m%d %H%M%S", "premature end of input"),
+            ("SS", "%c", "premature end of input"),
+            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
+            (
+                "Wed, 18 Feb 2015 23:16:09 GMT",
+                "%XX",
+                "input contains invalid characters",
+            ),
+            (
+                "Wed, 18 Feb 2015 23:16:09 GMT",
+                "%Y%m%d %H%M%S",
+                "input contains invalid characters",
+            ),
+        ];
+
+        for (s, f, ctx) in cases {
+            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
+            let actual = string_to_datetime_formatted(&Utc, s, f)
+                .unwrap_err()
+                .to_string();
+            assert_eq!(actual, expected)
+        }
+    }
+
+    #[test]
+    fn string_to_timestamp_invalid_arguments() {
+        // Test parsing invalid formats
+        let cases = [
+            ("", "%Y%m%d %H%M%S", "premature end of input"),
+            ("SS", "%c", "premature end of input"),
+            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
+            (
+                "Wed, 18 Feb 2015 23:16:09 GMT",
+                "%XX",
+                "input contains invalid characters",
+            ),
+            (
+                "Wed, 18 Feb 2015 23:16:09 GMT",
+                "%Y%m%d %H%M%S",
+                "input contains invalid characters",
+            ),
+        ];
+
+        for (s, f, ctx) in cases {
+            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
+            let actual = string_to_datetime_formatted(&Utc, s, f)
+                .unwrap_err()
+                .to_string();
+            assert_eq!(actual, expected)
+        }
+    }
+
+    #[test]
+    fn test_to_timestamp_arg_validation() {
+        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
+        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
+
+        let data = date_string_builder.finish();
+
+        let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
+            (Arc::new(to_timestamp), TimeUnit::Nanosecond),
+            (Arc::new(to_timestamp_micros), TimeUnit::Microsecond),
+            (Arc::new(to_timestamp_millis), TimeUnit::Millisecond),
+            (Arc::new(to_timestamp_nanos), TimeUnit::Nanosecond),
+            (Arc::new(to_timestamp_seconds), TimeUnit::Second),
+        ];
+
+        let mut nanos_builder = TimestampNanosecondArray::builder(2);
+        let mut millis_builder = TimestampMillisecondArray::builder(2);
+        let mut micros_builder = TimestampMicrosecondArray::builder(2);
+        let mut sec_builder = TimestampSecondArray::builder(2);
+
+        nanos_builder.append_value(1599572549190850000);
+        millis_builder.append_value(1599572549190);
+        micros_builder.append_value(1599572549190850);
+        sec_builder.append_value(1599572549);
+
+        let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
+        let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
+        let micros_expected_timestamps = &micros_builder.finish() as &dyn Array;
+        let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
+
+        for (func, time_unit) in funcs {
+            // test UTF8
+            let string_array = [
+                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
+            ];
+            let parsed_timestamps = func(&string_array)
+                .expect("that to_timestamp with format args parsed values without error");
+            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
+                assert_eq!(parsed_array.len(), 1);
+                match time_unit {
+                    TimeUnit::Nanosecond => {
+                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
+                    }
+                    TimeUnit::Millisecond => {
+                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
+                    }
+                    TimeUnit::Microsecond => {
+                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
+                    }
+                    TimeUnit::Second => {
+                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
+                    }
+                };
+            } else {
+                panic!("Expected a columnar array")
+            }
+
+            // test LargeUTF8
+            let string_array = [
+                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
+            ];
+            let parsed_timestamps = func(&string_array)
+                .expect("that to_timestamp with format args parsed values without error");
+            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
+                assert_eq!(parsed_array.len(), 1);
+                match time_unit {
+                    TimeUnit::Nanosecond => {
+                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
+                    }
+                    TimeUnit::Millisecond => {
+                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
+                    }
+                    TimeUnit::Microsecond => {
+                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
+                    }
+                    TimeUnit::Second => {
+                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
+                    }
+                };
+            } else {
+                panic!("Expected a columnar array")
+            }
+
+            // test other types
+            let string_array = [
+                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
+            ];
+
+            let expected = "Unsupported data type Int32 for function".to_string();
+            let actual = func(&string_array).unwrap_err().to_string();
+            assert_contains!(actual, expected);
+
+            // test other types
+            let string_array = [
+                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
+                ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
+                    vec![1i64].into(),
+                    None,
+                )) as ArrayRef),
+            ];
+
+            let expected = "Unsupported data type".to_string();
+            let actual = func(&string_array).unwrap_err().to_string();
+            assert_contains!(actual, expected);
+        }
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 8c4078dbce8c..3f13030092c1 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -28,12 +28,12 @@ use crate::sort_properties::SortProperties;
 use crate::PhysicalExpr;
 
 use arrow::array::*;
-use arrow::compute::cast;
 use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
 use arrow::compute::kernels::cmp::*;
 use arrow::compute::kernels::comparison::regexp_is_match_utf8;
 use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar;
 use arrow::compute::kernels::concat_elements::concat_elements_utf8;
+use arrow::compute::{cast, ilike, like, nilike, nlike};
 use arrow::datatypes::*;
 use arrow::record_batch::RecordBatch;
 
@@ -281,6 +281,10 @@ impl PhysicalExpr for BinaryExpr {
             Operator::GtEq => return apply_cmp(&lhs, &rhs, gt_eq),
             Operator::IsDistinctFrom => return apply_cmp(&lhs, &rhs, distinct),
             Operator::IsNotDistinctFrom => return apply_cmp(&lhs, &rhs, not_distinct),
+            Operator::LikeMatch => return apply_cmp(&lhs, &rhs, like),
+            Operator::ILikeMatch => return apply_cmp(&lhs, &rhs, ilike),
+            Operator::NotLikeMatch => return apply_cmp(&lhs, &rhs, nlike),
+            Operator::NotILikeMatch => return apply_cmp(&lhs, &rhs, nilike),
             _ => {}
         }
 
@@ -554,7 +558,8 @@ impl BinaryExpr {
         use Operator::*;
         match &self.op {
             IsDistinctFrom | IsNotDistinctFrom | Lt | LtEq | Gt | GtEq | Eq | NotEq
-            | Plus | Minus | Multiply | Divide | Modulo => unreachable!(),
+            | Plus | Minus | Multiply | Divide | Modulo | LikeMatch | ILikeMatch
+            | NotLikeMatch | NotILikeMatch => unreachable!(),
             And => {
                 if left_data_type == &DataType::Boolean {
                     boolean_op!(&left, &right, and_kleene)
@@ -970,6 +975,102 @@ mod tests {
             DataType::Boolean,
             [false, false, false, false, true],
         );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::LikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, false, false, true, false],
+        );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::ILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, true, false, true, true],
+        );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotLikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, true, true, false, true],
+        );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, false, true, false, false],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::LikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, false, false, true, false],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::ILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, true, false, true, true],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotLikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, true, true, false, true],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, false, true, false, false],
+        );
         test_coercion!(
             Int16Array,
             DataType::Int16,
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 414ddd0921a8..6a168e2f1e5f 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -148,6 +148,11 @@ impl CaseExpr {
             // Make sure we only consider rows that have not been matched yet
             let when_match = and(&when_match, &remainder)?;
 
+            // When no rows available for when clause, skip then clause
+            if when_match.true_count() == 0 {
+                continue;
+            }
+
             let then_value = self.when_then_expr[i]
                 .1
                 .evaluate_selection(batch, &when_match)?;
@@ -214,6 +219,11 @@ impl CaseExpr {
             // Make sure we only consider rows that have not been matched yet
             let when_value = and(&when_value, &remainder)?;
 
+            // When no rows available for when clause, skip then clause
+            if when_value.true_count() == 0 {
+                continue;
+            }
+
             let then_value = self.when_then_expr[i]
                 .1
                 .evaluate_selection(batch, &when_value)?;
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index bbfba4ad8310..007a03985f45 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -105,14 +105,13 @@ pub(crate) mod tests {
     use std::sync::Arc;
 
     use crate::expressions::{col, create_aggregate_expr, try_cast};
-    use crate::{AggregateExpr, EmitTo};
-
+    use crate::AggregateExpr;
     use arrow::record_batch::RecordBatch;
     use arrow_array::ArrayRef;
     use arrow_schema::{Field, Schema};
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::type_coercion::aggregates::coerce_types;
-    use datafusion_expr::AggregateFunction;
+    use datafusion_expr::{AggregateFunction, EmitTo};
 
     /// macro to perform an aggregation using [`datafusion_expr::Accumulator`] and verify the
     /// result.
diff --git a/datafusion/physical-expr/src/expressions/not.rs b/datafusion/physical-expr/src/expressions/not.rs
index 4ceccc6932fe..f17df73e3070 100644
--- a/datafusion/physical-expr/src/expressions/not.rs
+++ b/datafusion/physical-expr/src/expressions/not.rs
@@ -26,9 +26,7 @@ use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{
-    cast::as_boolean_array, internal_err, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{cast::as_boolean_array, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
 /// Not expression
@@ -83,13 +81,6 @@ impl PhysicalExpr for NotExpr {
                 if scalar.is_null() {
                     return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
                 }
-                let value_type = scalar.data_type();
-                if value_type != DataType::Boolean {
-                    return internal_err!(
-                        "NOT '{:?}' can't be evaluated because the expression's type is {:?}, not boolean or NULL",
-                        self.arg, value_type
-                    );
-                }
                 let bool_value: bool = scalar.try_into()?;
                 Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(
                     !bool_value,
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 66e22d2302de..2bfdf499123b 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -42,6 +42,7 @@ use arrow::{
     compute::kernels::length::{bit_length, length},
     datatypes::{DataType, Int32Type, Int64Type, Schema},
 };
+use arrow_array::Array;
 use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
 pub use datafusion_expr::FuncMonotonicity;
 use datafusion_expr::{
@@ -191,9 +192,68 @@ pub(crate) enum Hint {
     AcceptsSingular,
 }
 
-/// decorates a function to handle [`ScalarValue`]s by converting them to arrays before calling the function
+/// A helper function used to infer the length of arguments of Scalar functions and convert
+/// [`ColumnarValue`]s to [`ArrayRef`]s with the inferred length. Note that this function
+/// only works for functions that accept either that all arguments are scalars or all arguments
+/// are arrays with same length. Otherwise, it will return an error.
+pub fn columnar_values_to_array(args: &[ColumnarValue]) -> Result<Vec<ArrayRef>> {
+    if args.is_empty() {
+        return Ok(vec![]);
+    }
+
+    let len = args
+        .iter()
+        .fold(Option::<usize>::None, |acc, arg| match arg {
+            ColumnarValue::Scalar(_) if acc.is_none() => Some(1),
+            ColumnarValue::Scalar(_) => {
+                if let Some(1) = acc {
+                    acc
+                } else {
+                    None
+                }
+            }
+            ColumnarValue::Array(a) => {
+                if let Some(l) = acc {
+                    if l == a.len() {
+                        acc
+                    } else {
+                        None
+                    }
+                } else {
+                    Some(a.len())
+                }
+            }
+        });
+
+    let inferred_length = len.ok_or(DataFusionError::Internal(
+        "Arguments has mixed length".to_string(),
+    ))?;
+
+    let args = args
+        .iter()
+        .map(|arg| arg.clone().into_array(inferred_length))
+        .collect::<Result<Vec<_>>>()?;
+
+    Ok(args)
+}
+
+/// Decorates a function to handle [`ScalarValue`]s by converting them to arrays before calling the function
 /// and vice-versa after evaluation.
+/// Note that this function makes a scalar function with no arguments or all scalar inputs return a scalar.
+/// That's said its output will be same for all input rows in a batch.
+#[deprecated(
+    since = "36.0.0",
+    note = "Implement your function directly in terms of ColumnarValue or use `ScalarUDF` instead"
+)]
 pub fn make_scalar_function<F>(inner: F) -> ScalarFunctionImplementation
+where
+    F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
+{
+    make_scalar_function_inner(inner)
+}
+
+/// Internal implementation, see comments on `make_scalar_function` for caveats
+pub(crate) fn make_scalar_function_inner<F>(inner: F) -> ScalarFunctionImplementation
 where
     F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
 {
@@ -260,9 +320,9 @@ pub fn create_physical_fun(
 ) -> Result<ScalarFunctionImplementation> {
     Ok(match fun {
         // math functions
-        BuiltinScalarFunction::Abs => {
-            Arc::new(|args| make_scalar_function(math_expressions::abs_invoke)(args))
-        }
+        BuiltinScalarFunction::Abs => Arc::new(|args| {
+            make_scalar_function_inner(math_expressions::abs_invoke)(args)
+        }),
         BuiltinScalarFunction::Acos => Arc::new(math_expressions::acos),
         BuiltinScalarFunction::Asin => Arc::new(math_expressions::asin),
         BuiltinScalarFunction::Atan => Arc::new(math_expressions::atan),
@@ -275,31 +335,31 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Degrees => Arc::new(math_expressions::to_degrees),
         BuiltinScalarFunction::Exp => Arc::new(math_expressions::exp),
         BuiltinScalarFunction::Factorial => {
-            Arc::new(|args| make_scalar_function(math_expressions::factorial)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::factorial)(args))
         }
         BuiltinScalarFunction::Floor => Arc::new(math_expressions::floor),
         BuiltinScalarFunction::Gcd => {
-            Arc::new(|args| make_scalar_function(math_expressions::gcd)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::gcd)(args))
         }
         BuiltinScalarFunction::Isnan => {
-            Arc::new(|args| make_scalar_function(math_expressions::isnan)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::isnan)(args))
         }
         BuiltinScalarFunction::Iszero => {
-            Arc::new(|args| make_scalar_function(math_expressions::iszero)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::iszero)(args))
         }
         BuiltinScalarFunction::Lcm => {
-            Arc::new(|args| make_scalar_function(math_expressions::lcm)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::lcm)(args))
         }
         BuiltinScalarFunction::Ln => Arc::new(math_expressions::ln),
         BuiltinScalarFunction::Log10 => Arc::new(math_expressions::log10),
         BuiltinScalarFunction::Log2 => Arc::new(math_expressions::log2),
         BuiltinScalarFunction::Nanvl => {
-            Arc::new(|args| make_scalar_function(math_expressions::nanvl)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::nanvl)(args))
         }
         BuiltinScalarFunction::Radians => Arc::new(math_expressions::to_radians),
         BuiltinScalarFunction::Random => Arc::new(math_expressions::random),
         BuiltinScalarFunction::Round => {
-            Arc::new(|args| make_scalar_function(math_expressions::round)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::round)(args))
         }
         BuiltinScalarFunction::Signum => Arc::new(math_expressions::signum),
         BuiltinScalarFunction::Sin => Arc::new(math_expressions::sin),
@@ -309,135 +369,135 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Tan => Arc::new(math_expressions::tan),
         BuiltinScalarFunction::Tanh => Arc::new(math_expressions::tanh),
         BuiltinScalarFunction::Trunc => {
-            Arc::new(|args| make_scalar_function(math_expressions::trunc)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::trunc)(args))
         }
         BuiltinScalarFunction::Pi => Arc::new(math_expressions::pi),
         BuiltinScalarFunction::Power => {
-            Arc::new(|args| make_scalar_function(math_expressions::power)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::power)(args))
         }
         BuiltinScalarFunction::Atan2 => {
-            Arc::new(|args| make_scalar_function(math_expressions::atan2)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::atan2)(args))
         }
         BuiltinScalarFunction::Log => {
-            Arc::new(|args| make_scalar_function(math_expressions::log)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::log)(args))
         }
         BuiltinScalarFunction::Cot => {
-            Arc::new(|args| make_scalar_function(math_expressions::cot)(args))
+            Arc::new(|args| make_scalar_function_inner(math_expressions::cot)(args))
         }
 
         // array functions
-        BuiltinScalarFunction::ArrayAppend => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_append)(args))
-        }
-        BuiltinScalarFunction::ArraySort => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_sort)(args))
-        }
-        BuiltinScalarFunction::ArrayConcat => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_concat)(args))
-        }
-        BuiltinScalarFunction::ArrayEmpty => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_empty)(args))
-        }
-        BuiltinScalarFunction::ArrayHasAll => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_has_all)(args))
-        }
-        BuiltinScalarFunction::ArrayHasAny => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_has_any)(args))
-        }
-        BuiltinScalarFunction::ArrayHas => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_has)(args))
-        }
-        BuiltinScalarFunction::ArrayDims => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_dims)(args))
-        }
-        BuiltinScalarFunction::ArrayDistinct => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_distinct)(args))
-        }
-        BuiltinScalarFunction::ArrayElement => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_element)(args))
-        }
-        BuiltinScalarFunction::ArrayExcept => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_except)(args))
-        }
-        BuiltinScalarFunction::ArrayLength => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_length)(args))
-        }
+        BuiltinScalarFunction::ArrayAppend => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_append)(args)
+        }),
+        BuiltinScalarFunction::ArraySort => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_sort)(args)
+        }),
+        BuiltinScalarFunction::ArrayConcat => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_concat)(args)
+        }),
+        BuiltinScalarFunction::ArrayEmpty => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_empty)(args)
+        }),
+        BuiltinScalarFunction::ArrayHasAll => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_has_all)(args)
+        }),
+        BuiltinScalarFunction::ArrayHasAny => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_has_any)(args)
+        }),
+        BuiltinScalarFunction::ArrayHas => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_has)(args)
+        }),
+        BuiltinScalarFunction::ArrayDims => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_dims)(args)
+        }),
+        BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_distinct)(args)
+        }),
+        BuiltinScalarFunction::ArrayElement => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_element)(args)
+        }),
+        BuiltinScalarFunction::ArrayExcept => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_except)(args)
+        }),
+        BuiltinScalarFunction::ArrayLength => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_length)(args)
+        }),
         BuiltinScalarFunction::Flatten => {
-            Arc::new(|args| make_scalar_function(array_expressions::flatten)(args))
-        }
-        BuiltinScalarFunction::ArrayNdims => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_ndims)(args))
+            Arc::new(|args| make_scalar_function_inner(array_expressions::flatten)(args))
         }
+        BuiltinScalarFunction::ArrayNdims => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_ndims)(args)
+        }),
         BuiltinScalarFunction::ArrayPopFront => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_pop_front)(args)
+            make_scalar_function_inner(array_expressions::array_pop_front)(args)
+        }),
+        BuiltinScalarFunction::ArrayPopBack => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_pop_back)(args)
+        }),
+        BuiltinScalarFunction::ArrayPosition => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_position)(args)
         }),
-        BuiltinScalarFunction::ArrayPopBack => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_pop_back)(args))
-        }
-        BuiltinScalarFunction::ArrayPosition => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_position)(args))
-        }
         BuiltinScalarFunction::ArrayPositions => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_positions)(args)
+            make_scalar_function_inner(array_expressions::array_positions)(args)
+        }),
+        BuiltinScalarFunction::ArrayPrepend => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_prepend)(args)
+        }),
+        BuiltinScalarFunction::ArrayRepeat => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_repeat)(args)
+        }),
+        BuiltinScalarFunction::ArrayRemove => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_remove)(args)
+        }),
+        BuiltinScalarFunction::ArrayRemoveN => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_remove_n)(args)
         }),
-        BuiltinScalarFunction::ArrayPrepend => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_prepend)(args))
-        }
-        BuiltinScalarFunction::ArrayRepeat => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_repeat)(args))
-        }
-        BuiltinScalarFunction::ArrayRemove => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_remove)(args))
-        }
-        BuiltinScalarFunction::ArrayRemoveN => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_remove_n)(args))
-        }
         BuiltinScalarFunction::ArrayRemoveAll => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_remove_all)(args)
+            make_scalar_function_inner(array_expressions::array_remove_all)(args)
+        }),
+        BuiltinScalarFunction::ArrayReplace => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_replace)(args)
         }),
-        BuiltinScalarFunction::ArrayReplace => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_replace)(args))
-        }
         BuiltinScalarFunction::ArrayReplaceN => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_replace_n)(args)
+            make_scalar_function_inner(array_expressions::array_replace_n)(args)
         }),
         BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_replace_all)(args)
+            make_scalar_function_inner(array_expressions::array_replace_all)(args)
+        }),
+        BuiltinScalarFunction::ArraySlice => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_slice)(args)
         }),
-        BuiltinScalarFunction::ArraySlice => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_slice)(args))
-        }
         BuiltinScalarFunction::ArrayToString => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_to_string)(args)
+            make_scalar_function_inner(array_expressions::array_to_string)(args)
         }),
         BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| {
-            make_scalar_function(array_expressions::array_intersect)(args)
+            make_scalar_function_inner(array_expressions::array_intersect)(args)
+        }),
+        BuiltinScalarFunction::Range => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::gen_range)(args)
+        }),
+        BuiltinScalarFunction::Cardinality => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::cardinality)(args)
+        }),
+        BuiltinScalarFunction::ArrayResize => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_resize)(args)
+        }),
+        BuiltinScalarFunction::MakeArray => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::make_array)(args)
+        }),
+        BuiltinScalarFunction::ArrayUnion => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_union)(args)
         }),
-        BuiltinScalarFunction::Range => {
-            Arc::new(|args| make_scalar_function(array_expressions::gen_range)(args))
-        }
-        BuiltinScalarFunction::Cardinality => {
-            Arc::new(|args| make_scalar_function(array_expressions::cardinality)(args))
-        }
-        BuiltinScalarFunction::ArrayResize => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_resize)(args))
-        }
-        BuiltinScalarFunction::MakeArray => {
-            Arc::new(|args| make_scalar_function(array_expressions::make_array)(args))
-        }
-        BuiltinScalarFunction::ArrayUnion => {
-            Arc::new(|args| make_scalar_function(array_expressions::array_union)(args))
-        }
         // struct functions
         BuiltinScalarFunction::Struct => Arc::new(struct_expressions::struct_expr),
 
         // string functions
         BuiltinScalarFunction::Ascii => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::ascii::<i32>)(args)
+                make_scalar_function_inner(string_expressions::ascii::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::ascii::<i64>)(args)
+                make_scalar_function_inner(string_expressions::ascii::<i64>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function ascii"),
         }),
@@ -455,10 +515,10 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::Btrim => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::btrim::<i32>)(args)
+                make_scalar_function_inner(string_expressions::btrim::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::btrim::<i64>)(args)
+                make_scalar_function_inner(string_expressions::btrim::<i64>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function btrim"),
         }),
@@ -470,7 +530,7 @@ pub fn create_physical_fun(
                         Int32Type,
                         "character_length"
                     );
-                    make_scalar_function(func)(args)
+                    make_scalar_function_inner(func)(args)
                 }
                 DataType::LargeUtf8 => {
                     let func = invoke_if_unicode_expressions_feature_flag!(
@@ -478,7 +538,7 @@ pub fn create_physical_fun(
                         Int64Type,
                         "character_length"
                     );
-                    make_scalar_function(func)(args)
+                    make_scalar_function_inner(func)(args)
                 }
                 other => internal_err!(
                     "Unsupported data type {other:?} for function character_length"
@@ -486,13 +546,13 @@ pub fn create_physical_fun(
             })
         }
         BuiltinScalarFunction::Chr => {
-            Arc::new(|args| make_scalar_function(string_expressions::chr)(args))
+            Arc::new(|args| make_scalar_function_inner(string_expressions::chr)(args))
         }
         BuiltinScalarFunction::Coalesce => Arc::new(conditional_expressions::coalesce),
         BuiltinScalarFunction::Concat => Arc::new(string_expressions::concat),
-        BuiltinScalarFunction::ConcatWithSeparator => {
-            Arc::new(|args| make_scalar_function(string_expressions::concat_ws)(args))
-        }
+        BuiltinScalarFunction::ConcatWithSeparator => Arc::new(|args| {
+            make_scalar_function_inner(string_expressions::concat_ws)(args)
+        }),
         BuiltinScalarFunction::DatePart => Arc::new(datetime_expressions::date_part),
         BuiltinScalarFunction::DateTrunc => Arc::new(datetime_expressions::date_trunc),
         BuiltinScalarFunction::DateBin => Arc::new(datetime_expressions::date_bin),
@@ -534,23 +594,32 @@ pub fn create_physical_fun(
         }
         BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::initcap::<i32>)(args)
+                make_scalar_function_inner(string_expressions::initcap::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::initcap::<i64>)(args)
+                make_scalar_function_inner(string_expressions::initcap::<i64>)(args)
             }
             other => {
                 internal_err!("Unsupported data type {other:?} for function initcap")
             }
         }),
+        BuiltinScalarFunction::InStr => Arc::new(|args| match args[0].data_type() {
+            DataType::Utf8 => {
+                make_scalar_function_inner(string_expressions::instr::<i32>)(args)
+            }
+            DataType::LargeUtf8 => {
+                make_scalar_function_inner(string_expressions::instr::<i64>)(args)
+            }
+            other => internal_err!("Unsupported data type {other:?} for function instr"),
+        }),
         BuiltinScalarFunction::Left => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(left, i32, "left");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(left, i64, "left");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function left"),
         }),
@@ -558,20 +627,20 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Lpad => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(lpad, i32, "lpad");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(lpad, i64, "lpad");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function lpad"),
         }),
         BuiltinScalarFunction::Ltrim => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::ltrim::<i32>)(args)
+                make_scalar_function_inner(string_expressions::ltrim::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::ltrim::<i64>)(args)
+                make_scalar_function_inner(string_expressions::ltrim::<i64>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function ltrim"),
         }),
@@ -608,7 +677,7 @@ pub fn create_physical_fun(
                         i32,
                         "regexp_match"
                     );
-                    make_scalar_function(func)(args)
+                    make_scalar_function_inner(func)(args)
                 }
                 DataType::LargeUtf8 => {
                     let func = invoke_on_array_if_regex_expressions_feature_flag!(
@@ -616,7 +685,7 @@ pub fn create_physical_fun(
                         i64,
                         "regexp_match"
                     );
-                    make_scalar_function(func)(args)
+                    make_scalar_function_inner(func)(args)
                 }
                 other => internal_err!(
                     "Unsupported data type {other:?} for function regexp_match"
@@ -650,19 +719,19 @@ pub fn create_physical_fun(
         }
         BuiltinScalarFunction::Repeat => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::repeat::<i32>)(args)
+                make_scalar_function_inner(string_expressions::repeat::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::repeat::<i64>)(args)
+                make_scalar_function_inner(string_expressions::repeat::<i64>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function repeat"),
         }),
         BuiltinScalarFunction::Replace => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::replace::<i32>)(args)
+                make_scalar_function_inner(string_expressions::replace::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::replace::<i64>)(args)
+                make_scalar_function_inner(string_expressions::replace::<i64>)(args)
             }
             other => {
                 internal_err!("Unsupported data type {other:?} for function replace")
@@ -672,12 +741,12 @@ pub fn create_physical_fun(
             DataType::Utf8 => {
                 let func =
                     invoke_if_unicode_expressions_feature_flag!(reverse, i32, "reverse");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func =
                     invoke_if_unicode_expressions_feature_flag!(reverse, i64, "reverse");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => {
                 internal_err!("Unsupported data type {other:?} for function reverse")
@@ -687,32 +756,32 @@ pub fn create_physical_fun(
             DataType::Utf8 => {
                 let func =
                     invoke_if_unicode_expressions_feature_flag!(right, i32, "right");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func =
                     invoke_if_unicode_expressions_feature_flag!(right, i64, "right");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function right"),
         }),
         BuiltinScalarFunction::Rpad => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(rpad, i32, "rpad");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(rpad, i64, "rpad");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function rpad"),
         }),
         BuiltinScalarFunction::Rtrim => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::rtrim::<i32>)(args)
+                make_scalar_function_inner(string_expressions::rtrim::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::rtrim::<i64>)(args)
+                make_scalar_function_inner(string_expressions::rtrim::<i64>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function rtrim"),
         }),
@@ -730,10 +799,10 @@ pub fn create_physical_fun(
         }
         BuiltinScalarFunction::SplitPart => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::split_part::<i32>)(args)
+                make_scalar_function_inner(string_expressions::split_part::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::split_part::<i64>)(args)
+                make_scalar_function_inner(string_expressions::split_part::<i64>)(args)
             }
             other => {
                 internal_err!("Unsupported data type {other:?} for function split_part")
@@ -741,12 +810,12 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::StringToArray => {
             Arc::new(|args| match args[0].data_type() {
-                DataType::Utf8 => {
-                    make_scalar_function(array_expressions::string_to_array::<i32>)(args)
-                }
-                DataType::LargeUtf8 => {
-                    make_scalar_function(array_expressions::string_to_array::<i64>)(args)
-                }
+                DataType::Utf8 => make_scalar_function_inner(
+                    array_expressions::string_to_array::<i32>,
+                )(args),
+                DataType::LargeUtf8 => make_scalar_function_inner(
+                    array_expressions::string_to_array::<i64>,
+                )(args),
                 other => {
                     internal_err!(
                         "Unsupported data type {other:?} for function string_to_array"
@@ -756,27 +825,38 @@ pub fn create_physical_fun(
         }
         BuiltinScalarFunction::StartsWith => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::starts_with::<i32>)(args)
+                make_scalar_function_inner(string_expressions::starts_with::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::starts_with::<i64>)(args)
+                make_scalar_function_inner(string_expressions::starts_with::<i64>)(args)
             }
             other => {
                 internal_err!("Unsupported data type {other:?} for function starts_with")
             }
         }),
+        BuiltinScalarFunction::EndsWith => Arc::new(|args| match args[0].data_type() {
+            DataType::Utf8 => {
+                make_scalar_function_inner(string_expressions::ends_with::<i32>)(args)
+            }
+            DataType::LargeUtf8 => {
+                make_scalar_function_inner(string_expressions::ends_with::<i64>)(args)
+            }
+            other => {
+                internal_err!("Unsupported data type {other:?} for function ends_with")
+            }
+        }),
         BuiltinScalarFunction::Strpos => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
                     strpos, Int32Type, "strpos"
                 );
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
                     strpos, Int64Type, "strpos"
                 );
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function strpos"),
         }),
@@ -784,21 +864,21 @@ pub fn create_physical_fun(
             DataType::Utf8 => {
                 let func =
                     invoke_if_unicode_expressions_feature_flag!(substr, i32, "substr");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func =
                     invoke_if_unicode_expressions_feature_flag!(substr, i64, "substr");
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function substr"),
         }),
         BuiltinScalarFunction::ToHex => Arc::new(|args| match args[0].data_type() {
             DataType::Int32 => {
-                make_scalar_function(string_expressions::to_hex::<Int32Type>)(args)
+                make_scalar_function_inner(string_expressions::to_hex::<Int32Type>)(args)
             }
             DataType::Int64 => {
-                make_scalar_function(string_expressions::to_hex::<Int64Type>)(args)
+                make_scalar_function_inner(string_expressions::to_hex::<Int64Type>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function to_hex"),
         }),
@@ -809,7 +889,7 @@ pub fn create_physical_fun(
                     i32,
                     "translate"
                 );
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
@@ -817,7 +897,7 @@ pub fn create_physical_fun(
                     i64,
                     "translate"
                 );
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => {
                 internal_err!("Unsupported data type {other:?} for function translate")
@@ -825,10 +905,10 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::Trim => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::btrim::<i32>)(args)
+                make_scalar_function_inner(string_expressions::btrim::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::btrim::<i64>)(args)
+                make_scalar_function_inner(string_expressions::btrim::<i64>)(args)
             }
             other => internal_err!("Unsupported data type {other:?} for function trim"),
         }),
@@ -849,10 +929,10 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
-                make_scalar_function(string_expressions::overlay::<i32>)(args)
+                make_scalar_function_inner(string_expressions::overlay::<i32>)(args)
             }
             DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::overlay::<i64>)(args)
+                make_scalar_function_inner(string_expressions::overlay::<i64>)(args)
             }
             other => Err(DataFusionError::Internal(format!(
                 "Unsupported data type {other:?} for function overlay",
@@ -860,12 +940,12 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::Levenshtein => {
             Arc::new(|args| match args[0].data_type() {
-                DataType::Utf8 => {
-                    make_scalar_function(string_expressions::levenshtein::<i32>)(args)
-                }
-                DataType::LargeUtf8 => {
-                    make_scalar_function(string_expressions::levenshtein::<i64>)(args)
-                }
+                DataType::Utf8 => make_scalar_function_inner(
+                    string_expressions::levenshtein::<i32>,
+                )(args),
+                DataType::LargeUtf8 => make_scalar_function_inner(
+                    string_expressions::levenshtein::<i64>,
+                )(args),
                 other => Err(DataFusionError::Internal(format!(
                     "Unsupported data type {other:?} for function levenshtein",
                 ))),
@@ -879,7 +959,7 @@ pub fn create_physical_fun(
                         i32,
                         "substr_index"
                     );
-                    make_scalar_function(func)(args)
+                    make_scalar_function_inner(func)(args)
                 }
                 DataType::LargeUtf8 => {
                     let func = invoke_if_unicode_expressions_feature_flag!(
@@ -887,7 +967,7 @@ pub fn create_physical_fun(
                         i64,
                         "substr_index"
                     );
-                    make_scalar_function(func)(args)
+                    make_scalar_function_inner(func)(args)
                 }
                 other => Err(DataFusionError::Internal(format!(
                     "Unsupported data type {other:?} for function substr_index",
@@ -901,7 +981,7 @@ pub fn create_physical_fun(
                     Int32Type,
                     "find_in_set"
                 );
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
@@ -909,7 +989,7 @@ pub fn create_physical_fun(
                     Int64Type,
                     "find_in_set"
                 );
-                make_scalar_function(func)(args)
+                make_scalar_function_inner(func)(args)
             }
             other => Err(DataFusionError::Internal(format!(
                 "Unsupported data type {other:?} for function find_in_set",
@@ -987,7 +1067,7 @@ mod tests {
     use arrow::{
         array::{
             Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float64Array,
-            Int32Array, StringArray, UInt64Array,
+            Int32Array, Int64Array, StringArray, UInt64Array,
         },
         datatypes::Field,
         record_batch::RecordBatch,
@@ -1379,6 +1459,95 @@ mod tests {
             Utf8,
             StringArray
         );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("b")],
+            Ok(Some(2)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("c")],
+            Ok(Some(3)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("d")],
+            Ok(Some(0)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("")],
+            Ok(Some(1)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("Helloworld"), lit("world")],
+            Ok(Some(6)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("Helloworld"), lit(ScalarValue::Utf8(None))],
+            Ok(None),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit(ScalarValue::Utf8(None)), lit("Hello")],
+            Ok(None),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[
+                lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))),
+                lit(ScalarValue::LargeUtf8(Some("world".to_string())))
+            ],
+            Ok(Some(6)),
+            i64,
+            Int64,
+            Int64Array
+        );
+        test_function!(
+            InStr,
+            &[
+                lit(ScalarValue::LargeUtf8(None)),
+                lit(ScalarValue::LargeUtf8(Some("world".to_string())))
+            ],
+            Ok(None),
+            i64,
+            Int64,
+            Int64Array
+        );
+        test_function!(
+            InStr,
+            &[
+                lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))),
+                lit(ScalarValue::LargeUtf8(None))
+            ],
+            Ok(None),
+            i64,
+            Int64,
+            Int64Array
+        );
         #[cfg(feature = "unicode_expressions")]
         test_function!(
             Left,
@@ -2497,6 +2666,38 @@ mod tests {
             Boolean,
             BooleanArray
         );
+        test_function!(
+            EndsWith,
+            &[lit("alphabet"), lit("alph"),],
+            Ok(Some(false)),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+        test_function!(
+            EndsWith,
+            &[lit("alphabet"), lit("bet"),],
+            Ok(Some(true)),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+        test_function!(
+            EndsWith,
+            &[lit(ScalarValue::Utf8(None)), lit("alph"),],
+            Ok(None),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+        test_function!(
+            EndsWith,
+            &[lit("alphabet"), lit(ScalarValue::Utf8(None)),],
+            Ok(None),
+            bool,
+            Boolean,
+            BooleanArray
+        );
         #[cfg(feature = "unicode_expressions")]
         test_function!(
             Strpos,
@@ -3108,7 +3309,7 @@ mod tests {
 
     #[test]
     fn test_make_scalar_function() -> Result<()> {
-        let adapter_func = make_scalar_function(dummy_function);
+        let adapter_func = make_scalar_function_inner(dummy_function);
 
         let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1)));
         let array_arg = ColumnarValue::Array(
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index fffa8f602d87..6f55f56916e7 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -48,9 +48,7 @@ pub mod utils;
 pub mod var_provider;
 pub mod window;
 
-pub use aggregate::groups_accumulator::{
-    EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter,
-};
+pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, NullState};
 pub use aggregate::AggregateExpr;
 pub use analysis::{analyze, AnalysisContext, ExprBoundaries};
 pub use equivalence::EquivalenceProperties;
diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs
index b778fd86c24b..bdd272563e75 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -36,7 +36,9 @@ use hashbrown::HashMap;
 use regex::Regex;
 use std::sync::{Arc, OnceLock};
 
-use crate::functions::{make_scalar_function, make_scalar_function_with_hints, Hint};
+use crate::functions::{
+    make_scalar_function_inner, make_scalar_function_with_hints, Hint,
+};
 
 /// Get the first argument from the given string array.
 ///
@@ -401,7 +403,7 @@ pub fn specialize_regexp_replace<T: OffsetSizeTrait>(
 
         // If there are no specialized implementations, we'll fall back to the
         // generic implementation.
-        (_, _, _, _) => Ok(make_scalar_function(regexp_replace::<T>)),
+        (_, _, _, _) => Ok(make_scalar_function_inner(regexp_replace::<T>)),
     }
 }
 
diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs
index 7d9fecf61407..d5344773cfbc 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -23,8 +23,8 @@
 
 use arrow::{
     array::{
-        Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, Int64Array,
-        OffsetSizeTrait, StringArray,
+        Array, ArrayRef, GenericStringArray, Int32Array, Int64Array, OffsetSizeTrait,
+        StringArray,
     },
     datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
 };
@@ -296,6 +296,50 @@ pub fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     Ok(Arc::new(result) as ArrayRef)
 }
 
+/// Returns the position of the first occurrence of substring in string.
+/// The position is counted from 1. If the substring is not found, returns 0.
+/// For example, instr('Helloworld', 'world') = 6.
+pub fn instr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+    let substr_array = as_generic_string_array::<T>(&args[1])?;
+
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let result = string_array
+                .iter()
+                .zip(substr_array.iter())
+                .map(|(string, substr)| match (string, substr) {
+                    (Some(string), Some(substr)) => string
+                        .find(substr)
+                        .map_or(Some(0), |index| Some((index + 1) as i32)),
+                    _ => None,
+                })
+                .collect::<Int32Array>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        DataType::LargeUtf8 => {
+            let result = string_array
+                .iter()
+                .zip(substr_array.iter())
+                .map(|(string, substr)| match (string, substr) {
+                    (Some(string), Some(substr)) => string
+                        .find(substr)
+                        .map_or(Some(0), |index| Some((index + 1) as i64)),
+                    _ => None,
+                })
+                .collect::<Int64Array>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        other => {
+            internal_err!(
+                "instr was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
+            )
+        }
+    }
+}
+
 /// Converts the string to all lower case.
 /// lower('TOM') = 'tom'
 pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -461,17 +505,21 @@ pub fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 /// Returns true if string starts with prefix.
 /// starts_with('alphabet', 'alph') = 't'
 pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let prefix_array = as_generic_string_array::<T>(&args[1])?;
+    let left = as_generic_string_array::<T>(&args[0])?;
+    let right = as_generic_string_array::<T>(&args[1])?;
 
-    let result = string_array
-        .iter()
-        .zip(prefix_array.iter())
-        .map(|(string, prefix)| match (string, prefix) {
-            (Some(string), Some(prefix)) => Some(string.starts_with(prefix)),
-            _ => None,
-        })
-        .collect::<BooleanArray>();
+    let result = arrow::compute::kernels::comparison::starts_with(left, right)?;
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Returns true if string ends with suffix.
+/// ends_with('alphabet', 'abet') = 't'
+pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let left = as_generic_string_array::<T>(&args[0])?;
+    let right = as_generic_string_array::<T>(&args[1])?;
+
+    let result = arrow::compute::kernels::comparison::ends_with(left, right)?;
 
     Ok(Arc::new(result) as ArrayRef)
 }
diff --git a/datafusion/physical-expr/src/window/cume_dist.rs b/datafusion/physical-expr/src/window/cume_dist.rs
index edef77c51c31..9720187ea83d 100644
--- a/datafusion/physical-expr/src/window/cume_dist.rs
+++ b/datafusion/physical-expr/src/window/cume_dist.rs
@@ -34,11 +34,16 @@ use std::sync::Arc;
 #[derive(Debug)]
 pub struct CumeDist {
     name: String,
+    /// Output data type
+    data_type: DataType,
 }
 
 /// Create a cume_dist window function
-pub fn cume_dist(name: String) -> CumeDist {
-    CumeDist { name }
+pub fn cume_dist(name: String, data_type: &DataType) -> CumeDist {
+    CumeDist {
+        name,
+        data_type: data_type.clone(),
+    }
 }
 
 impl BuiltInWindowFunctionExpr for CumeDist {
@@ -49,8 +54,7 @@ impl BuiltInWindowFunctionExpr for CumeDist {
 
     fn field(&self) -> Result<Field> {
         let nullable = false;
-        let data_type = DataType::Float64;
-        Ok(Field::new(self.name(), data_type, nullable))
+        Ok(Field::new(self.name(), self.data_type.clone(), nullable))
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
@@ -119,7 +123,7 @@ mod tests {
     #[test]
     #[allow(clippy::single_range_in_vec_init)]
     fn test_cume_dist() -> Result<()> {
-        let r = cume_dist("arr".into());
+        let r = cume_dist("arr".into(), &DataType::Float64);
 
         let expected = vec![0.0; 0];
         test_i32_result(&r, 0, vec![], expected)?;
diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/physical-expr/src/window/lead_lag.rs
index 7ee736ce9caa..c218b5555afc 100644
--- a/datafusion/physical-expr/src/window/lead_lag.rs
+++ b/datafusion/physical-expr/src/window/lead_lag.rs
@@ -23,8 +23,7 @@ use crate::PhysicalExpr;
 use arrow::array::ArrayRef;
 use arrow::compute::cast;
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{arrow_datafusion_err, ScalarValue};
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::PartitionEvaluator;
 use std::any::Any;
 use std::cmp::min;
@@ -35,6 +34,7 @@ use std::sync::Arc;
 #[derive(Debug)]
 pub struct WindowShift {
     name: String,
+    /// Output data type
     data_type: DataType,
     shift_offset: i64,
     expr: Arc<dyn PhysicalExpr>,
@@ -235,14 +235,10 @@ fn get_default_value(
     default_value: Option<&ScalarValue>,
     dtype: &DataType,
 ) -> Result<ScalarValue> {
-    if let Some(value) = default_value {
-        if let ScalarValue::Int64(Some(val)) = value {
-            ScalarValue::try_from_string(val.to_string(), dtype)
-        } else {
-            internal_err!("Expects default value to have Int64 type")
-        }
-    } else {
-        Ok(ScalarValue::try_from(dtype)?)
+    match default_value {
+        Some(v) if !v.data_type().is_null() => v.cast_to(dtype),
+        // If None or Null datatype
+        _ => ScalarValue::try_from(dtype),
     }
 }
 
diff --git a/datafusion/physical-expr/src/window/nth_value.rs b/datafusion/physical-expr/src/window/nth_value.rs
index b3c89122ebad..05909ab25a07 100644
--- a/datafusion/physical-expr/src/window/nth_value.rs
+++ b/datafusion/physical-expr/src/window/nth_value.rs
@@ -39,6 +39,7 @@ use datafusion_expr::PartitionEvaluator;
 pub struct NthValue {
     name: String,
     expr: Arc<dyn PhysicalExpr>,
+    /// Output data type
     data_type: DataType,
     kind: NthValueKind,
 }
diff --git a/datafusion/physical-expr/src/window/ntile.rs b/datafusion/physical-expr/src/window/ntile.rs
index f5442e1b0fee..fb7a7ad84fb7 100644
--- a/datafusion/physical-expr/src/window/ntile.rs
+++ b/datafusion/physical-expr/src/window/ntile.rs
@@ -35,11 +35,17 @@ use std::sync::Arc;
 pub struct Ntile {
     name: String,
     n: u64,
+    /// Output data type
+    data_type: DataType,
 }
 
 impl Ntile {
-    pub fn new(name: String, n: u64) -> Self {
-        Self { name, n }
+    pub fn new(name: String, n: u64, data_type: &DataType) -> Self {
+        Self {
+            name,
+            n,
+            data_type: data_type.clone(),
+        }
     }
 
     pub fn get_n(&self) -> u64 {
@@ -54,8 +60,7 @@ impl BuiltInWindowFunctionExpr for Ntile {
 
     fn field(&self) -> Result<Field> {
         let nullable = false;
-        let data_type = DataType::UInt64;
-        Ok(Field::new(self.name(), data_type, nullable))
+        Ok(Field::new(self.name(), self.data_type.clone(), nullable))
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
diff --git a/datafusion/physical-expr/src/window/rank.rs b/datafusion/physical-expr/src/window/rank.rs
index 86af5b322133..1f643f0280dc 100644
--- a/datafusion/physical-expr/src/window/rank.rs
+++ b/datafusion/physical-expr/src/window/rank.rs
@@ -41,6 +41,8 @@ use std::sync::Arc;
 pub struct Rank {
     name: String,
     rank_type: RankType,
+    /// Output data type
+    data_type: DataType,
 }
 
 impl Rank {
@@ -58,26 +60,29 @@ pub enum RankType {
 }
 
 /// Create a rank window function
-pub fn rank(name: String) -> Rank {
+pub fn rank(name: String, data_type: &DataType) -> Rank {
     Rank {
         name,
         rank_type: RankType::Basic,
+        data_type: data_type.clone(),
     }
 }
 
 /// Create a dense rank window function
-pub fn dense_rank(name: String) -> Rank {
+pub fn dense_rank(name: String, data_type: &DataType) -> Rank {
     Rank {
         name,
         rank_type: RankType::Dense,
+        data_type: data_type.clone(),
     }
 }
 
 /// Create a percent rank window function
-pub fn percent_rank(name: String) -> Rank {
+pub fn percent_rank(name: String, data_type: &DataType) -> Rank {
     Rank {
         name,
         rank_type: RankType::Percent,
+        data_type: data_type.clone(),
     }
 }
 
@@ -89,11 +94,7 @@ impl BuiltInWindowFunctionExpr for Rank {
 
     fn field(&self) -> Result<Field> {
         let nullable = false;
-        let data_type = match self.rank_type {
-            RankType::Basic | RankType::Dense => DataType::UInt64,
-            RankType::Percent => DataType::Float64,
-        };
-        Ok(Field::new(self.name(), data_type, nullable))
+        Ok(Field::new(self.name(), self.data_type.clone(), nullable))
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
@@ -268,7 +269,7 @@ mod tests {
 
     #[test]
     fn test_dense_rank() -> Result<()> {
-        let r = dense_rank("arr".into());
+        let r = dense_rank("arr".into(), &DataType::UInt64);
         test_without_rank(&r, vec![1; 8])?;
         test_with_rank(&r, vec![1, 1, 2, 3, 3, 3, 4, 5])?;
         Ok(())
@@ -276,7 +277,7 @@ mod tests {
 
     #[test]
     fn test_rank() -> Result<()> {
-        let r = rank("arr".into());
+        let r = rank("arr".into(), &DataType::UInt64);
         test_without_rank(&r, vec![1; 8])?;
         test_with_rank(&r, vec![1, 1, 3, 4, 4, 4, 7, 8])?;
         Ok(())
@@ -285,7 +286,7 @@ mod tests {
     #[test]
     #[allow(clippy::single_range_in_vec_init)]
     fn test_percent_rank() -> Result<()> {
-        let r = percent_rank("arr".into());
+        let r = percent_rank("arr".into(), &DataType::Float64);
 
         // empty case
         let expected = vec![0.0; 0];
diff --git a/datafusion/physical-expr/src/window/row_number.rs b/datafusion/physical-expr/src/window/row_number.rs
index f5e2f65a656e..759f447ab0f8 100644
--- a/datafusion/physical-expr/src/window/row_number.rs
+++ b/datafusion/physical-expr/src/window/row_number.rs
@@ -36,12 +36,17 @@ use std::sync::Arc;
 #[derive(Debug)]
 pub struct RowNumber {
     name: String,
+    /// Output data type
+    data_type: DataType,
 }
 
 impl RowNumber {
     /// Create a new ROW_NUMBER function
-    pub fn new(name: impl Into<String>) -> Self {
-        Self { name: name.into() }
+    pub fn new(name: impl Into<String>, data_type: &DataType) -> Self {
+        Self {
+            name: name.into(),
+            data_type: data_type.clone(),
+        }
     }
 }
 
@@ -53,8 +58,7 @@ impl BuiltInWindowFunctionExpr for RowNumber {
 
     fn field(&self) -> Result<Field> {
         let nullable = false;
-        let data_type = DataType::UInt64;
-        Ok(Field::new(self.name(), data_type, nullable))
+        Ok(Field::new(self.name(), self.data_type.clone(), nullable))
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
@@ -127,7 +131,7 @@ mod tests {
         ]));
         let schema = Schema::new(vec![Field::new("arr", DataType::Boolean, true)]);
         let batch = RecordBatch::try_new(Arc::new(schema), vec![arr])?;
-        let row_number = RowNumber::new("row_number".to_owned());
+        let row_number = RowNumber::new("row_number".to_owned(), &DataType::UInt64);
         let values = row_number.evaluate_args(&batch)?;
         let result = row_number
             .create_evaluator()?
@@ -145,7 +149,7 @@ mod tests {
         ]));
         let schema = Schema::new(vec![Field::new("arr", DataType::Boolean, false)]);
         let batch = RecordBatch::try_new(Arc::new(schema), vec![arr])?;
-        let row_number = RowNumber::new("row_number".to_owned());
+        let row_number = RowNumber::new("row_number".to_owned(), &DataType::UInt64);
         let values = row_number.evaluate_args(&batch)?;
         let result = row_number
             .create_evaluator()?
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 357e036b6f39..1c638d9c184e 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -61,6 +61,7 @@ uuid = { version = "^1.2", features = ["v4"] }
 
 [dev-dependencies]
 rstest = { workspace = true }
+rstest_reuse = "0.6.0"
 termtree = "0.4.1"
 tokio = { version = "1.28", features = [
     "macros",
diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
index cafa385eac39..ef9aac3d3ef0 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
@@ -19,9 +19,9 @@ use arrow::record_batch::RecordBatch;
 use arrow_array::{downcast_primitive, ArrayRef};
 use arrow_schema::SchemaRef;
 use datafusion_common::Result;
-use datafusion_physical_expr::EmitTo;
 
 pub(crate) mod primitive;
+use datafusion_expr::EmitTo;
 use primitive::GroupValuesPrimitive;
 
 mod row;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
index e3ba284797d1..18d20f3c47e6 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
@@ -26,7 +26,7 @@ use arrow_array::{ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, PrimitiveArra
 use arrow_schema::DataType;
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
-use datafusion_physical_expr::EmitTo;
+use datafusion_expr::EmitTo;
 use half::f16;
 use hashbrown::raw::RawTable;
 use std::sync::Arc;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 10ff9edb8912..3b7480cd292a 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -25,7 +25,7 @@ use arrow_schema::{DataType, SchemaRef};
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
-use datafusion_physical_expr::EmitTo;
+use datafusion_expr::EmitTo;
 use hashbrown::raw::RawTable;
 
 /// A [`GroupValues`] making use of [`Rows`]
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index d3ae0d5ce01f..2d7a8cccc481 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1087,14 +1087,14 @@ fn create_accumulators(
 /// returns a vector of ArrayRefs, where each entry corresponds to either the
 /// final value (mode = Final, FinalPartitioned and Single) or states (mode = Partial)
 fn finalize_aggregation(
-    accumulators: &[AccumulatorItem],
+    accumulators: &mut [AccumulatorItem],
     mode: &AggregateMode,
 ) -> Result<Vec<ArrayRef>> {
     match mode {
         AggregateMode::Partial => {
             // Build the vector of states
             accumulators
-                .iter()
+                .iter_mut()
                 .map(|accumulator| {
                     accumulator.state().and_then(|e| {
                         e.iter()
@@ -1111,7 +1111,7 @@ fn finalize_aggregation(
         | AggregateMode::SinglePartitioned => {
             // Merge the state to the final value
             accumulators
-                .iter()
+                .iter_mut()
                 .map(|accumulator| accumulator.evaluate().and_then(|v| v.to_array()))
                 .collect()
         }
diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs
index 90eb488a2ead..5ec95bd79942 100644
--- a/datafusion/physical-plan/src/aggregates/no_grouping.rs
+++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs
@@ -137,12 +137,13 @@ impl AggregateStream {
                     None => {
                         this.finished = true;
                         let timer = this.baseline_metrics.elapsed_compute().timer();
-                        let result = finalize_aggregation(&this.accumulators, &this.mode)
-                            .and_then(|columns| {
-                                RecordBatch::try_new(this.schema.clone(), columns)
-                                    .map_err(Into::into)
-                            })
-                            .record_output(&this.baseline_metrics);
+                        let result =
+                            finalize_aggregation(&mut this.accumulators, &this.mode)
+                                .and_then(|columns| {
+                                    RecordBatch::try_new(this.schema.clone(), columns)
+                                        .map_err(Into::into)
+                                })
+                                .record_output(&this.baseline_metrics);
 
                         timer.done();
 
diff --git a/datafusion/physical-plan/src/aggregates/order/full.rs b/datafusion/physical-plan/src/aggregates/order/full.rs
index f46ee687faf1..c15538e8ab8e 100644
--- a/datafusion/physical-plan/src/aggregates/order/full.rs
+++ b/datafusion/physical-plan/src/aggregates/order/full.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_physical_expr::EmitTo;
+use datafusion_expr::EmitTo;
 
 /// Tracks grouping state when the data is ordered entirely by its
 /// group keys
diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs
index b258b97a9e84..4f1914b12c96 100644
--- a/datafusion/physical-plan/src/aggregates/order/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/order/mod.rs
@@ -18,7 +18,8 @@
 use arrow_array::ArrayRef;
 use arrow_schema::Schema;
 use datafusion_common::Result;
-use datafusion_physical_expr::{EmitTo, PhysicalSortExpr};
+use datafusion_expr::EmitTo;
+use datafusion_physical_expr::PhysicalSortExpr;
 
 mod full;
 mod partial;
diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
index ff8a75b9b28b..ecd37c913e98 100644
--- a/datafusion/physical-plan/src/aggregates/order/partial.rs
+++ b/datafusion/physical-plan/src/aggregates/order/partial.rs
@@ -20,7 +20,7 @@ use arrow_array::ArrayRef;
 use arrow_schema::Schema;
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
-use datafusion_physical_expr::EmitTo;
+use datafusion_expr::EmitTo;
 use datafusion_physical_expr::PhysicalSortExpr;
 
 /// Tracks grouping state when the data is ordered by some subset of
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 6a0c02f5caf3..f9db0a050cfc 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -44,9 +44,10 @@ use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
+use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
-    AggregateExpr, EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter, PhysicalSortExpr,
+    AggregateExpr, GroupsAccumulatorAdapter, PhysicalSortExpr,
 };
 
 use futures::ready;
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 374a0ad50700..0c213f425785 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -26,7 +26,7 @@ use std::{any::Any, usize, vec};
 use crate::joins::utils::{
     adjust_indices_by_join_type, apply_join_filter_to_indices, build_batch_from_indices,
     calculate_join_output_ordering, get_final_indices_from_bit_map,
-    need_produce_result_in_final, JoinHashMap, JoinHashMapType,
+    need_produce_result_in_final, JoinHashMap, JoinHashMapOffset, JoinHashMapType,
 };
 use crate::{
     coalesce_partitions::CoalescePartitionsExec,
@@ -61,7 +61,8 @@ use arrow::util::bit_util;
 use arrow_array::cast::downcast_array;
 use arrow_schema::ArrowError;
 use datafusion_common::{
-    exec_err, internal_err, plan_err, DataFusionError, JoinSide, JoinType, Result,
+    internal_datafusion_err, internal_err, plan_err, DataFusionError, JoinSide, JoinType,
+    Result,
 };
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
@@ -644,6 +645,8 @@ impl ExecutionPlan for HashJoinExec {
             }
         };
 
+        let batch_size = context.session_config().batch_size();
+
         let reservation = MemoryConsumer::new(format!("HashJoinStream[{partition}]"))
             .register(context.memory_pool());
 
@@ -665,6 +668,8 @@ impl ExecutionPlan for HashJoinExec {
             reservation,
             state: HashJoinStreamState::WaitBuildSide,
             build_side: BuildSide::Initial(BuildSideInitialState { left_fut }),
+            batch_size,
+            hashes_buffer: vec![],
         }))
     }
 
@@ -908,16 +913,10 @@ enum HashJoinStreamState {
     Completed,
 }
 
-/// Container for HashJoinStreamState::ProcessProbeBatch related data
-struct ProcessProbeBatchState {
-    /// Current probe-side batch
-    batch: RecordBatch,
-}
-
 impl HashJoinStreamState {
     /// Tries to extract ProcessProbeBatchState from HashJoinStreamState enum.
     /// Returns an error if state is not ProcessProbeBatchState.
-    fn try_as_process_probe_batch(&self) -> Result<&ProcessProbeBatchState> {
+    fn try_as_process_probe_batch_mut(&mut self) -> Result<&mut ProcessProbeBatchState> {
         match self {
             HashJoinStreamState::ProcessProbeBatch(state) => Ok(state),
             _ => internal_err!("Expected hash join stream in ProcessProbeBatch state"),
@@ -925,6 +924,25 @@ impl HashJoinStreamState {
     }
 }
 
+/// Container for HashJoinStreamState::ProcessProbeBatch related data
+struct ProcessProbeBatchState {
+    /// Current probe-side batch
+    batch: RecordBatch,
+    /// Starting offset for JoinHashMap lookups
+    offset: JoinHashMapOffset,
+    /// Max joined probe-side index from current batch
+    joined_probe_idx: Option<usize>,
+}
+
+impl ProcessProbeBatchState {
+    fn advance(&mut self, offset: JoinHashMapOffset, joined_probe_idx: Option<usize>) {
+        self.offset = offset;
+        if joined_probe_idx.is_some() {
+            self.joined_probe_idx = joined_probe_idx;
+        }
+    }
+}
+
 /// [`Stream`] for [`HashJoinExec`] that does the actual join.
 ///
 /// This stream:
@@ -960,6 +978,10 @@ struct HashJoinStream {
     state: HashJoinStreamState,
     /// Build side
     build_side: BuildSide,
+    /// Maximum output batch size
+    batch_size: usize,
+    /// Scratch space for computing hashes
+    hashes_buffer: Vec<u64>,
 }
 
 impl RecordBatchStream for HashJoinStream {
@@ -968,7 +990,10 @@ impl RecordBatchStream for HashJoinStream {
     }
 }
 
-/// Returns build/probe indices satisfying the equality condition.
+/// Executes lookups by hash against JoinHashMap and resolves potential
+/// hash collisions.
+/// Returns build/probe indices satisfying the equality condition, along with
+/// (optional) starting point for next iteration.
 ///
 /// # Example
 ///
@@ -1014,20 +1039,17 @@ impl RecordBatchStream for HashJoinStream {
 /// Probe indices: 3, 3, 4, 5
 /// ```
 #[allow(clippy::too_many_arguments)]
-pub fn build_equal_condition_join_indices<T: JoinHashMapType>(
-    build_hashmap: &T,
+fn lookup_join_hashmap(
+    build_hashmap: &JoinHashMap,
     build_input_buffer: &RecordBatch,
     probe_batch: &RecordBatch,
     build_on: &[Column],
     probe_on: &[Column],
-    random_state: &RandomState,
     null_equals_null: bool,
-    hashes_buffer: &mut Vec<u64>,
-    filter: Option<&JoinFilter>,
-    build_side: JoinSide,
-    deleted_offset: Option<usize>,
-    fifo_hashmap: bool,
-) -> Result<(UInt64Array, UInt32Array)> {
+    hashes_buffer: &[u64],
+    limit: usize,
+    offset: JoinHashMapOffset,
+) -> Result<(UInt64Array, UInt32Array, Option<JoinHashMapOffset>)> {
     let keys_values = probe_on
         .iter()
         .map(|c| c.evaluate(probe_batch)?.into_array(probe_batch.num_rows()))
@@ -1039,76 +1061,24 @@ pub fn build_equal_condition_join_indices<T: JoinHashMapType>(
                 .into_array(build_input_buffer.num_rows())
         })
         .collect::<Result<Vec<_>>>()?;
-    hashes_buffer.clear();
-    hashes_buffer.resize(probe_batch.num_rows(), 0);
-    let hash_values = create_hashes(&keys_values, random_state, hashes_buffer)?;
 
-    // In case build-side input has not been inverted while JoinHashMap creation, the chained list algorithm
-    // will return build indices for each probe row in a reverse order as such:
-    // Build Indices: [5, 4, 3]
-    // Probe Indices: [1, 1, 1]
-    //
-    // This affects the output sequence. Hypothetically, it's possible to preserve the lexicographic order on the build side.
-    // Let's consider probe rows [0,1] as an example:
-    //
-    // When the probe iteration sequence is reversed, the following pairings can be derived:
-    //
-    // For probe row 1:
-    //     (5, 1)
-    //     (4, 1)
-    //     (3, 1)
-    //
-    // For probe row 0:
-    //     (5, 0)
-    //     (4, 0)
-    //     (3, 0)
-    //
-    // After reversing both sets of indices, we obtain reversed indices:
-    //
-    //     (3,0)
-    //     (4,0)
-    //     (5,0)
-    //     (3,1)
-    //     (4,1)
-    //     (5,1)
-    //
-    // With this approach, the lexicographic order on both the probe side and the build side is preserved.
-    let (mut probe_indices, mut build_indices) = if fifo_hashmap {
-        build_hashmap.get_matched_indices(hash_values.iter().enumerate(), deleted_offset)
-    } else {
-        let (mut matched_probe, mut matched_build) = build_hashmap
-            .get_matched_indices(hash_values.iter().enumerate().rev(), deleted_offset);
-
-        matched_probe.as_slice_mut().reverse();
-        matched_build.as_slice_mut().reverse();
-
-        (matched_probe, matched_build)
-    };
+    let (mut probe_builder, mut build_builder, next_offset) = build_hashmap
+        .get_matched_indices_with_limit_offset(hashes_buffer, None, limit, offset);
 
-    let left: UInt64Array = PrimitiveArray::new(build_indices.finish().into(), None);
-    let right: UInt32Array = PrimitiveArray::new(probe_indices.finish().into(), None);
+    let build_indices: UInt64Array =
+        PrimitiveArray::new(build_builder.finish().into(), None);
+    let probe_indices: UInt32Array =
+        PrimitiveArray::new(probe_builder.finish().into(), None);
 
-    let (left, right) = if let Some(filter) = filter {
-        // Filter the indices which satisfy the non-equal join condition, like `left.b1 = 10`
-        apply_join_filter_to_indices(
-            build_input_buffer,
-            probe_batch,
-            left,
-            right,
-            filter,
-            build_side,
-        )?
-    } else {
-        (left, right)
-    };
-
-    equal_rows_arr(
-        &left,
-        &right,
+    let (build_indices, probe_indices) = equal_rows_arr(
+        &build_indices,
+        &probe_indices,
         &build_join_values,
         &keys_values,
         null_equals_null,
-    )
+    )?;
+
+    Ok((build_indices, probe_indices, next_offset))
 }
 
 // version of eq_dyn supporting equality on null arrays
@@ -1253,9 +1223,25 @@ impl HashJoinStream {
                 self.state = HashJoinStreamState::ExhaustedProbeSide;
             }
             Some(Ok(batch)) => {
+                // Precalculate hash values for fetched batch
+                let keys_values = self
+                    .on_right
+                    .iter()
+                    .map(|c| c.evaluate(&batch)?.into_array(batch.num_rows()))
+                    .collect::<Result<Vec<_>>>()?;
+
+                self.hashes_buffer.clear();
+                self.hashes_buffer.resize(batch.num_rows(), 0);
+                create_hashes(&keys_values, &self.random_state, &mut self.hashes_buffer)?;
+
+                self.join_metrics.input_batches.add(1);
+                self.join_metrics.input_rows.add(batch.num_rows());
+
                 self.state =
                     HashJoinStreamState::ProcessProbeBatch(ProcessProbeBatchState {
                         batch,
+                        offset: (0, None),
+                        joined_probe_idx: None,
                     });
             }
             Some(Err(err)) => return Poll::Ready(Err(err)),
@@ -1270,70 +1256,108 @@ impl HashJoinStream {
     fn process_probe_batch(
         &mut self,
     ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
-        let state = self.state.try_as_process_probe_batch()?;
+        let state = self.state.try_as_process_probe_batch_mut()?;
         let build_side = self.build_side.try_as_ready_mut()?;
 
-        self.join_metrics.input_batches.add(1);
-        self.join_metrics.input_rows.add(state.batch.num_rows());
         let timer = self.join_metrics.join_time.timer();
 
-        let mut hashes_buffer = vec![];
-        // get the matched two indices for the on condition
-        let left_right_indices = build_equal_condition_join_indices(
+        // get the matched by join keys indices
+        let (left_indices, right_indices, next_offset) = lookup_join_hashmap(
             build_side.left_data.hash_map(),
             build_side.left_data.batch(),
             &state.batch,
             &self.on_left,
             &self.on_right,
-            &self.random_state,
             self.null_equals_null,
-            &mut hashes_buffer,
-            self.filter.as_ref(),
-            JoinSide::Left,
-            None,
-            true,
-        );
+            &self.hashes_buffer,
+            self.batch_size,
+            state.offset,
+        )?;
 
-        let result = match left_right_indices {
-            Ok((left_side, right_side)) => {
-                // set the left bitmap
-                // and only left, full, left semi, left anti need the left bitmap
-                if need_produce_result_in_final(self.join_type) {
-                    left_side.iter().flatten().for_each(|x| {
-                        build_side.visited_left_side.set_bit(x as usize, true);
-                    });
-                }
+        // apply join filter if exists
+        let (left_indices, right_indices) = if let Some(filter) = &self.filter {
+            apply_join_filter_to_indices(
+                build_side.left_data.batch(),
+                &state.batch,
+                left_indices,
+                right_indices,
+                filter,
+                JoinSide::Left,
+            )?
+        } else {
+            (left_indices, right_indices)
+        };
 
-                // adjust the two side indices base on the join type
-                let (left_side, right_side) = adjust_indices_by_join_type(
-                    left_side,
-                    right_side,
-                    state.batch.num_rows(),
-                    self.join_type,
-                );
+        // mark joined left-side indices as visited, if required by join type
+        if need_produce_result_in_final(self.join_type) {
+            left_indices.iter().flatten().for_each(|x| {
+                build_side.visited_left_side.set_bit(x as usize, true);
+            });
+        }
 
-                let result = build_batch_from_indices(
-                    &self.schema,
-                    build_side.left_data.batch(),
-                    &state.batch,
-                    &left_side,
-                    &right_side,
-                    &self.column_indices,
-                    JoinSide::Left,
-                );
-                self.join_metrics.output_batches.add(1);
-                self.join_metrics.output_rows.add(state.batch.num_rows());
-                result
-            }
-            Err(err) => {
-                exec_err!("Fail to build join indices in HashJoinExec, error:{err}")
-            }
+        // The goals of index alignment for different join types are:
+        //
+        // 1) Right & FullJoin -- to append all missing probe-side indices between
+        //    previous (excluding) and current joined indices.
+        // 2) SemiJoin -- deduplicate probe indices in range between previous
+        //    (excluding) and current joined indices.
+        // 3) AntiJoin -- return only missing indices in range between
+        //    previous and current joined indices.
+        //    Inclusion/exclusion of the indices themselves don't matter
+        //
+        // As a summary -- alignment range can be produced based only on
+        // joined (matched with filters applied) probe side indices, excluding starting one
+        // (left from previous iteration).
+
+        // if any rows have been joined -- get last joined probe-side (right) row
+        // it's important that index counts as "joined" after hash collisions checks
+        // and join filters applied.
+        let last_joined_right_idx = match right_indices.len() {
+            0 => None,
+            n => Some(right_indices.value(n - 1) as usize),
         };
+
+        // Calculate range and perform alignment.
+        // In case probe batch has been processed -- align all remaining rows.
+        let index_alignment_range_start = state.joined_probe_idx.map_or(0, |v| v + 1);
+        let index_alignment_range_end = if next_offset.is_none() {
+            state.batch.num_rows()
+        } else {
+            last_joined_right_idx.map_or(0, |v| v + 1)
+        };
+
+        let (left_indices, right_indices) = adjust_indices_by_join_type(
+            left_indices,
+            right_indices,
+            index_alignment_range_start..index_alignment_range_end,
+            self.join_type,
+        );
+
+        let result = build_batch_from_indices(
+            &self.schema,
+            build_side.left_data.batch(),
+            &state.batch,
+            &left_indices,
+            &right_indices,
+            &self.column_indices,
+            JoinSide::Left,
+        )?;
+
+        self.join_metrics.output_batches.add(1);
+        self.join_metrics.output_rows.add(result.num_rows());
         timer.done();
 
-        self.state = HashJoinStreamState::FetchProbeBatch;
+        if next_offset.is_none() {
+            self.state = HashJoinStreamState::FetchProbeBatch;
+        } else {
+            state.advance(
+                next_offset
+                    .ok_or_else(|| internal_datafusion_err!("unexpected None offset"))?,
+                last_joined_right_idx,
+            )
+        };
 
-        Ok(StatefulStreamResult::Ready(Some(result?)))
+        Ok(StatefulStreamResult::Ready(Some(result)))
     }
 
     /// Processes unmatched build-side rows for certain join types and produces output batch
@@ -1399,15 +1423,15 @@ mod tests {
 
     use super::*;
     use crate::{
-        common, expressions::Column, hash_utils::create_hashes,
-        joins::hash_join::build_equal_condition_join_indices, memory::MemoryExec,
+        common, expressions::Column, hash_utils::create_hashes, memory::MemoryExec,
         repartition::RepartitionExec, test::build_table_i32, test::exec::MockExec,
     };
 
     use arrow::array::{ArrayRef, Date32Array, Int32Array, UInt32Builder, UInt64Builder};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::{
-        assert_batches_eq, assert_batches_sorted_eq, assert_contains, ScalarValue,
+        assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err,
+        ScalarValue,
     };
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
@@ -1415,6 +1439,21 @@ mod tests {
     use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
 
     use hashbrown::raw::RawTable;
+    use rstest::*;
+    use rstest_reuse::{self, *};
+
+    fn div_ceil(a: usize, b: usize) -> usize {
+        (a + b - 1) / b
+    }
+
+    #[template]
+    #[rstest]
+    fn batch_sizes(#[values(8192, 10, 5, 2, 1)] batch_size: usize) {}
+
+    fn prepare_task_ctx(batch_size: usize) -> Arc<TaskContext> {
+        let session_config = SessionConfig::default().with_batch_size(batch_size);
+        Arc::new(TaskContext::default().with_session_config(session_config))
+    }
 
     fn build_table(
         a: (&str, &Vec<i32>),
@@ -1533,9 +1572,10 @@ mod tests {
         Ok((columns, batches))
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_inner_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_inner_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 5]), // this has a repetition
@@ -1580,9 +1620,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn partitioned_join_inner_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn partitioned_join_inner_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 5]), // this has a repetition
@@ -1703,9 +1744,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_inner_two() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_inner_two(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 2]),
             ("b2", &vec![1, 2, 2]),
@@ -1732,7 +1774,13 @@ mod tests {
 
         assert_eq!(columns, vec!["a1", "b2", "c1", "a1", "b2", "c2"]);
 
-        assert_eq!(batches.len(), 1);
+        // expected joined records = 3
+        // in case batch_size is 1 - additional empty batch for remaining 3-2 row
+        let mut expected_batch_count = div_ceil(3, batch_size);
+        if batch_size == 1 {
+            expected_batch_count += 1;
+        }
+        assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
             "+----+----+----+----+----+----+",
@@ -1751,9 +1799,10 @@ mod tests {
     }
 
     /// Test where the left has 2 parts, the right with 1 part => 1 part
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_inner_one_two_parts_left() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_inner_one_two_parts_left(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let batch1 = build_table_i32(
             ("a1", &vec![1, 2]),
             ("b2", &vec![1, 2]),
@@ -1787,7 +1836,13 @@ mod tests {
 
         assert_eq!(columns, vec!["a1", "b2", "c1", "a1", "b2", "c2"]);
 
-        assert_eq!(batches.len(), 1);
+        // expected joined records = 3
+        // in case batch_size is 1 - additional empty batch for remaining 3-2 row
+        let mut expected_batch_count = div_ceil(3, batch_size);
+        if batch_size == 1 {
+            expected_batch_count += 1;
+        }
+        assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
             "+----+----+----+----+----+----+",
@@ -1856,9 +1911,10 @@ mod tests {
     }
 
     /// Test where the left has 1 part, the right has 2 parts => 2 parts
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_inner_one_two_parts_right() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_inner_one_two_parts_right(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 5]), // this has a repetition
@@ -1890,7 +1946,14 @@ mod tests {
         // first part
         let stream = join.execute(0, task_ctx.clone())?;
         let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
+
+        // expected joined records = 1 (first right batch)
+        // and additional empty batch for non-joined 20-6-80
+        let mut expected_batch_count = div_ceil(1, batch_size);
+        if batch_size == 1 {
+            expected_batch_count += 1;
+        }
+        assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
             "+----+----+----+----+----+----+",
@@ -1906,7 +1969,11 @@ mod tests {
         // second part
         let stream = join.execute(1, task_ctx.clone())?;
         let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
+
+        // expected joined records = 2 (second right batch)
+        let expected_batch_count = div_ceil(2, batch_size);
+        assert_eq!(batches.len(), expected_batch_count);
+
         let expected = [
             "+----+----+----+----+----+----+",
             "| a1 | b1 | c1 | a2 | b1 | c2 |",
@@ -1934,9 +2001,10 @@ mod tests {
         )
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_multi_batch() {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_multi_batch(batch_size: usize) {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
@@ -1975,9 +2043,10 @@ mod tests {
         assert_batches_sorted_eq!(expected, &batches);
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_full_multi_batch() {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_full_multi_batch(batch_size: usize) {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
@@ -2019,9 +2088,10 @@ mod tests {
         assert_batches_sorted_eq!(expected, &batches);
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_empty_right() {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_empty_right(batch_size: usize) {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]),
@@ -2055,9 +2125,10 @@ mod tests {
         assert_batches_sorted_eq!(expected, &batches);
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_full_empty_right() {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_full_empty_right(batch_size: usize) {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]),
@@ -2091,9 +2162,10 @@ mod tests {
         assert_batches_sorted_eq!(expected, &batches);
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
@@ -2134,9 +2206,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn partitioned_join_left_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn partitioned_join_left_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
@@ -2197,9 +2270,10 @@ mod tests {
         )
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_semi() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_semi(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
         // left_table left semi join right_table on left_table.b1 = right_table.b2
@@ -2231,9 +2305,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_semi_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_semi_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
 
@@ -2317,9 +2392,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_right_semi() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_right_semi(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
 
@@ -2353,9 +2429,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_right_semi_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_right_semi_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
 
@@ -2442,9 +2519,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_anti() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_anti(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
         // left_table left anti join right_table on left_table.b1 = right_table.b2
@@ -2475,9 +2553,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_anti_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_anti_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
         // left_table left anti join right_table on left_table.b1 = right_table.b2 and right_table.a2!=8
@@ -2568,9 +2647,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_right_anti() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_right_anti(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
         let on = vec![(
@@ -2601,9 +2681,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_right_anti_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_right_anti_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_semi_anti_left_table();
         let right = build_semi_anti_right_table();
         // left_table right anti join right_table on left_table.b1 = right_table.b2 and left_table.a1!=13
@@ -2701,9 +2782,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_right_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_right_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]),
@@ -2739,9 +2821,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn partitioned_join_right_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn partitioned_join_right_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]),
@@ -2778,9 +2861,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_full_one() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_full_one(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a1", &vec![1, 2, 3]),
             ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
@@ -2845,21 +2929,26 @@ mod tests {
             ("c", &vec![30, 40]),
         );
 
+        // Join key column for both join sides
+        let key_column = Column::new("a", 0);
+
         let join_hash_map = JoinHashMap::new(hashmap_left, next);
 
-        let (l, r) = build_equal_condition_join_indices(
+        let right_keys_values =
+            key_column.evaluate(&right)?.into_array(right.num_rows())?;
+        let mut hashes_buffer = vec![0; right.num_rows()];
+        create_hashes(&[right_keys_values], &random_state, &mut hashes_buffer)?;
+
+        let (l, r, _) = lookup_join_hashmap(
             &join_hash_map,
             &left,
             &right,
-            &[Column::new("a", 0)],
-            &[Column::new("a", 0)],
-            &random_state,
-            false,
-            &mut vec![0; right.num_rows()],
-            None,
-            JoinSide::Left,
-            None,
+            &[key_column.clone()],
+            &[key_column],
             false,
+            &hashes_buffer,
+            8192,
+            (0, None),
         )?;
 
         let mut left_ids = UInt64Builder::with_capacity(0);
@@ -2941,9 +3030,10 @@ mod tests {
         JoinFilter::new(filter_expression, column_indices, intermediate_schema)
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_inner_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_inner_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a", &vec![0, 1, 2, 2]),
             ("b", &vec![4, 5, 7, 8]),
@@ -2981,9 +3071,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_left_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_left_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a", &vec![0, 1, 2, 2]),
             ("b", &vec![4, 5, 7, 8]),
@@ -3024,9 +3115,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_right_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_right_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a", &vec![0, 1, 2, 2]),
             ("b", &vec![4, 5, 7, 8]),
@@ -3066,9 +3158,10 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
     #[tokio::test]
-    async fn join_full_with_filter() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
+    async fn join_full_with_filter(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
         let left = build_table(
             ("a", &vec![0, 1, 2, 2]),
             ("b", &vec![4, 5, 7, 8]),
@@ -3211,6 +3304,140 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn join_splitted_batch() {
+        let left = build_table(
+            ("a1", &vec![1, 2, 3, 4]),
+            ("b1", &vec![1, 1, 1, 1]),
+            ("c1", &vec![0, 0, 0, 0]),
+        );
+        let right = build_table(
+            ("a2", &vec![10, 20, 30, 40, 50]),
+            ("b2", &vec![1, 1, 1, 1, 1]),
+            ("c2", &vec![0, 0, 0, 0, 0]),
+        );
+        let on = vec![(
+            Column::new_with_schema("b1", &left.schema()).unwrap(),
+            Column::new_with_schema("b2", &right.schema()).unwrap(),
+        )];
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::RightSemi,
+            JoinType::RightAnti,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+        ];
+        let expected_resultset_records = 20;
+        let common_result = [
+            "+----+----+----+----+----+----+",
+            "| a1 | b1 | c1 | a2 | b2 | c2 |",
+            "+----+----+----+----+----+----+",
+            "| 1  | 1  | 0  | 10 | 1  | 0  |",
+            "| 2  | 1  | 0  | 10 | 1  | 0  |",
+            "| 3  | 1  | 0  | 10 | 1  | 0  |",
+            "| 4  | 1  | 0  | 10 | 1  | 0  |",
+            "| 1  | 1  | 0  | 20 | 1  | 0  |",
+            "| 2  | 1  | 0  | 20 | 1  | 0  |",
+            "| 3  | 1  | 0  | 20 | 1  | 0  |",
+            "| 4  | 1  | 0  | 20 | 1  | 0  |",
+            "| 1  | 1  | 0  | 30 | 1  | 0  |",
+            "| 2  | 1  | 0  | 30 | 1  | 0  |",
+            "| 3  | 1  | 0  | 30 | 1  | 0  |",
+            "| 4  | 1  | 0  | 30 | 1  | 0  |",
+            "| 1  | 1  | 0  | 40 | 1  | 0  |",
+            "| 2  | 1  | 0  | 40 | 1  | 0  |",
+            "| 3  | 1  | 0  | 40 | 1  | 0  |",
+            "| 4  | 1  | 0  | 40 | 1  | 0  |",
+            "| 1  | 1  | 0  | 50 | 1  | 0  |",
+            "| 2  | 1  | 0  | 50 | 1  | 0  |",
+            "| 3  | 1  | 0  | 50 | 1  | 0  |",
+            "| 4  | 1  | 0  | 50 | 1  | 0  |",
+            "+----+----+----+----+----+----+",
+        ];
+        let left_batch = [
+            "+----+----+----+",
+            "| a1 | b1 | c1 |",
+            "+----+----+----+",
+            "| 1  | 1  | 0  |",
+            "| 2  | 1  | 0  |",
+            "| 3  | 1  | 0  |",
+            "| 4  | 1  | 0  |",
+            "+----+----+----+",
+        ];
+        let right_batch = [
+            "+----+----+----+",
+            "| a2 | b2 | c2 |",
+            "+----+----+----+",
+            "| 10 | 1  | 0  |",
+            "| 20 | 1  | 0  |",
+            "| 30 | 1  | 0  |",
+            "| 40 | 1  | 0  |",
+            "| 50 | 1  | 0  |",
+            "+----+----+----+",
+        ];
+        let right_empty = [
+            "+----+----+----+",
+            "| a2 | b2 | c2 |",
+            "+----+----+----+",
+            "+----+----+----+",
+        ];
+        let left_empty = [
+            "+----+----+----+",
+            "| a1 | b1 | c1 |",
+            "+----+----+----+",
+            "+----+----+----+",
+        ];
+
+        // validation of partial join results output for different batch_size setting
+        for join_type in join_types {
+            for batch_size in (1..21).rev() {
+                let task_ctx = prepare_task_ctx(batch_size);
+
+                let join =
+                    join(left.clone(), right.clone(), on.clone(), &join_type, false)
+                        .unwrap();
+
+                let stream = join.execute(0, task_ctx).unwrap();
+                let batches = common::collect(stream).await.unwrap();
+
+                // For inner/right join expected batch count equals dev_ceil result,
+                // as there is no need to append non-joined build side data.
+                // For other join types it'll be div_ceil + 1 -- for additional batch
+                // containing not visited build side rows (empty in this test case).
+                let expected_batch_count = match join_type {
+                    JoinType::Inner
+                    | JoinType::Right
+                    | JoinType::RightSemi
+                    | JoinType::RightAnti => {
+                        (expected_resultset_records + batch_size - 1) / batch_size
+                    }
+                    _ => (expected_resultset_records + batch_size - 1) / batch_size + 1,
+                };
+                assert_eq!(
+                    batches.len(),
+                    expected_batch_count,
+                    "expected {} output batches for {} join with batch_size = {}",
+                    expected_batch_count,
+                    join_type,
+                    batch_size
+                );
+
+                let expected = match join_type {
+                    JoinType::RightSemi => right_batch.to_vec(),
+                    JoinType::RightAnti => right_empty.to_vec(),
+                    JoinType::LeftSemi => left_batch.to_vec(),
+                    JoinType::LeftAnti => left_empty.to_vec(),
+                    _ => common_result.to_vec(),
+                };
+                assert_batches_eq!(expected, &batches);
+            }
+        }
+    }
+
     #[tokio::test]
     async fn single_partition_join_overallocation() -> Result<()> {
         let left = build_table(
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 6951642ff801..f89a2445fd07 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -28,9 +28,9 @@ use crate::coalesce_batches::concat_batches;
 use crate::joins::utils::{
     append_right_indices, apply_join_filter_to_indices, build_batch_from_indices,
     build_join_schema, check_join_is_valid, estimate_join_statistics, get_anti_indices,
-    get_anti_u64_indices, get_final_indices_from_bit_map, get_semi_indices,
-    get_semi_u64_indices, partitioned_join_output_partitioning, BuildProbeJoinMetrics,
-    ColumnIndex, JoinFilter, OnceAsync, OnceFut,
+    get_final_indices_from_bit_map, get_semi_indices,
+    partitioned_join_output_partitioning, BuildProbeJoinMetrics, ColumnIndex, JoinFilter,
+    OnceAsync, OnceFut,
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
@@ -649,20 +649,20 @@ fn adjust_indices_by_join_type(
             // matched
             // unmatched left row will be produced in this batch
             let left_unmatched_indices =
-                get_anti_u64_indices(count_left_batch, &left_indices);
+                get_anti_indices(0..count_left_batch, &left_indices);
             // combine the matched and unmatched left result together
             append_left_indices(left_indices, right_indices, left_unmatched_indices)
         }
         JoinType::LeftSemi => {
             // need to remove the duplicated record in the left side
-            let left_indices = get_semi_u64_indices(count_left_batch, &left_indices);
+            let left_indices = get_semi_indices(0..count_left_batch, &left_indices);
             // the right_indices will not be used later for the `left semi` join
             (left_indices, right_indices)
         }
         JoinType::LeftAnti => {
             // need to remove the duplicated record in the left side
             // get the anti index for the left side
-            let left_indices = get_anti_u64_indices(count_left_batch, &left_indices);
+            let left_indices = get_anti_indices(0..count_left_batch, &left_indices);
             // the right_indices will not be used later for the `left anti` join
             (left_indices, right_indices)
         }
@@ -671,20 +671,20 @@ fn adjust_indices_by_join_type(
             // matched
             // unmatched right row will be produced in this batch
             let right_unmatched_indices =
-                get_anti_indices(count_right_batch, &right_indices);
+                get_anti_indices(0..count_right_batch, &right_indices);
             // combine the matched and unmatched right result together
             append_right_indices(left_indices, right_indices, right_unmatched_indices)
         }
         JoinType::RightSemi => {
             // need to remove the duplicated record in the right side
-            let right_indices = get_semi_indices(count_right_batch, &right_indices);
+            let right_indices = get_semi_indices(0..count_right_batch, &right_indices);
             // the left_indices will not be used later for the `right semi` join
             (left_indices, right_indices)
         }
         JoinType::RightAnti => {
             // need to remove the duplicated record in the right side
             // get the anti index for the right side
-            let right_indices = get_anti_indices(count_right_batch, &right_indices);
+            let right_indices = get_anti_indices(0..count_right_batch, &right_indices);
             // the left_indices will not be used later for the `right anti` join
             (left_indices, right_indices)
         }
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 7719c72774d6..00950f082582 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -32,7 +32,7 @@ use std::task::Poll;
 use std::{usize, vec};
 
 use crate::common::SharedMemoryReservation;
-use crate::joins::hash_join::{build_equal_condition_join_indices, update_hash};
+use crate::joins::hash_join::{equal_rows_arr, update_hash};
 use crate::joins::stream_join_utils::{
     calculate_filter_expr_intervals, combine_two_batches,
     convert_sort_expr_with_filter_schema, get_pruning_anti_indices,
@@ -41,22 +41,26 @@ use crate::joins::stream_join_utils::{
     StreamJoinMetrics,
 };
 use crate::joins::utils::{
-    build_batch_from_indices, build_join_schema, check_join_is_valid,
-    partitioned_join_output_partitioning, ColumnIndex, JoinFilter, JoinOn,
-    StatefulStreamResult,
+    apply_join_filter_to_indices, build_batch_from_indices, build_join_schema,
+    check_join_is_valid, partitioned_join_output_partitioning, ColumnIndex, JoinFilter,
+    JoinHashMapType, JoinOn, StatefulStreamResult,
 };
 use crate::{
     expressions::{Column, PhysicalSortExpr},
     joins::StreamJoinPartitionMode,
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     DisplayAs, DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan,
-    Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    Partitioning, PhysicalExpr, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
-use arrow::array::{ArrowPrimitiveType, NativeAdapter, PrimitiveArray, PrimitiveBuilder};
+use arrow::array::{
+    ArrowPrimitiveType, NativeAdapter, PrimitiveArray, PrimitiveBuilder, UInt32Array,
+    UInt64Array,
+};
 use arrow::compute::concat_batches;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
+use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::utils::bisect;
 use datafusion_common::{
     internal_err, plan_err, DataFusionError, JoinSide, JoinType, Result,
@@ -785,7 +789,7 @@ pub(crate) fn join_with_probe_batch(
     if build_hash_joiner.input_buffer.num_rows() == 0 || probe_batch.num_rows() == 0 {
         return Ok(None);
     }
-    let (build_indices, probe_indices) = build_equal_condition_join_indices(
+    let (build_indices, probe_indices) = lookup_join_hashmap(
         &build_hash_joiner.hashmap,
         &build_hash_joiner.input_buffer,
         probe_batch,
@@ -794,11 +798,22 @@ pub(crate) fn join_with_probe_batch(
         random_state,
         null_equals_null,
         &mut build_hash_joiner.hashes_buffer,
-        filter,
-        build_hash_joiner.build_side,
         Some(build_hash_joiner.deleted_offset),
-        false,
     )?;
+
+    let (build_indices, probe_indices) = if let Some(filter) = filter {
+        apply_join_filter_to_indices(
+            &build_hash_joiner.input_buffer,
+            probe_batch,
+            build_indices,
+            probe_indices,
+            filter,
+            build_hash_joiner.build_side,
+        )?
+    } else {
+        (build_indices, probe_indices)
+    };
+
     if need_to_produce_result_in_final(build_hash_joiner.build_side, join_type) {
         record_visited_indices(
             &mut build_hash_joiner.visited_rows,
@@ -835,6 +850,102 @@ pub(crate) fn join_with_probe_batch(
     }
 }
 
+/// This method performs lookups against JoinHashMap by hash values of join-key columns, and handles potential
+/// hash collisions.
+///
+/// # Arguments
+///
+/// * `build_hashmap` - hashmap collected from build side data.
+/// * `build_batch` - Build side record batch.
+/// * `probe_batch` - Probe side record batch.
+/// * `build_on` - An array of columns on which the join will be performed. The columns are from the build side of the join.
+/// * `probe_on` - An array of columns on which the join will be performed. The columns are from the probe side of the join.
+/// * `random_state` - The random state for the join.
+/// * `null_equals_null` - A boolean indicating whether NULL values should be treated as equal when joining.
+/// * `hashes_buffer` - Buffer used for probe side keys hash calculation.
+/// * `deleted_offset` - deleted offset for build side data.
+///
+/// # Returns
+///
+/// A [Result] containing a tuple with two equal length arrays, representing indices of rows from build and probe side,
+/// matched by join key columns.
+#[allow(clippy::too_many_arguments)]
+fn lookup_join_hashmap(
+    build_hashmap: &PruningJoinHashMap,
+    build_batch: &RecordBatch,
+    probe_batch: &RecordBatch,
+    build_on: &[Column],
+    probe_on: &[Column],
+    random_state: &RandomState,
+    null_equals_null: bool,
+    hashes_buffer: &mut Vec<u64>,
+    deleted_offset: Option<usize>,
+) -> Result<(UInt64Array, UInt32Array)> {
+    let keys_values = probe_on
+        .iter()
+        .map(|c| c.evaluate(probe_batch)?.into_array(probe_batch.num_rows()))
+        .collect::<Result<Vec<_>>>()?;
+    let build_join_values = build_on
+        .iter()
+        .map(|c| c.evaluate(build_batch)?.into_array(build_batch.num_rows()))
+        .collect::<Result<Vec<_>>>()?;
+
+    hashes_buffer.clear();
+    hashes_buffer.resize(probe_batch.num_rows(), 0);
+    let hash_values = create_hashes(&keys_values, random_state, hashes_buffer)?;
+
+    // As SymmetricHashJoin uses LIFO JoinHashMap, the chained list algorithm
+    // will return build indices for each probe row in a reverse order as such:
+    // Build Indices: [5, 4, 3]
+    // Probe Indices: [1, 1, 1]
+    //
+    // This affects the output sequence. Hypothetically, it's possible to preserve the lexicographic order on the build side.
+    // Let's consider probe rows [0,1] as an example:
+    //
+    // When the probe iteration sequence is reversed, the following pairings can be derived:
+    //
+    // For probe row 1:
+    //     (5, 1)
+    //     (4, 1)
+    //     (3, 1)
+    //
+    // For probe row 0:
+    //     (5, 0)
+    //     (4, 0)
+    //     (3, 0)
+    //
+    // After reversing both sets of indices, we obtain reversed indices:
+    //
+    //     (3,0)
+    //     (4,0)
+    //     (5,0)
+    //     (3,1)
+    //     (4,1)
+    //     (5,1)
+    //
+    // With this approach, the lexicographic order on both the probe side and the build side is preserved.
+    let (mut matched_probe, mut matched_build) = build_hashmap
+        .get_matched_indices(hash_values.iter().enumerate().rev(), deleted_offset);
+
+    matched_probe.as_slice_mut().reverse();
+    matched_build.as_slice_mut().reverse();
+
+    let build_indices: UInt64Array =
+        PrimitiveArray::new(matched_build.finish().into(), None);
+    let probe_indices: UInt32Array =
+        PrimitiveArray::new(matched_probe.finish().into(), None);
+
+    let (build_indices, probe_indices) = equal_rows_arr(
+        &build_indices,
+        &probe_indices,
+        &build_join_values,
+        &keys_values,
+        null_equals_null,
+    )?;
+
+    Ok((build_indices, probe_indices))
+}
+
 pub struct OneSideHashJoiner {
     /// Build side
     build_side: JoinSide,
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 1e3cf5abb477..6ab08d3db022 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -20,7 +20,7 @@
 use std::collections::HashSet;
 use std::fmt::{self, Debug};
 use std::future::Future;
-use std::ops::IndexMut;
+use std::ops::{IndexMut, Range};
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::usize;
@@ -35,6 +35,8 @@ use arrow::array::{
 use arrow::compute;
 use arrow::datatypes::{Field, Schema, SchemaBuilder};
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
+use arrow_array::{ArrowPrimitiveType, NativeAdapter, PrimitiveArray};
+use arrow_buffer::ArrowNativeType;
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
@@ -136,6 +138,53 @@ impl JoinHashMap {
     }
 }
 
+// Type of offsets for obtaining indices from JoinHashMap.
+pub(crate) type JoinHashMapOffset = (usize, Option<u64>);
+
+// Macro for traversing chained values with limit.
+// Early returns in case of reacing output tuples limit.
+macro_rules! chain_traverse {
+    (
+        $input_indices:ident, $match_indices:ident, $hash_values:ident, $next_chain:ident,
+        $input_idx:ident, $chain_idx:ident, $deleted_offset:ident, $remaining_output:ident
+    ) => {
+        let mut i = $chain_idx - 1;
+        loop {
+            let match_row_idx = if let Some(offset) = $deleted_offset {
+                // This arguments means that we prune the next index way before here.
+                if i < offset as u64 {
+                    // End of the list due to pruning
+                    break;
+                }
+                i - offset as u64
+            } else {
+                i
+            };
+            $match_indices.append(match_row_idx);
+            $input_indices.append($input_idx as u32);
+            $remaining_output -= 1;
+            // Follow the chain to get the next index value
+            let next = $next_chain[match_row_idx as usize];
+
+            if $remaining_output == 0 {
+                // In case current input index is the last, and no more chain values left
+                // returning None as whole input has been scanned
+                let next_offset = if $input_idx == $hash_values.len() - 1 && next == 0 {
+                    None
+                } else {
+                    Some(($input_idx, Some(next)))
+                };
+                return ($input_indices, $match_indices, next_offset);
+            }
+            if next == 0 {
+                // end of list
+                break;
+            }
+            i = next - 1;
+        }
+    };
+}
+
 // Trait defining methods that must be implemented by a hash map type to be used for joins.
 pub trait JoinHashMapType {
     /// The type of list used to store the next list
@@ -224,6 +273,78 @@ pub trait JoinHashMapType {
 
         (input_indices, match_indices)
     }
+
+    /// Matches hashes with taking limit and offset into account.
+    /// Returns pairs of matched indices along with the starting point for next
+    /// matching iteration (`None` if limit has not been reached).
+    ///
+    /// This method only compares hashes, so additional further check for actual values
+    /// equality may be required.
+    fn get_matched_indices_with_limit_offset(
+        &self,
+        hash_values: &[u64],
+        deleted_offset: Option<usize>,
+        limit: usize,
+        offset: JoinHashMapOffset,
+    ) -> (
+        UInt32BufferBuilder,
+        UInt64BufferBuilder,
+        Option<JoinHashMapOffset>,
+    ) {
+        let mut input_indices = UInt32BufferBuilder::new(0);
+        let mut match_indices = UInt64BufferBuilder::new(0);
+
+        let mut remaining_output = limit;
+
+        let hash_map: &RawTable<(u64, u64)> = self.get_map();
+        let next_chain = self.get_list();
+
+        // Calculate initial `hash_values` index before iterating
+        let to_skip = match offset {
+            // None `initial_next_idx` indicates that `initial_idx` processing has'n been started
+            (initial_idx, None) => initial_idx,
+            // Zero `initial_next_idx` indicates that `initial_idx` has been processed during
+            // previous iteration, and it should be skipped
+            (initial_idx, Some(0)) => initial_idx + 1,
+            // Otherwise, process remaining `initial_idx` matches by traversing `next_chain`,
+            // to start with the next index
+            (initial_idx, Some(initial_next_idx)) => {
+                chain_traverse!(
+                    input_indices,
+                    match_indices,
+                    hash_values,
+                    next_chain,
+                    initial_idx,
+                    initial_next_idx,
+                    deleted_offset,
+                    remaining_output
+                );
+
+                initial_idx + 1
+            }
+        };
+
+        let mut row_idx = to_skip;
+        for hash_value in &hash_values[to_skip..] {
+            if let Some((_, index)) =
+                hash_map.get(*hash_value, |(hash, _)| *hash_value == *hash)
+            {
+                chain_traverse!(
+                    input_indices,
+                    match_indices,
+                    hash_values,
+                    next_chain,
+                    row_idx,
+                    index,
+                    deleted_offset,
+                    remaining_output
+                );
+            }
+            row_idx += 1;
+        }
+
+        (input_indices, match_indices, None)
+    }
 }
 
 /// Implementation of `JoinHashMapType` for `JoinHashMap`.
@@ -1079,7 +1200,7 @@ pub(crate) fn build_batch_from_indices(
 pub(crate) fn adjust_indices_by_join_type(
     left_indices: UInt64Array,
     right_indices: UInt32Array,
-    count_right_batch: usize,
+    adjust_range: Range<usize>,
     join_type: JoinType,
 ) -> (UInt64Array, UInt32Array) {
     match join_type {
@@ -1095,21 +1216,20 @@ pub(crate) fn adjust_indices_by_join_type(
         JoinType::Right | JoinType::Full => {
             // matched
             // unmatched right row will be produced in this batch
-            let right_unmatched_indices =
-                get_anti_indices(count_right_batch, &right_indices);
+            let right_unmatched_indices = get_anti_indices(adjust_range, &right_indices);
             // combine the matched and unmatched right result together
             append_right_indices(left_indices, right_indices, right_unmatched_indices)
         }
         JoinType::RightSemi => {
             // need to remove the duplicated record in the right side
-            let right_indices = get_semi_indices(count_right_batch, &right_indices);
+            let right_indices = get_semi_indices(adjust_range, &right_indices);
             // the left_indices will not be used later for the `right semi` join
             (left_indices, right_indices)
         }
         JoinType::RightAnti => {
             // need to remove the duplicated record in the right side
             // get the anti index for the right side
-            let right_indices = get_anti_indices(count_right_batch, &right_indices);
+            let right_indices = get_anti_indices(adjust_range, &right_indices);
             // the left_indices will not be used later for the `right anti` join
             (left_indices, right_indices)
         }
@@ -1151,72 +1271,62 @@ pub(crate) fn append_right_indices(
     }
 }
 
-/// Get unmatched and deduplicated indices
-pub(crate) fn get_anti_indices(
-    row_count: usize,
-    input_indices: &UInt32Array,
-) -> UInt32Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
-
-    // get the anti index
-    (0..row_count)
-        .filter_map(|idx| (!bitmap.get_bit(idx)).then_some(idx as u32))
-        .collect::<UInt32Array>()
-}
+/// Returns `range` indices which are not present in `input_indices`
+pub(crate) fn get_anti_indices<T: ArrowPrimitiveType>(
+    range: Range<usize>,
+    input_indices: &PrimitiveArray<T>,
+) -> PrimitiveArray<T>
+where
+    NativeAdapter<T>: From<<T as ArrowPrimitiveType>::Native>,
+{
+    let mut bitmap = BooleanBufferBuilder::new(range.len());
+    bitmap.append_n(range.len(), false);
+    input_indices
+        .iter()
+        .flatten()
+        .map(|v| v.as_usize())
+        .filter(|v| range.contains(v))
+        .for_each(|v| {
+            bitmap.set_bit(v - range.start, true);
+        });
 
-/// Get unmatched and deduplicated indices
-pub(crate) fn get_anti_u64_indices(
-    row_count: usize,
-    input_indices: &UInt64Array,
-) -> UInt64Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
+    let offset = range.start;
 
     // get the anti index
-    (0..row_count)
-        .filter_map(|idx| (!bitmap.get_bit(idx)).then_some(idx as u64))
-        .collect::<UInt64Array>()
+    (range)
+        .filter_map(|idx| {
+            (!bitmap.get_bit(idx - offset)).then_some(T::Native::from_usize(idx))
+        })
+        .collect::<PrimitiveArray<T>>()
 }
 
-/// Get matched and deduplicated indices
-pub(crate) fn get_semi_indices(
-    row_count: usize,
-    input_indices: &UInt32Array,
-) -> UInt32Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
-
-    // get the semi index
-    (0..row_count)
-        .filter_map(|idx| (bitmap.get_bit(idx)).then_some(idx as u32))
-        .collect::<UInt32Array>()
-}
+/// Returns intersection of `range` and `input_indices` omitting duplicates
+pub(crate) fn get_semi_indices<T: ArrowPrimitiveType>(
+    range: Range<usize>,
+    input_indices: &PrimitiveArray<T>,
+) -> PrimitiveArray<T>
+where
+    NativeAdapter<T>: From<<T as ArrowPrimitiveType>::Native>,
+{
+    let mut bitmap = BooleanBufferBuilder::new(range.len());
+    bitmap.append_n(range.len(), false);
+    input_indices
+        .iter()
+        .flatten()
+        .map(|v| v.as_usize())
+        .filter(|v| range.contains(v))
+        .for_each(|v| {
+            bitmap.set_bit(v - range.start, true);
+        });
 
-/// Get matched and deduplicated indices
-pub(crate) fn get_semi_u64_indices(
-    row_count: usize,
-    input_indices: &UInt64Array,
-) -> UInt64Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
+    let offset = range.start;
 
     // get the semi index
-    (0..row_count)
-        .filter_map(|idx| (bitmap.get_bit(idx)).then_some(idx as u64))
-        .collect::<UInt64Array>()
+    (range)
+        .filter_map(|idx| {
+            (bitmap.get_bit(idx - offset)).then_some(T::Native::from_usize(idx))
+        })
+        .collect::<PrimitiveArray<T>>()
 }
 
 /// Metrics for build & probe joins
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 1dd1392b9d86..0a9eab5c8633 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -61,6 +61,7 @@ pub mod metrics;
 mod ordering;
 pub mod placeholder_row;
 pub mod projection;
+pub mod recursive_query;
 pub mod repartition;
 pub mod sorts;
 pub mod stream;
@@ -71,6 +72,7 @@ pub mod union;
 pub mod unnest;
 pub mod values;
 pub mod windows;
+pub mod work_table;
 
 pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
 pub use crate::metrics::Metric;
@@ -610,4 +612,7 @@ pub fn get_plan_string(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
 }
 
 #[cfg(test)]
+#[allow(clippy::single_component_path_imports)]
+use rstest_reuse;
+
 pub mod test;
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
new file mode 100644
index 000000000000..614ab990ac49
--- /dev/null
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -0,0 +1,377 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines the recursive query plan
+
+use std::any::Any;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use super::expressions::PhysicalSortExpr;
+use super::metrics::BaselineMetrics;
+use super::RecordBatchStream;
+use super::{
+    metrics::{ExecutionPlanMetricsSet, MetricsSet},
+    work_table::{WorkTable, WorkTableExec},
+    SendableRecordBatchStream, Statistics,
+};
+
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_execution::TaskContext;
+use datafusion_physical_expr::Partitioning;
+use futures::{ready, Stream, StreamExt};
+
+use crate::{DisplayAs, DisplayFormatType, ExecutionPlan};
+
+/// Recursive query execution plan.
+///
+/// This plan has two components: a base part (the static term) and
+/// a dynamic part (the recursive term). The execution will start from
+/// the base, and as long as the previous iteration produced at least
+/// a single new row (taking care of the distinction) the recursive
+/// part will be continuously executed.
+///
+/// Before each execution of the dynamic part, the rows from the previous
+/// iteration will be available in a "working table" (not a real table,
+/// can be only accessed using a continuance operation).
+///
+/// Note that there won't be any limit or checks applied to detect
+/// an infinite recursion, so it is up to the planner to ensure that
+/// it won't happen.
+#[derive(Debug)]
+pub struct RecursiveQueryExec {
+    /// Name of the query handler
+    name: String,
+    /// The working table of cte
+    work_table: Arc<WorkTable>,
+    /// The base part (static term)
+    static_term: Arc<dyn ExecutionPlan>,
+    /// The dynamic part (recursive term)
+    recursive_term: Arc<dyn ExecutionPlan>,
+    /// Distinction
+    is_distinct: bool,
+    /// Execution metrics
+    metrics: ExecutionPlanMetricsSet,
+}
+
+impl RecursiveQueryExec {
+    /// Create a new RecursiveQueryExec
+    pub fn try_new(
+        name: String,
+        static_term: Arc<dyn ExecutionPlan>,
+        recursive_term: Arc<dyn ExecutionPlan>,
+        is_distinct: bool,
+    ) -> Result<Self> {
+        // Each recursive query needs its own work table
+        let work_table = Arc::new(WorkTable::new());
+        // Use the same work table for both the WorkTableExec and the recursive term
+        let recursive_term = assign_work_table(recursive_term, work_table.clone())?;
+        Ok(RecursiveQueryExec {
+            name,
+            static_term,
+            recursive_term,
+            is_distinct,
+            work_table,
+            metrics: ExecutionPlanMetricsSet::new(),
+        })
+    }
+}
+
+impl ExecutionPlan for RecursiveQueryExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.static_term.schema()
+    }
+
+    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+        vec![self.static_term.clone(), self.recursive_term.clone()]
+    }
+
+    // Distribution on a recursive query is really tricky to handle.
+    // For now, we are going to use a single partition but in the
+    // future we might find a better way to handle this.
+    fn output_partitioning(&self) -> Partitioning {
+        Partitioning::UnknownPartitioning(1)
+    }
+
+    // TODO: control these hints and see whether we can
+    // infer some from the child plans (static/recurisve terms).
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![false, false]
+    }
+
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        vec![false, false]
+    }
+
+    fn required_input_distribution(&self) -> Vec<datafusion_physical_expr::Distribution> {
+        vec![
+            datafusion_physical_expr::Distribution::SinglePartition,
+            datafusion_physical_expr::Distribution::SinglePartition,
+        ]
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        None
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(RecursiveQueryExec {
+            name: self.name.clone(),
+            static_term: children[0].clone(),
+            recursive_term: children[1].clone(),
+            is_distinct: self.is_distinct,
+            work_table: self.work_table.clone(),
+            metrics: self.metrics.clone(),
+        }))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        // TODO: we might be able to handle multiple partitions in the future.
+        if partition != 0 {
+            return Err(DataFusionError::Internal(format!(
+                "RecursiveQueryExec got an invalid partition {} (expected 0)",
+                partition
+            )));
+        }
+
+        let static_stream = self.static_term.execute(partition, context.clone())?;
+        let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
+        Ok(Box::pin(RecursiveQueryStream::new(
+            context,
+            self.work_table.clone(),
+            self.recursive_term.clone(),
+            static_stream,
+            baseline_metrics,
+        )))
+    }
+
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema()))
+    }
+}
+
+impl DisplayAs for RecursiveQueryExec {
+    fn fmt_as(
+        &self,
+        t: DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "RecursiveQueryExec: name={}, is_distinct={}",
+                    self.name, self.is_distinct
+                )
+            }
+        }
+    }
+}
+
+/// The actual logic of the recursive queries happens during the streaming
+/// process. A simplified version of the algorithm is the following:
+///
+/// buffer = []
+///
+/// while batch := static_stream.next():
+///    buffer.push(batch)
+///    yield buffer
+///
+/// while buffer.len() > 0:
+///    sender, receiver = Channel()
+///    register_continuation(handle_name, receiver)
+///    sender.send(buffer.drain())
+///    recursive_stream = recursive_term.execute()
+///    while batch := recursive_stream.next():
+///        buffer.append(batch)
+///        yield buffer
+///
+struct RecursiveQueryStream {
+    /// The context to be used for managing handlers & executing new tasks
+    task_context: Arc<TaskContext>,
+    /// The working table state, representing the self referencing cte table
+    work_table: Arc<WorkTable>,
+    /// The dynamic part (recursive term) as is (without being executed)
+    recursive_term: Arc<dyn ExecutionPlan>,
+    /// The static part (static term) as a stream. If the processing of this
+    /// part is completed, then it will be None.
+    static_stream: Option<SendableRecordBatchStream>,
+    /// The dynamic part (recursive term) as a stream. If the processing of this
+    /// part has not started yet, or has been completed, then it will be None.
+    recursive_stream: Option<SendableRecordBatchStream>,
+    /// The schema of the output.
+    schema: SchemaRef,
+    /// In-memory buffer for storing a copy of the current results. Will be
+    /// cleared after each iteration.
+    buffer: Vec<RecordBatch>,
+    // /// Metrics.
+    _baseline_metrics: BaselineMetrics,
+}
+
+impl RecursiveQueryStream {
+    /// Create a new recursive query stream
+    fn new(
+        task_context: Arc<TaskContext>,
+        work_table: Arc<WorkTable>,
+        recursive_term: Arc<dyn ExecutionPlan>,
+        static_stream: SendableRecordBatchStream,
+        baseline_metrics: BaselineMetrics,
+    ) -> Self {
+        let schema = static_stream.schema();
+        Self {
+            task_context,
+            work_table,
+            recursive_term,
+            static_stream: Some(static_stream),
+            recursive_stream: None,
+            schema,
+            buffer: vec![],
+            _baseline_metrics: baseline_metrics,
+        }
+    }
+
+    /// Push a clone of the given batch to the in memory buffer, and then return
+    /// a poll with it.
+    fn push_batch(
+        mut self: std::pin::Pin<&mut Self>,
+        batch: RecordBatch,
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        self.buffer.push(batch.clone());
+        Poll::Ready(Some(Ok(batch)))
+    }
+
+    /// Start polling for the next iteration, will be called either after the static term
+    /// is completed or another term is completed. It will follow the algorithm above on
+    /// to check whether the recursion has ended.
+    fn poll_next_iteration(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        let total_length = self
+            .buffer
+            .iter()
+            .fold(0, |acc, batch| acc + batch.num_rows());
+
+        if total_length == 0 {
+            return Poll::Ready(None);
+        }
+
+        // Update the work table with the current buffer
+        let batches = self.buffer.drain(..).collect();
+        self.work_table.write(batches);
+
+        // We always execute (and re-execute iteratively) the first partition.
+        // Downstream plans should not expect any partitioning.
+        let partition = 0;
+
+        self.recursive_stream = Some(
+            self.recursive_term
+                .execute(partition, self.task_context.clone())?,
+        );
+        self.poll_next(cx)
+    }
+}
+
+fn assign_work_table(
+    plan: Arc<dyn ExecutionPlan>,
+    work_table: Arc<WorkTable>,
+) -> Result<Arc<dyn ExecutionPlan>> {
+    let mut work_table_refs = 0;
+    plan.transform_down_mut(&mut |plan| {
+        if let Some(exec) = plan.as_any().downcast_ref::<WorkTableExec>() {
+            if work_table_refs > 0 {
+                not_impl_err!(
+                    "Multiple recursive references to the same CTE are not supported"
+                )
+            } else {
+                work_table_refs += 1;
+                Ok(Transformed::Yes(Arc::new(
+                    exec.with_work_table(work_table.clone()),
+                )))
+            }
+        } else if plan.as_any().is::<RecursiveQueryExec>() {
+            not_impl_err!("Recursive queries cannot be nested")
+        } else {
+            Ok(Transformed::No(plan))
+        }
+    })
+}
+
+impl Stream for RecursiveQueryStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        // TODO: we should use this poll to record some metrics!
+        if let Some(static_stream) = &mut self.static_stream {
+            // While the static term's stream is available, we'll be forwarding the batches from it (also
+            // saving them for the initial iteration of the recursive term).
+            let batch_result = ready!(static_stream.poll_next_unpin(cx));
+            match &batch_result {
+                None => {
+                    // Once this is done, we can start running the setup for the recursive term.
+                    self.static_stream = None;
+                    self.poll_next_iteration(cx)
+                }
+                Some(Ok(batch)) => self.push_batch(batch.clone()),
+                _ => Poll::Ready(batch_result),
+            }
+        } else if let Some(recursive_stream) = &mut self.recursive_stream {
+            let batch_result = ready!(recursive_stream.poll_next_unpin(cx));
+            match batch_result {
+                None => {
+                    self.recursive_stream = None;
+                    self.poll_next_iteration(cx)
+                }
+                Some(Ok(batch)) => self.push_batch(batch.clone()),
+                _ => Poll::Ready(batch_result),
+            }
+        } else {
+            Poll::Ready(None)
+        }
+    }
+}
+
+impl RecordBatchStream for RecursiveQueryStream {
+    /// Get the schema
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index 1f6ee1f117aa..5a8ef2db77c2 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -61,7 +61,7 @@ impl BatchIndex {
 
 /// Iterator over batches
 #[derive(Debug, Default)]
-pub(crate) struct TestStream {
+pub struct TestStream {
     /// Vector of record batches
     data: Vec<RecordBatch>,
     /// Index into the data that has been returned so far
@@ -684,7 +684,7 @@ pub struct PanicExec {
 }
 
 impl PanicExec {
-    /// Create new [`PanickingExec`] with a give schema and number of
+    /// Create new [`PanicExec`] with a give schema and number of
     /// partitions, which will each panic immediately.
     pub fn new(schema: SchemaRef, n_partitions: usize) -> Self {
         Self {
@@ -708,7 +708,7 @@ impl DisplayAs for PanicExec {
     ) -> std::fmt::Result {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                write!(f, "PanickingExec",)
+                write!(f, "PanicExec",)
             }
         }
     }
diff --git a/datafusion/physical-plan/src/udaf.rs b/datafusion/physical-plan/src/udaf.rs
index 94017efe97aa..a82bbe1d0705 100644
--- a/datafusion/physical-plan/src/udaf.rs
+++ b/datafusion/physical-plan/src/udaf.rs
@@ -17,6 +17,7 @@
 
 //! This module contains functions and structs supporting user-defined aggregate functions.
 
+use datafusion_expr::GroupsAccumulator;
 use fmt::Debug;
 use std::any::Any;
 use std::fmt;
@@ -166,6 +167,14 @@ impl AggregateExpr for AggregateFunctionExpr {
     fn name(&self) -> &str {
         &self.name
     }
+
+    fn groups_accumulator_supported(&self) -> bool {
+        self.fun.groups_accumulator_supported()
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        self.fun.create_groups_accumulator()
+    }
 }
 
 impl PartialEq<dyn Any> for AggregateFunctionExpr {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index a85e5cc31c58..01818405b810 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -160,12 +160,20 @@ fn create_built_in_window_expr(
     input_schema: &Schema,
     name: String,
 ) -> Result<Arc<dyn BuiltInWindowFunctionExpr>> {
+    // need to get the types into an owned vec for some reason
+    let input_types: Vec<_> = args
+        .iter()
+        .map(|arg| arg.data_type(input_schema))
+        .collect::<Result<_>>()?;
+
+    // figure out the output type
+    let data_type = &fun.return_type(&input_types)?;
     Ok(match fun {
-        BuiltInWindowFunction::RowNumber => Arc::new(RowNumber::new(name)),
-        BuiltInWindowFunction::Rank => Arc::new(rank(name)),
-        BuiltInWindowFunction::DenseRank => Arc::new(dense_rank(name)),
-        BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name)),
-        BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name)),
+        BuiltInWindowFunction::RowNumber => Arc::new(RowNumber::new(name, data_type)),
+        BuiltInWindowFunction::Rank => Arc::new(rank(name, data_type)),
+        BuiltInWindowFunction::DenseRank => Arc::new(dense_rank(name, data_type)),
+        BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name, data_type)),
+        BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name, data_type)),
         BuiltInWindowFunction::Ntile => {
             let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| {
                 DataFusionError::Execution(
@@ -179,32 +187,42 @@ fn create_built_in_window_expr(
 
             if n.is_unsigned() {
                 let n: u64 = n.try_into()?;
-                Arc::new(Ntile::new(name, n))
+                Arc::new(Ntile::new(name, n, data_type))
             } else {
                 let n: i64 = n.try_into()?;
                 if n <= 0 {
                     return exec_err!("NTILE requires a positive integer");
                 }
-                Arc::new(Ntile::new(name, n as u64))
+                Arc::new(Ntile::new(name, n as u64, data_type))
             }
         }
         BuiltInWindowFunction::Lag => {
             let arg = args[0].clone();
-            let data_type = args[0].data_type(input_schema)?;
             let shift_offset = get_scalar_value_from_args(args, 1)?
                 .map(|v| v.try_into())
                 .and_then(|v| v.ok());
             let default_value = get_scalar_value_from_args(args, 2)?;
-            Arc::new(lag(name, data_type, arg, shift_offset, default_value))
+            Arc::new(lag(
+                name,
+                data_type.clone(),
+                arg,
+                shift_offset,
+                default_value,
+            ))
         }
         BuiltInWindowFunction::Lead => {
             let arg = args[0].clone();
-            let data_type = args[0].data_type(input_schema)?;
             let shift_offset = get_scalar_value_from_args(args, 1)?
                 .map(|v| v.try_into())
                 .and_then(|v| v.ok());
             let default_value = get_scalar_value_from_args(args, 2)?;
-            Arc::new(lead(name, data_type, arg, shift_offset, default_value))
+            Arc::new(lead(
+                name,
+                data_type.clone(),
+                arg,
+                shift_offset,
+                default_value,
+            ))
         }
         BuiltInWindowFunction::NthValue => {
             let arg = args[0].clone();
@@ -214,18 +232,15 @@ fn create_built_in_window_expr(
                 .try_into()
                 .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?;
             let n: u32 = n as u32;
-            let data_type = args[0].data_type(input_schema)?;
-            Arc::new(NthValue::nth(name, arg, data_type, n)?)
+            Arc::new(NthValue::nth(name, arg, data_type.clone(), n)?)
         }
         BuiltInWindowFunction::FirstValue => {
             let arg = args[0].clone();
-            let data_type = args[0].data_type(input_schema)?;
-            Arc::new(NthValue::first(name, arg, data_type))
+            Arc::new(NthValue::first(name, arg, data_type.clone()))
         }
         BuiltInWindowFunction::LastValue => {
             let arg = args[0].clone();
-            let data_type = args[0].data_type(input_schema)?;
-            Arc::new(NthValue::last(name, arg, data_type))
+            Arc::new(NthValue::last(name, arg, data_type.clone()))
         }
     })
 }
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
new file mode 100644
index 000000000000..c74a596f3dae
--- /dev/null
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -0,0 +1,192 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines the work table query plan
+
+use std::any::Any;
+use std::sync::{Arc, Mutex};
+
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use datafusion_execution::TaskContext;
+use datafusion_physical_expr::Partitioning;
+
+use crate::memory::MemoryStream;
+use crate::{DisplayAs, DisplayFormatType, ExecutionPlan};
+
+use super::expressions::PhysicalSortExpr;
+
+use super::{
+    metrics::{ExecutionPlanMetricsSet, MetricsSet},
+    SendableRecordBatchStream, Statistics,
+};
+use datafusion_common::{internal_err, DataFusionError, Result};
+
+/// The name is from PostgreSQL's terminology.
+/// See <https://wiki.postgresql.org/wiki/CTEReadme#How_Recursion_Works>
+/// This table serves as a mirror or buffer between each iteration of a recursive query.
+#[derive(Debug)]
+pub(super) struct WorkTable {
+    batches: Mutex<Option<Vec<RecordBatch>>>,
+}
+
+impl WorkTable {
+    /// Create a new work table.
+    pub(super) fn new() -> Self {
+        Self {
+            batches: Mutex::new(None),
+        }
+    }
+
+    /// Take the previously written batches from the work table.
+    /// This will be called by the [`WorkTableExec`] when it is executed.
+    fn take(&self) -> Vec<RecordBatch> {
+        let batches = self.batches.lock().unwrap().take().unwrap_or_default();
+        batches
+    }
+
+    /// Write the results of a recursive query iteration to the work table.
+    pub(super) fn write(&self, input: Vec<RecordBatch>) {
+        self.batches.lock().unwrap().replace(input);
+    }
+}
+
+/// A temporary "working table" operation where the input data will be
+/// taken from the named handle during the execution and will be re-published
+/// as is (kind of like a mirror).
+///
+/// Most notably used in the implementation of recursive queries where the
+/// underlying relation does not exist yet but the data will come as the previous
+/// term is evaluated. This table will be used such that the recursive plan
+/// will register a receiver in the task context and this plan will use that
+/// receiver to get the data and stream it back up so that the batches are available
+/// in the next iteration.
+#[derive(Clone, Debug)]
+pub struct WorkTableExec {
+    /// Name of the relation handler
+    name: String,
+    /// The schema of the stream
+    schema: SchemaRef,
+    /// The work table
+    work_table: Arc<WorkTable>,
+    /// Execution metrics
+    metrics: ExecutionPlanMetricsSet,
+}
+
+impl WorkTableExec {
+    /// Create a new execution plan for a worktable exec.
+    pub fn new(name: String, schema: SchemaRef) -> Self {
+        Self {
+            name,
+            schema,
+            metrics: ExecutionPlanMetricsSet::new(),
+            work_table: Arc::new(WorkTable::new()),
+        }
+    }
+
+    pub(super) fn with_work_table(&self, work_table: Arc<WorkTable>) -> Self {
+        Self {
+            name: self.name.clone(),
+            schema: self.schema.clone(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            work_table,
+        }
+    }
+}
+
+impl DisplayAs for WorkTableExec {
+    fn fmt_as(
+        &self,
+        t: DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "WorkTableExec: name={}", self.name)
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for WorkTableExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn output_partitioning(&self) -> Partitioning {
+        Partitioning::UnknownPartitioning(1)
+    }
+
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![false]
+    }
+
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        vec![false]
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        None
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(self.clone())
+    }
+
+    /// Stream the batches that were written to the work table.
+    fn execute(
+        &self,
+        partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        // WorkTable streams must be the plan base.
+        if partition != 0 {
+            return internal_err!(
+                "WorkTableExec got an invalid partition {partition} (expected 0)"
+            );
+        }
+
+        let batches = self.work_table.take();
+        Ok(Box::pin(MemoryStream::try_new(
+            batches,
+            self.schema.clone(),
+            None,
+        )?))
+    }
+
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema()))
+    }
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index e42322021630..f9d54dba5756 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -43,7 +43,7 @@ parquet = ["datafusion/parquet", "datafusion-common/parquet"]
 [dependencies]
 arrow = { workspace = true }
 chrono = { workspace = true }
-datafusion = { path = "../core", version = "34.0.0" }
+datafusion = { path = "../core", version = "35.0.0" }
 datafusion-common = { workspace = true }
 datafusion-expr = { workspace = true }
 object_store = { workspace = true }
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 8bde0da133eb..66c1271e65c1 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -669,6 +669,8 @@ enum ScalarFunction {
   ArraySort = 128;
   ArrayDistinct = 129;
   ArrayResize = 130;
+  EndsWith = 131;
+  InStr = 132;
 }
 
 message ScalarFunctionNode {
@@ -1213,6 +1215,7 @@ message FileTypeWriterOptions {
     JsonWriterOptions json_options = 1;
     ParquetWriterOptions parquet_options = 2;
     CsvWriterOptions csv_options = 3;
+    ArrowWriterOptions arrow_options = 4;
   }
 }
 
@@ -1243,6 +1246,8 @@ message CsvWriterOptions {
   string null_value = 8;
 }
 
+message ArrowWriterOptions {}
+
 message WriterProperties {
   uint64 data_page_size_limit = 1;
   uint64 dictionary_page_size_limit = 2;
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 9377501499e2..d9eda5d00d52 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -23,7 +23,6 @@ use crate::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
 use crate::protobuf;
-use datafusion::physical_plan::functions::make_scalar_function;
 use datafusion_common::{plan_datafusion_err, DataFusionError, Result};
 use datafusion_expr::{
     create_udaf, create_udf, create_udwf, AggregateUDF, Expr, LogicalPlan, Volatility,
@@ -117,7 +116,7 @@ impl Serializeable for Expr {
                     vec![],
                     Arc::new(arrow::datatypes::DataType::Null),
                     Volatility::Immutable,
-                    make_scalar_function(|_| unimplemented!()),
+                    Arc::new(|_| unimplemented!()),
                 )))
             }
 
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 528761136ca3..39a8678ef250 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -1929,6 +1929,77 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
         deserializer.deserialize_struct("datafusion.ArrowType", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for ArrowWriterOptions {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let len = 0;
+        let struct_ser = serializer.serialize_struct("datafusion.ArrowWriterOptions", len)?;
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ArrowWriterOptions {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                            Err(serde::de::Error::unknown_field(value, FIELDS))
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ArrowWriterOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ArrowWriterOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ArrowWriterOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                while map_.next_key::<GeneratedField>()?.is_some() {
+                    let _ = map_.next_value::<serde::de::IgnoredAny>()?;
+                }
+                Ok(ArrowWriterOptions {
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ArrowWriterOptions", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for AvroFormat {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -8354,6 +8425,9 @@ impl serde::Serialize for FileTypeWriterOptions {
                 file_type_writer_options::FileType::CsvOptions(v) => {
                     struct_ser.serialize_field("csvOptions", v)?;
                 }
+                file_type_writer_options::FileType::ArrowOptions(v) => {
+                    struct_ser.serialize_field("arrowOptions", v)?;
+                }
             }
         }
         struct_ser.end()
@@ -8372,6 +8446,8 @@ impl<'de> serde::Deserialize<'de> for FileTypeWriterOptions {
             "parquetOptions",
             "csv_options",
             "csvOptions",
+            "arrow_options",
+            "arrowOptions",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -8379,6 +8455,7 @@ impl<'de> serde::Deserialize<'de> for FileTypeWriterOptions {
             JsonOptions,
             ParquetOptions,
             CsvOptions,
+            ArrowOptions,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -8403,6 +8480,7 @@ impl<'de> serde::Deserialize<'de> for FileTypeWriterOptions {
                             "jsonOptions" | "json_options" => Ok(GeneratedField::JsonOptions),
                             "parquetOptions" | "parquet_options" => Ok(GeneratedField::ParquetOptions),
                             "csvOptions" | "csv_options" => Ok(GeneratedField::CsvOptions),
+                            "arrowOptions" | "arrow_options" => Ok(GeneratedField::ArrowOptions),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -8444,6 +8522,13 @@ impl<'de> serde::Deserialize<'de> for FileTypeWriterOptions {
                                 return Err(serde::de::Error::duplicate_field("csvOptions"));
                             }
                             file_type__ = map_.next_value::<::std::option::Option<_>>()?.map(file_type_writer_options::FileType::CsvOptions)
+;
+                        }
+                        GeneratedField::ArrowOptions => {
+                            if file_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("arrowOptions"));
+                            }
+                            file_type__ = map_.next_value::<::std::option::Option<_>>()?.map(file_type_writer_options::FileType::ArrowOptions)
 ;
                         }
                     }
@@ -22338,6 +22423,8 @@ impl serde::Serialize for ScalarFunction {
             Self::ArraySort => "ArraySort",
             Self::ArrayDistinct => "ArrayDistinct",
             Self::ArrayResize => "ArrayResize",
+            Self::EndsWith => "EndsWith",
+            Self::InStr => "InStr",
         };
         serializer.serialize_str(variant)
     }
@@ -22480,6 +22567,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "ArraySort",
             "ArrayDistinct",
             "ArrayResize",
+            "EndsWith",
+            "InStr",
         ];
 
         struct GeneratedVisitor;
@@ -22651,6 +22740,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "ArraySort" => Ok(ScalarFunction::ArraySort),
                     "ArrayDistinct" => Ok(ScalarFunction::ArrayDistinct),
                     "ArrayResize" => Ok(ScalarFunction::ArrayResize),
+                    "EndsWith" => Ok(ScalarFunction::EndsWith),
+                    "InStr" => Ok(ScalarFunction::InStr),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 9a0b7ab332a6..7bf1d8ed0450 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1646,7 +1646,7 @@ pub struct PartitionColumn {
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct FileTypeWriterOptions {
-    #[prost(oneof = "file_type_writer_options::FileType", tags = "1, 2, 3")]
+    #[prost(oneof = "file_type_writer_options::FileType", tags = "1, 2, 3, 4")]
     pub file_type: ::core::option::Option<file_type_writer_options::FileType>,
 }
 /// Nested message and enum types in `FileTypeWriterOptions`.
@@ -1660,6 +1660,8 @@ pub mod file_type_writer_options {
         ParquetOptions(super::ParquetWriterOptions),
         #[prost(message, tag = "3")]
         CsvOptions(super::CsvWriterOptions),
+        #[prost(message, tag = "4")]
+        ArrowOptions(super::ArrowWriterOptions),
     }
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
@@ -1704,6 +1706,9 @@ pub struct CsvWriterOptions {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ArrowWriterOptions {}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct WriterProperties {
     #[prost(uint64, tag = "1")]
     pub data_page_size_limit: u64,
@@ -2759,6 +2764,8 @@ pub enum ScalarFunction {
     ArraySort = 128,
     ArrayDistinct = 129,
     ArrayResize = 130,
+    EndsWith = 131,
+    InStr = 132,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2898,6 +2905,8 @@ impl ScalarFunction {
             ScalarFunction::ArraySort => "ArraySort",
             ScalarFunction::ArrayDistinct => "ArrayDistinct",
             ScalarFunction::ArrayResize => "ArrayResize",
+            ScalarFunction::EndsWith => "EndsWith",
+            ScalarFunction::InStr => "InStr",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -3034,6 +3043,8 @@ impl ScalarFunction {
             "ArraySort" => Some(Self::ArraySort),
             "ArrayDistinct" => Some(Self::ArrayDistinct),
             "ArrayResize" => Some(Self::ArrayResize),
+            "EndsWith" => Some(Self::EndsWith),
+            "InStr" => Some(Self::InStr),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 973e366d0bbd..42d39b5c5139 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -56,19 +56,18 @@ use datafusion_expr::{
     ascii, asin, asinh, atan, atan2, atanh, bit_length, btrim, cardinality, cbrt, ceil,
     character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot,
     current_date, current_time, date_bin, date_part, date_trunc, decode, degrees, digest,
-    encode, exp,
+    encode, ends_with, exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, initcap,
-    isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
+    instr, isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi, power,
     radians, random, regexp_match, regexp_replace, repeat, replace, reverse, right,
     round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part,
     sqrt, starts_with, string_to_array, strpos, struct_fun, substr, substr_index,
-    substring, tan, tanh, to_hex, to_timestamp_micros, to_timestamp_millis,
-    to_timestamp_nanos, to_timestamp_seconds, translate, trim, trunc, upper, uuid,
-    AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction,
-    Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
+    substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid, AggregateFunction,
+    Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr,
+    GetFieldAccess, GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
@@ -476,7 +475,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Trim => Self::Trim,
             ScalarFunction::Ltrim => Self::Ltrim,
             ScalarFunction::Rtrim => Self::Rtrim,
-            ScalarFunction::ToTimestamp => Self::ToTimestamp,
             ScalarFunction::ArrayAppend => Self::ArrayAppend,
             ScalarFunction::ArraySort => Self::ArraySort,
             ScalarFunction::ArrayConcat => Self::ArrayConcat,
@@ -523,7 +521,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Digest => Self::Digest,
             ScalarFunction::Encode => Self::Encode,
             ScalarFunction::Decode => Self::Decode,
-            ScalarFunction::ToTimestampMillis => Self::ToTimestampMillis,
             ScalarFunction::Log2 => Self::Log2,
             ScalarFunction::Signum => Self::Signum,
             ScalarFunction::Ascii => Self::Ascii,
@@ -532,7 +529,9 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::CharacterLength => Self::CharacterLength,
             ScalarFunction::Chr => Self::Chr,
             ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator,
+            ScalarFunction::EndsWith => Self::EndsWith,
             ScalarFunction::InitCap => Self::InitCap,
+            ScalarFunction::InStr => Self::InStr,
             ScalarFunction::Left => Self::Left,
             ScalarFunction::Lpad => Self::Lpad,
             ScalarFunction::Random => Self::Random,
@@ -548,6 +547,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Strpos => Self::Strpos,
             ScalarFunction::Substr => Self::Substr,
             ScalarFunction::ToHex => Self::ToHex,
+            ScalarFunction::ToTimestamp => Self::ToTimestamp,
+            ScalarFunction::ToTimestampMillis => Self::ToTimestampMillis,
             ScalarFunction::ToTimestampMicros => Self::ToTimestampMicros,
             ScalarFunction::ToTimestampNanos => Self::ToTimestampNanos,
             ScalarFunction::ToTimestampSeconds => Self::ToTimestampSeconds,
@@ -1457,6 +1458,7 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                     parse_expr(&args[2], registry)?,
+                    parse_expr(&args[3], registry)?,
                 )),
                 ScalarFunction::ArrayToString => Ok(array_to_string(
                     parse_expr(&args[0], registry)?,
@@ -1586,6 +1588,10 @@ pub fn parse_expr(
                 }
                 ScalarFunction::Chr => Ok(chr(parse_expr(&args[0], registry)?)),
                 ScalarFunction::InitCap => Ok(initcap(parse_expr(&args[0], registry)?)),
+                ScalarFunction::InStr => Ok(instr(
+                    parse_expr(&args[0], registry)?,
+                    parse_expr(&args[1], registry)?,
+                )),
                 ScalarFunction::Gcd => Ok(gcd(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
@@ -1665,6 +1671,10 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
+                ScalarFunction::EndsWith => Ok(ends_with(
+                    parse_expr(&args[0], registry)?,
+                    parse_expr(&args[1], registry)?,
+                )),
                 ScalarFunction::Strpos => Ok(strpos(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
@@ -1689,17 +1699,55 @@ pub fn parse_expr(
                     parse_expr(&args[1], registry)?,
                 )),
                 ScalarFunction::ToHex => Ok(to_hex(parse_expr(&args[0], registry)?)),
+                ScalarFunction::ToTimestamp => {
+                    let args: Vec<_> = args
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<std::result::Result<_, _>>()?;
+                    Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
+                        BuiltinScalarFunction::ToTimestamp,
+                        args,
+                    )))
+                }
                 ScalarFunction::ToTimestampMillis => {
-                    Ok(to_timestamp_millis(parse_expr(&args[0], registry)?))
+                    let args: Vec<_> = args
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<_, _>>()?;
+                    Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
+                        BuiltinScalarFunction::ToTimestampMillis,
+                        args,
+                    )))
                 }
                 ScalarFunction::ToTimestampMicros => {
-                    Ok(to_timestamp_micros(parse_expr(&args[0], registry)?))
+                    let args: Vec<_> = args
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<std::result::Result<_, _>>()?;
+                    Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
+                        BuiltinScalarFunction::ToTimestampMicros,
+                        args,
+                    )))
                 }
                 ScalarFunction::ToTimestampNanos => {
-                    Ok(to_timestamp_nanos(parse_expr(&args[0], registry)?))
+                    let args: Vec<_> = args
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<_, _>>()?;
+                    Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
+                        BuiltinScalarFunction::ToTimestampNanos,
+                        args,
+                    )))
                 }
                 ScalarFunction::ToTimestampSeconds => {
-                    Ok(to_timestamp_seconds(parse_expr(&args[0], registry)?))
+                    let args: Vec<_> = args
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<std::result::Result<_, _>>()?;
+                    Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
+                        BuiltinScalarFunction::ToTimestampSeconds,
+                        args,
+                    )))
                 }
                 ScalarFunction::Now => Ok(now()),
                 ScalarFunction::Translate => Ok(translate(
@@ -1741,18 +1789,6 @@ pub fn parse_expr(
                 ScalarFunction::ArrowTypeof => {
                     Ok(arrow_typeof(parse_expr(&args[0], registry)?))
                 }
-                ScalarFunction::ToTimestamp => {
-                    let args: Vec<_> = args
-                        .iter()
-                        .map(|expr| parse_expr(expr, registry))
-                        .collect::<Result<_, _>>()?;
-                    Ok(Expr::ScalarFunction(
-                        datafusion_expr::expr::ScalarFunction::new(
-                            BuiltinScalarFunction::ToTimestamp,
-                            args,
-                        ),
-                    ))
-                }
                 ScalarFunction::Flatten => Ok(flatten(parse_expr(&args[0], registry)?)),
                 ScalarFunction::StringToArray => Ok(string_to_array(
                     parse_expr(&args[0], registry)?,
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 6ca95519a9b1..d95d69780301 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use arrow::csv::WriterBuilder;
+use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::str::FromStr;
@@ -858,6 +859,13 @@ impl AsLogicalPlan for LogicalPlanNode {
                     Some(copy_to_node::CopyOptions::WriterOptions(opt)) => {
                         match &opt.file_type {
                             Some(ft) => match ft {
+                                file_type_writer_options::FileType::ArrowOptions(_) => {
+                                    CopyOptions::WriterOptions(Box::new(
+                                        FileTypeWriterOptions::Arrow(
+                                            ArrowWriterOptions::new(),
+                                        ),
+                                    ))
+                                }
                                 file_type_writer_options::FileType::CsvOptions(
                                     writer_options,
                                 ) => {
@@ -1659,6 +1667,17 @@ impl AsLogicalPlan for LogicalPlanNode {
                         }
                         CopyOptions::WriterOptions(opt) => {
                             match opt.as_ref() {
+                                FileTypeWriterOptions::Arrow(_) => {
+                                    let arrow_writer_options =
+                                        file_type_writer_options::FileType::ArrowOptions(
+                                            protobuf::ArrowWriterOptions {},
+                                        );
+                                    Some(copy_to_node::CopyOptions::WriterOptions(
+                                        protobuf::FileTypeWriterOptions {
+                                            file_type: Some(arrow_writer_options),
+                                        },
+                                    ))
+                                }
                                 FileTypeWriterOptions::CSV(csv_opts) => {
                                     let csv_options = &csv_opts.writer_options;
                                     let csv_writer_options = csv_writer_options_to_proto(
@@ -1715,6 +1734,9 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlan::DescribeTable(_) => Err(proto_error(
                 "LogicalPlan serde is not yet implemented for DescribeTable",
             )),
+            LogicalPlan::RecursiveQuery(_) => Err(proto_error(
+                "LogicalPlan serde is not yet implemented for RecursiveQuery",
+            )),
         }
     }
 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 7eef3da9519f..dbb52eced36c 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1525,7 +1525,9 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::CharacterLength => Self::CharacterLength,
             BuiltinScalarFunction::Chr => Self::Chr,
             BuiltinScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator,
+            BuiltinScalarFunction::EndsWith => Self::EndsWith,
             BuiltinScalarFunction::InitCap => Self::InitCap,
+            BuiltinScalarFunction::InStr => Self::InStr,
             BuiltinScalarFunction::Left => Self::Left,
             BuiltinScalarFunction::Lpad => Self::Lpad,
             BuiltinScalarFunction::Random => Self::Random,
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index ea28eeee8810..dc827d02bf25 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -42,6 +42,7 @@ use datafusion::physical_plan::windows::create_window_expr;
 use datafusion::physical_plan::{
     functions, ColumnStatistics, Partitioning, PhysicalExpr, Statistics, WindowExpr,
 };
+use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
@@ -834,6 +835,10 @@ impl TryFrom<&protobuf::FileTypeWriterOptions> for FileTypeWriterOptions {
             .ok_or_else(|| proto_error("Missing required file_type field in protobuf"))?;
 
         match file_type {
+            protobuf::file_type_writer_options::FileType::ArrowOptions(_) => {
+                Ok(Self::Arrow(ArrowWriterOptions::new()))
+            }
+
             protobuf::file_type_writer_options::FileType::JsonOptions(opts) => {
                 let compression: CompressionTypeVariant = opts.compression().into();
                 Ok(Self::JSON(JsonWriterOptions::new(compression)))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index ed21124a9e22..17d47a65d8d1 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -27,6 +27,7 @@ use arrow::datatypes::{
     IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields, UnionMode,
 };
 
+use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
 use prost::Message;
 
 use datafusion::datasource::provider::TableProviderFactory;
@@ -34,7 +35,6 @@ use datafusion::datasource::TableProvider;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 use datafusion::parquet::file::properties::{WriterProperties, WriterVersion};
-use datafusion::physical_plan::functions::make_scalar_function;
 use datafusion::prelude::{create_udf, CsvReadOptions, SessionConfig, SessionContext};
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
@@ -53,9 +53,9 @@ use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
     col, create_udaf, lit, Accumulator, AggregateFunction,
     BuiltinScalarFunction::{Sqrt, Substr},
-    Expr, LogicalPlan, Operator, PartitionEvaluator, Signature, TryCast, Volatility,
-    WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, WindowUDF,
-    WindowUDFImpl,
+    ColumnarValue, Expr, LogicalPlan, Operator, PartitionEvaluator, Signature, TryCast,
+    Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
 };
 use datafusion_proto::bytes::{
     logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec,
@@ -394,6 +394,45 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let input = create_csv_scan(&ctx).await?;
+
+    let plan = LogicalPlan::Copy(CopyTo {
+        input: Arc::new(input),
+        output_url: "test.arrow".to_string(),
+        file_format: FileType::ARROW,
+        single_file_output: true,
+        copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::Arrow(
+            ArrowWriterOptions::new(),
+        ))),
+    });
+
+    let bytes = logical_plan_to_bytes(&plan)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+
+    match logical_round_trip {
+        LogicalPlan::Copy(copy_to) => {
+            assert_eq!("test.arrow", copy_to.output_url);
+            assert_eq!(FileType::ARROW, copy_to.file_format);
+            assert!(copy_to.single_file_output);
+            match &copy_to.copy_options {
+                CopyOptions::WriterOptions(y) => match y.as_ref() {
+                    FileTypeWriterOptions::Arrow(_) => {}
+                    _ => panic!(),
+                },
+                _ => panic!(),
+            }
+        }
+        _ => panic!(),
+    }
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
     let ctx = SessionContext::new();
@@ -1538,7 +1577,7 @@ fn roundtrip_aggregate_udf() {
     struct Dummy {}
 
     impl Accumulator for Dummy {
-        fn state(&self) -> datafusion::error::Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
             Ok(vec![])
         }
 
@@ -1553,7 +1592,7 @@ fn roundtrip_aggregate_udf() {
             Ok(())
         }
 
-        fn evaluate(&self) -> datafusion::error::Result<ScalarValue> {
+        fn evaluate(&mut self) -> datafusion::error::Result<ScalarValue> {
             Ok(ScalarValue::Float64(None))
         }
 
@@ -1592,9 +1631,12 @@ fn roundtrip_aggregate_udf() {
 
 #[test]
 fn roundtrip_scalar_udf() {
-    let fn_impl = |args: &[ArrayRef]| Ok(Arc::new(args[0].clone()) as ArrayRef);
-
-    let scalar_fn = make_scalar_function(fn_impl);
+    let scalar_fn = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(array) = &args[0] else {
+            panic!("should be array")
+        };
+        Ok(ColumnarValue::from(Arc::new(array.clone()) as ArrayRef))
+    });
 
     let udf = create_udf(
         "dummy",
@@ -1722,7 +1764,7 @@ fn roundtrip_window() {
     struct DummyAggr {}
 
     impl Accumulator for DummyAggr {
-        fn state(&self) -> datafusion::error::Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
             Ok(vec![])
         }
 
@@ -1737,7 +1779,7 @@ fn roundtrip_window() {
             Ok(())
         }
 
-        fn evaluate(&self) -> datafusion::error::Result<ScalarValue> {
+        fn evaluate(&mut self) -> datafusion::error::Result<ScalarValue> {
             Ok(ScalarValue::Float64(None))
         }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 3a13dc887f0c..9a95e103c294 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -47,7 +47,6 @@ use datafusion::physical_plan::expressions::{
     GetFieldAccessExpr, GetIndexedFieldExpr, NotExpr, NthValue, PhysicalSortExpr, Sum,
 };
 use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::functions::make_scalar_function;
 use datafusion::physical_plan::insert::FileSinkExec;
 use datafusion::physical_plan::joins::{
     HashJoinExec, NestedLoopJoinExec, PartitionMode, StreamJoinPartitionMode,
@@ -73,8 +72,8 @@ use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
 use datafusion_common::{FileTypeWriterOptions, Result};
 use datafusion_expr::{
-    Accumulator, AccumulatorFactoryFunction, AggregateUDF, Signature, SimpleAggregateUDF,
-    WindowFrame, WindowFrameBound,
+    Accumulator, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, Signature,
+    SimpleAggregateUDF, WindowFrame, WindowFrameBound,
 };
 use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec};
 use datafusion_proto::protobuf;
@@ -253,7 +252,8 @@ fn roundtrip_nested_loop_join() -> Result<()> {
 fn roundtrip_window() -> Result<()> {
     let field_a = Field::new("a", DataType::Int64, false);
     let field_b = Field::new("b", DataType::Int64, false);
-    let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+    let field_c = Field::new("FIRST_VALUE(a) PARTITION BY [b] ORDER BY [a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", DataType::Int64, false);
+    let schema = Arc::new(Schema::new(vec![field_a, field_b, field_c]));
 
     let window_frame = WindowFrame::new_bounds(
         datafusion_expr::WindowFrameUnits::Range,
@@ -353,7 +353,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
     #[derive(Debug)]
     struct Example;
     impl Accumulator for Example {
-        fn state(&self) -> Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> Result<Vec<ScalarValue>> {
             Ok(vec![ScalarValue::Int64(Some(0))])
         }
 
@@ -365,7 +365,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
             Ok(())
         }
 
-        fn evaluate(&self) -> Result<ScalarValue> {
+        fn evaluate(&mut self) -> Result<ScalarValue> {
             Ok(ScalarValue::Int64(Some(0)))
         }
 
@@ -568,9 +568,12 @@ fn roundtrip_scalar_udf() -> Result<()> {
 
     let input = Arc::new(EmptyExec::new(schema.clone()));
 
-    let fn_impl = |args: &[ArrayRef]| Ok(Arc::new(args[0].clone()) as ArrayRef);
-
-    let scalar_fn = make_scalar_function(fn_impl);
+    let scalar_fn = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(array) = &args[0] else {
+            panic!("should be array")
+        };
+        Ok(ColumnarValue::from(Arc::new(array.clone()) as ArrayRef))
+    });
 
     let udf = create_udf(
         "dummy",
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index 222d1a3a629c..7dd0333909ee 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -21,9 +21,8 @@ use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
 
 use datafusion::execution::FunctionRegistry;
-use datafusion::physical_plan::functions::make_scalar_function;
 use datafusion::prelude::SessionContext;
-use datafusion_expr::{col, create_udf, lit};
+use datafusion_expr::{col, create_udf, lit, ColumnarValue};
 use datafusion_expr::{Expr, Volatility};
 use datafusion_proto::bytes::Serializeable;
 
@@ -226,9 +225,12 @@ fn roundtrip_deeply_nested() {
 
 /// return a `SessionContext` with a `dummy` function registered as a UDF
 fn context_with_udf() -> SessionContext {
-    let fn_impl = |args: &[ArrayRef]| Ok(Arc::new(args[0].clone()) as ArrayRef);
-
-    let scalar_fn = make_scalar_function(fn_impl);
+    let scalar_fn = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(array) = &args[0] else {
+            panic!("should be array")
+        };
+        Ok(ColumnarValue::from(Arc::new(array.clone()) as ArrayRef))
+    });
 
     let udf = create_udf(
         "dummy",
diff --git a/datafusion/sql/src/expr/arrow_cast.rs b/datafusion/sql/src/expr/arrow_cast.rs
index ade8b96b5cc2..9a0d61f41c01 100644
--- a/datafusion/sql/src/expr/arrow_cast.rs
+++ b/datafusion/sql/src/expr/arrow_cast.rs
@@ -150,6 +150,7 @@ impl<'a> Parser<'a> {
             Token::Dictionary => self.parse_dictionary(),
             Token::List => self.parse_list(),
             Token::LargeList => self.parse_large_list(),
+            Token::FixedSizeList => self.parse_fixed_size_list(),
             tok => Err(make_error(
                 self.val,
                 &format!("finding next type, got unexpected '{tok}'"),
@@ -177,6 +178,19 @@ impl<'a> Parser<'a> {
         ))))
     }
 
+    /// Parses the FixedSizeList type
+    fn parse_fixed_size_list(&mut self) -> Result<DataType> {
+        self.expect_token(Token::LParen)?;
+        let length = self.parse_i32("FixedSizeList")?;
+        self.expect_token(Token::Comma)?;
+        let data_type = self.parse_next_type()?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::FixedSizeList(
+            Arc::new(Field::new("item", data_type, true)),
+            length,
+        ))
+    }
+
     /// Parses the next timeunit
     fn parse_time_unit(&mut self, context: &str) -> Result<TimeUnit> {
         match self.next_token()? {
@@ -508,6 +522,7 @@ impl<'a> Tokenizer<'a> {
 
             "List" => Token::List,
             "LargeList" => Token::LargeList,
+            "FixedSizeList" => Token::FixedSizeList,
 
             "Second" => Token::TimeUnit(TimeUnit::Second),
             "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
@@ -598,6 +613,7 @@ enum Token {
     DoubleQuotedString(String),
     List,
     LargeList,
+    FixedSizeList,
 }
 
 impl Display for Token {
@@ -606,6 +622,7 @@ impl Display for Token {
             Token::SimpleType(t) => write!(f, "{t}"),
             Token::List => write!(f, "List"),
             Token::LargeList => write!(f, "LargeList"),
+            Token::FixedSizeList => write!(f, "FixedSizeList"),
             Token::Timestamp => write!(f, "Timestamp"),
             Token::Time32 => write!(f, "Time32"),
             Token::Time64 => write!(f, "Time64"),
diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs
index d9c85663e50e..78efaca09938 100644
--- a/datafusion/sql/src/expr/binary_op.rs
+++ b/datafusion/sql/src/expr/binary_op.rs
@@ -40,6 +40,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
             BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
             BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
+            BinaryOperator::PGLikeMatch => Ok(Operator::LikeMatch),
+            BinaryOperator::PGILikeMatch => Ok(Operator::ILikeMatch),
+            BinaryOperator::PGNotLikeMatch => Ok(Operator::NotLikeMatch),
+            BinaryOperator::PGNotILikeMatch => Ok(Operator::NotILikeMatch),
             BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
             BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr),
             BinaryOperator::BitwiseXor => Ok(Operator::BitwiseXor),
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index dbd72ec5eb7a..effc1d096cfd 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -366,7 +366,7 @@ impl<'a> DFParser<'a> {
             CopyToSource::Query(query)
         } else {
             // parse as table reference
-            let table_name = self.parser.parse_object_name()?;
+            let table_name = self.parser.parse_object_name(true)?;
             CopyToSource::Relation(table_name)
         };
 
@@ -465,7 +465,7 @@ impl<'a> DFParser<'a> {
 
         loop {
             if let Token::Word(_) = self.parser.peek_token().token {
-                let identifier = self.parser.parse_identifier()?;
+                let identifier = self.parser.parse_identifier(false)?;
                 partitions.push(identifier.to_string());
             } else {
                 return self.expected("partition name", self.parser.peek_token());
@@ -567,17 +567,17 @@ impl<'a> DFParser<'a> {
     }
 
     fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
-        let name = self.parser.parse_identifier()?;
+        let name = self.parser.parse_identifier(false)?;
         let data_type = self.parser.parse_data_type()?;
         let collation = if self.parser.parse_keyword(Keyword::COLLATE) {
-            Some(self.parser.parse_object_name()?)
+            Some(self.parser.parse_object_name(false)?)
         } else {
             None
         };
         let mut options = vec![];
         loop {
             if self.parser.parse_keyword(Keyword::CONSTRAINT) {
-                let name = Some(self.parser.parse_identifier()?);
+                let name = Some(self.parser.parse_identifier(false)?);
                 if let Some(option) = self.parser.parse_optional_column_option()? {
                     options.push(ColumnOptionDef { name, option });
                 } else {
@@ -608,7 +608,7 @@ impl<'a> DFParser<'a> {
         let if_not_exists =
             self.parser
                 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
-        let table_name = self.parser.parse_object_name()?;
+        let table_name = self.parser.parse_object_name(true)?;
         let (columns, constraints) = self.parse_columns()?;
 
         #[derive(Default)]
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index a04df5589b85..012b1c51a5c1 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -61,6 +61,19 @@ pub trait ContextProvider {
         not_impl_err!("Table Functions are not supported")
     }
 
+    /// This provides a worktable (an intermediate table that is used to store the results of a CTE during execution)
+    /// We don't directly implement this in the logical plan's ['SqlToRel`]
+    /// because the sql code needs access to a table that contains execution-related types that can't be a direct dependency
+    /// of the sql crate (namely, the `CteWorktable`).
+    /// The [`ContextProvider`] provides a way to "hide" this dependency.
+    fn create_cte_work_table(
+        &self,
+        _name: &str,
+        _schema: SchemaRef,
+    ) -> Result<Arc<dyn TableSource>> {
+        not_impl_err!("Recursive CTE is not implemented")
+    }
+
     /// Getter for a UDF description
     fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
     /// Getter for a UDAF description
@@ -285,7 +298,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let plan = self.apply_expr_alias(plan, alias.columns)?;
 
         LogicalPlanBuilder::from(plan)
-            .alias(self.normalizer.normalize(alias.name))?
+            .alias(TableReference::bare(self.normalizer.normalize(alias.name)))?
             .build()
     }
 
@@ -452,6 +465,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Int64
             | SQLDataType::Float64
             | SQLDataType::Struct(_)
+            | SQLDataType::JSONB
+            | SQLDataType::Unspecified
             => not_impl_err!(
                 "Unsupported SQL type {sql_type:?}"
             ),
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index 388377e3ee6b..ea8edd0771c8 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -19,6 +19,7 @@ use std::sync::Arc;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
+use arrow::datatypes::Schema;
 use datafusion_common::{
     not_impl_err, plan_err, sql_err, Constraints, DataFusionError, Result, ScalarValue,
 };
@@ -26,7 +27,8 @@ use datafusion_expr::{
     CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder,
 };
 use sqlparser::ast::{
-    Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, Value,
+    Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, SetOperator,
+    SetQuantifier, Value,
 };
 
 use sqlparser::parser::ParserError::ParserError;
@@ -52,21 +54,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let set_expr = query.body;
         if let Some(with) = query.with {
             // Process CTEs from top to bottom
-            // do not allow self-references
-            if with.recursive {
-                if self
-                    .context_provider
-                    .options()
-                    .execution
-                    .enable_recursive_ctes
-                {
-                    return plan_err!(
-                        "Recursive CTEs are enabled but are not yet supported"
-                    );
-                } else {
-                    return not_impl_err!("Recursive CTEs are not supported");
-                }
-            }
+            let is_recursive = with.recursive;
 
             for cte in with.cte_tables {
                 // A `WITH` block can't use the same name more than once
@@ -76,16 +64,127 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         "WITH query name {cte_name:?} specified more than once"
                     )));
                 }
-                // create logical plan & pass backreferencing CTEs
-                // CTE expr don't need extend outer_query_schema
-                let logical_plan =
-                    self.query_to_plan(*cte.query, &mut planner_context.clone())?;
 
-                // Each `WITH` block can change the column names in the last
-                // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2").
-                let logical_plan = self.apply_table_alias(logical_plan, cte.alias)?;
+                if is_recursive {
+                    if !self
+                        .context_provider
+                        .options()
+                        .execution
+                        .enable_recursive_ctes
+                    {
+                        return not_impl_err!("Recursive CTEs are not enabled");
+                    }
+
+                    match *cte.query.body {
+                        SetExpr::SetOperation {
+                            op: SetOperator::Union,
+                            left,
+                            right,
+                            set_quantifier,
+                        } => {
+                            let distinct = set_quantifier != SetQuantifier::All;
+
+                            // Each recursive CTE consists from two parts in the logical plan:
+                            //   1. A static term   (the left hand side on the SQL, where the
+                            //                       referencing to the same CTE is not allowed)
+                            //
+                            //   2. A recursive term (the right hand side, and the recursive
+                            //                       part)
+
+                            // Since static term does not have any specific properties, it can
+                            // be compiled as if it was a regular expression. This will
+                            // allow us to infer the schema to be used in the recursive term.
+
+                            // ---------- Step 1: Compile the static term ------------------
+                            let static_plan = self
+                                .set_expr_to_plan(*left, &mut planner_context.clone())?;
+
+                            // Since the recursive CTEs include a component that references a
+                            // table with its name, like the example below:
+                            //
+                            // WITH RECURSIVE values(n) AS (
+                            //      SELECT 1 as n -- static term
+                            //    UNION ALL
+                            //      SELECT n + 1
+                            //      FROM values -- self reference
+                            //      WHERE n < 100
+                            // )
+                            //
+                            // We need a temporary 'relation' to be referenced and used. PostgreSQL
+                            // calls this a 'working table', but it is entirely an implementation
+                            // detail and a 'real' table with that name might not even exist (as
+                            // in the case of DataFusion).
+                            //
+                            // Since we can't simply register a table during planning stage (it is
+                            // an execution problem), we'll use a relation object that preserves the
+                            // schema of the input perfectly and also knows which recursive CTE it is
+                            // bound to.
+
+                            // ---------- Step 2: Create a temporary relation ------------------
+                            // Step 2.1: Create a table source for the temporary relation
+                            let work_table_source =
+                                self.context_provider.create_cte_work_table(
+                                    &cte_name,
+                                    Arc::new(Schema::from(static_plan.schema().as_ref())),
+                                )?;
 
-                planner_context.insert_cte(cte_name, logical_plan);
+                            // Step 2.2: Create a temporary relation logical plan that will be used
+                            // as the input to the recursive term
+                            let work_table_plan = LogicalPlanBuilder::scan(
+                                cte_name.to_string(),
+                                work_table_source,
+                                None,
+                            )?
+                            .build()?;
+
+                            let name = cte_name.clone();
+
+                            // Step 2.3: Register the temporary relation in the planning context
+                            // For all the self references in the variadic term, we'll replace it
+                            // with the temporary relation we created above by temporarily registering
+                            // it as a CTE. This temporary relation in the planning context will be
+                            // replaced by the actual CTE plan once we're done with the planning.
+                            planner_context.insert_cte(cte_name.clone(), work_table_plan);
+
+                            // ---------- Step 3: Compile the recursive term ------------------
+                            // this uses the named_relation we inserted above to resolve the
+                            // relation. This ensures that the recursive term uses the named relation logical plan
+                            // and thus the 'continuance' physical plan as its input and source
+                            let recursive_plan = self
+                                .set_expr_to_plan(*right, &mut planner_context.clone())?;
+
+                            // ---------- Step 4: Create the final plan ------------------
+                            // Step 4.1: Compile the final plan
+                            let logical_plan = LogicalPlanBuilder::from(static_plan)
+                                .to_recursive_query(name, recursive_plan, distinct)?
+                                .build()?;
+
+                            let final_plan =
+                                self.apply_table_alias(logical_plan, cte.alias)?;
+
+                            // Step 4.2: Remove the temporary relation from the planning context and replace it
+                            // with the final plan.
+                            planner_context.insert_cte(cte_name.clone(), final_plan);
+                        }
+                        _ => {
+                            return Err(DataFusionError::SQL(
+                                ParserError(format!("Unsupported CTE: {cte}")),
+                                None,
+                            ));
+                        }
+                    };
+                } else {
+                    // create logical plan & pass backreferencing CTEs
+                    // CTE expr don't need extend outer_query_schema
+                    let logical_plan =
+                        self.query_to_plan(*cte.query, &mut planner_context.clone())?;
+
+                    // Each `WITH` block can change the column names in the last
+                    // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2").
+                    let logical_plan = self.apply_table_alias(logical_plan, cte.alias)?;
+
+                    planner_context.insert_cte(cte_name, logical_plan);
+                }
             }
         }
         let plan = self.set_expr_to_plan(*(set_expr.clone()), planner_context)?;
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index b9fb4c65dc2c..84a394f324cf 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -52,9 +52,10 @@ use datafusion_expr::{
 };
 use sqlparser::ast;
 use sqlparser::ast::{
-    Assignment, ColumnDef, Expr as SQLExpr, Expr, Ident, ObjectName, ObjectType, Query,
-    SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement,
-    TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value,
+    Assignment, ColumnDef, CreateTableOptions, Expr as SQLExpr, Expr, Ident, ObjectName,
+    ObjectType, Query, SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter,
+    Statement, TableConstraint, TableFactor, TableWithJoins, TransactionMode,
+    UnaryOperator, Value,
 };
 use sqlparser::parser::ParserError::ParserError;
 
@@ -90,18 +91,21 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec<TableConst
     for column in columns {
         for ast::ColumnOptionDef { name, option } in &column.options {
             match option {
-                ast::ColumnOption::Unique { is_primary } => {
-                    constraints.push(ast::TableConstraint::Unique {
-                        name: name.clone(),
-                        columns: vec![column.name.clone()],
-                        is_primary: *is_primary,
-                    })
-                }
+                ast::ColumnOption::Unique {
+                    is_primary,
+                    characteristics,
+                } => constraints.push(ast::TableConstraint::Unique {
+                    name: name.clone(),
+                    columns: vec![column.name.clone()],
+                    is_primary: *is_primary,
+                    characteristics: *characteristics,
+                }),
                 ast::ColumnOption::ForeignKey {
                     foreign_table,
                     referred_columns,
                     on_delete,
                     on_update,
+                    characteristics,
                 } => constraints.push(ast::TableConstraint::ForeignKey {
                     name: name.clone(),
                     columns: vec![],
@@ -109,6 +113,7 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec<TableConst
                     referred_columns: referred_columns.to_vec(),
                     on_delete: *on_delete,
                     on_update: *on_update,
+                    characteristics: *characteristics,
                 }),
                 ast::ColumnOption::Check(expr) => {
                     constraints.push(ast::TableConstraint::Check {
@@ -124,6 +129,7 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec<TableConst
                 | ast::ColumnOption::CharacterSet(_)
                 | ast::ColumnOption::Generated { .. }
                 | ast::ColumnOption::Comment(_)
+                | ast::ColumnOption::Options(_)
                 | ast::ColumnOption::OnUpdate(_) => {}
             }
         }
@@ -292,9 +298,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 name,
                 columns,
                 query,
-                with_options,
+                options: CreateTableOptions::None,
                 ..
-            } if with_options.is_empty() => {
+            } => {
+                let columns = columns
+                    .into_iter()
+                    .map(|view_column_def| {
+                        if let Some(options) = view_column_def.options {
+                            plan_err!(
+                                "Options not supported for view columns: {options:?}"
+                            )
+                        } else {
+                            Ok(view_column_def.name)
+                        }
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
                 let mut plan = self.query_to_plan(*query, &mut PlannerContext::new())?;
                 plan = self.apply_expr_alias(plan, columns)?;
 
@@ -440,6 +459,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 on,
                 returning,
                 ignore,
+                table_alias,
+                replace_into,
+                priority,
             } => {
                 if or.is_some() {
                     plan_err!("Inserts with or clauses not supported")?;
@@ -465,6 +487,19 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let Some(source) = source else {
                     plan_err!("Inserts without a source not supported")?
                 };
+                if let Some(table_alias) = table_alias {
+                    plan_err!(
+                        "Inserts with a table alias not supported: {table_alias:?}"
+                    )?
+                };
+                if replace_into {
+                    plan_err!("Inserts with a `REPLACE INTO` clause not supported")?
+                };
+                if let Some(priority) = priority {
+                    plan_err!(
+                        "Inserts with a `PRIORITY` clause not supported: {priority:?}"
+                    )?
+                };
                 let _ = into; // optional keyword doesn't change behavior
                 self.insert_to_plan(table_name, columns, source, overwrite)
             }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 44da4cd4d836..c88e2d1130ed 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1394,11 +1394,46 @@ fn recursive_ctes() {
         select * from numbers;";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "This feature is not implemented: Recursive CTEs are not supported",
+        "This feature is not implemented: Recursive CTEs are not enabled",
         err.strip_backtrace()
     );
 }
 
+#[test]
+fn recursive_ctes_enabled() {
+    let sql = "
+        WITH RECURSIVE numbers AS (
+              select 1 as n
+            UNION ALL
+              select n + 1 FROM numbers WHERE N < 10
+        )
+        select * from numbers;";
+
+    // manually setting up test here so that we can enable recursive ctes
+    let mut context = MockContextProvider::default();
+    context.options_mut().execution.enable_recursive_ctes = true;
+
+    let planner = SqlToRel::new_with_options(&context, ParserOptions::default());
+    let result = DFParser::parse_sql_with_dialect(sql, &GenericDialect {});
+    let mut ast = result.unwrap();
+
+    let plan = planner
+        .statement_to_plan(ast.pop_front().unwrap())
+        .expect("recursive cte plan creation failed");
+
+    assert_eq!(
+        format!("{plan:?}"),
+        "Projection: numbers.n\
+        \n  SubqueryAlias: numbers\
+        \n    RecursiveQuery: is_distinct=false\
+        \n      Projection: Int64(1) AS n\
+        \n        EmptyRelation\
+        \n      Projection: numbers.n + Int64(1)\
+        \n        Filter: numbers.n < Int64(10)\
+        \n          TableScan: numbers"
+    );
+}
+
 #[test]
 fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() {
     quick_test(
@@ -2692,6 +2727,12 @@ struct MockContextProvider {
     udafs: HashMap<String, Arc<AggregateUDF>>,
 }
 
+impl MockContextProvider {
+    fn options_mut(&mut self) -> &mut ConfigOptions {
+        &mut self.options
+    }
+}
+
 impl ContextProvider for MockContextProvider {
     fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
         let schema = match name.table() {
@@ -2801,6 +2842,14 @@ impl ContextProvider for MockContextProvider {
     fn options(&self) -> &ConfigOptions {
         &self.options
     }
+
+    fn create_cte_work_table(
+        &self,
+        _name: &str,
+        schema: SchemaRef,
+    ) -> Result<Arc<dyn TableSource>> {
+        Ok(Arc::new(EmptyTable::new(schema)))
+    }
 }
 
 #[test]
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index 7085e1ada09a..911b46c0bcf4 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -37,7 +37,7 @@ bigdecimal = { workspace = true }
 bytes = { version = "1.4.0", optional = true }
 chrono = { workspace = true, optional = true }
 clap = { version = "4.4.8", features = ["derive", "env"] }
-datafusion = { path = "../core", version = "34.0.0" }
+datafusion = { path = "../core", version = "35.0.0" }
 datafusion-common = { workspace = true }
 futures = { version = "0.3.28" }
 half = { workspace = true }
diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs
index a5ce7ccb9fe0..889ccdcd66d4 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -28,8 +28,7 @@ use arrow::array::{
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use arrow::record_batch::RecordBatch;
 use datafusion::execution::context::SessionState;
-use datafusion::logical_expr::{create_udf, Expr, ScalarUDF, Volatility};
-use datafusion::physical_expr::functions::make_scalar_function;
+use datafusion::logical_expr::{create_udf, ColumnarValue, Expr, ScalarUDF, Volatility};
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionConfig;
 use datafusion::{
@@ -356,9 +355,16 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
 /// Create a UDF function named "example". See the `sample_udf.rs` example
 /// file for an explanation of the API.
 fn create_example_udf() -> ScalarUDF {
-    let adder = make_scalar_function(|args: &[ArrayRef]| {
-        let lhs = as_float64_array(&args[0]).expect("cast failed");
-        let rhs = as_float64_array(&args[1]).expect("cast failed");
+    let adder = Arc::new(|args: &[ColumnarValue]| {
+        let ColumnarValue::Array(lhs) = &args[0] else {
+            panic!("should be array")
+        };
+        let ColumnarValue::Array(rhs) = &args[1] else {
+            panic!("should be array")
+        };
+
+        let lhs = as_float64_array(lhs).expect("cast failed");
+        let rhs = as_float64_array(rhs).expect("cast failed");
         let array = lhs
             .iter()
             .zip(rhs.iter())
@@ -367,7 +373,7 @@ fn create_example_udf() -> ScalarUDF {
                 _ => None,
             })
             .collect::<Float64Array>();
-        Ok(Arc::new(array) as ArrayRef)
+        Ok(ColumnarValue::from(Arc::new(array) as ArrayRef))
     });
     create_udf(
         "example",
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 50cdebd054a7..5cd728c4344b 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -2515,198 +2515,6 @@ false
 true
 NULL
 
-# TopK aggregation
-statement ok
-CREATE TABLE traces(trace_id varchar, timestamp bigint, other bigint) AS VALUES
-(NULL, 0, 0),
-('a', NULL, NULL),
-('a', 1, 1),
-('a', -1, -1),
-('b', 0, 0),
-('c', 1, 1),
-('c', 2, 2),
-('b', 3, 3);
-
-statement ok
-set datafusion.optimizer.enable_topk_aggregation = false;
-
-query TT
-explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
-----
-logical_plan
-Limit: skip=0, fetch=4
---Sort: MAX(traces.timestamp) DESC NULLS FIRST, fetch=4
-----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
-------TableScan: traces projection=[trace_id, timestamp]
-physical_plan
-GlobalLimitExec: skip=0, fetch=4
---SortPreservingMergeExec: [MAX(traces.timestamp)@1 DESC], fetch=4
-----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 DESC]
-------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
---------CoalesceBatchesExec: target_batch_size=8192
-----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
-----------------MemoryExec: partitions=1, partition_sizes=[1]
-
-
-query TI
-select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
-----
-b 3
-c 2
-a 1
-NULL 0
-
-query TI
-select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 4;
-----
-a -1
-NULL 0
-b 0
-c 1
-
-query TII
-select trace_id, other, MIN(timestamp) from traces group by trace_id, other order by MIN(timestamp) asc limit 4;
-----
-a -1 -1
-b 0 0
-NULL 0 0
-c 1 1
-
-query TII
-select trace_id, MIN(other), MIN(timestamp) from traces group by trace_id order by MIN(timestamp), MIN(other) limit 4;
-----
-a -1 -1
-NULL 0 0
-b 0 0
-c 1 1
-
-statement ok
-set datafusion.optimizer.enable_topk_aggregation = true;
-
-query TT
-explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
-----
-logical_plan
-Limit: skip=0, fetch=4
---Sort: MAX(traces.timestamp) DESC NULLS FIRST, fetch=4
-----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
-------TableScan: traces projection=[trace_id, timestamp]
-physical_plan
-GlobalLimitExec: skip=0, fetch=4
---SortPreservingMergeExec: [MAX(traces.timestamp)@1 DESC], fetch=4
-----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 DESC]
-------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
---------CoalesceBatchesExec: target_batch_size=8192
-----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
-----------------MemoryExec: partitions=1, partition_sizes=[1]
-
-query TT
-explain select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) desc limit 4;
-----
-logical_plan
-Limit: skip=0, fetch=4
---Sort: MIN(traces.timestamp) DESC NULLS FIRST, fetch=4
-----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MIN(traces.timestamp)]]
-------TableScan: traces projection=[trace_id, timestamp]
-physical_plan
-GlobalLimitExec: skip=0, fetch=4
---SortPreservingMergeExec: [MIN(traces.timestamp)@1 DESC], fetch=4
-----SortExec: TopK(fetch=4), expr=[MIN(traces.timestamp)@1 DESC]
-------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
---------CoalesceBatchesExec: target_batch_size=8192
-----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
-----------------MemoryExec: partitions=1, partition_sizes=[1]
-
-query TT
-explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) asc limit 4;
-----
-logical_plan
-Limit: skip=0, fetch=4
---Sort: MAX(traces.timestamp) ASC NULLS LAST, fetch=4
-----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
-------TableScan: traces projection=[trace_id, timestamp]
-physical_plan
-GlobalLimitExec: skip=0, fetch=4
---SortPreservingMergeExec: [MAX(traces.timestamp)@1 ASC NULLS LAST], fetch=4
-----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 ASC NULLS LAST]
-------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
---------CoalesceBatchesExec: target_batch_size=8192
-----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
-----------------MemoryExec: partitions=1, partition_sizes=[1]
-
-query TT
-explain select trace_id, MAX(timestamp) from traces group by trace_id order by trace_id asc limit 4;
-----
-logical_plan
-Limit: skip=0, fetch=4
---Sort: traces.trace_id ASC NULLS LAST, fetch=4
-----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
-------TableScan: traces projection=[trace_id, timestamp]
-physical_plan
-GlobalLimitExec: skip=0, fetch=4
---SortPreservingMergeExec: [trace_id@0 ASC NULLS LAST], fetch=4
-----SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST]
-------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
---------CoalesceBatchesExec: target_batch_size=8192
-----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
-----------------MemoryExec: partitions=1, partition_sizes=[1]
-
-query TI
-select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
-----
-b 3
-c 2
-a 1
-NULL 0
-
-query TI
-select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 4;
-----
-a -1
-NULL 0
-b 0
-c 1
-
-query TI
-select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 3;
-----
-b 3
-c 2
-a 1
-
-query TI
-select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 3;
-----
-a -1
-NULL 0
-b 0
-
-query TII
-select trace_id, other, MIN(timestamp) from traces group by trace_id, other order by MIN(timestamp) asc limit 4;
-----
-a -1 -1
-b 0 0
-NULL 0 0
-c 1 1
-
-query TII
-select trace_id, MIN(other), MIN(timestamp) from traces group by trace_id order by MIN(timestamp), MIN(other) limit 4;
-----
-a -1 -1
-NULL 0 0
-b 0 0
-c 1 1
-
 #
 # Push limit into distinct group-by aggregation tests
 #
@@ -3260,3 +3068,57 @@ query I
 select count(*) from (select count(*) a, count(*) b from (select 1));
 ----
 1
+
+# rule `aggregate_statistics` should not optimize MIN/MAX to wrong values on empty relation
+
+statement ok
+CREATE TABLE empty(col0 INTEGER);
+
+query I
+SELECT MIN(col0) FROM empty WHERE col0=1;
+----
+NULL
+
+query I
+SELECT MAX(col0) FROM empty WHERE col0=1;
+----
+NULL
+
+query TT
+EXPLAIN SELECT MIN(col0) FROM empty;
+----
+logical_plan
+Aggregate: groupBy=[[]], aggr=[[MIN(empty.col0)]]
+--TableScan: empty projection=[col0]
+physical_plan
+ProjectionExec: expr=[NULL as MIN(empty.col0)]
+--PlaceholderRowExec
+
+query TT
+EXPLAIN SELECT MAX(col0) FROM empty;
+----
+logical_plan
+Aggregate: groupBy=[[]], aggr=[[MAX(empty.col0)]]
+--TableScan: empty projection=[col0]
+physical_plan
+ProjectionExec: expr=[NULL as MAX(empty.col0)]
+--PlaceholderRowExec
+
+statement ok
+DROP TABLE empty;
+
+statement ok
+CREATE TABLE t(col0 INTEGER) as VALUES(2);
+
+query I
+SELECT MIN(col0) FROM t WHERE col0=1;
+----
+NULL
+
+query I
+SELECT MAX(col0) FROM t WHERE col0=1;
+----
+NULL
+
+statement ok
+DROP TABLE t;
diff --git a/datafusion/sqllogictest/test_files/aggregates_topk.slt b/datafusion/sqllogictest/test_files/aggregates_topk.slt
new file mode 100644
index 000000000000..bd8f00e04158
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/aggregates_topk.slt
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#######
+# Setup test data table
+#######
+
+# TopK aggregation
+statement ok
+CREATE TABLE traces(trace_id varchar, timestamp bigint, other bigint) AS VALUES
+(NULL, 0, 0),
+('a', NULL, NULL),
+('a', 1, 1),
+('a', -1, -1),
+('b', -2, 0),
+('c', 4, 1),
+('c', 4, 2),
+('c', 2, 2),
+('c', 2, 4),
+('b', 3, 3);
+
+statement ok
+set datafusion.optimizer.enable_topk_aggregation = false;
+
+query TT
+explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
+----
+logical_plan
+Limit: skip=0, fetch=4
+--Sort: MAX(traces.timestamp) DESC NULLS FIRST, fetch=4
+----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+------TableScan: traces projection=[trace_id, timestamp]
+physical_plan
+GlobalLimitExec: skip=0, fetch=4
+--SortPreservingMergeExec: [MAX(traces.timestamp)@1 DESC], fetch=4
+----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 DESC]
+------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+--------CoalesceBatchesExec: target_batch_size=8192
+----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+
+query TI
+select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
+----
+c 4
+b 3
+a 1
+NULL 0
+
+query TI
+select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 4;
+----
+b -2
+a -1
+NULL 0
+c 2
+
+query TII
+select trace_id, other, MIN(timestamp) from traces group by trace_id, other order by MIN(timestamp) asc limit 4;
+----
+b 0 -2
+a -1 -1
+NULL 0 0
+a 1 1
+
+query TII
+select trace_id, MIN(other), MIN(timestamp) from traces group by trace_id order by MIN(timestamp), MIN(other) limit 4;
+----
+b 0 -2
+a -1 -1
+NULL 0 0
+c 1 2
+
+statement ok
+set datafusion.optimizer.enable_topk_aggregation = true;
+
+query TT
+explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
+----
+logical_plan
+Limit: skip=0, fetch=4
+--Sort: MAX(traces.timestamp) DESC NULLS FIRST, fetch=4
+----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+------TableScan: traces projection=[trace_id, timestamp]
+physical_plan
+GlobalLimitExec: skip=0, fetch=4
+--SortPreservingMergeExec: [MAX(traces.timestamp)@1 DESC], fetch=4
+----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 DESC]
+------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
+--------CoalesceBatchesExec: target_batch_size=8192
+----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
+----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) desc limit 4;
+----
+logical_plan
+Limit: skip=0, fetch=4
+--Sort: MIN(traces.timestamp) DESC NULLS FIRST, fetch=4
+----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MIN(traces.timestamp)]]
+------TableScan: traces projection=[trace_id, timestamp]
+physical_plan
+GlobalLimitExec: skip=0, fetch=4
+--SortPreservingMergeExec: [MIN(traces.timestamp)@1 DESC], fetch=4
+----SortExec: TopK(fetch=4), expr=[MIN(traces.timestamp)@1 DESC]
+------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
+--------CoalesceBatchesExec: target_batch_size=8192
+----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
+----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) asc limit 4;
+----
+logical_plan
+Limit: skip=0, fetch=4
+--Sort: MAX(traces.timestamp) ASC NULLS LAST, fetch=4
+----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+------TableScan: traces projection=[trace_id, timestamp]
+physical_plan
+GlobalLimitExec: skip=0, fetch=4
+--SortPreservingMergeExec: [MAX(traces.timestamp)@1 ASC NULLS LAST], fetch=4
+----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 ASC NULLS LAST]
+------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+--------CoalesceBatchesExec: target_batch_size=8192
+----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select trace_id, MAX(timestamp) from traces group by trace_id order by trace_id asc limit 4;
+----
+logical_plan
+Limit: skip=0, fetch=4
+--Sort: traces.trace_id ASC NULLS LAST, fetch=4
+----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+------TableScan: traces projection=[trace_id, timestamp]
+physical_plan
+GlobalLimitExec: skip=0, fetch=4
+--SortPreservingMergeExec: [trace_id@0 ASC NULLS LAST], fetch=4
+----SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST]
+------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+--------CoalesceBatchesExec: target_batch_size=8192
+----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+query TI
+select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
+----
+c 4
+b 3
+a 1
+NULL 0
+
+query TI
+select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 4;
+----
+b -2
+a -1
+NULL 0
+c 2
+
+query TI
+select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 3;
+----
+c 4
+b 3
+a 1
+
+query TI
+select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 3;
+----
+b -2
+a -1
+NULL 0
+
+query TII
+select trace_id, other, MIN(timestamp) from traces group by trace_id, other order by MIN(timestamp) asc limit 4;
+----
+b 0 -2
+a -1 -1
+NULL 0 0
+a 1 1
+
+query TII
+select trace_id, MIN(other), MIN(timestamp) from traces group by trace_id order by MIN(timestamp), MIN(other) limit 4;
+----
+b 0 -2
+a -1 -1
+NULL 0 0
+c 1 2
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 55cd17724565..b7d92aec88e6 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -1234,6 +1234,25 @@ select array_slice(make_array(1, 2, 3, 4, 5), 2, 4), array_slice(make_array('h',
 ----
 [2, 3, 4] [h, e]
 
+query ????
+select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 2),
+       array_slice(make_array(1, 2, 3, 4, 5), 0, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5, 2);
+----
+[1, 3, 5] [h, l, o] [1, 3, 5] [h, l, o]
+
+query ??
+select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, -1);
+----
+[] []
+
+query error Execution error: array_slice got invalid stride: 0, it cannot be 0
+select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 0);
+
+query ??
+select array_slice(make_array(1, 2, 3, 4, 5), 5, 1, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 5, 1, -2);
+----
+[5, 3, 1] [o, l, h]
+
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2);
 ----
@@ -1342,12 +1361,12 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NU
 query ??
 select array_slice(make_array(1, 2, 3, 4, 5), 0, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, -3);
 ----
-[1] [h, e]
+[1, 2] [h, e, l]
 
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, -3);
 ----
-[1] [h, e]
+[1, 2] [h, e, l]
 
 # array_slice scalar function #13 (with negative number and NULL)
 query error
@@ -1367,34 +1386,34 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NU
 query ??
 select array_slice(make_array(1, 2, 3, 4, 5), -4, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -1);
 ----
-[2, 3, 4] [l, l]
+[2, 3, 4, 5] [l, l, o]
 
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -1);
 ----
-[2, 3, 4] [l, l]
+[2, 3, 4, 5] [l, l, o]
 
 # array_slice scalar function #16 (with negative indexes; almost full array (only with negative indices cannot return full array))
 query ??
 select array_slice(make_array(1, 2, 3, 4, 5), -5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -5, -1);
 ----
-[1, 2, 3, 4] [h, e, l, l]
+[1, 2, 3, 4, 5] [h, e, l, l, o]
 
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -5, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -5, -1);
 ----
-[1, 2, 3, 4] [h, e, l, l]
+[1, 2, 3, 4, 5] [h, e, l, l, o]
 
 # array_slice scalar function #17 (with negative indexes; first index = second index)
 query ??
 select array_slice(make_array(1, 2, 3, 4, 5), -4, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -3);
 ----
-[] []
+[2] [l]
 
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -3);
 ----
-[] []
+[2] [l]
 
 # array_slice scalar function #18 (with negative indexes; first index > second_index)
 query ??
@@ -1422,24 +1441,24 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -7
 query ??
 select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), -2, -1), array_slice(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), -1, -1);
 ----
-[[1, 2, 3, 4, 5]] []
+[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]]
 
 query ??
 select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), -2, -1), array_slice(arrow_cast(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), 'LargeList(List(Int64))'), -1, -1);
 ----
-[[1, 2, 3, 4, 5]] []
+[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]]
 
 
 # array_slice scalar function #21 (with first positive index and last negative index)
 query ??
 select array_slice(make_array(1, 2, 3, 4, 5), 2, -3), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, -2);
 ----
-[2] [e, l]
+[2, 3] [e, l, l]
 
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, -3), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, -2);
 ----
-[2] [e, l]
+[2, 3] [e, l, l]
 
 # array_slice scalar function #22 (with first negative index and last positive index)
 query ??
@@ -1468,7 +1487,7 @@ query ?
 select array_slice(column1, column2, column3) from slices;
 ----
 []
-[12, 13, 14, 15, 16]
+[12, 13, 14, 15, 16, 17]
 []
 []
 []
@@ -1479,7 +1498,7 @@ query ?
 select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from slices;
 ----
 []
-[12, 13, 14, 15, 16]
+[12, 13, 14, 15, 16, 17]
 []
 []
 []
@@ -1492,9 +1511,9 @@ query ???
 select array_slice(make_array(1, 2, 3, 4, 5), column2, column3), array_slice(column1, 3, column3), array_slice(column1, column2, 5) from slices;
 ----
 [1] [] [, 2, 3, 4, 5]
-[] [13, 14, 15, 16] [12, 13, 14, 15]
+[2] [13, 14, 15, 16, 17] [12, 13, 14, 15]
 [] [] [21, 22, 23, , 25]
-[] [33] []
+[] [33, 34] []
 [4, 5] [] []
 [1, 2, 3, 4, 5] [43, 44, 45, 46] [41, 42, 43, 44, 45]
 [5] [, 54, 55, 56, 57, 58, 59, 60] [55]
@@ -1503,9 +1522,9 @@ query ???
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), 3, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, 5) from slices;
 ----
 [1] [] [, 2, 3, 4, 5]
-[] [13, 14, 15, 16] [12, 13, 14, 15]
+[2] [13, 14, 15, 16, 17] [12, 13, 14, 15]
 [] [] [21, 22, 23, , 25]
-[] [33] []
+[] [33, 34] []
 [4, 5] [] []
 [1, 2, 3, 4, 5] [43, 44, 45, 46] [41, 42, 43, 44, 45]
 [5] [, 54, 55, 56, 57, 58, 59, 60] [55]
@@ -1599,9 +1618,9 @@ select
 query ????
 select
   array_append(arrow_cast(make_array(), 'LargeList(Null)'), 4),
-  array_append(make_array(), null),
-  array_append(make_array(1, null, 3), 4),
-  array_append(make_array(null, null), 1)
+  array_append(arrow_cast(make_array(), 'LargeList(Null)'), null),
+  array_append(arrow_cast(make_array(1, null, 3), 'LargeList(Int64)'), 4),
+  array_append(arrow_cast(make_array(null, null), 'LargeList(Null)'), 1)
 ;
 ----
 [4] [] [1, , 3, 4] [, , 1]
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index 5e9e7ff03d8b..8b3bd7eac95d 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -384,4 +384,41 @@ LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, di
 query T
 select arrow_typeof(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'));
 ----
-LargeList(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
\ No newline at end of file
+LargeList(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+
+## FixedSizeList
+
+query ?
+select arrow_cast(null, 'FixedSizeList(1, Int64)');
+----
+NULL
+
+#TODO: arrow-rs doesn't support it yet
+#query ?
+#select arrow_cast('1', 'FixedSizeList(1, Int64)');
+#----
+#[1]
+
+
+query ?
+select arrow_cast([1], 'FixedSizeList(1, Int64)');
+----
+[1]
+
+query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
+select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)');
+
+query ?
+select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)');
+----
+[1, 2, 3]
+
+query T
+select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)'));
+----
+FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+
+query ?
+select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
+----
+[1, 2, 3]
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 9f5b7af41577..c9b3bdfa338b 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -64,6 +64,24 @@ select * from validate_parquet;
 1 Foo
 2 Bar
 
+query ?
+copy (values (struct(timestamp '2021-01-01 01:00:01', 1)), (struct(timestamp '2022-01-01 01:00:01', 2)), 
+(struct(timestamp '2023-01-03 01:00:01', 3)), (struct(timestamp '2024-01-01 01:00:01', 4)))
+to 'test_files/scratch/copy/table_nested2' (format parquet, single_file_output false);
+----
+4
+
+statement ok
+CREATE EXTERNAL TABLE validate_parquet_nested2 STORED AS PARQUET LOCATION 'test_files/scratch/copy/table_nested2/';
+
+query ?
+select * from validate_parquet_nested2;
+----
+{c0: 2021-01-01T01:00:01, c1: 1}
+{c0: 2022-01-01T01:00:01, c1: 2}
+{c0: 2023-01-03T01:00:01, c1: 3}
+{c0: 2024-01-01T01:00:01, c1: 4}
+
 query ??
 COPY 
 (values (struct ('foo', (struct ('foo', make_array(struct('a',1), struct('b',2))))), make_array(timestamp '2023-01-01 01:00:01',timestamp '2023-01-01 01:00:01')), 
@@ -72,9 +90,9 @@ to 'test_files/scratch/copy/table_nested' (format parquet, single_file_output fa
 ----
 2
 
-# validate multiple parquet file output
 statement ok
-CREATE EXTERNAL TABLE validate_parquet_nested STORED AS PARQUET LOCATION 'test_files/scratch/copy/table_nested/';
+CREATE EXTERNAL TABLE validate_parquet_nested STORED AS PARQUET 
+LOCATION 'test_files/scratch/copy/table_nested/';
 
 query ??
 select * from validate_parquet_nested;
@@ -82,6 +100,38 @@ select * from validate_parquet_nested;
 {c0: foo, c1: {c0: foo, c1: [{c0: a, c1: 1}, {c0: b, c1: 2}]}} [2023-01-01T01:00:01, 2023-01-01T01:00:01]
 {c0: bar, c1: {c0: foo, c1: [{c0: aa, c1: 10}, {c0: bb, c1: 20}]}} [2024-01-01T01:00:01, 2024-01-01T01:00:01]
 
+query ?
+copy (values ([struct('foo', 1), struct('bar', 2)])) 
+to 'test_files/scratch/copy/array_of_struct/'
+(format parquet, single_file_output false);
+----
+1
+
+statement ok
+CREATE EXTERNAL TABLE validate_array_of_struct 
+STORED AS PARQUET LOCATION 'test_files/scratch/copy/array_of_struct/';
+
+query ?
+select * from validate_array_of_struct;
+----
+[{c0: foo, c1: 1}, {c0: bar, c1: 2}]
+
+query ?
+copy (values (struct('foo', [1,2,3], struct('bar', [2,3,4])))) 
+to 'test_files/scratch/copy/struct_with_array/'
+(format parquet, single_file_output false);
+----
+1
+
+statement ok
+CREATE EXTERNAL TABLE validate_struct_with_array
+STORED AS PARQUET LOCATION 'test_files/scratch/copy/struct_with_array/';
+
+query ?
+select * from validate_struct_with_array;
+----
+{c0: foo, c1: [1, 2, 3], c2: {c0: bar, c1: [2, 3, 4]}}
+
 
 # Copy parquet with all supported statment overrides
 query IT
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index 9facb064bf32..5393083e6c53 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -63,3 +63,76 @@ id6 value"6
 id7 value"7
 id8 value"8
 id9 value"9
+
+
+# Read partitioned csv
+statement ok
+CREATE TABLE src_table_1 (
+  int_col INT,
+  string_col TEXT,
+  bigint_col BIGINT,
+  partition_col INT
+) AS VALUES
+(1, 'aaa', 100, 1),
+(2, 'bbb', 200, 1),
+(3, 'ccc', 300, 1),
+(4, 'ddd', 400, 1);
+
+statement ok
+CREATE TABLE src_table_2 (
+  int_col INT,
+  string_col TEXT,
+  bigint_col BIGINT,
+  partition_col INT
+) AS VALUES
+(5, 'eee', 500, 2),
+(6, 'fff', 600, 2),
+(7, 'ggg', 700, 2),
+(8, 'hhh', 800, 2);
+
+query ITII
+COPY  src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv'
+(FORMAT CSV, SINGLE_FILE_OUTPUT true);
+----
+4
+
+
+query ITII
+COPY  src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv'
+(FORMAT CSV, SINGLE_FILE_OUTPUT true);
+----
+4
+
+statement ok
+CREATE EXTERNAL TABLE partitioned_table (
+  int_col INT,
+  string_col TEXT,
+  bigint_col BIGINT,
+  partition_col INT
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION 'test_files/scratch/csv_files/csv_partitions';
+
+query ITII
+SELECT * FROM partitioned_table ORDER BY int_col;
+----
+1 aaa 100 1
+2 bbb 200 1
+3 ccc 300 1
+4 ddd 400 1
+5 eee 500 2
+6 fff 600 2
+7 ggg 700 2
+8 hhh 800 2
+
+query TT
+EXPLAIN SELECT * FROM partitioned_table ORDER BY int_col;
+----
+logical_plan
+Sort: partitioned_table.int_col ASC NULLS LAST
+--TableScan: partitioned_table projection=[int_col, string_col, bigint_col, partition_col]
+physical_plan
+SortPreservingMergeExec: [int_col@0 ASC NULLS LAST]
+--SortExec: expr=[int_col@0 ASC NULLS LAST]
+----CsvExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]}, projection=[int_col, string_col, bigint_col, partition_col], has_header=true
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index c62b56584682..6b9db5589391 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -19,3 +19,635 @@ query II
 select * from (WITH source AS (select 1 as e) SELECT * FROM source) t1,   (WITH source AS (select 1 as e) SELECT * FROM source) t2
 ----
 1 1
+
+# Ensure table aliases can be case sensitive
+query I
+WITH "T" AS (SELECT 1 a) SELECT "T".* FROM "T"
+----
+1
+
+# Ensure table aliases can be case sensitive
+query TT
+EXPLAIN WITH "NUMBERS" AS (SELECT 1 as a, 2 as b, 3 as c) SELECT "NUMBERS".* FROM "NUMBERS"
+----
+logical_plan
+Projection: NUMBERS.a, NUMBERS.b, NUMBERS.c
+--SubqueryAlias: NUMBERS
+----Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c
+------EmptyRelation
+physical_plan
+ProjectionExec: expr=[1 as a, 2 as b, 3 as c]
+--PlaceholderRowExec
+
+
+
+# enable recursive CTEs
+statement ok
+set datafusion.execution.enable_recursive_ctes = true;
+
+# trivial recursive CTE works
+query I rowsort
+WITH RECURSIVE nodes AS ( 
+    SELECT 1 as id
+    UNION ALL 
+    SELECT id + 1 as id 
+    FROM nodes
+    WHERE id < 10
+)
+SELECT * FROM nodes
+----
+1
+10
+2
+3
+4
+5
+6
+7
+8
+9
+
+# explain trivial recursive CTE
+query TT
+EXPLAIN WITH RECURSIVE nodes AS ( 
+    SELECT 1 as id
+    UNION ALL 
+    SELECT id + 1 as id 
+    FROM nodes
+    WHERE id < 10
+)
+SELECT * FROM nodes
+----
+logical_plan
+Projection: nodes.id
+--SubqueryAlias: nodes
+----RecursiveQuery: is_distinct=false
+------Projection: Int64(1) AS id
+--------EmptyRelation
+------Projection: nodes.id + Int64(1) AS id
+--------Filter: nodes.id < Int64(10)
+----------TableScan: nodes
+physical_plan
+RecursiveQueryExec: name=nodes, is_distinct=false
+--ProjectionExec: expr=[1 as id]
+----PlaceholderRowExec
+--CoalescePartitionsExec
+----ProjectionExec: expr=[id@0 + 1 as id]
+------CoalesceBatchesExec: target_batch_size=8192
+--------FilterExec: id@0 < 10
+----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------WorkTableExec: name=nodes
+
+# setup
+statement ok
+CREATE EXTERNAL TABLE balance STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/balance.csv'
+
+# setup
+statement ok
+CREATE EXTERNAL TABLE growth STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/growth.csv'
+
+# setup
+statement ok
+set datafusion.execution.batch_size = 2;
+
+# recursive CTE with static term derived from table works.
+# use explain to ensure that batch size is set to 2. This should produce multiple batches per iteration since the input
+# table 'balances' has 4 rows
+query TT
+EXPLAIN WITH RECURSIVE balances AS (
+    SELECT * from balance
+    UNION ALL 
+    SELECT time + 1 as time, name, account_balance + 10 as account_balance
+    FROM balances
+    WHERE time < 10
+)
+SELECT * FROM balances
+ORDER BY time, name, account_balance
+----
+logical_plan
+Sort: balances.time ASC NULLS LAST, balances.name ASC NULLS LAST, balances.account_balance ASC NULLS LAST
+--Projection: balances.time, balances.name, balances.account_balance
+----SubqueryAlias: balances
+------RecursiveQuery: is_distinct=false
+--------Projection: balance.time, balance.name, balance.account_balance
+----------TableScan: balance
+--------Projection: balances.time + Int64(1) AS time, balances.name, balances.account_balance + Int64(10) AS account_balance
+----------Filter: balances.time < Int64(10)
+------------TableScan: balances
+physical_plan
+SortExec: expr=[time@0 ASC NULLS LAST,name@1 ASC NULLS LAST,account_balance@2 ASC NULLS LAST]
+--RecursiveQueryExec: name=balances, is_distinct=false
+----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/balance.csv]]}, projection=[time, name, account_balance], has_header=true
+----CoalescePartitionsExec
+------ProjectionExec: expr=[time@0 + 1 as time, name@1 as name, account_balance@2 + 10 as account_balance]
+--------CoalesceBatchesExec: target_batch_size=2
+----------FilterExec: time@0 < 10
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------WorkTableExec: name=balances
+
+# recursive CTE with static term derived from table works
+# note that this is run with batch size set to 2. This should produce multiple batches per iteration since the input
+# table 'balances' has 4 rows
+query ITI
+WITH RECURSIVE balances AS (
+    SELECT * from balance
+    UNION ALL 
+    SELECT time + 1 as time, name, account_balance + 10 as account_balance
+    FROM balances
+    WHERE time < 10
+)
+SELECT * FROM balances
+ORDER BY time, name, account_balance
+----
+1 John 100
+1 Tim 200
+2 John 110
+2 John 300
+2 Tim 210
+2 Tim 400
+3 John 120
+3 John 310
+3 Tim 220
+3 Tim 410
+4 John 130
+4 John 320
+4 Tim 230
+4 Tim 420
+5 John 140
+5 John 330
+5 Tim 240
+5 Tim 430
+6 John 150
+6 John 340
+6 Tim 250
+6 Tim 440
+7 John 160
+7 John 350
+7 Tim 260
+7 Tim 450
+8 John 170
+8 John 360
+8 Tim 270
+8 Tim 460
+9 John 180
+9 John 370
+9 Tim 280
+9 Tim 470
+10 John 190
+10 John 380
+10 Tim 290
+10 Tim 480
+
+# reset batch size to default
+statement ok
+set datafusion.execution.batch_size = 8182;
+
+# recursive CTE with recursive join works
+query ITI
+WITH RECURSIVE balances AS (
+    SELECT time as time, name as name, account_balance as account_balance
+    FROM balance
+    UNION ALL 
+    SELECT time + 1 as time, balances.name, account_balance + growth.account_growth as account_balance
+    FROM balances
+    JOIN growth
+    ON balances.name = growth.name
+    WHERE time < 10
+)
+SELECT * FROM balances
+ORDER BY time, name, account_balance
+----
+1 John 100
+1 Tim 200
+2 John 103
+2 John 300
+2 Tim 220
+2 Tim 400
+3 John 106
+3 John 303
+3 Tim 240
+3 Tim 420
+4 John 109
+4 John 306
+4 Tim 260
+4 Tim 440
+5 John 112
+5 John 309
+5 Tim 280
+5 Tim 460
+6 John 115
+6 John 312
+6 Tim 300
+6 Tim 480
+7 John 118
+7 John 315
+7 Tim 320
+7 Tim 500
+8 John 121
+8 John 318
+8 Tim 340
+8 Tim 520
+9 John 124
+9 John 321
+9 Tim 360
+9 Tim 540
+10 John 127
+10 John 324
+10 Tim 380
+10 Tim 560
+
+# recursive CTE with aggregations works
+query I rowsort
+WITH RECURSIVE nodes AS ( 
+    SELECT 1 as id
+    UNION ALL 
+    SELECT id + 1 as id 
+    FROM nodes
+    WHERE id < 10
+)
+SELECT sum(id) FROM nodes
+----
+55
+
+# setup
+statement ok
+CREATE TABLE t(a BIGINT) AS VALUES(1),(2),(3);
+
+# referencing CTE multiple times does not error
+query II rowsort
+WITH RECURSIVE my_cte AS (
+    SELECT a from t 
+    UNION ALL 
+    SELECT a+2 as a
+    FROM my_cte 
+    WHERE a<5
+)
+SELECT * FROM my_cte t1, my_cte
+----
+1 1
+1 2
+1 3
+1 3
+1 4
+1 5
+1 5
+1 6
+2 1
+2 2
+2 3
+2 3
+2 4
+2 5
+2 5
+2 6
+3 1
+3 1
+3 2
+3 2
+3 3
+3 3
+3 3
+3 3
+3 4
+3 4
+3 5
+3 5
+3 5
+3 5
+3 6
+3 6
+4 1
+4 2
+4 3
+4 3
+4 4
+4 5
+4 5
+4 6
+5 1
+5 1
+5 2
+5 2
+5 3
+5 3
+5 3
+5 3
+5 4
+5 4
+5 5
+5 5
+5 5
+5 5
+5 6
+5 6
+6 1
+6 2
+6 3
+6 3
+6 4
+6 5
+6 5
+6 6
+
+# CTE within recursive CTE works and does not result in 'index out of bounds: the len is 0 but the index is 0'
+query I
+WITH RECURSIVE "recursive_cte" AS (
+    SELECT 1 as "val"
+  UNION ALL (
+    WITH "sub_cte" AS (
+      SELECT
+        time,
+        1 as "val"
+      FROM
+        (SELECT DISTINCT "time" FROM "balance")
+    )
+    SELECT
+      2 as "val"
+    FROM
+      "recursive_cte" 
+      FULL JOIN "sub_cte" ON 1 = 1
+    WHERE
+      "recursive_cte"."val" < 2
+  )
+)
+SELECT
+   *
+FROM
+  "recursive_cte";
+----
+1
+2
+2
+
+# setup
+statement ok
+CREATE EXTERNAL TABLE prices STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/prices.csv'
+
+# CTE within window function inside nested CTE works. This test demonstrates using a nested window function to recursively iterate over a column.
+query RRII
+WITH RECURSIVE "recursive_cte" AS (
+  (
+    WITH "min_prices_row_num_cte" AS (
+      SELECT
+        MIN("prices"."prices_row_num") AS "prices_row_num"
+      FROM
+        "prices"
+    ),
+    "min_prices_row_num_cte_second" AS (
+      SELECT
+        MIN("prices"."prices_row_num") AS "prices_row_num_advancement"
+      FROM
+        "prices"
+      WHERE
+        "prices"."prices_row_num" > (
+          SELECT
+            "prices_row_num"
+          FROM
+            "min_prices_row_num_cte"
+        )
+    )
+    SELECT
+      0.0 AS "beg",
+      (0.0 + 50) AS "end",
+      (
+        SELECT
+          "prices_row_num"
+        FROM
+          "min_prices_row_num_cte"
+      ) AS "prices_row_num",
+      (
+        SELECT
+          "prices_row_num_advancement"
+        FROM
+          "min_prices_row_num_cte_second"
+      ) AS "prices_row_num_advancement"
+    FROM
+      "prices"
+    WHERE
+      "prices"."prices_row_num" = (
+        SELECT
+          DISTINCT "prices_row_num"
+        FROM
+          "min_prices_row_num_cte"
+      )
+  )
+  UNION ALL (
+    WITH "min_prices_row_num_cte" AS (
+      SELECT
+        "prices"."prices_row_num" AS "prices_row_num",
+        LEAD("prices"."prices_row_num", 1) OVER (
+          ORDER BY "prices_row_num"
+        ) AS "prices_row_num_advancement"
+      FROM
+        (
+          SELECT
+            DISTINCT "prices_row_num"
+          FROM
+            "prices"
+        ) AS "prices"
+    )
+    SELECT
+      "recursive_cte"."end" AS "beg",
+      ("recursive_cte"."end" + 50) AS "end",
+      "min_prices_row_num_cte"."prices_row_num" AS "prices_row_num",
+      "min_prices_row_num_cte"."prices_row_num_advancement" AS "prices_row_num_advancement"
+    FROM
+      "recursive_cte"
+      FULL JOIN "prices" ON "prices"."prices_row_num" = "recursive_cte"."prices_row_num_advancement"
+      FULL JOIN "min_prices_row_num_cte" ON "min_prices_row_num_cte"."prices_row_num" = COALESCE(
+        "prices"."prices_row_num",
+        "recursive_cte"."prices_row_num_advancement"
+      )
+    WHERE
+      "recursive_cte"."prices_row_num_advancement" IS NOT NULL
+  )
+)
+SELECT
+  DISTINCT *
+FROM
+  "recursive_cte"
+ORDER BY
+  "prices_row_num" ASC;
+----
+0 50 1 2
+50 100 2 3
+100 150 3 4
+150 200 4 5
+200 250 5 6
+250 300 6 7
+300 350 7 8
+350 400 8 9
+400 450 9 10
+450 500 10 11
+500 550 11 12
+550 600 12 13
+600 650 13 14
+650 700 14 15
+700 750 15 16
+750 800 16 17
+800 850 17 18
+850 900 18 19
+900 950 19 20
+950 1000 20 21
+1000 1050 21 22
+1050 1100 22 23
+1100 1150 23 24
+1150 1200 24 25
+1200 1250 25 26
+1250 1300 26 27
+1300 1350 27 28
+1350 1400 28 29
+1400 1450 29 30
+1450 1500 30 31
+1500 1550 31 32
+1550 1600 32 33
+1600 1650 33 34
+1650 1700 34 35
+1700 1750 35 36
+1750 1800 36 37
+1800 1850 37 38
+1850 1900 38 39
+1900 1950 39 40
+1950 2000 40 41
+2000 2050 41 42
+2050 2100 42 43
+2100 2150 43 44
+2150 2200 44 45
+2200 2250 45 46
+2250 2300 46 47
+2300 2350 47 48
+2350 2400 48 49
+2400 2450 49 50
+2450 2500 50 51
+2500 2550 51 52
+2550 2600 52 53
+2600 2650 53 54
+2650 2700 54 55
+2700 2750 55 56
+2750 2800 56 57
+2800 2850 57 58
+2850 2900 58 59
+2900 2950 59 60
+2950 3000 60 61
+3000 3050 61 62
+3050 3100 62 63
+3100 3150 63 64
+3150 3200 64 65
+3200 3250 65 66
+3250 3300 66 67
+3300 3350 67 68
+3350 3400 68 69
+3400 3450 69 70
+3450 3500 70 71
+3500 3550 71 72
+3550 3600 72 73
+3600 3650 73 74
+3650 3700 74 75
+3700 3750 75 76
+3750 3800 76 77
+3800 3850 77 78
+3850 3900 78 79
+3900 3950 79 80
+3950 4000 80 81
+4000 4050 81 82
+4050 4100 82 83
+4100 4150 83 84
+4150 4200 84 85
+4200 4250 85 86
+4250 4300 86 87
+4300 4350 87 88
+4350 4400 88 89
+4400 4450 89 90
+4450 4500 90 91
+4500 4550 91 92
+4550 4600 92 93
+4600 4650 93 94
+4650 4700 94 95
+4700 4750 95 96
+4750 4800 96 97
+4800 4850 97 98
+4850 4900 98 99
+4900 4950 99 100
+4950 5000 100 NULL
+
+# setup
+statement ok
+CREATE EXTERNAL TABLE sales STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/sales.csv'
+
+# setup
+statement ok
+CREATE EXTERNAL TABLE salespersons STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/salespersons.csv'
+
+
+# group by works within recursive cte. This test case demonstrates rolling up a hierarchy of salespeople to their managers.
+query III
+WITH RECURSIVE region_sales AS (
+    -- Anchor member
+    SELECT
+        s.salesperson_id AS salesperson_id,
+        SUM(s.sale_amount) AS amount,
+        0 as level
+    FROM
+        sales s
+    GROUP BY
+        s.salesperson_id
+    UNION ALL
+    -- Recursive member
+    SELECT
+      sp.manager_id AS salesperson_id,
+      SUM(rs.amount) AS amount,
+      MIN(rs.level) + 1 as level 
+    FROM
+        region_sales rs
+    INNER JOIN salespersons sp ON rs.salesperson_id = sp.salesperson_id
+    WHERE sp.manager_id IS NOT NULL
+    GROUP BY
+      sp.manager_id
+)
+SELECT
+    salesperson_id,
+    MAX(amount) as amount,
+    MAX(level) as hierarchy_level 
+FROM
+    region_sales
+GROUP BY
+  salesperson_id
+ORDER BY
+  hierarchy_level ASC, salesperson_id ASC;
+----
+4 700 0
+5 600 0
+6 500 0
+7 900 0
+2 1300 1
+3 1400 1
+1 2700 2
+
+#expect error from recursive CTE with nested recursive terms
+query error DataFusion error: This feature is not implemented: Recursive queries cannot be nested
+WITH RECURSIVE outer_cte AS (
+    SELECT 1 as a
+    UNION ALL (
+        WITH  RECURSIVE nested_cte AS (
+           SELECT 1 as a
+           UNION ALL
+           SELECT a+2 as a
+	   FROM nested_cte where a < 3
+         )
+    SELECT outer_cte.a +2
+    FROM outer_cte JOIN nested_cte USING(a)
+    WHERE nested_cte.a < 4
+   )
+)
+SELECT a FROM outer_cte;
+
+# expect error when recursive CTE is referenced multiple times in the recursive term
+query error DataFusion error: This feature is not implemented: Multiple recursive references to the same CTE are not supported
+WITH RECURSIVE my_cte AS (
+    SELECT 1 as a
+    UNION ALL
+    SELECT my_cte.a+2 as a
+    FROM my_cte join my_cte c2 using(a)
+    WHERE my_cte.a<5
+)
+SELECT a FROM my_cte;
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 7bd60a3a154b..d3f81cc61e95 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -647,6 +647,21 @@ SELECT initcap(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
 ----
 Foo
 
+query I
+SELECT instr('foobarbar', 'bar')
+----
+4
+
+query I
+SELECT instr('foobarbar', 'aa')
+----
+0
+
+query I
+SELECT instr('foobarbar', '')
+----
+1
+
 query T
 SELECT lower('FOObar')
 ----
@@ -727,6 +742,26 @@ SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, Utf8)'), '_', 2)
 ----
 bar
 
+query B
+SELECT starts_with('foobar', 'foo')
+----
+true
+
+query B
+SELECT starts_with('foobar', 'bar')
+----
+false
+
+query B
+SELECT ends_with('foobar', 'bar')
+----
+true
+
+query B
+SELECT ends_with('foobar', 'foo')
+----
+false
+
 query T
 SELECT trim('  foo  ')
 ----
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index b37b78ab6d79..43899f756735 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -155,7 +155,7 @@ datafusion.execution.listing_table_ignore_subdirectory true
 datafusion.execution.max_buffered_batches_per_output_file 2
 datafusion.execution.meta_fetch_concurrency 32
 datafusion.execution.minimum_parallel_output_files 4
-datafusion.execution.parquet.allow_single_file_parallelism false
+datafusion.execution.parquet.allow_single_file_parallelism true
 datafusion.execution.parquet.bloom_filter_enabled false
 datafusion.execution.parquet.bloom_filter_fpp NULL
 datafusion.execution.parquet.bloom_filter_ndv NULL
@@ -196,6 +196,7 @@ datafusion.optimizer.enable_round_robin_repartition true
 datafusion.optimizer.enable_topk_aggregation true
 datafusion.optimizer.filter_null_join_keys false
 datafusion.optimizer.hash_join_single_partition_threshold 1048576
+datafusion.optimizer.hash_join_single_partition_threshold_rows 131072
 datafusion.optimizer.max_passes 3
 datafusion.optimizer.prefer_existing_sort false
 datafusion.optimizer.prefer_hash_join true
@@ -231,7 +232,7 @@ datafusion.execution.listing_table_ignore_subdirectory true Should sub directori
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
 datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics
 datafusion.execution.minimum_parallel_output_files 4 Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.
-datafusion.execution.parquet.allow_single_file_parallelism false Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
+datafusion.execution.parquet.allow_single_file_parallelism true Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
 datafusion.execution.parquet.bloom_filter_enabled false Sets if bloom filter is enabled for any column
 datafusion.execution.parquet.bloom_filter_fpp NULL Sets bloom filter false positive probability. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.bloom_filter_ndv NULL Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting
@@ -272,6 +273,7 @@ datafusion.optimizer.enable_round_robin_repartition true When set to true, the p
 datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
 datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
 datafusion.optimizer.hash_join_single_partition_threshold 1048576 The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition
+datafusion.optimizer.hash_join_single_partition_threshold_rows 131072 The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition
 datafusion.optimizer.max_passes 3 Number of times that the optimizer will attempt to optimize the plan
 datafusion.optimizer.prefer_existing_sort false When true, DataFusion will opportunistically remove sorts when the data is already sorted, (i.e. setting `preserve_order` to true on `RepartitionExec`  and using `SortPreservingMergeExec`) When false, DataFusion will maximize plan parallelism using `RepartitionExec` even if this requires subsequently resorting data using a `SortExec`.
 datafusion.optimizer.prefer_hash_join true When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index a7146a5a91c4..9619696679d2 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1109,7 +1109,7 @@ RIGHT JOIN join_t2 on join_t1.t1_id = join_t2.t2_id
 WHERE NOT (join_t1.t1_int = join_t2.t2_int)
 ----
 logical_plan
-Inner Join: join_t1.t1_id = join_t2.t2_id Filter: join_t1.t1_int != join_t2.t2_int
+Inner Join: join_t1.t1_id = join_t2.t2_id Filter: join_t2.t2_int != join_t1.t1_int
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
@@ -3472,13 +3472,13 @@ FROM annotated_data as l, annotated_data as r
 WHERE l.a > r.a
 ----
 logical_plan
-Inner Join:  Filter: l.a > r.a
+Inner Join:  Filter: r.a < l.a
 --SubqueryAlias: l
 ----TableScan: annotated_data projection=[a0, a, b, c, d]
 --SubqueryAlias: r
 ----TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-NestedLoopJoinExec: join_type=Inner, filter=a@0 > a@1
+NestedLoopJoinExec: join_type=Inner, filter=a@1 < a@0
 --RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 --CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
@@ -3523,3 +3523,73 @@ set datafusion.optimizer.prefer_existing_sort = false;
 
 statement ok
 drop table annotated_data;
+
+####
+#  nestedjoin_with_alias_test
+####
+
+query IIII
+select * from ((select 1 as a, 2 as b) c INNER JOIN (select 1 as c, 3 as d) e on c.a = e.c) f;
+----
+1 2 1 3
+
+####
+#  create_left_semi_anti_join_context_with_null_ids_table_test
+####
+
+statement ok
+CREATE TABLE join_test_left(t1_id INT UNSIGNED, t1_name VARCHAR, t1_int INT UNSIGNED)
+AS VALUES
+(11, 'a', 1),
+(11, 'a', 1),
+(22, 'b', 2),
+(33, 'c', 3),
+(44, 'd', 4),
+(NULL, 'e', 0);
+
+statement ok
+CREATE TABLE join_test_right(t2_id INT UNSIGNED, t2_name VARCHAR, t2_int INT UNSIGNED)
+AS VALUES
+(11, 'z', 3),
+(11, 'z', 3),
+(22, 'y', 1),
+(33, 'x', 3),
+(44, 'w', 3),
+(NULL, 'v', 0);
+
+query IT
+SELECT t1_id, t1_name FROM join_test_left WHERE t1_id NOT IN (SELECT t2_id FROM join_test_right) ORDER BY t1_id;
+----
+NULL e
+
+####
+# join_partitioned_test
+####
+
+statement ok
+CREATE TABLE join_partitioned_table(c1 INT UNSIGNED, c2 INT UNSIGNED, c3 BOOLEAN)
+AS VALUES
+(4, 1, true),
+(4, 2, false),
+(4, 3, true),
+(4, 4, false);
+
+query I
+SELECT 1 FROM join_partitioned_table JOIN (SELECT c1 AS id1 FROM join_partitioned_table) AS a ON c1=id1;
+----
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index 0fa7ff9c2051..5f3e1dd9ee11 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -121,7 +121,7 @@ statement error DataFusion error: Error during planning: No function matches the
 SELECT abs(1, 2);
 
 # abs: unsupported argument type
-statement error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nThis feature is not implemented: Unsupported data type Utf8 for function abs
+query error DataFusion error: This feature is not implemented: Unsupported data type Utf8 for function abs
 SELECT abs('foo');
 
 
@@ -293,52 +293,52 @@ select c1*0, c2*0, c3*0, c4*0, c5*0, c6*0, c7*0, c8*0 from test_non_nullable_int
 ----
 0 0 0 0 0 0 0 0
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c1/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c2/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c3/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c4/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c5/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c6/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c7/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c8/0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c1%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c2%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c3%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c4%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c5%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c6%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c7%0 FROM test_non_nullable_integer
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c8%0 FROM test_non_nullable_integer
 
 statement ok
@@ -556,10 +556,10 @@ SELECT c1*0 FROM test_non_nullable_decimal
 ----
 0
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c1/0 FROM test_non_nullable_decimal 
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Divide by zero
+query error DataFusion error: Arrow error: Divide by zero error
 SELECT c1%0 FROM test_non_nullable_decimal 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index e32e415338a7..ba407f6d2852 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -220,6 +220,30 @@ SELECT * FROM test WHERE column1 !~* 'z'
 foo
 Barrr
 
+query T
+SELECT * FROM test WHERE column1 ~~ '__z%'
+----
+Bazzz
+
+query T
+SELECT * FROM test WHERE column1 ~~* '__z%'
+----
+Bazzz
+ZZZZZ
+
+query T
+SELECT * FROM test WHERE column1 !~~ '__z%'
+----
+foo
+Barrr
+ZZZZZ
+
+query T
+SELECT * FROM test WHERE column1 !~~* '__z%'
+----
+foo
+Barrr
+
 statement ok
 DROP TABLE test;
 
@@ -725,3 +749,40 @@ AggregateExec: mode=SinglePartitioned, gby=[p_partkey@2 as p_partkey], aggr=[SUM
 --------CoalesceBatchesExec: target_batch_size=8192
 ----------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=1
 ------------MemoryExec: partitions=1, partition_sizes=[1]
+
+# Inlist simplification
+
+statement ok
+create table t(x int) as values (1), (2), (3);
+
+query TT
+explain select x from t where x IN (1,2,3) AND x IN (4,5);
+----
+logical_plan EmptyRelation
+physical_plan EmptyExec
+
+query TT
+explain select x from t where x NOT IN (1,2,3,4) OR x NOT IN (5,6,7,8);
+----
+logical_plan TableScan: t projection=[x]
+physical_plan MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select x from t where x IN (1,2,3,4,5) AND x NOT IN (1,2,3,4);
+----
+logical_plan
+Filter: t.x = Int32(5)
+--TableScan: t projection=[x]
+physical_plan
+CoalesceBatchesExec: target_batch_size=8192
+--FilterExec: x@0 = 5
+----MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select x from t where x NOT IN (1,2,3,4,5) AND x IN (1,2,3);
+----
+logical_plan EmptyRelation
+physical_plan EmptyExec
+
+statement ok
+drop table t;
diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt
index 9829299f43e5..7c141adf82b1 100644
--- a/datafusion/sqllogictest/test_files/repartition.slt
+++ b/datafusion/sqllogictest/test_files/repartition.slt
@@ -71,3 +71,59 @@ AggregateExec: mode=FinalPartitioned, gby=[column1@0 as column1], aggr=[SUM(parq
 # Cleanup
 statement ok
 DROP TABLE parquet_table;
+
+
+
+# Unbounded repartition
+# See https://github.com/apache/arrow-datafusion/issues/5278
+# Set up unbounded table and run a query - the query plan should display a `RepartitionExec`
+# and a `CoalescePartitionsExec`
+statement ok
+CREATE UNBOUNDED EXTERNAL TABLE sink_table (
+        c1  VARCHAR NOT NULL,
+        c2  TINYINT NOT NULL,
+        c3  SMALLINT NOT NULL,
+        c4  SMALLINT NOT NULL,
+        c5  INTEGER NOT NULL,
+        c6  BIGINT NOT NULL,
+        c7  SMALLINT NOT NULL,
+        c8  INT NOT NULL,
+        c9  INT UNSIGNED NOT NULL,
+        c10 BIGINT UNSIGNED NOT NULL,
+        c11 FLOAT NOT NULL,
+        c12 DOUBLE NOT NULL,
+        c13 VARCHAR NOT NULL
+    )
+STORED AS CSV
+WITH HEADER ROW
+LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+
+query TII
+SELECT c1, c2, c3 FROM sink_table WHERE c3 > 0 LIMIT 5;
+----
+c 2 1
+b 1 29
+e 3 104
+a 3 13
+d 1 38
+
+statement ok
+set datafusion.execution.target_partitions = 3;
+
+statement ok
+set datafusion.optimizer.enable_round_robin_repartition = true;
+
+query TT
+EXPLAIN SELECT c1, c2, c3 FROM sink_table WHERE c3 > 0 LIMIT 5;
+----
+logical_plan
+Limit: skip=0, fetch=5
+--Filter: sink_table.c3 > Int16(0)
+----TableScan: sink_table projection=[c1, c2, c3]
+physical_plan
+GlobalLimitExec: skip=0, fetch=5
+--CoalescePartitionsExec
+----CoalesceBatchesExec: target_batch_size=8192
+------FilterExec: c3@2 > 0
+--------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
+----------StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 5ee0da2d33e8..4b8c8f2f084e 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -61,7 +61,7 @@ Filter: parquet_table.column1 != Int32(42)
 physical_plan
 CoalesceBatchesExec: target_batch_size=8192
 --FilterExec: column1@0 != 42
-----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..153], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:153..306], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:306..459], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:459..610]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
+----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..104], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:104..208], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:208..312], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:312..414]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
 
 # disable round robin repartitioning
 statement ok
@@ -77,7 +77,7 @@ Filter: parquet_table.column1 != Int32(42)
 physical_plan
 CoalesceBatchesExec: target_batch_size=8192
 --FilterExec: column1@0 != 42
-----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..153], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:153..306], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:306..459], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:459..610]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
+----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..104], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:104..208], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:208..312], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:312..414]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
 
 # enable round robin repartitioning again
 statement ok
@@ -102,7 +102,7 @@ SortPreservingMergeExec: [column1@0 ASC NULLS LAST]
 --SortExec: expr=[column1@0 ASC NULLS LAST]
 ----CoalesceBatchesExec: target_batch_size=8192
 ------FilterExec: column1@0 != 42
---------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..303], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:303..601, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..5], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:5..308], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:308..610]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
+--------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..205], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:205..405, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..5], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:5..210], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:210..414]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
 
 
 ## Read the files as though they are ordered
@@ -138,7 +138,7 @@ physical_plan
 SortPreservingMergeExec: [column1@0 ASC NULLS LAST]
 --CoalesceBatchesExec: target_batch_size=8192
 ----FilterExec: column1@0 != 42
-------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..300], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..305], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:305..610], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:300..601]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
+------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..202], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..207], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:207..414], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:202..405]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], predicate=column1@0 != 42, pruning_predicate=column1_min@0 != 42 OR 42 != column1_max@1, required_guarantees=[column1 not in (42)]
 
 # Cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 9b30699e3fa3..5b3ecab5fd76 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1527,7 +1527,7 @@ SELECT not(true), not(false)
 ----
 false true
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nInternal error: NOT 'Literal \{ value: Int64\(1\) \}' can't be evaluated because the expression's type is Int64, not boolean or NULL
+query error type_coercion\ncaused by\nError during planning: Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean
 SELECT not(1), not(0)
 
 query ?B
@@ -1535,7 +1535,7 @@ SELECT null, not(null)
 ----
 NULL NULL
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nInternal error: NOT 'Literal \{ value: Utf8\("hi"\) \}' can't be evaluated because the expression's type is Utf8, not boolean or NULL
+query error type_coercion\ncaused by\nError during planning: Cannot infer common argument type for comparison operation Utf8 IS DISTINCT FROM Boolean
 SELECT NOT('hi')
 
 # test_negative_expressions()
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index ca48c07b0914..b7bbc0706576 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -46,11 +46,294 @@ STORED AS CSV
 WITH HEADER ROW
 LOCATION '../core/tests/data/aggregate_simple.csv'
 
-
 ##########
 ## SELECT Tests
 ##########
 
+##########
+## window_null_string_table_test
+##########
+
+statement ok
+CREATE TABLE window_null_string_value_prepare(x string, y string, z string)
+AS VALUES
+('one', 'ONE', 'One'),
+(NULL, NULL, NULL),
+('three', 'THREE', 'Three');
+
+statement ok
+CREATE TABLE window_null_string_table
+AS SELECT arrow_cast(x, 'Dictionary(Int32, Utf8)') as d1,
+y as d2,
+arrow_cast(z, 'LargeUtf8') as d3 FROM window_null_string_value_prepare;
+
+query ?I
+SELECT d1, row_number() OVER (partition by d1) as rn1 FROM window_null_string_table order by d1 asc;
+----
+one 1
+three 1
+NULL 1
+
+query TI
+SELECT d2, row_number() OVER (partition by d2) as rn1 FROM window_null_string_table ORDER BY d2 asc;
+----
+ONE 1
+THREE 1
+NULL 1
+
+query TI
+SELECT d2, row_number() OVER (partition by d2 order by d2 desc) as rn1 FROM window_null_string_table ORDER BY d2 desc
+----
+NULL 1
+THREE 1
+ONE 1
+
+# Test large string as well
+query TI rowsort
+SELECT d3, row_number() OVER (partition by d3) as rn1 FROM window_null_string_table;
+----
+NULL 1
+One 1
+Three 1
+
+
+statement ok
+CREATE TABLE test (
+  c1 BIGINT NOT NULL,
+  c2 BIGINT NOT NULL,
+  c3 BOOLEAN NOT NULL,
+) AS VALUES (0, 1, false),
+(0, 10, true),
+(0, 2, true),
+(0, 3, false),
+(0, 4, true),
+(0, 5, false),
+(0, 6, true),
+(0, 7, false),
+(0, 8, true),
+(0, 9, false),
+(1, 1, false),
+(1, 10, true),
+(1, 2, true),
+(1, 3, false),
+(1, 4, true),
+(1, 5, false),
+(1, 6, true),
+(1, 7, false),
+(1, 8, true),
+(1, 9, false),
+(2, 1, false),
+(2, 10, true),
+(2, 2, true),
+(2, 3, false),
+(2, 4, true),
+(2, 5, false),
+(2, 6, true),
+(2, 7, false),
+(2, 8, true),
+(2, 9, false),
+(3, 1, false),
+(3, 10, true),
+(3, 2, true),
+(3, 3, false),
+(3, 4, true),
+(3, 5, false),
+(3, 6, true),
+(3, 7, false),
+(3, 8, true),
+(3, 9, false);
+
+
+# parallel_query_with_filter
+query II
+SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3;
+----
+1 1
+1 10
+1 2
+1 3
+1 4
+1 5
+1 6
+1 7
+1 8
+1 9
+2 1
+2 10
+2 2
+2 3
+2 4
+2 5
+2 6
+2 7
+2 8
+2 9
+
+######
+# Boolean literal
+######
+query IB
+SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true;
+----
+3 true
+3 true
+3 true
+3 true
+3 true
+
+statement ok
+drop table test;
+
+######
+# struct test
+######
+# Prepare the table with struct values for testing
+statement ok
+CREATE TABLE struct_value
+AS VALUES
+(make_array(0, 1, 2, 3)),
+(make_array(4, 5, 6, 7)),
+(make_array(8, 9, 10, 11));
+
+statement ok
+CREATE TABLE nested_get_indexed_field_on_struct_table
+AS SELECT struct(column1) as some_struct from struct_value;
+
+# Original column is micros, convert to millis and check timestamp
+query ?
+SELECT some_struct['c0'] FROM nested_get_indexed_field_on_struct_table LIMIT 3;
+----
+[0, 1, 2, 3]
+[4, 5, 6, 7]
+[8, 9, 10, 11]
+
+# Access to field of struct by CompoundIdentifier
+query ?
+SELECT some_struct.c0 as l0 FROM nested_get_indexed_field_on_struct_table LIMIT 3;
+----
+[0, 1, 2, 3]
+[4, 5, 6, 7]
+[8, 9, 10, 11]
+
+query I
+SELECT some_struct['c0'][1] as i0 FROM nested_get_indexed_field_on_struct_table LIMIT 3;
+----
+0
+4
+8
+
+# Basic SELECT
+####
+#  dictionary_test
+####
+
+# Prepare the table with dictionary values for testing
+statement ok
+CREATE TABLE value(x string, y string, z string)
+AS VALUES
+('one', 'blarg', 'XYZ'),
+(NULL, NULL, NULL),
+('three', 'three', 'three');
+
+statement ok
+CREATE TABLE string_dictionary_table
+AS SELECT arrow_cast(x, 'Dictionary(Int32, Utf8)') as d1,
+arrow_cast(y, 'Dictionary(Int32, Utf8)') as d2,
+z as d3 FROM value;
+
+query ?
+SELECT d1 FROM string_dictionary_table;
+----
+one
+NULL
+three
+
+# basic filtering
+query ?
+SELECT d1 FROM string_dictionary_table WHERE d1 IS NOT NULL;
+----
+one
+three
+
+# comparison with constant
+query ?
+SELECT d1 FROM string_dictionary_table WHERE d1 = 'three';
+----
+three
+
+# comparison with another dictionary column
+query ?
+SELECT d1 FROM string_dictionary_table WHERE d1 = d2;
+----
+three
+
+# order comparison with another dictionary column
+query ?
+SELECT d1 FROM string_dictionary_table WHERE d1 <= d2;
+----
+three
+
+# comparison with a non dictionary column
+query ?
+SELECT d1 FROM string_dictionary_table WHERE d1 = d3;
+----
+three
+
+# filtering with constant
+query ?
+SELECT d1 FROM string_dictionary_table WHERE d1 = 'three';
+----
+three
+
+# Expression evaluation
+query T
+SELECT concat(d1, '-foo') FROM string_dictionary_table;
+----
+one-foo
+-foo
+three-foo
+
+# Expression evaluation with two dictionaries
+query T
+SELECT concat(d1, d2) FROM string_dictionary_table;
+----
+oneblarg
+(empty)
+threethree
+
+# aggregation
+query I
+SELECT COUNT(d1) FROM string_dictionary_table;
+----
+2
+
+# aggregation min
+query T
+SELECT MIN(d1) FROM string_dictionary_table;
+----
+one
+
+# aggregation max
+query T
+SELECT MAX(d1) FROM string_dictionary_table;
+----
+three
+
+# grouping
+query ?I
+SELECT d1, COUNT(*) FROM string_dictionary_table group by d1 order by d1;
+----
+one 1
+three 1
+NULL 1
+
+# window functions
+query ?I
+SELECT d1, row_number() OVER (partition by d1) as rn1 FROM string_dictionary_table order by d1;
+----
+one 1
+three 1
+NULL 1
 
 # select_values_list
 statement error DataFusion error: SQL error: ParserError\("Expected \(, found: EOF"\)
@@ -232,6 +515,28 @@ select
 ----
 false true false true true false false true false true true false true true false false true
 
+# select uppercase alias table
+query I
+SELECT "T".* from (SELECT 1 a) AS "T"
+----
+1
+
+# explain select uppercase alias table
+query TT
+EXPLAIN SELECT * FROM ((SELECT column1 FROM foo) "T1" CROSS JOIN (SELECT column2 FROM foo) "T2") AS "F"
+----
+logical_plan
+SubqueryAlias: F
+--CrossJoin:
+----SubqueryAlias: T1
+------TableScan: foo projection=[column1]
+----SubqueryAlias: T2
+------TableScan: foo projection=[column2]
+physical_plan
+CrossJoinExec
+--MemoryExec: partitions=1, partition_sizes=[1]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
 # select NaNs
 query BBBB
 select (isnan('NaN'::double) AND 'NaN'::double > 0) a, (isnan('-NaN'::double) AND '-NaN'::double < 0) b, (isnan('NaN'::float) AND 'NaN'::float > 0) c, (isnan('-NaN'::float) AND '-NaN'::float < 0) d
@@ -1129,5 +1434,152 @@ FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B;
 0 0
 0 0
 
+# Expressions that short circuit should not be refactored out as that may cause side effects (divide by zero)
+# at plan time that would not actually happen during execution, so the follow three query should not be extract
+# the common sub-expression
+query TT
+explain select coalesce(1, y/x), coalesce(2, y/x) from t;
+----
+logical_plan
+Projection: coalesce(Int64(1), CAST(t.y / t.x AS Int64)), coalesce(Int64(2), CAST(t.y / t.x AS Int64))
+--TableScan: t projection=[x, y]
+physical_plan
+ProjectionExec: expr=[coalesce(1, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(1),t.y / t.x), coalesce(2, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(2),t.y / t.x)]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+EXPLAIN SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;
+----
+logical_plan
+Projection: t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
+--TableScan: t projection=[x, y]
+physical_plan
+ProjectionExec: expr=[y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 as t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@0 > 0 AND y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+EXPLAIN SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t;
+----
+logical_plan
+Projection: t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
+--TableScan: t projection=[x, y]
+physical_plan
+ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
+# due to the reason describe in https://github.com/apache/arrow-datafusion/issues/8927,
+# the following queries will fail
+query error
+select coalesce(1, y/x), coalesce(2, y/x) from t;
+
+query error
+SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;
+
+query error
+SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t;
+
 statement ok
 DROP TABLE t;
+
+##########
+## indexed_field_test
+##########
+statement ok
+CREATE TABLE indexed_field
+AS VALUES (make_array(0, 1, 2)),
+(make_array(4, 5, 6)),
+(make_array(7, 8, 9))
+
+# query_get_indexed_field
+query I
+SELECT column1[1] AS i0
+FROM indexed_field LIMIT 3;
+----
+0
+4
+7
+
+##########
+## nested_indexed_field_test
+##########
+statement ok
+CREATE TABLE nested_indexed_field
+AS VALUES (make_array([0, 1], [2, 3], [3, 4])),
+(make_array([5, 6], [7, 8], [9, 10])),
+(make_array([11, 12], [13, 14], [15, 16]))
+
+# query nested_indexed_field
+query ?
+SELECT column1[1] AS i0
+FROM nested_indexed_field LIMIT 3;
+----
+[0, 1]
+[5, 6]
+[11, 12]
+
+query I
+SELECT column1[1][1] AS i0
+FROM nested_indexed_field LIMIT 3;
+----
+0
+5
+11
+
+query I
+SELECT CASE 1 WHEN 2 THEN 4 / 0 END;
+----
+NULL
+
+
+######
+# Unprojected filter
+######
+
+statement ok
+CREATE TABLE test(i INT) AS
+VALUES (1), (2), (3);
+
+query I
+SELECT i + i FROM test WHERE i > 2;
+----
+6
+
+query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from 'I AM NOT A TIMESTAMP': error parsing date
+SELECT to_timestamp('I AM NOT A TIMESTAMP');
+
+query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
+SELECT CAST('' AS int);
+
+# See issue: https://github.com/apache/arrow-datafusion/issues/8978
+statement ok
+create table users (id int, name varchar);
+
+statement ok
+insert into users values (1, 'Tom');
+
+statement ok
+create view v as select count(id) from users;
+
+query I
+select * from v;
+----
+1
+
+query I
+select count(1) from v;
+----
+1
+
+# run below query without logical optimizations
+statement ok
+set datafusion.optimizer.max_passes=0;
+
+statement ok
+CREATE TABLE t(a int, b int);
+
+query I
+select a from t;
+----
+
+statement ok
+set datafusion.optimizer.max_passes=3;
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 3e0fcb7aa96e..1ca9045f1bd0 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -396,7 +396,7 @@ LeftSemi Join: t1.t1_id = __correlated_sq_1.t1_int
 --TableScan: t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_1
 ----Projection: t1.t1_int
-------Filter: t1.t1_id > t1.t1_int
+------Filter: t1.t1_int < t1.t1_id
 --------TableScan: t1 projection=[t1_id, t1_int]
 
 #in_subquery_nested_exist_subquery
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 2ab3dbdac61b..5c7687aa27b2 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -18,7 +18,7 @@
 ##########
 ## Common timestamp data
 #
-# ts_data:        Int64 nanosecods
+# ts_data:        Int64 nanoseconds
 # ts_data_nanos:  Timestamp(Nanosecond, None)
 # ts_data_micros: Timestamp(Microsecond, None)
 # ts_data_millis: Timestamp(Millisecond, None)
@@ -331,6 +331,35 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08T12
 ----
 2
 
+# to_timestamp with formatting
+query I
+SELECT COUNT(*) FROM ts_data_nanos where ts > to_timestamp('2020-09-08T12:00:00+00:00', '2020-09-08 12/00/00+00:00', '%c', '%+', '%Y-%m-%d %H/%M/%s%#z')
+----
+2
+
+# to_timestamp_nanos with formatting
+query I
+SELECT COUNT(*) FROM ts_data_nanos where ts > to_timestamp_nanos('2020-09-08 12/00/00+00:00', '%c', '%+', '%Y-%m-%d %H/%M/%S%#z')
+----
+2
+
+# to_timestamp_millis with formatting
+query I
+SELECT COUNT(*) FROM ts_data_millis where ts > to_timestamp_millis('2020-09-08 12/00/00+00:00', '%c', '%+', '%Y-%m-%d %H/%M/%S%#z')
+----
+2
+
+# to_timestamp_micros with formatting
+query I
+SELECT COUNT(*) FROM ts_data_micros where ts > to_timestamp_micros('2020-09-08 12/00/00+00:00', '%c', '%+', '%Y-%m-%d %H/%M/%S%#z')
+----
+2
+
+# to_timestamp_seconds with formatting
+query I
+SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%c', '%+', '%Y-%m-%d %H/%M/%S%#z')
+----
+2
 
 # to_timestamp float inputs
 
@@ -1880,7 +1909,7 @@ SELECT to_timestamp(null), to_timestamp(0), to_timestamp(1926632005), to_timesta
 ----
 NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:59:59 1969-12-31T23:59:59
 
-# verify timestamp syntax stlyes are consistent
+# verify timestamp syntax styles are consistent
 query BBBBBBBBBBBBB
 SELECT to_timestamp(null) is null as c1,
        null::timestamp is null as c2, 
@@ -1922,6 +1951,116 @@ true true true true true true
 #----
 #0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37
 
+# verify timestamp data with formatting options
+query PPPPPP
+SELECT to_timestamp(null, '%+'), to_timestamp(0, '%s'), to_timestamp(1926632005, '%s'), to_timestamp(1, '%+', '%s'), to_timestamp(-1, '%c', '%+', '%s'), to_timestamp(0-1, '%c', '%+', '%s')
+----
+NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:59:59 1969-12-31T23:59:59
+
+# verify timestamp data with formatting options
+query PPPPPP
+SELECT to_timestamp(null, '%+'), to_timestamp(0, '%s'), to_timestamp(1926632005, '%s'), to_timestamp(1, '%+', '%s'), to_timestamp(-1, '%c', '%+', '%s'), to_timestamp(0-1, '%c', '%+', '%s')
+----
+NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:59:59 1969-12-31T23:59:59
+
+# verify timestamp output types with formatting options
+query TTT
+SELECT arrow_typeof(to_timestamp(1, '%c', '%s')), arrow_typeof(to_timestamp(null, '%+', '%s')), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f'))
+----
+Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+
+# to_timestamp with invalid formatting
+query error input contains invalid characters
+SELECT to_timestamp('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_timestamp_nanos with invalid formatting
+query error input contains invalid characters
+SELECT to_timestamp_nanos('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_timestamp_millis with invalid formatting
+query error input contains invalid characters
+SELECT to_timestamp_millis('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_timestamp_micros with invalid formatting
+query error input contains invalid characters
+SELECT to_timestamp_micros('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_timestamp_seconds with invalid formatting
+query error input contains invalid characters
+SELECT to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_timestamp with broken formatting
+query error bad or unsupported format string
+SELECT to_timestamp('2020-09-08 12/00/00+00:00', '%q')
+
+# to_timestamp_nanos with broken formatting
+query error bad or unsupported format string
+SELECT to_timestamp_nanos('2020-09-08 12/00/00+00:00', '%q')
+
+# to_timestamp_millis with broken formatting
+query error bad or unsupported format string
+SELECT to_timestamp_millis('2020-09-08 12/00/00+00:00', '%q')
+
+# to_timestamp_micros with broken formatting
+query error bad or unsupported format string
+SELECT to_timestamp_micros('2020-09-08 12/00/00+00:00', '%q')
+
+# to_timestamp_seconds with broken formatting
+query error bad or unsupported format string
+SELECT to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%q')
+
+# Create string timestamp table with different formats
+# including a few very non-standard formats
+
+statement ok
+create table ts_utf8_data(ts varchar(100), format varchar(100)) as values
+  ('2020-09-08 12/00/00+00:00', '%Y-%m-%d %H/%M/%S%#z'),
+  ('2031-01-19T23:33:25+05:00', '%+'),
+  ('08-09-2020 12:00:00+00:00', '%d-%m-%Y %H:%M:%S%#z'),
+  ('1926632005', '%s'),
+  ('2000-01-01T01:01:01+07:00', '%+');
+
+# verify timestamp data using tables with formatting options
+query P
+SELECT to_timestamp(t.ts, t.format) from ts_utf8_data as t
+----
+2020-09-08T12:00:00
+2031-01-19T18:33:25
+2020-09-08T12:00:00
+2031-01-19T23:33:25
+1999-12-31T18:01:01
+
+# verify timestamp data using tables with formatting options
+query P
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y %H:%M:%S%#z') from ts_utf8_data as t
+----
+2020-09-08T12:00:00
+2031-01-19T18:33:25
+2020-09-08T12:00:00
+2031-01-19T23:33:25
+1999-12-31T18:01:01
+
+# verify timestamp data using tables with formatting options where at least one column cannot be parsed
+query error Error parsing timestamp from '1926632005' using format '%d-%m-%Y %H:%M:%S%#z': input contains invalid characters
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%d-%m-%Y %H:%M:%S%#z') from ts_utf8_data as t
+
+# verify timestamp data using tables with formatting options where one of the formats is invalid
+query P
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+') from ts_utf8_data as t
+----
+2020-09-08T12:00:00
+2031-01-19T18:33:25
+2020-09-08T12:00:00
+2031-01-19T23:33:25
+1999-12-31T18:01:01
+
+# timestamp data using tables with formatting options in an array is not supported at this time
+query error function unsupported data type at index 1:
+SELECT to_timestamp(t.ts, make_array('%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+')) from ts_utf8_data as t
+
+statement ok
+drop table ts_utf8_data
+
 ##########
 ## Test binary temporal coercion for Date and Timestamp
 ##########
diff --git a/datafusion/sqllogictest/test_files/tpch/q12.slt.part b/datafusion/sqllogictest/test_files/tpch/q12.slt.part
index 09939359ce12..68ef41b382e3 100644
--- a/datafusion/sqllogictest/test_files/tpch/q12.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q12.slt.part
@@ -55,8 +55,8 @@ Sort: lineitem.l_shipmode ASC NULLS LAST
 ------Projection: lineitem.l_shipmode, orders.o_orderpriority
 --------Inner Join: lineitem.l_orderkey = orders.o_orderkey
 ----------Projection: lineitem.l_orderkey, lineitem.l_shipmode
-------------Filter: (lineitem.l_shipmode = Utf8("MAIL") OR lineitem.l_shipmode = Utf8("SHIP")) AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("8766") AND lineitem.l_receiptdate < Date32("9131")
---------------TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("MAIL") OR lineitem.l_shipmode = Utf8("SHIP"), lineitem.l_commitdate < lineitem.l_receiptdate, lineitem.l_shipdate < lineitem.l_commitdate, lineitem.l_receiptdate >= Date32("8766"), lineitem.l_receiptdate < Date32("9131")]
+------------Filter: (lineitem.l_shipmode = Utf8("MAIL") OR lineitem.l_shipmode = Utf8("SHIP")) AND lineitem.l_receiptdate > lineitem.l_commitdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("8766") AND lineitem.l_receiptdate < Date32("9131")
+--------------TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("MAIL") OR lineitem.l_shipmode = Utf8("SHIP"), lineitem.l_receiptdate > lineitem.l_commitdate, lineitem.l_shipdate < lineitem.l_commitdate, lineitem.l_receiptdate >= Date32("8766"), lineitem.l_receiptdate < Date32("9131")]
 ----------TableScan: orders projection=[o_orderkey, o_orderpriority]
 physical_plan
 SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST]
@@ -73,7 +73,7 @@ SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST]
 ----------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
 ------------------------ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode]
 --------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------FilterExec: (l_shipmode@4 = MAIL OR l_shipmode@4 = SHIP) AND l_commitdate@2 < l_receiptdate@3 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 8766 AND l_receiptdate@3 < 9131
+----------------------------FilterExec: (l_shipmode@4 = MAIL OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 8766 AND l_receiptdate@3 < 9131
 ------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], has_header=false
 --------------------CoalesceBatchesExec: target_batch_size=8192
 ----------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
diff --git a/datafusion/sqllogictest/test_files/tpch/q4.slt.part b/datafusion/sqllogictest/test_files/tpch/q4.slt.part
index 690ef64bc28d..1709ae04aa3b 100644
--- a/datafusion/sqllogictest/test_files/tpch/q4.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q4.slt.part
@@ -50,8 +50,8 @@ Sort: orders.o_orderpriority ASC NULLS LAST
 --------------TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("8582"), orders.o_orderdate < Date32("8674")]
 ----------SubqueryAlias: __correlated_sq_1
 ------------Projection: lineitem.l_orderkey
---------------Filter: lineitem.l_commitdate < lineitem.l_receiptdate
-----------------TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_commitdate < lineitem.l_receiptdate]
+--------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate
+----------------TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate]
 physical_plan
 SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST]
 --SortExec: expr=[o_orderpriority@0 ASC NULLS LAST]
@@ -73,7 +73,7 @@ SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST]
 ----------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
 ------------------------ProjectionExec: expr=[l_orderkey@0 as l_orderkey]
 --------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------FilterExec: l_commitdate@1 < l_receiptdate@2
+----------------------------FilterExec: l_receiptdate@2 > l_commitdate@1
 ------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], has_header=false
 
 
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index f8337e21d703..aec2fed73847 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -2947,25 +2947,34 @@ logical_plan
 Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum1, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING AS sum2, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum3, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING AS sum4, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum5, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum6, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum7, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum8, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum9, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW AS sum10, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum11, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING AS sum12
 --Limit: skip=0, fetch=5
 ----WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING]]
-------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]]
---------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
-----------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]]
-------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]]
---------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
-----------------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d
-------------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
+------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING
+--------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]]
+----------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING
+------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
+--------------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING
+----------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]]
+------------------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
+--------------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]]
+----------------------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING
+------------------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c AS annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
+--------------------------Projection: CAST(annotated_data_infinite2.c AS Int64) AS CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d
+----------------------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
 --GlobalLimitExec: skip=0, fetch=5
 ----BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)) }], mode=[Linear]
-------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[PartiallySorted([1, 0])]
---------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[PartiallySorted([0])]
-------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[PartiallySorted([0, 1])]
---------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------------ProjectionExec: expr=[CAST(c@2 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
-------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
-
+------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]
+--------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[PartiallySorted([1, 0])]
+----------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]
+------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]
+----------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[PartiallySorted([0])]
+------------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]
+--------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[PartiallySorted([0, 1])]
+----------------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]
+------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as CAST(annotated_data_infinite2.c AS Int64)annotated_data_infinite2.c, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
+----------------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
 query IIIIIIIIIIIIIII
 SELECT a, b, c,
@@ -3017,31 +3026,40 @@ Limit: skip=0, fetch=5
 --Sort: annotated_data_finite2.c ASC NULLS LAST, fetch=5
 ----Projection: annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum1, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING AS sum2, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum3, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING AS sum4, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum5, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum6, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum7, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum8, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum9, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW AS sum10, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum11, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING AS sum12
 ------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING]]
---------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]]
-----------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
-------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]]
---------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]]
-----------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
-------------------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d
---------------------TableScan: annotated_data_finite2 projection=[a, b, c, d]
+--------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING
+----------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]]
+------------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING
+--------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
+----------------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING
+------------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]]
+--------------------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
+----------------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]]
+------------------------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING
+--------------------------WindowAggr: windowExpr=[[SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
+----------------------------Projection: CAST(annotated_data_finite2.c AS Int64) AS CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d
+------------------------------TableScan: annotated_data_finite2 projection=[a, b, c, d]
 physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST]
 ----ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
 ------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)) }], mode=[Sorted]
 --------SortExec: expr=[d@4 ASC NULLS LAST,a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
-----------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[Sorted]
-------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST]
---------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,c@3 ASC NULLS LAST]
-------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
---------------------SortExec: expr=[a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
-----------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[Sorted]
-------------------------SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST]
---------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
-------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
-
+----------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]
+------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[Sorted]
+--------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST]
+----------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]
+------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,c@3 ASC NULLS LAST]
+----------------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]
+------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------------------SortExec: expr=[a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+----------------------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]
+------------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[Sorted]
+--------------------------------SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST]
+----------------------------------ProjectionExec: expr=[CAST(c@3 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@1 as a, b@2 as b, c@3 as c, d@4 as d, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]
+------------------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as CAST(annotated_data_finite2.c AS Int64)annotated_data_finite2.c, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
+----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIII
 SELECT a, b, c,
@@ -3906,3 +3924,97 @@ ProjectionExec: expr=[sn@0 as sn, ts@1 as ts, currency@2 as currency, amount@3 a
 --BoundedWindowAggExec: wdw=[SUM(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ----SortExec: expr=[sn@0 ASC NULLS LAST]
 ------MemoryExec: partitions=1, partition_sizes=[1]
+
+# test ROW_NUMBER window function returns correct data_type
+query T
+select arrow_typeof(row_number() over ()) from (select 1 a)
+----
+UInt64
+
+# test RANK window function returns correct data_type
+query T
+select arrow_typeof(rank() over ()) from (select 1 a)
+----
+UInt64
+
+# test DENSE_RANK window function returns correct data_type
+query T
+select arrow_typeof(dense_rank() over ()) from (select 1 a)
+----
+UInt64
+
+# test PERCENT_RANK window function returns correct data_type
+query T
+select arrow_typeof(percent_rank() over ()) from (select 1 a)
+----
+Float64
+
+# test CUME_DIST window function returns correct data_type
+query T
+select arrow_typeof(cume_dist() over ()) from (select 1 a)
+----
+Float64
+
+# test NTILE window function returns correct data_type
+query T
+select arrow_typeof(ntile(1) over ()) from (select 1 a)
+----
+UInt64
+
+# test LAG window function returns correct data_type
+query T
+select arrow_typeof(lag(a) over ()) from (select 1 a)
+----
+Int64
+
+# test LEAD window function returns correct data_type
+query T
+select arrow_typeof(lead(a) over ()) from (select 1 a)
+----
+Int64
+
+# test FIRST_VALUE window function returns correct data_type
+query T
+select arrow_typeof(first_value(a) over ()) from (select 1 a)
+----
+Int64
+
+# test LAST_VALUE window function returns correct data_type
+query T
+select arrow_typeof(last_value(a) over ()) from (select 1 a)
+----
+Int64
+
+# test NTH_VALUE window function returns correct data_type
+query T
+select arrow_typeof(nth_value(a, 1) over ()) from (select 1 a)
+----
+Int64
+
+# test LEAD window function works NULL as default value
+query I
+select lead(a, 1, null) over (order by a) from (select 1 a union all select 2 a)
+----
+2
+NULL
+
+# test LAG window function works NULL as default value
+query I
+select lag(a, 1, null) over (order by a) from (select 1 a union all select 2 a)
+----
+NULL
+1
+
+# test LEAD window function with string default value
+query T
+select lead(a, 1, 'default') over (order by a) from (select '1' a union all select '2' a)
+----
+2
+default
+
+# test LAG window function with string default value
+query T
+select lag(a, 1, 'default') over (order by a) from (select '1' a union all select '2' a)
+----
+default
+1
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 160af37ef961..772ba11ed4f6 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -35,7 +35,7 @@ itertools = { workspace = true }
 object_store = { workspace = true }
 prost = "0.12"
 prost-types = "0.12"
-substrait = "0.22.1"
+substrait = "0.23.0"
 
 [dev-dependencies]
 tokio = "1.17"
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index ab0e8c860858..fc9517c90a45 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -577,6 +577,10 @@ pub fn operator_to_name(op: Operator) -> &'static str {
         Operator::RegexIMatch => "regex_imatch",
         Operator::RegexNotMatch => "regex_not_match",
         Operator::RegexNotIMatch => "regex_not_imatch",
+        Operator::LikeMatch => "like_match",
+        Operator::ILikeMatch => "like_imatch",
+        Operator::NotLikeMatch => "like_not_match",
+        Operator::NotILikeMatch => "like_not_imatch",
         Operator::BitwiseAnd => "bitwise_and",
         Operator::BitwiseOr => "bitwise_or",
         Operator::StringConcat => "str_concat",
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index d7327caee43d..79cf76de5985 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -712,7 +712,7 @@ async fn roundtrip_aggregate_udf() -> Result<()> {
     struct Dummy {}
 
     impl Accumulator for Dummy {
-        fn state(&self) -> datafusion::error::Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
             Ok(vec![])
         }
 
@@ -727,7 +727,7 @@ async fn roundtrip_aggregate_udf() -> Result<()> {
             Ok(())
         }
 
-        fn evaluate(&self) -> datafusion::error::Result<ScalarValue> {
+        fn evaluate(&mut self) -> datafusion::error::Result<ScalarValue> {
             Ok(ScalarValue::Float64(None))
         }
 
diff --git a/dev/changelog/35.0.0.md b/dev/changelog/35.0.0.md
new file mode 100644
index 000000000000..b48b2b7aaa12
--- /dev/null
+++ b/dev/changelog/35.0.0.md
@@ -0,0 +1,295 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+## [35.0.0](https://github.com/apache/arrow-datafusion/tree/35.0.0) (2024-01-20)
+
+[Full Changelog](https://github.com/apache/arrow-datafusion/compare/34.0.0...35.0.0)
+
+**Breaking changes:**
+
+- Minor: make SubqueryAlias::try_new take Arc<LogicalPlan> [#8542](https://github.com/apache/arrow-datafusion/pull/8542) (sadboy)
+- Remove ListingTable and FileScanConfig Unbounded (#8540) [#8573](https://github.com/apache/arrow-datafusion/pull/8573) (tustvold)
+- Rename `ParamValues::{LIST -> List,MAP -> Map}` [#8611](https://github.com/apache/arrow-datafusion/pull/8611) (kawadakk)
+- Rename `expr::window_function::WindowFunction` to `WindowFunctionDefinition`, make structure consistent with ScalarFunction [#8382](https://github.com/apache/arrow-datafusion/pull/8382) (edmondop)
+- Implement `ScalarUDF` in terms of `ScalarUDFImpl` trait [#8713](https://github.com/apache/arrow-datafusion/pull/8713) (alamb)
+- Change `ScalarValue::{List, LargeList, FixedSizedList}` to take specific types rather than `ArrayRef` [#8562](https://github.com/apache/arrow-datafusion/pull/8562) (rspears74)
+- Remove unused array_expression.rs and `SUPPORTED_ARRAY_TYPES` [#8807](https://github.com/apache/arrow-datafusion/pull/8807) (alamb)
+- Simplify physical expression creation API (not require schema) [#8823](https://github.com/apache/arrow-datafusion/pull/8823) (comphead)
+- Determine causal window frames to produce early results. [#8842](https://github.com/apache/arrow-datafusion/pull/8842) (mustafasrepo)
+
+**Implemented enhancements:**
+
+- feat: implement Unary Expr in substrait [#8534](https://github.com/apache/arrow-datafusion/pull/8534) (waynexia)
+- feat: implement Repartition plan in substrait [#8526](https://github.com/apache/arrow-datafusion/pull/8526) (waynexia)
+- feat: support largelist in array_slice [#8561](https://github.com/apache/arrow-datafusion/pull/8561) (Weijun-H)
+- feat: support `LargeList` in `array_positions` [#8571](https://github.com/apache/arrow-datafusion/pull/8571) (Weijun-H)
+- feat: support `LargeList` in `array_element` [#8570](https://github.com/apache/arrow-datafusion/pull/8570) (Weijun-H)
+- feat: support `LargeList` in `array_dims` [#8592](https://github.com/apache/arrow-datafusion/pull/8592) (Weijun-H)
+- feat: support `LargeList` in `array_remove` [#8595](https://github.com/apache/arrow-datafusion/pull/8595) (Weijun-H)
+- feat: support inlist in LiteralGurantee for pruning [#8654](https://github.com/apache/arrow-datafusion/pull/8654) (my-vegetable-has-exploded)
+- feat: support 'LargeList' in `array_pop_front` and `array_pop_back` [#8569](https://github.com/apache/arrow-datafusion/pull/8569) (Weijun-H)
+- feat: support `LargeList` in `array_position` [#8714](https://github.com/apache/arrow-datafusion/pull/8714) (Weijun-H)
+- feat: support `LargeList` in `array_ndims` [#8716](https://github.com/apache/arrow-datafusion/pull/8716) (Weijun-H)
+- feat: remove filters with null constants [#8700](https://github.com/apache/arrow-datafusion/pull/8700) (asimsedhain)
+- feat: support LargeList in array_repeat [#8725](https://github.com/apache/arrow-datafusion/pull/8725) (Weijun-H)
+- feat: native types in `DistinctCountAccumulator` for primitive types [#8721](https://github.com/apache/arrow-datafusion/pull/8721) (korowa)
+- feat: support `LargeList` in `cardinality` [#8726](https://github.com/apache/arrow-datafusion/pull/8726) (Weijun-H)
+- feat: support `largelist` in `array_to_string` [#8729](https://github.com/apache/arrow-datafusion/pull/8729) (Weijun-H)
+- feat: Add bloom filter metric to ParquetExec [#8772](https://github.com/apache/arrow-datafusion/pull/8772) (my-vegetable-has-exploded)
+- feat: support `array_resize` [#8744](https://github.com/apache/arrow-datafusion/pull/8744) (Weijun-H)
+- feat: add more components to the wasm-pack compatible list [#8843](https://github.com/apache/arrow-datafusion/pull/8843) (waynexia)
+
+**Fixed bugs:**
+
+- fix: make sure CASE WHEN pick first true branch when WHEN clause is true [#8477](https://github.com/apache/arrow-datafusion/pull/8477) (haohuaijin)
+- fix: `Antarctica/Vostok` tz offset changed in chrono-tz 0.8.5 [#8677](https://github.com/apache/arrow-datafusion/pull/8677) (korowa)
+- fix: struct field don't push down to TableScan [#8774](https://github.com/apache/arrow-datafusion/pull/8774) (haohuaijin)
+- fix: failed to create ValuesExec with non-nullable schema [#8776](https://github.com/apache/arrow-datafusion/pull/8776) (jonahgao)
+- fix: fix markdown table in docs [#8812](https://github.com/apache/arrow-datafusion/pull/8812) (tshauck)
+- fix: don't extract common sub expr in `CASE WHEN` clause [#8833](https://github.com/apache/arrow-datafusion/pull/8833) (haohuaijin)
+
+**Documentation updates:**
+
+- docs: update udf docs for udtf [#8546](https://github.com/apache/arrow-datafusion/pull/8546) (tshauck)
+- Doc: Clarify When Limit is Pushed Down to TableProvider::Scan [#8686](https://github.com/apache/arrow-datafusion/pull/8686) (devinjdangelo)
+- Minor: Improve `PruningPredicate` docstrings [#8748](https://github.com/apache/arrow-datafusion/pull/8748) (alamb)
+- Minor: Add documentation about stream cancellation [#8747](https://github.com/apache/arrow-datafusion/pull/8747) (alamb)
+- docs: add sudo for install commands [#8804](https://github.com/apache/arrow-datafusion/pull/8804) (caicancai)
+- docs: document SessionConfig [#8771](https://github.com/apache/arrow-datafusion/pull/8771) (wjones127)
+- Upgrade to object_store `0.9.0` and arrow `50.0.0` [#8758](https://github.com/apache/arrow-datafusion/pull/8758) (tustvold)
+- docs: fix wrong pushdown name & a typo [#8875](https://github.com/apache/arrow-datafusion/pull/8875) (SteveLauC)
+- docs: Update contributor guide with installation instructions [#8876](https://github.com/apache/arrow-datafusion/pull/8876) (caicancai)
+- docs: fix wrong name in sub-crates' README [#8889](https://github.com/apache/arrow-datafusion/pull/8889) (SteveLauC)
+- docs: add an example for RecordBatchReceiverStreamBuilder [#8888](https://github.com/apache/arrow-datafusion/pull/8888) (SteveLauC)
+
+**Merged pull requests:**
+
+- Remove order_bys from AggregateExec state [#8537](https://github.com/apache/arrow-datafusion/pull/8537) (mustafasrepo)
+- Fix count(null) and count(distinct null) [#8511](https://github.com/apache/arrow-datafusion/pull/8511) (joroKr21)
+- Minor: reduce code duplication in `date_bin_impl` [#8528](https://github.com/apache/arrow-datafusion/pull/8528) (Weijun-H)
+- Add metrics for UnnestExec [#8482](https://github.com/apache/arrow-datafusion/pull/8482) (simonvandel)
+- Prepare 34.0.0-rc3 [#8549](https://github.com/apache/arrow-datafusion/pull/8549) (andygrove)
+- fix: make sure CASE WHEN pick first true branch when WHEN clause is true [#8477](https://github.com/apache/arrow-datafusion/pull/8477) (haohuaijin)
+- Minor: make SubqueryAlias::try_new take Arc<LogicalPlan> [#8542](https://github.com/apache/arrow-datafusion/pull/8542) (sadboy)
+- Fallback on null empty value in ExprBoundaries::try_from_column [#8501](https://github.com/apache/arrow-datafusion/pull/8501) (razeghi71)
+- Add test for DataFrame::write_table [#8531](https://github.com/apache/arrow-datafusion/pull/8531) (devinjdangelo)
+- [MINOR]: Generate empty column at placeholder exec [#8553](https://github.com/apache/arrow-datafusion/pull/8553) (mustafasrepo)
+- Minor: Remove now dead `SUPPORTED_STRUCT_TYPES` [#8480](https://github.com/apache/arrow-datafusion/pull/8480) (alamb)
+- [MINOR]: Add getter methods to first and last value [#8555](https://github.com/apache/arrow-datafusion/pull/8555) (mustafasrepo)
+- [MINOR]: Some code changes and a new empty batch guard for SHJ [#8557](https://github.com/apache/arrow-datafusion/pull/8557) (metesynnada)
+- docs: update udf docs for udtf [#8546](https://github.com/apache/arrow-datafusion/pull/8546) (tshauck)
+- feat: implement Unary Expr in substrait [#8534](https://github.com/apache/arrow-datafusion/pull/8534) (waynexia)
+- Fix `compute_record_batch_statistics` wrong with `projection` [#8489](https://github.com/apache/arrow-datafusion/pull/8489) (Asura7969)
+- Minor: Cleanup warning in scalar.rs test [#8563](https://github.com/apache/arrow-datafusion/pull/8563) (jayzhan211)
+- Minor: move some invariants out of the loop [#8564](https://github.com/apache/arrow-datafusion/pull/8564) (haohuaijin)
+- feat: implement Repartition plan in substrait [#8526](https://github.com/apache/arrow-datafusion/pull/8526) (waynexia)
+- Fix sort order aware file group parallelization [#8517](https://github.com/apache/arrow-datafusion/pull/8517) (alamb)
+- feat: support largelist in array_slice [#8561](https://github.com/apache/arrow-datafusion/pull/8561) (Weijun-H)
+- minor: fix to support scalars [#8559](https://github.com/apache/arrow-datafusion/pull/8559) (comphead)
+- refactor: `HashJoinStream` state machine [#8538](https://github.com/apache/arrow-datafusion/pull/8538) (korowa)
+- Remove ListingTable and FileScanConfig Unbounded (#8540) [#8573](https://github.com/apache/arrow-datafusion/pull/8573) (tustvold)
+- Update substrait requirement from 0.20.0 to 0.21.0 [#8574](https://github.com/apache/arrow-datafusion/pull/8574) (dependabot[bot])
+- [minor]: Fix rank calculation bug when empty order by is seen [#8567](https://github.com/apache/arrow-datafusion/pull/8567) (mustafasrepo)
+- Add `LiteralGuarantee` on columns to extract conditions required for `PhysicalExpr` expressions to evaluate to true [#8437](https://github.com/apache/arrow-datafusion/pull/8437) (alamb)
+- [MINOR]: Parametrize sort-preservation tests to exercise all situations (unbounded/bounded sources and flag behavior) [#8575](https://github.com/apache/arrow-datafusion/pull/8575) (mustafasrepo)
+- Minor: Add some comments to scalar_udf example [#8576](https://github.com/apache/arrow-datafusion/pull/8576) (alamb)
+- Move Coercion for MakeArray to `coerce_arguments_for_signature` and introduce another one for ArrayAppend [#8317](https://github.com/apache/arrow-datafusion/pull/8317) (jayzhan211)
+- feat: support `LargeList` in `array_positions` [#8571](https://github.com/apache/arrow-datafusion/pull/8571) (Weijun-H)
+- feat: support `LargeList` in `array_element` [#8570](https://github.com/apache/arrow-datafusion/pull/8570) (Weijun-H)
+- Increase test coverage for unbounded and bounded cases [#8581](https://github.com/apache/arrow-datafusion/pull/8581) (mustafasrepo)
+- Port tests in `parquet.rs` to sqllogictest [#8560](https://github.com/apache/arrow-datafusion/pull/8560) (hiltontj)
+- Minor: avoid a copy in Expr::unalias [#8588](https://github.com/apache/arrow-datafusion/pull/8588) (alamb)
+- Minor: support complex expr as the arg in the ApproxPercentileCont function [#8580](https://github.com/apache/arrow-datafusion/pull/8580) (liukun4515)
+- Bugfix: Add functional dependency check and aggregate try_new schema [#8584](https://github.com/apache/arrow-datafusion/pull/8584) (mustafasrepo)
+- Remove GroupByOrderMode [#8593](https://github.com/apache/arrow-datafusion/pull/8593) (ozankabak)
+- Minor: replace` not-impl-err` in `array_expression` [#8589](https://github.com/apache/arrow-datafusion/pull/8589) (Weijun-H)
+- Substrait insubquery [#8363](https://github.com/apache/arrow-datafusion/pull/8363) (tgujar)
+- Minor: port last test from parquet.rs [#8587](https://github.com/apache/arrow-datafusion/pull/8587) (alamb)
+- Minor: consolidate map sqllogictest tests [#8550](https://github.com/apache/arrow-datafusion/pull/8550) (alamb)
+- feat: support `LargeList` in `array_dims` [#8592](https://github.com/apache/arrow-datafusion/pull/8592) (Weijun-H)
+- Fix regression in regenerating protobuf source [#8603](https://github.com/apache/arrow-datafusion/pull/8603) (andygrove)
+- Remove unbounded_input from FileSinkOptions [#8605](https://github.com/apache/arrow-datafusion/pull/8605) (devinjdangelo)
+- Add `arrow_err!` macros, optional backtrace to ArrowError [#8586](https://github.com/apache/arrow-datafusion/pull/8586) (comphead)
+- Add examples of DataFrame::write\* methods without S3 dependency [#8606](https://github.com/apache/arrow-datafusion/pull/8606) (devinjdangelo)
+- Implement logical plan serde for CopyTo [#8618](https://github.com/apache/arrow-datafusion/pull/8618) (andygrove)
+- Fix InListExpr to return the correct number of rows [#8601](https://github.com/apache/arrow-datafusion/pull/8601) (alamb)
+- Remove ListingTable single_file option [#8604](https://github.com/apache/arrow-datafusion/pull/8604) (devinjdangelo)
+- feat: support `LargeList` in `array_remove` [#8595](https://github.com/apache/arrow-datafusion/pull/8595) (Weijun-H)
+- Rename `ParamValues::{LIST -> List,MAP -> Map}` [#8611](https://github.com/apache/arrow-datafusion/pull/8611) (kawadakk)
+- Support binary temporal coercion for Date64 and Timestamp types [#8616](https://github.com/apache/arrow-datafusion/pull/8616) (Asura7969)
+- Add new configuration item `listing_table_ignore_subdirectory` [#8565](https://github.com/apache/arrow-datafusion/pull/8565) (Asura7969)
+- Optimize the parameter types of `ParamValues`'s methods [#8613](https://github.com/apache/arrow-datafusion/pull/8613) (kawadakk)
+- Do not panic on zero placeholders in `ParamValues::get_placeholders_with_values` [#8615](https://github.com/apache/arrow-datafusion/pull/8615) (kawadakk)
+- Fix #8507: Non-null sub-field on nullable struct-field has wrong nullity [#8623](https://github.com/apache/arrow-datafusion/pull/8623) (marvinlanhenke)
+- Implement `contained` API in PruningPredicate [#8440](https://github.com/apache/arrow-datafusion/pull/8440) (alamb)
+- Add partial serde support for ParquetWriterOptions [#8627](https://github.com/apache/arrow-datafusion/pull/8627) (andygrove)
+- Minor: add arguments length check in `array_expressions` [#8622](https://github.com/apache/arrow-datafusion/pull/8622) (Weijun-H)
+- Minor: improve dataframe functional dependency tests [#8630](https://github.com/apache/arrow-datafusion/pull/8630) (alamb)
+- Improve regexp_match performance by avoiding cloning Regex [#8631](https://github.com/apache/arrow-datafusion/pull/8631) (viirya)
+- Minor: improve `listing_table_ignore_subdirectory` config documentation [#8634](https://github.com/apache/arrow-datafusion/pull/8634) (alamb)
+- Support Writing Arrow files [#8608](https://github.com/apache/arrow-datafusion/pull/8608) (devinjdangelo)
+- Filter pushdown into cross join [#8626](https://github.com/apache/arrow-datafusion/pull/8626) (mustafasrepo)
+- [MINOR] Remove duplicate test utility and move one utility function for better organization [#8652](https://github.com/apache/arrow-datafusion/pull/8652) (metesynnada)
+- [MINOR]: Add new test for filter pushdown into cross join [#8648](https://github.com/apache/arrow-datafusion/pull/8648) (mustafasrepo)
+- Rewrite bloom filters to use contains API [#8442](https://github.com/apache/arrow-datafusion/pull/8442) (alamb)
+- Split equivalence code into smaller modules. [#8649](https://github.com/apache/arrow-datafusion/pull/8649) (tushushu)
+- Move parquet_schema.rs from sql to parquet tests [#8644](https://github.com/apache/arrow-datafusion/pull/8644) (alamb)
+- Fix group by aliased expression in LogicalPLanBuilder::aggregate [#8629](https://github.com/apache/arrow-datafusion/pull/8629) (alamb)
+- Refactor `array_union` and `array_intersect` functions to one general function [#8516](https://github.com/apache/arrow-datafusion/pull/8516) (Weijun-H)
+- Minor: avoid extra clone in datafusion-proto::physical_plan [#8650](https://github.com/apache/arrow-datafusion/pull/8650) (ongchi)
+- Minor: name some constant values in arrow writer, parquet writer [#8642](https://github.com/apache/arrow-datafusion/pull/8642) (alamb)
+- TreeNode Refactor Part 2 [#8653](https://github.com/apache/arrow-datafusion/pull/8653) (berkaysynnada)
+- feat: support inlist in LiteralGurantee for pruning [#8654](https://github.com/apache/arrow-datafusion/pull/8654) (my-vegetable-has-exploded)
+- Streaming CLI support [#8651](https://github.com/apache/arrow-datafusion/pull/8651) (berkaysynnada)
+- Add serde support for CSV FileTypeWriterOptions [#8641](https://github.com/apache/arrow-datafusion/pull/8641) (andygrove)
+- Add trait based ScalarUDF API [#8578](https://github.com/apache/arrow-datafusion/pull/8578) (alamb)
+- Handle ordering of first last aggregation inside aggregator [#8662](https://github.com/apache/arrow-datafusion/pull/8662) (mustafasrepo)
+- feat: support 'LargeList' in `array_pop_front` and `array_pop_back` [#8569](https://github.com/apache/arrow-datafusion/pull/8569) (Weijun-H)
+- chore: rename ceresdb to apache horaedb [#8674](https://github.com/apache/arrow-datafusion/pull/8674) (tanruixiang)
+- Minor: clean up code [#8671](https://github.com/apache/arrow-datafusion/pull/8671) (Weijun-H)
+- fix: `Antarctica/Vostok` tz offset changed in chrono-tz 0.8.5 [#8677](https://github.com/apache/arrow-datafusion/pull/8677) (korowa)
+- Make the BatchSerializer behind Arc to avoid unnecessary struct creation [#8666](https://github.com/apache/arrow-datafusion/pull/8666) (metesynnada)
+- Implement serde for CSV and Parquet FileSinkExec [#8646](https://github.com/apache/arrow-datafusion/pull/8646) (andygrove)
+- [pruning] Add shortcut when all units have been pruned [#8675](https://github.com/apache/arrow-datafusion/pull/8675) (Ted-Jiang)
+- Change first/last implementation to prevent redundant comparisons when data is already sorted [#8678](https://github.com/apache/arrow-datafusion/pull/8678) (mustafasrepo)
+- minor: remove useless conversion [#8684](https://github.com/apache/arrow-datafusion/pull/8684) (comphead)
+- refactor: modified `JoinHashMap` build order for `HashJoinStream` [#8658](https://github.com/apache/arrow-datafusion/pull/8658) (korowa)
+- Start setting up tpch planning benchmarks [#8665](https://github.com/apache/arrow-datafusion/pull/8665) (matthewmturner)
+- Doc: Clarify When Limit is Pushed Down to TableProvider::Scan [#8686](https://github.com/apache/arrow-datafusion/pull/8686) (devinjdangelo)
+- Closes #8502: Parallel NDJSON file reading [#8659](https://github.com/apache/arrow-datafusion/pull/8659) (marvinlanhenke)
+- Improve `array_prepend` signature for null and empty array [#8625](https://github.com/apache/arrow-datafusion/pull/8625) (jayzhan211)
+- Cleanup TreeNode implementations [#8672](https://github.com/apache/arrow-datafusion/pull/8672) (viirya)
+- Update sqlparser requirement from 0.40.0 to 0.41.0 [#8647](https://github.com/apache/arrow-datafusion/pull/8647) (dependabot[bot])
+- Update scalar functions doc for extract/datepart [#8682](https://github.com/apache/arrow-datafusion/pull/8682) (Jefffrey)
+- Remove DescribeTableStmt in parser in favour of existing functionality from sqlparser-rs [#8703](https://github.com/apache/arrow-datafusion/pull/8703) (Jefffrey)
+- Simplify `NULL [NOT] IN (..)` expressions [#8691](https://github.com/apache/arrow-datafusion/pull/8691) (asimsedhain)
+- Rename `expr::window_function::WindowFunction` to `WindowFunctionDefinition`, make structure consistent with ScalarFunction [#8382](https://github.com/apache/arrow-datafusion/pull/8382) (edmondop)
+- Deprecate duplicate function `LogicalPlan::with_new_inputs` [#8707](https://github.com/apache/arrow-datafusion/pull/8707) (viirya)
+- Minor: refactor bloom filter tests to reduce duplication [#8435](https://github.com/apache/arrow-datafusion/pull/8435) (alamb)
+- Minor: clean up code based on `Clippy` [#8715](https://github.com/apache/arrow-datafusion/pull/8715) (Weijun-H)
+- Minor: Unbounded Output of AnalyzeExec [#8717](https://github.com/apache/arrow-datafusion/pull/8717) (berkaysynnada)
+- feat: support `LargeList` in `array_position` [#8714](https://github.com/apache/arrow-datafusion/pull/8714) (Weijun-H)
+- feat: support `LargeList` in `array_ndims` [#8716](https://github.com/apache/arrow-datafusion/pull/8716) (Weijun-H)
+- feat: remove filters with null constants [#8700](https://github.com/apache/arrow-datafusion/pull/8700) (asimsedhain)
+- support `LargeList` in `array_prepend` and `array_append` [#8679](https://github.com/apache/arrow-datafusion/pull/8679) (Weijun-H)
+- Support for `extract(epoch from date)` for Date32 and Date64 [#8695](https://github.com/apache/arrow-datafusion/pull/8695) (Jefffrey)
+- Implement trait based API for defining WindowUDF [#8719](https://github.com/apache/arrow-datafusion/pull/8719) (guojidan)
+- Minor: Introduce utils::hash for StructArray [#8552](https://github.com/apache/arrow-datafusion/pull/8552) (jayzhan211)
+- [CI] Improve windows machine CI test time [#8730](https://github.com/apache/arrow-datafusion/pull/8730) (comphead)
+- fix guarantees in allways_true of PruningPredicate [#8732](https://github.com/apache/arrow-datafusion/pull/8732) (my-vegetable-has-exploded)
+- Minor: Avoid memory copy in construct window exprs [#8718](https://github.com/apache/arrow-datafusion/pull/8718) (Ted-Jiang)
+- feat: support LargeList in array_repeat [#8725](https://github.com/apache/arrow-datafusion/pull/8725) (Weijun-H)
+- Minor: Ctrl+C Termination in CLI [#8739](https://github.com/apache/arrow-datafusion/pull/8739) (berkaysynnada)
+- Add support for functional dependency for ROW_NUMBER window function. [#8737](https://github.com/apache/arrow-datafusion/pull/8737) (mustafasrepo)
+- Minor: reduce code duplication in PruningPredicate test [#8441](https://github.com/apache/arrow-datafusion/pull/8441) (alamb)
+- feat: native types in `DistinctCountAccumulator` for primitive types [#8721](https://github.com/apache/arrow-datafusion/pull/8721) (korowa)
+- [MINOR]: Add a test case for when target partition is 1, no hash repartition is added to the plan. [#8757](https://github.com/apache/arrow-datafusion/pull/8757) (mustafasrepo)
+- Minor: Improve `PruningPredicate` docstrings [#8748](https://github.com/apache/arrow-datafusion/pull/8748) (alamb)
+- feat: support `LargeList` in `cardinality` [#8726](https://github.com/apache/arrow-datafusion/pull/8726) (Weijun-H)
+- Add reproducer for #8738 [#8750](https://github.com/apache/arrow-datafusion/pull/8750) (alamb)
+- Minor: Use faster check for column name in schema merge [#8765](https://github.com/apache/arrow-datafusion/pull/8765) (matthewmturner)
+- Minor: Add documentation about stream cancellation [#8747](https://github.com/apache/arrow-datafusion/pull/8747) (alamb)
+- Move `repartition_file_scans` out of `enable_round_robin` check in `EnforceDistribution` rule [#8731](https://github.com/apache/arrow-datafusion/pull/8731) (viirya)
+- Clean internal implementation of WindowUDF [#8746](https://github.com/apache/arrow-datafusion/pull/8746) (guojidan)
+- feat: support `largelist` in `array_to_string` [#8729](https://github.com/apache/arrow-datafusion/pull/8729) (Weijun-H)
+- [MINOR] CLI error handling on streaming use cases [#8761](https://github.com/apache/arrow-datafusion/pull/8761) (metesynnada)
+- Convert Binary Operator `StringConcat` to Function for `array_concat`, `array_append` and `array_prepend` [#8636](https://github.com/apache/arrow-datafusion/pull/8636) (jayzhan211)
+- Minor: Fix incorrect indices for hashing struct [#8775](https://github.com/apache/arrow-datafusion/pull/8775) (jayzhan211)
+- Minor: Improve library docs to mention TreeNode, ExprSimplifier, PruningPredicate and cp_solver [#8749](https://github.com/apache/arrow-datafusion/pull/8749) (alamb)
+- [MINOR] Add logo source files [#8762](https://github.com/apache/arrow-datafusion/pull/8762) (andygrove)
+- Add Apache attribution to site footer [#8760](https://github.com/apache/arrow-datafusion/pull/8760) (alamb)
+- ci: speed up win64 test [#8728](https://github.com/apache/arrow-datafusion/pull/8728) (Jefffrey)
+- Add `schema_err!` error macros with optional backtrace [#8620](https://github.com/apache/arrow-datafusion/pull/8620) (comphead)
+- Fix regression by reverting Materialize dictionaries in group keys [#8740](https://github.com/apache/arrow-datafusion/pull/8740) (alamb)
+- fix: struct field don't push down to TableScan [#8774](https://github.com/apache/arrow-datafusion/pull/8774) (haohuaijin)
+- Implement `ScalarUDF` in terms of `ScalarUDFImpl` trait [#8713](https://github.com/apache/arrow-datafusion/pull/8713) (alamb)
+- Minor: Fix error messages in array expressions [#8781](https://github.com/apache/arrow-datafusion/pull/8781) (Weijun-H)
+- Move tests from `expr.rs` to sqllogictests. Part1 [#8773](https://github.com/apache/arrow-datafusion/pull/8773) (comphead)
+- Permit running `sqllogictest` as a rust test in IDEs (+ use clap for sqllogicttest parsing, accept (and ignore) rust test harness arguments) [#8288](https://github.com/apache/arrow-datafusion/pull/8288) (alamb)
+- Minor: Use standard tree walk in Projection Pushdown [#8787](https://github.com/apache/arrow-datafusion/pull/8787) (alamb)
+- Implement trait based API for define AggregateUDF [#8733](https://github.com/apache/arrow-datafusion/pull/8733) (guojidan)
+- Minor: Improve `DataFusionError` documentation [#8792](https://github.com/apache/arrow-datafusion/pull/8792) (alamb)
+- fix: failed to create ValuesExec with non-nullable schema [#8776](https://github.com/apache/arrow-datafusion/pull/8776) (jonahgao)
+- Update substrait requirement from 0.21.0 to 0.22.1 [#8796](https://github.com/apache/arrow-datafusion/pull/8796) (dependabot[bot])
+- Bump follow-redirects from 1.15.3 to 1.15.4 in /datafusion/wasmtest/datafusion-wasm-app [#8798](https://github.com/apache/arrow-datafusion/pull/8798) (dependabot[bot])
+- Minor: array_pop_first should be array_pop_front in documentation [#8797](https://github.com/apache/arrow-datafusion/pull/8797) (ongchi)
+- feat: Add bloom filter metric to ParquetExec [#8772](https://github.com/apache/arrow-datafusion/pull/8772) (my-vegetable-has-exploded)
+- Add note on using larger row group size [#8745](https://github.com/apache/arrow-datafusion/pull/8745) (twitu)
+- Change `ScalarValue::{List, LargeList, FixedSizedList}` to take specific types rather than `ArrayRef` [#8562](https://github.com/apache/arrow-datafusion/pull/8562) (rspears74)
+- fix: fix markdown table in docs [#8812](https://github.com/apache/arrow-datafusion/pull/8812) (tshauck)
+- docs: add sudo for install commands [#8804](https://github.com/apache/arrow-datafusion/pull/8804) (caicancai)
+- Standardize `CompressionTypeVariant` encoding in protobuf [#8785](https://github.com/apache/arrow-datafusion/pull/8785) (tushushu)
+- Make benefits_from_input_partitioning Default in SHJ [#8801](https://github.com/apache/arrow-datafusion/pull/8801) (metesynnada)
+- refactor: standardize exec_from funcs arg order [#8809](https://github.com/apache/arrow-datafusion/pull/8809) (tshauck)
+- [Minor] extract const and add doc and more tests for in_list pruning [#8815](https://github.com/apache/arrow-datafusion/pull/8815) (Ted-Jiang)
+- [MINOR]: Add size check for aggregate [#8813](https://github.com/apache/arrow-datafusion/pull/8813) (mustafasrepo)
+- Minor: chores: Update clippy in pre-commit.sh [#8810](https://github.com/apache/arrow-datafusion/pull/8810) (my-vegetable-has-exploded)
+- Cleanup the usage of round-robin repartitioning [#8794](https://github.com/apache/arrow-datafusion/pull/8794) (viirya)
+- Implement monotonicity for ScalarUDF [#8799](https://github.com/apache/arrow-datafusion/pull/8799) (guojidan)
+- Remove unused array_expression.rs and `SUPPORTED_ARRAY_TYPES` [#8807](https://github.com/apache/arrow-datafusion/pull/8807) (alamb)
+- feat: support `array_resize` [#8744](https://github.com/apache/arrow-datafusion/pull/8744) (Weijun-H)
+- Minor: typo in `arrays.slt` [#8831](https://github.com/apache/arrow-datafusion/pull/8831) (Weijun-H)
+- docs: document SessionConfig [#8771](https://github.com/apache/arrow-datafusion/pull/8771) (wjones127)
+- Minor: Improve `datafusion-proto` documentation [#8822](https://github.com/apache/arrow-datafusion/pull/8822) (alamb)
+- [CI] Refactor CI builders [#8826](https://github.com/apache/arrow-datafusion/pull/8826) (comphead)
+- Serialize function signature simplifications [#8802](https://github.com/apache/arrow-datafusion/pull/8802) (metesynnada)
+- Port tests in `group_by.rs` to sqllogictest [#8834](https://github.com/apache/arrow-datafusion/pull/8834) (hiltontj)
+- Simplify physical expression creation API (not require schema) [#8823](https://github.com/apache/arrow-datafusion/pull/8823) (comphead)
+- feat: add more components to the wasm-pack compatible list [#8843](https://github.com/apache/arrow-datafusion/pull/8843) (waynexia)
+- Port tests in timestamp.rs to sqllogictest. Part 1 [#8818](https://github.com/apache/arrow-datafusion/pull/8818) (caicancai)
+- Upgrade to object_store `0.9.0` and arrow `50.0.0` [#8758](https://github.com/apache/arrow-datafusion/pull/8758) (tustvold)
+- Fix ApproxPercentileCont signature [#8825](https://github.com/apache/arrow-datafusion/pull/8825) (joroKr21)
+- Minor: Update `with_column_rename` method doc [#8858](https://github.com/apache/arrow-datafusion/pull/8858) (comphead)
+- Minor: Document `parquet_metadata` function [#8852](https://github.com/apache/arrow-datafusion/pull/8852) (alamb)
+- Speedup new_with_metadata by removing sort [#8855](https://github.com/apache/arrow-datafusion/pull/8855) (simonvandel)
+- Minor: fix wrong function call [#8847](https://github.com/apache/arrow-datafusion/pull/8847) (Weijun-H)
+- Add options of parquet bloom filter and page index in Session config [#8869](https://github.com/apache/arrow-datafusion/pull/8869) (Ted-Jiang)
+- Port tests in timestamp.rs to sqllogictest [#8859](https://github.com/apache/arrow-datafusion/pull/8859) (caicancai)
+- test: Port `order.rs` tests to sqllogictest [#8857](https://github.com/apache/arrow-datafusion/pull/8857) (simicd)
+- Determine causal window frames to produce early results. [#8842](https://github.com/apache/arrow-datafusion/pull/8842) (mustafasrepo)
+- docs: fix wrong pushdown name & a typo [#8875](https://github.com/apache/arrow-datafusion/pull/8875) (SteveLauC)
+- fix: don't extract common sub expr in `CASE WHEN` clause [#8833](https://github.com/apache/arrow-datafusion/pull/8833) (haohuaijin)
+- Add "Extended" clickbench queries [#8861](https://github.com/apache/arrow-datafusion/pull/8861) (alamb)
+- Change cli to propagate error to exit code [#8856](https://github.com/apache/arrow-datafusion/pull/8856) (tshauck)
+- test: Port tests in `predicates.rs` to sqllogictest [#8879](https://github.com/apache/arrow-datafusion/pull/8879) (simicd)
+- docs: Update contributor guide with installation instructions [#8876](https://github.com/apache/arrow-datafusion/pull/8876) (caicancai)
+- Minor: add tests for casts between nested `List` and `LargeList` [#8882](https://github.com/apache/arrow-datafusion/pull/8882) (Weijun-H)
+- Disable Parallel Parquet Writer by Default, Improve Writing Test Coverage [#8854](https://github.com/apache/arrow-datafusion/pull/8854) (devinjdangelo)
+- Support for order sensitive `NTH_VALUE` aggregation, make reverse `ARRAY_AGG` more efficient [#8841](https://github.com/apache/arrow-datafusion/pull/8841) (mustafasrepo)
+- test: Port tests in `csv_files.rs` to sqllogictest [#8885](https://github.com/apache/arrow-datafusion/pull/8885) (simicd)
+- test: Port tests in `references.rs` to sqllogictest [#8877](https://github.com/apache/arrow-datafusion/pull/8877) (simicd)
+- fix bug with `to_timestamp` and `InitCap` logical serialization, add roundtrip test between expression and proto, [#8868](https://github.com/apache/arrow-datafusion/pull/8868) (Weijun-H)
+- Support `LargeListArray` scalar values and `align_array_dimensions` [#8881](https://github.com/apache/arrow-datafusion/pull/8881) (Weijun-H)
+- refactor: rename FileStream.file_reader to file_opener & update doc [#8883](https://github.com/apache/arrow-datafusion/pull/8883) (SteveLauC)
+- docs: fix wrong name in sub-crates' README [#8889](https://github.com/apache/arrow-datafusion/pull/8889) (SteveLauC)
+- Recursive CTEs: Stage 1 - add config flag [#8828](https://github.com/apache/arrow-datafusion/pull/8828) (matthewgapp)
+- Support array literal with scalar function [#8884](https://github.com/apache/arrow-datafusion/pull/8884) (jayzhan211)
+- Bump actions/cache from 3 to 4 [#8903](https://github.com/apache/arrow-datafusion/pull/8903) (dependabot[bot])
+- Fix `datafusion-cli` print output [#8895](https://github.com/apache/arrow-datafusion/pull/8895) (alamb)
+- docs: add an example for RecordBatchReceiverStreamBuilder [#8888](https://github.com/apache/arrow-datafusion/pull/8888) (SteveLauC)
+- Fix "Projection references non-aggregate values" by updating `rebase_expr` to use `transform_down` [#8890](https://github.com/apache/arrow-datafusion/pull/8890) (wizardxz)
+- Add serde support for Arrow FileTypeWriterOptions [#8850](https://github.com/apache/arrow-datafusion/pull/8850) (tushushu)
+- Improve `datafusion-cli` print format tests [#8896](https://github.com/apache/arrow-datafusion/pull/8896) (alamb)
+- Recursive CTEs: Stage 2 - add support for sql -> logical plan generation [#8839](https://github.com/apache/arrow-datafusion/pull/8839) (matthewgapp)
+- Minor: remove null in `array-append` and `array-prepend` [#8901](https://github.com/apache/arrow-datafusion/pull/8901) (Weijun-H)
+- Add support for FixedSizeList type in `arrow_cast`, hashing [#8344](https://github.com/apache/arrow-datafusion/pull/8344) (Weijun-H)
+- aggregate_statistics should only optimize MIN/MAX when relation is not empty [#8914](https://github.com/apache/arrow-datafusion/pull/8914) (viirya)
+- support to_timestamp with optional chrono formats [#8886](https://github.com/apache/arrow-datafusion/pull/8886) (Omega359)
+- Minor: Document third argument of `date_bin` as optional and default value [#8912](https://github.com/apache/arrow-datafusion/pull/8912) (alamb)
+- Minor: distinguish parquet row group pruning type in unit test [#8921](https://github.com/apache/arrow-datafusion/pull/8921) (Ted-Jiang)
diff --git a/dev/release/README.md b/dev/release/README.md
index 53487678aa69..9cf241355e4d 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -19,7 +19,7 @@
 
 # Release Process
 
-DataFusion typically has major releases every two weeks, including breaking API changes.
+DataFusion typically has major releases around once per month, including breaking API changes.
 
 Patch releases are made on an adhoc basis, but we try and avoid them given the frequent major releases.
 
diff --git a/docs/Cargo.toml b/docs/Cargo.toml
index 813335e30f77..3a8c90cae085 100644
--- a/docs/Cargo.toml
+++ b/docs/Cargo.toml
@@ -29,4 +29,4 @@ authors = { workspace = true }
 rust-version = "1.70"
 
 [dependencies]
-datafusion = { path = "../datafusion/core", version = "34.0.0", default-features = false }
+datafusion = { path = "../datafusion/core", version = "35.0.0", default-features = false }
diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md
index 64dc25411deb..1824b23f9f9b 100644
--- a/docs/source/library-user-guide/adding-udfs.md
+++ b/docs/source/library-user-guide/adding-udfs.md
@@ -41,12 +41,12 @@ use std::sync::Arc;
 
 use datafusion::arrow::array::{ArrayRef, Int64Array};
 use datafusion::common::Result;
-
 use datafusion::common::cast::as_int64_array;
+use datafusion::physical_plan::functions::columnar_values_to_array;
 
-pub fn add_one(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn add_one(args: &[ColumnarValue]) -> Result<ArrayRef> {
     // Error handling omitted for brevity
-
+    let args = columnar_values_to_array(args)?;
     let i64s = as_int64_array(&args[0])?;
 
     let new_array = i64s
@@ -82,7 +82,6 @@ There is a lower level API with more functionality but is more complex, that is
 
 ```rust
 use datafusion::logical_expr::{Volatility, create_udf};
-use datafusion::physical_plan::functions::make_scalar_function;
 use datafusion::arrow::datatypes::DataType;
 use std::sync::Arc;
 
@@ -91,13 +90,13 @@ let udf = create_udf(
     vec![DataType::Int64],
     Arc::new(DataType::Int64),
     Volatility::Immutable,
-    make_scalar_function(add_one),
+    Arc::new(add_one),
 );
 ```
 
 [`scalarudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.ScalarUDF.html
 [`create_udf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udf.html
-[`make_scalar_function`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.make_scalar_function.html
+[`process_scalar_func_inputs`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.process_scalar_func_inputs.html
 [`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
 
 A few things to note:
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index a812b74284cf..8b039102d4d7 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -64,14 +64,14 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.statistics_enabled                         | NULL                      | Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.max_statistics_size                        | NULL                      | Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.execution.parquet.created_by                                 | datafusion version 34.0.0 | Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.execution.parquet.created_by                                 | datafusion version 35.0.0 | Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.parquet.column_index_truncate_length               | NULL                      | Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 18446744073709551615      | Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.encoding                                   | NULL                      | Sets default encoding for any column Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                               |
 | datafusion.execution.parquet.bloom_filter_enabled                       | false                     | Sets if bloom filter is enabled for any column                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.bloom_filter_fpp                           | NULL                      | Sets bloom filter false positive probability. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.parquet.bloom_filter_ndv                           | NULL                      | Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.parquet.allow_single_file_parallelism              | false                     | Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.allow_single_file_parallelism              | true                      | Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                    |
 | datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                    |
 | datafusion.execution.aggregate.scalar_update_factor                     | 10                        | Specifies the threshold for using `ScalarValue`s to update accumulators during high-cardinality aggregations for each input batch. The aggregation is considered high-cardinality if the number of affected groups is greater than or equal to `batch_size / scalar_update_factor`. In such cases, `ScalarValue`s are utilized for updating accumulators, rather than the default batch-slice approach. This can lead to performance improvements. By adjusting the `scalar_update_factor`, you can balance the trade-off between more efficient accumulator updates and the number of groups affected. |
@@ -101,6 +101,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.optimizer.top_down_join_key_reordering                       | true                      | When set to true, the physical plan optimizer will run a top down process to reorder the join keys                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.optimizer.prefer_hash_join                                   | true                      | When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.optimizer.hash_join_single_partition_threshold               | 1048576                   | The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.optimizer.hash_join_single_partition_threshold_rows          | 131072                    | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.optimizer.default_filter_selectivity                         | 20                        | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.explain.logical_plan_only                                    | false                     | When set to true, the explain statement will only print logical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.explain.physical_plan_only                                   | false                     | When set to true, the explain statement will only print physical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md
index 85322d9fa766..f01750e56ae0 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -207,41 +207,42 @@ Unlike to some databases the math functions in Datafusion works the same way as
 
 ## Array Expressions
 
-| Syntax                                | Description                                                                                                                                                              |
-| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| array_append(array, element)          | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]`                                                                                  |
-| array_concat(array[, ..., array_n])   | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]`                                                                                          |
-| array_has(array, element)             | Returns true if the array contains the element `array_has([1,2,3], 1) -> true`                                                                                           |
-| array_has_all(array, sub-array)       | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true`                                                                         |
-| array_has_any(array, sub-array)       | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true`                                                                                |
-| array_dims(array)                     | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]`                                                                               |
-| array_distinct(array)                 | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]`                                                |
-| array_element(array, index)           | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3`                                                                               |
-| flatten(array)                        | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`                                                                    |
-| array_length(array, dimension)        | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5`                                                                                          |
-| array_ndims(array)                    | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`                                                                                |
-| array_pop_front(array)                | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]`                                                                                      |
-| array_pop_back(array)                 | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]`                                                                                        |
-| array_position(array, element)        | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`                                                                |
-| array_positions(array, element)       | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]`                                                           |
-| array_prepend(array, element)         | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]`                                                                          |
-| array_repeat(element, count)          | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]`                                                                                     |
-| array_remove(array, element)          | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]`                                        |
-| array_remove_n(array, element, max)   | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]`                               |
-| array_remove_all(array, element)      | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]`                                               |
-| array_replace(array, from, to)        | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]`             |
-| array_replace_n(array, from, to, max) | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` |
-| array_replace_all(array, from, to)    | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]`              |
-| array_slice(array, index)             | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]`                                                                              |
-| array_to_string(array, delimiter)     | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4`                                                                        |
-| array_intersect(array1, array2)       | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                                       |
-| array_union(array1, array2)           | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]`                   |
-| array_except(array1, array2)          | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                              |
-| array_resize(array, size, value)      | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 4, 5, 6]`       |
-| cardinality(array)                    | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                                                                            |
-| make_array(value1, [value2 [, ...]])  | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`                                                                         |
-| range(start [, stop, step])           | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`                                                                           |
-| trim_array(array, n)                  | Deprecated                                                                                                                                                               |
+| Syntax                                 | Description                                                                                                                                                              |
+| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| array_append(array, element)           | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]`                                                                                  |
+| array_concat(array[, ..., array_n])    | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]`                                                                                          |
+| array_has(array, element)              | Returns true if the array contains the element `array_has([1,2,3], 1) -> true`                                                                                           |
+| array_has_all(array, sub-array)        | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true`                                                                         |
+| array_has_any(array, sub-array)        | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true`                                                                                |
+| array_dims(array)                      | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]`                                                                               |
+| array_distinct(array)                  | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]`                                                |
+| array_element(array, index)            | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3`                                                                               |
+| flatten(array)                         | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`                                                                    |
+| array_length(array, dimension)         | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5`                                                                                          |
+| array_ndims(array)                     | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`                                                                                |
+| array_pop_front(array)                 | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]`                                                                                      |
+| array_pop_back(array)                  | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]`                                                                                        |
+| array_position(array, element)         | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`                                                                |
+| array_positions(array, element)        | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]`                                                           |
+| array_prepend(array, element)          | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]`                                                                          |
+| array_repeat(element, count)           | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]`                                                                                     |
+| array_remove(array, element)           | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]`                                        |
+| array_remove_n(array, element, max)    | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]`                               |
+| array_remove_all(array, element)       | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]`                                               |
+| array_replace(array, from, to)         | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]`             |
+| array_replace_n(array, from, to, max)  | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` |
+| array_replace_all(array, from, to)     | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]`              |
+| array_slice(array, begin,end)          | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]`                                                                              |
+| array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]`                                                    |
+| array_to_string(array, delimiter)      | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4`                                                                        |
+| array_intersect(array1, array2)        | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                                       |
+| array_union(array1, array2)            | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]`                   |
+| array_except(array1, array2)           | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                              |
+| array_resize(array, size, value)       | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 4, 5, 6]`       |
+| cardinality(array)                     | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                                                                            |
+| make_array(value1, [value2 [, ...]])   | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`                                                                         |
+| range(start [, stop, step])            | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`                                                                           |
+| trim_array(array, n)                   | Deprecated                                                                                                                                                               |
 
 ## Regular Expressions
 
diff --git a/docs/source/user-guide/sql/operators.md b/docs/source/user-guide/sql/operators.md
index 265e56bb2c34..872ef55dd39d 100644
--- a/docs/source/user-guide/sql/operators.md
+++ b/docs/source/user-guide/sql/operators.md
@@ -263,6 +263,58 @@ Not Regex Case-Insensitive Match
 +---------------------------------------------------+
 ```
 
+### `~~`
+
+Like Match
+
+```sql
+❯ SELECT 'datafusion' ~~ 'dat_f%n';
++---------------------------------------+
+| Utf8("datafusion") ~~ Utf8("dat_f%n") |
++---------------------------------------+
+| true                                  |
++---------------------------------------+
+```
+
+### `~~*`
+
+Case-Insensitive Like Match
+
+```sql
+❯ SELECT 'datafusion' ~~* 'Dat_F%n';
++----------------------------------------+
+| Utf8("datafusion") ~~* Utf8("Dat_F%n") |
++----------------------------------------+
+| true                                   |
++----------------------------------------+
+```
+
+### `!~~`
+
+Not Like Match
+
+```sql
+❯ SELECT 'datafusion' !~~ 'Dat_F%n';
++----------------------------------------+
+| Utf8("datafusion") !~~ Utf8("Dat_F%n") |
++----------------------------------------+
+| true                                   |
++----------------------------------------+
+```
+
+### `!~~*`
+
+Not Case-Insensitive Like Match
+
+```sql
+❯ SELECT 'datafusion' !~~* 'Dat%F_n';
++-----------------------------------------+
+| Utf8("datafusion") !~~* Utf8("Dat%F_n") |
++-----------------------------------------+
+| true                                    |
++-----------------------------------------+
+```
+
 ## Logical Operators
 
 - [AND](#and)
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 9dd008f8fc44..6c526e3ada75 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -613,7 +613,9 @@ nullif(expression1, expression2)
 - [concat](#concat)
 - [concat_ws](#concat_ws)
 - [chr](#chr)
+- [ends_with](#ends_with)
 - [initcap](#initcap)
+- [instr](#instr)
 - [left](#left)
 - [length](#length)
 - [lower](#lower)
@@ -756,6 +758,20 @@ chr(expression)
 **Related functions**:
 [ascii](#ascii)
 
+### `ends_with`
+
+Tests if a string ends with a substring.
+
+```
+ends_with(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to test.
+  Can be a constant, column, or function, and any combination of string operators.
+- **substr**: Substring to test for.
+
 ### `initcap`
 
 Capitalizes the first character in each word in the input string.
@@ -774,6 +790,22 @@ initcap(str)
 [lower](#lower),
 [upper](#upper)
 
+### `instr`
+
+Returns the location where substr first appeared in str (counting from 1).
+If substr does not appear in str, return 0.
+
+```
+instr(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on.
+  Can be a constant, column, or function, and any combination of string operators.
+- **substr**: Substring expression to search for.
+  Can be a constant, column, or function, and any combination of string operators.
+
 ### `left`
 
 Returns a specified number of characters from the left side of a string.
@@ -1279,8 +1311,8 @@ regexp_replace(str, regexp, replacement, flags)
 - [date_part](#date_part)
 - [datepart](#datepart)
 - [extract](#extract)
-- [to_timestamp](#to_timestamp)
 - [today](#today)
+- [to_timestamp](#to_timestamp)
 - [to_timestamp_millis](#to_timestamp_millis)
 - [to_timestamp_micros](#to_timestamp_micros)
 - [to_timestamp_seconds](#to_timestamp_seconds)
@@ -1347,7 +1379,8 @@ date_bin(interval, expression, origin-timestamp)
 - **interval**: Bin interval.
 - **expression**: Time expression to operate on.
   Can be a constant, column, or function.
-- **timestamp**: Starting point used to determine bin boundaries.
+- **origin-timestamp**: Optional. Starting point used to determine bin boundaries. If not specified
+  defaults `1970-01-01T00:00:00Z` (the UNIX epoch in UTC).
 
 The following intervals are supported:
 
@@ -1471,84 +1504,212 @@ extract(field FROM source)
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`).
 Supports strings, integer, unsigned integer, and double types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
-Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`)
-return the corresponding timestamp.
+Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided.
+Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
+Returns the corresponding timestamp.
 
 Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`.
-Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
+for the input outside of supported bounds.
 
 ```
-to_timestamp(expression)
+to_timestamp(expression[, ..., format_n])
 ```
 
 #### Arguments
 
 - **expression**: Expression to operate on.
   Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
+  they appear with the first successful one being returned. If none of the formats successfully parse the expression
+  an error will be returned.
+
+[chrono format]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
+
+#### Example
+
+```
+❯ select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
++-----------------------------------------------------------+
+| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-----------------------------------------------------------+
+| 2023-01-31T14:26:56.123456789                             |
++-----------------------------------------------------------+
+❯ select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++--------------------------------------------------------------------------------------------------------+
+| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++--------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456789                                                                          |
++--------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here]
+
+[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs
 
 ### `to_timestamp_millis`
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`).
 Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
-Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`)
-return the corresponding timestamp.
+Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
+Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+Returns the corresponding timestamp.
 
 ```
-to_timestamp_millis(expression)
+to_timestamp_millis(expression[, ..., format_n])
 ```
 
 #### Arguments
 
 - **expression**: Expression to operate on.
   Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
+  they appear with the first successful one being returned. If none of the formats successfully parse the expression
+  an error will be returned.
+
+#### Example
+
+```
+❯ select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
++------------------------------------------------------------------+
+| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++------------------------------------------------------------------+
+| 2023-01-31T14:26:56.123                                          |
++------------------------------------------------------------------+
+❯ select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++---------------------------------------------------------------------------------------------------------------+
+| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++---------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123                                                                                       |
++---------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here]
+
+[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs
 
 ### `to_timestamp_micros`
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`).
 Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
 Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`)
-return the corresponding timestamp.
+Returns the corresponding timestamp.
 
 ```
-to_timestamp_nanos(expression)
+to_timestamp_micros(expression[, ..., format_n])
 ```
 
+#### Arguments
+
+- **expression**: Expression to operate on.
+  Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
+  they appear with the first successful one being returned. If none of the formats successfully parse the expression
+  an error will be returned.
+
+#### Example
+
+```
+❯ select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
++------------------------------------------------------------------+
+| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++------------------------------------------------------------------+
+| 2023-01-31T14:26:56.123456                                       |
++------------------------------------------------------------------+
+❯ select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++---------------------------------------------------------------------------------------------------------------+
+| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++---------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456                                                                                    |
++---------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here]
+
+[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs
+
 ### `to_timestamp_nanos`
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`).
 Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
-Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`)
-return the corresponding timestamp.
+Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
+Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+Returns the corresponding timestamp.
 
 ```
-to_timestamp_nanos(expression)
+to_timestamp_nanos(expression[, ..., format_n])
 ```
 
 #### Arguments
 
 - **expression**: Expression to operate on.
   Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
+  they appear with the first successful one being returned. If none of the formats successfully parse the expression
+  an error will be returned.
+
+#### Example
+
+```
+❯ select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
++-----------------------------------------------------------------+
+| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-----------------------------------------------------------------+
+| 2023-01-31T14:26:56.123456789                                   |
++-----------------------------------------------------------------+
+❯ select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++--------------------------------------------------------------------------------------------------------------+
+| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++--------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456789                                                                                |
++---------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here]
+
+[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs
 
 ### `to_timestamp_seconds`
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`).
 Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
-Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`)
-return the corresponding timestamp.
+Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
+Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
+Returns the corresponding timestamp.
 
 ```
-to_timestamp_seconds(expression)
+to_timestamp_seconds(expression[, ..., format_n])
 ```
 
 #### Arguments
 
 - **expression**: Expression to operate on.
   Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
+  they appear with the first successful one being returned. If none of the formats successfully parse the expression
+  an error will be returned.
+
+#### Example
+
+```
+❯ select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
++-------------------------------------------------------------------+
+| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-------------------------------------------------------------------+
+| 2023-01-31T14:26:56                                               |
++-------------------------------------------------------------------+
+❯ select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++----------------------------------------------------------------------------------------------------------------+
+| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++----------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00                                                                                            |
++----------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here]
+
+[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs
 
 ### `from_unixtime`