diff --git a/Cargo.toml b/Cargo.toml
index b7c8c09a8537..2b854c670349 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,14 +66,14 @@ license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
 rust-version = "1.80.1"
-version = "43.0.0"
+version = "44.0.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
 # selectively turn them on if needed, since we can override default-features = true (from false)
 # for the inherited dependency but cannot do the reverse (override from true to false).
 #
-# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
+# See for more details: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
@@ -98,31 +98,31 @@ bytes = "1.4"
 chrono = { version = "0.4.38", default-features = false }
 ctor = "0.2.0"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "43.0.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "43.0.0" }
-datafusion-common = { path = "datafusion/common", version = "43.0.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "43.0.0" }
-datafusion-doc = { path = "datafusion/doc", version = "43.0.0" }
-datafusion-execution = { path = "datafusion/execution", version = "43.0.0" }
-datafusion-expr = { path = "datafusion/expr", version = "43.0.0" }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "43.0.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "43.0.0" }
-datafusion-functions = { path = "datafusion/functions", version = "43.0.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "43.0.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "43.0.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "43.0.0" }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "43.0.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "43.0.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "43.0.0" }
-datafusion-macros = { path = "datafusion/macros", version = "43.0.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "43.0.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "43.0.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "43.0.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "43.0.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "43.0.0" }
-datafusion-proto = { path = "datafusion/proto", version = "43.0.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "43.0.0" }
-datafusion-sql = { path = "datafusion/sql", version = "43.0.0" }
+datafusion = { path = "datafusion/core", version = "44.0.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "44.0.0" }
+datafusion-common = { path = "datafusion/common", version = "44.0.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "44.0.0" }
+datafusion-doc = { path = "datafusion/doc", version = "44.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "44.0.0" }
+datafusion-expr = { path = "datafusion/expr", version = "44.0.0" }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "44.0.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "44.0.0" }
+datafusion-functions = { path = "datafusion/functions", version = "44.0.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "44.0.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "44.0.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "44.0.0" }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "44.0.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "44.0.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "44.0.0" }
+datafusion-macros = { path = "datafusion/macros", version = "44.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "44.0.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "44.0.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "44.0.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "44.0.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "44.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "44.0.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "44.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "44.0.0" }
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
diff --git a/README.md b/README.md
index f199021d7d78..e0fc6854ecff 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,8 @@ Default features:
 - `parquet`: support for reading the [Apache Parquet] format
 - `regex_expressions`: regular expression functions, such as `regexp_match`
 - `unicode_expressions`: Include unicode aware functions such as `character_length`
-- `unparser` : enables support to reverse LogicalPlans back into SQL
+- `unparser`: enables support to reverse LogicalPlans back into SQL
+- `recursive_protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection.
 
 Optional features:
 
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 9549cfeeb3b8..863bb5181f45 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -414,7 +414,6 @@ dependencies = [
  "bzip2 0.4.4",
  "flate2",
  "futures-core",
- "futures-io",
  "memchr",
  "pin-project-lite",
  "tokio",
@@ -1232,7 +1231,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -1284,7 +1283,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow-schema",
  "async-trait",
@@ -1297,7 +1296,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "assert_cmd",
@@ -1329,7 +1328,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1353,7 +1352,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "log",
  "tokio",
@@ -1361,11 +1360,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "43.0.0"
+version = "44.0.0"
 
 [[package]]
 name = "datafusion-execution"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "dashmap",
@@ -1382,7 +1381,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -1401,7 +1400,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1410,7 +1409,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1438,7 +1437,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1458,7 +1457,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1469,7 +1468,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1489,7 +1488,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1503,7 +1502,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1518,7 +1517,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1526,7 +1525,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "quote",
  "syn",
@@ -1534,7 +1533,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -1551,7 +1550,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1574,7 +1573,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1586,7 +1585,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1601,7 +1600,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1631,7 +1630,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "43.0.0"
+version = "44.0.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -4390,7 +4389,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 4cdc2120a029..054a58b7bc41 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-cli"
 description = "Command Line Client for DataFusion query engine."
-version = "43.0.0"
+version = "44.0.0"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
 edition = "2021"
 keywords = ["arrow", "datafusion", "query", "sql"]
@@ -39,17 +39,18 @@ aws-sdk-sts = "1.43.0"
 # end pin aws-sdk crates
 aws-credential-types = "1.2.0"
 clap = { version = "4.5.16", features = ["derive", "cargo"] }
-datafusion = { path = "../datafusion/core", version = "43.0.0", features = [
+datafusion = { path = "../datafusion/core", version = "44.0.0", features = [
     "avro",
     "crypto_expressions",
     "datetime_expressions",
     "encoding_expressions",
     "parquet",
+    "recursive_protection",
     "regex_expressions",
     "unicode_expressions",
     "compression",
 ] }
-datafusion-catalog = { path = "../datafusion/catalog", version = "43.0.0" }
+datafusion-catalog = { path = "../datafusion/catalog", version = "44.0.0" }
 dirs = "5.0.1"
 env_logger = "0.11"
 futures = "0.3"
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index d7ca48d638b7..36e68ec4842b 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -360,7 +360,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
             Field::new("total_uncompressed_size", DataType::Int64, true),
         ]));
 
-        // construct recordbatch from metadata
+        // construct record batch from metadata
         let mut filename_arr = vec![];
         let mut row_group_id_arr = vec![];
         let mut row_group_num_rows_arr = vec![];
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index aca600e50e4f..b06148ce267f 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -22,7 +22,7 @@
 This crate includes end to end, highly commented examples of how to use
 various DataFusion APIs to help you get started.
 
-## Prerequisites:
+## Prerequisites
 
 Run `git submodule update --init` to init test files.
 
@@ -57,8 +57,7 @@ cargo run --example dataframe
 - [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
 - [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format
 - [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
-- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data
-- [`dataframe_output.rs`](examples/dataframe_output.rs): Examples of methods which write data out from a DataFrame
+- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file.
 - [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results into rust structs using serde
 - [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
 - [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs
index 67b745d4074e..28a3a2f1de09 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -82,7 +82,7 @@ use url::Url;
 /// Specifically, this example illustrates how to:
 /// 1. Use [`ParquetFileReaderFactory`] to avoid re-reading parquet metadata on each query
 /// 2. Use [`PruningPredicate`] for predicate analysis
-/// 3. Pass a row group selection to [`ParuetExec`]
+/// 3. Pass a row group selection to [`ParquetExec`]
 /// 4. Pass a row selection (within a row group) to [`ParquetExec`]
 ///
 /// Note this is a *VERY* low level example for people who want to build their
@@ -211,7 +211,7 @@ async fn main() -> Result<()> {
     //
     // Note: in order to prune pages, the Page Index must be loaded and the
     // ParquetExec will load it on demand if not present. To avoid a second IO
-    // during query, this example loaded the Page Index pre-emptively by setting
+    // during query, this example loaded the Page Index preemptively by setting
     // `ArrowReader::with_page_index` in `IndexedFile::try_new`
     provider.set_use_row_selection(true);
     println!("** Select data, predicate `id = 950`");
diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs
index 1c20e292f091..49e890467d21 100644
--- a/datafusion-examples/examples/advanced_udwf.rs
+++ b/datafusion-examples/examples/advanced_udwf.rs
@@ -24,11 +24,14 @@ use arrow::{
 };
 use arrow_schema::Field;
 use datafusion::error::Result;
+use datafusion::functions_aggregate::average::avg_udaf;
 use datafusion::prelude::*;
 use datafusion_common::ScalarValue;
-use datafusion_expr::function::WindowUDFFieldArgs;
+use datafusion_expr::expr::WindowFunction;
+use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFFieldArgs};
+use datafusion_expr::simplify::SimplifyInfo;
 use datafusion_expr::{
-    PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl,
+    Expr, PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl,
 };
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
 
@@ -142,6 +145,67 @@ impl PartitionEvaluator for MyPartitionEvaluator {
     }
 }
 
+/// This UDWF will show how to use the WindowUDFImpl::simplify() API
+#[derive(Debug, Clone)]
+struct SimplifySmoothItUdf {
+    signature: Signature,
+}
+
+impl SimplifySmoothItUdf {
+    fn new() -> Self {
+        Self {
+            signature: Signature::exact(
+                // this function will always take one arguments of type f64
+                vec![DataType::Float64],
+                // this function is deterministic and will always return the same
+                // result for the same input
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+impl WindowUDFImpl for SimplifySmoothItUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "simplify_smooth_it"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn partition_evaluator(
+        &self,
+        _partition_evaluator_args: PartitionEvaluatorArgs,
+    ) -> Result<Box<dyn PartitionEvaluator>> {
+        todo!()
+    }
+
+    /// this function will simplify `SimplifySmoothItUdf` to `AggregateUDF` for `Avg`
+    /// default implementation will not be called (left as `todo!()`)
+    fn simplify(&self) -> Option<WindowFunctionSimplification> {
+        let simplify = |window_function: WindowFunction, _: &dyn SimplifyInfo| {
+            Ok(Expr::WindowFunction(WindowFunction {
+                fun: datafusion_expr::WindowFunctionDefinition::AggregateUDF(avg_udaf()),
+                args: window_function.args,
+                partition_by: window_function.partition_by,
+                order_by: window_function.order_by,
+                window_frame: window_function.window_frame,
+                null_treatment: window_function.null_treatment,
+            }))
+        };
+
+        Some(Box::new(simplify))
+    }
+
+    fn field(&self, field_args: WindowUDFFieldArgs) -> Result<Field> {
+        Ok(Field::new(field_args.name(), DataType::Float64, true))
+    }
+}
+
 // create local execution context with `cars.csv` registered as a table named `cars`
 async fn create_context() -> Result<SessionContext> {
     // declare a new context. In spark API, this corresponds to a new spark SQL session
@@ -162,12 +226,15 @@ async fn main() -> Result<()> {
     let smooth_it = WindowUDF::from(SmoothItUdf::new());
     ctx.register_udwf(smooth_it.clone());
 
-    // Use SQL to run the new window function
+    let simplify_smooth_it = WindowUDF::from(SimplifySmoothItUdf::new());
+    ctx.register_udwf(simplify_smooth_it.clone());
+
+    // Use SQL to retrieve entire table
     let df = ctx.sql("SELECT * from cars").await?;
     // print the results
     df.show().await?;
 
-    // Use SQL to run the new window function:
+    // Use SQL to run smooth_it:
     //
     // `PARTITION BY car`:each distinct value of car (red, and green)
     // should be treated as a separate partition (and will result in
@@ -201,7 +268,7 @@ async fn main() -> Result<()> {
     // print the results
     df.show().await?;
 
-    // this time, call the new widow function with an explicit
+    // this time, call the function with an explicit
     // window so evaluate will be invoked with each window.
     //
     // `ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING`: each invocation
@@ -232,5 +299,22 @@ async fn main() -> Result<()> {
     // print the results
     df.show().await?;
 
+    // Use SQL to run simplify_smooth_it
+    let df = ctx
+        .sql(
+            "SELECT \
+               car, \
+               speed, \
+               simplify_smooth_it(speed) OVER (PARTITION BY car ORDER BY time) AS smooth_speed,\
+               time \
+               from cars \
+             ORDER BY \
+               car",
+        )
+        .await?;
+
+    // print the results
+    df.show().await?;
+
     Ok(())
 }
diff --git a/datafusion-examples/examples/analyzer_rule.rs b/datafusion-examples/examples/analyzer_rule.rs
index bd067be97b8b..aded64ed4105 100644
--- a/datafusion-examples/examples/analyzer_rule.rs
+++ b/datafusion-examples/examples/analyzer_rule.rs
@@ -138,7 +138,7 @@ impl AnalyzerRule for RowLevelAccessControl {
     fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
         // use the TreeNode API to recursively walk the LogicalPlan tree
         // and all of its children (inputs)
-        let transfomed_plan = plan.transform(|plan| {
+        let transformed_plan = plan.transform(|plan| {
             // This closure is called for each LogicalPlan node
             // if it is a Scan node, add a filter to remove all managers
             if is_employee_table_scan(&plan) {
@@ -166,7 +166,7 @@ impl AnalyzerRule for RowLevelAccessControl {
         //
         // This example does not need the value of either flag, so simply
         // extract the LogicalPlan "data"
-        Ok(transfomed_plan.data)
+        Ok(transformed_plan.data)
     }
 
     fn name(&self) -> &str {
diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/catalog.rs
index f40f1dfb5a15..655438b78b9f 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/catalog.rs
@@ -46,11 +46,11 @@ async fn main() -> Result<()> {
 
     let ctx = SessionContext::new();
     let state = ctx.state();
-    let cataloglist = Arc::new(CustomCatalogProviderList::new());
+    let catalog_list = Arc::new(CustomCatalogProviderList::new());
 
     // use our custom catalog list for context. each context has a single catalog list.
     // context will by default have [`MemoryCatalogProviderList`]
-    ctx.register_catalog_list(cataloglist.clone());
+    ctx.register_catalog_list(catalog_list.clone());
 
     // initialize our catalog and schemas
     let catalog = DirCatalog::new();
@@ -81,7 +81,7 @@ async fn main() -> Result<()> {
     ctx.register_catalog("dircat", Arc::new(catalog));
     {
         // catalog was passed down into our custom catalog list since we override the ctx's default
-        let catalogs = cataloglist.catalogs.read().unwrap();
+        let catalogs = catalog_list.catalogs.read().unwrap();
         assert!(catalogs.contains_key("dircat"));
     };
 
@@ -144,8 +144,8 @@ impl DirSchema {
     async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
         let DirSchemaOpts { ext, dir, format } = opts;
         let mut tables = HashMap::new();
-        let direntries = std::fs::read_dir(dir).unwrap();
-        for res in direntries {
+        let dir_entries = std::fs::read_dir(dir).unwrap();
+        for res in dir_entries {
             let entry = res.unwrap();
             let filename = entry.file_name().to_str().unwrap().to_string();
             if !filename.ends_with(ext) {
diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs
index 59766e881e8b..5d5414e3d8b4 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -17,8 +17,12 @@
 
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::error::Result;
 use datafusion::prelude::*;
+use datafusion_common::config::CsvOptions;
+use datafusion_common::parsers::CompressionTypeVariant;
+use datafusion_common::DataFusionError;
 use std::fs::File;
 use std::io::Write;
 use std::sync::Arc;
@@ -29,6 +33,11 @@ use tempfile::tempdir;
 /// * [read_parquet]: execute queries against parquet files
 /// * [read_csv]: execute queries against csv files
 /// * [read_memory]: execute queries against in-memory arrow data
+///
+/// This example demonstrates the various methods to write out a DataFrame to local storage.
+/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs for an example
+/// using a remote object store.
+/// * [write_out]: write out a DataFrame to a table, parquet file, csv file, or json file
 #[tokio::main]
 async fn main() -> Result<()> {
     // The SessionContext is the main high level API for interacting with DataFusion
@@ -36,6 +45,7 @@ async fn main() -> Result<()> {
     read_parquet(&ctx).await?;
     read_csv(&ctx).await?;
     read_memory(&ctx).await?;
+    write_out(&ctx).await?;
     Ok(())
 }
 
@@ -139,3 +149,60 @@ async fn read_memory(ctx: &SessionContext) -> Result<()> {
 
     Ok(())
 }
+
+/// Use the DataFrame API to:
+/// 1. Write out a DataFrame to a table
+/// 2. Write out a DataFrame to a parquet file
+/// 3. Write out a DataFrame to a csv file
+/// 4. Write out a DataFrame to a json file
+async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionError> {
+    let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();
+
+    // Ensure the column names and types match the target table
+    df = df.with_column_renamed("column1", "tablecol1").unwrap();
+
+    ctx.sql(
+        "create external table
+    test(tablecol1 varchar)
+    stored as parquet
+    location './datafusion-examples/test_table/'",
+    )
+    .await?
+    .collect()
+    .await?;
+
+    // This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
+    // The behavior of write_table depends on the TableProvider's implementation
+    // of the insert_into method.
+    df.clone()
+        .write_table("test", DataFrameWriteOptions::new())
+        .await?;
+
+    df.clone()
+        .write_parquet(
+            "./datafusion-examples/test_parquet/",
+            DataFrameWriteOptions::new(),
+            None,
+        )
+        .await?;
+
+    df.clone()
+        .write_csv(
+            "./datafusion-examples/test_csv/",
+            // DataFrameWriteOptions contains options which control how data is written
+            // such as compression codec
+            DataFrameWriteOptions::new(),
+            Some(CsvOptions::default().with_compression(CompressionTypeVariant::GZIP)),
+        )
+        .await?;
+
+    df.clone()
+        .write_json(
+            "./datafusion-examples/test_json/",
+            DataFrameWriteOptions::new(),
+            None,
+        )
+        .await?;
+
+    Ok(())
+}
diff --git a/datafusion-examples/examples/dataframe_output.rs b/datafusion-examples/examples/dataframe_output.rs
deleted file mode 100644
index 60ca090d722d..000000000000
--- a/datafusion-examples/examples/dataframe_output.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::{dataframe::DataFrameWriteOptions, prelude::*};
-use datafusion_common::config::CsvOptions;
-use datafusion_common::{parsers::CompressionTypeVariant, DataFusionError};
-
-/// This example demonstrates the various methods to write out a DataFrame to local storage.
-/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs for an example
-/// using a remote object store.
-#[tokio::main]
-async fn main() -> Result<(), DataFusionError> {
-    let ctx = SessionContext::new();
-
-    let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();
-
-    // Ensure the column names and types match the target table
-    df = df.with_column_renamed("column1", "tablecol1").unwrap();
-
-    ctx.sql(
-        "create external table 
-    test(tablecol1 varchar)
-    stored as parquet 
-    location './datafusion-examples/test_table/'",
-    )
-    .await?
-    .collect()
-    .await?;
-
-    // This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
-    // The behavior of write_table depends on the TableProvider's implementation
-    // of the insert_into method.
-    df.clone()
-        .write_table("test", DataFrameWriteOptions::new())
-        .await?;
-
-    df.clone()
-        .write_parquet(
-            "./datafusion-examples/test_parquet/",
-            DataFrameWriteOptions::new(),
-            None,
-        )
-        .await?;
-
-    df.clone()
-        .write_csv(
-            "./datafusion-examples/test_csv/",
-            // DataFrameWriteOptions contains options which control how data is written
-            // such as compression codec
-            DataFrameWriteOptions::new(),
-            Some(CsvOptions::default().with_compression(CompressionTypeVariant::GZIP)),
-        )
-        .await?;
-
-    df.clone()
-        .write_json(
-            "./datafusion-examples/test_json/",
-            DataFrameWriteOptions::new(),
-            None,
-        )
-        .await?;
-
-    Ok(())
-}
diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
index cb0796bdcf73..943e5d5e027c 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/expr_api.rs
@@ -53,7 +53,7 @@ use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter;
 /// 4. Simplify expressions: [`simplify_demo`]
 /// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
 /// 6. Get the types of the expressions: [`expression_type_demo`]
-/// 7. Apply type cocercion to expressions: [`type_coercion_demo`]
+/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
 #[tokio::main]
 async fn main() -> Result<()> {
     // The easiest way to do create expressions is to use the
@@ -392,7 +392,7 @@ fn type_coercion_demo() -> Result<()> {
     )?;
     assert!(physical_expr.evaluate(&batch).is_ok());
 
-    // 4. Apply explict type coercion by manually rewriting the expression
+    // 4. Apply explicit type coercion by manually rewriting the expression
     let coerced_expr = expr
         .transform(|e| {
             // Only type coerces binary expressions.
diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs
index b2771149aae5..58ffa060ebaa 100644
--- a/datafusion-examples/examples/function_factory.rs
+++ b/datafusion-examples/examples/function_factory.rs
@@ -36,7 +36,7 @@ use datafusion_expr::{
 ///
 /// Apart from [FunctionFactory], this example covers
 /// [ScalarUDFImpl::simplify()] which is often used at the same time, to replace
-/// a function call with another expression at rutime.
+/// a function call with another expression at runtime.
 ///
 /// This example is rather simple and does not cover all cases required for a
 /// real implementation.
diff --git a/datafusion-examples/examples/memtable.rs b/datafusion-examples/examples/memtable.rs
index 5cce578039e7..bb0b720eff79 100644
--- a/datafusion-examples/examples/memtable.rs
+++ b/datafusion-examples/examples/memtable.rs
@@ -25,7 +25,7 @@ use std::sync::Arc;
 use std::time::Duration;
 use tokio::time::timeout;
 
-/// This example demonstrates executing a simple query against a Memtable
+/// This example demonstrates executing a simple query against a [`MemTable`]
 #[tokio::main]
 async fn main() -> Result<()> {
     let mem_table = create_memtable()?;
diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/optimizer_rule.rs
index 0f28a1670252..e8a272f28318 100644
--- a/datafusion-examples/examples/optimizer_rule.rs
+++ b/datafusion-examples/examples/optimizer_rule.rs
@@ -146,7 +146,7 @@ impl MyOptimizerRule {
             // Closure called for each sub tree
             match expr {
                 Expr::BinaryExpr(binary_expr) if is_binary_eq(&binary_expr) => {
-                    // destruture the expression
+                    // destructure the expression
                     let BinaryExpr { left, op: _, right } = binary_expr;
                     // rewrite to `my_eq(left, right)`
                     let udf = ScalarUDF::new_from_impl(MyEq::new());
diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs
index 43a7f19dc6c9..cf1202498416 100644
--- a/datafusion-examples/examples/plan_to_sql.rs
+++ b/datafusion-examples/examples/plan_to_sql.rs
@@ -85,7 +85,7 @@ fn simple_expr_to_sql_demo() -> Result<()> {
     Ok(())
 }
 
-/// DataFusioon can remove parentheses when converting an expression to SQL.
+/// DataFusion can remove parentheses when converting an expression to SQL.
 /// Note that output is intended for humans, not for other SQL engines,
 /// as difference in precedence rules can cause expressions to be parsed differently.
 fn simple_expr_to_pretty_sql_demo() -> Result<()> {
diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/simple_udtf.rs
index f32560ede69d..7cf1ce87690e 100644
--- a/datafusion-examples/examples/simple_udtf.rs
+++ b/datafusion-examples/examples/simple_udtf.rs
@@ -140,7 +140,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
         let limit = exprs
             .get(1)
             .map(|expr| {
-                // try to simpify the expression, so 1+2 becomes 3, for example
+                // try to simplify the expression, so 1+2 becomes 3, for example
                 let execution_props = ExecutionProps::new();
                 let info = SimplifyContext::new(&execution_props);
                 let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
@@ -173,8 +173,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
         .with_header(true)
         .build(file)?;
     let mut batches = vec![];
-    for bacth in reader {
-        batches.push(bacth?);
+    for batch in reader {
+        batches.push(batch?);
     }
     let schema = Arc::new(schema);
     Ok((schema, batches))
diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs
deleted file mode 100644
index 117063df4e0d..000000000000
--- a/datafusion-examples/examples/simplify_udwf_expression.rs
+++ /dev/null
@@ -1,133 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-
-use arrow_schema::{DataType, Field};
-
-use datafusion::execution::context::SessionContext;
-use datafusion::functions_aggregate::average::avg_udaf;
-use datafusion::{error::Result, execution::options::CsvReadOptions};
-use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFFieldArgs};
-use datafusion_expr::{
-    expr::WindowFunction, simplify::SimplifyInfo, Expr, PartitionEvaluator, Signature,
-    Volatility, WindowUDF, WindowUDFImpl,
-};
-use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
-
-/// This UDWF will show how to use the WindowUDFImpl::simplify() API
-#[derive(Debug, Clone)]
-struct SimplifySmoothItUdf {
-    signature: Signature,
-}
-
-impl SimplifySmoothItUdf {
-    fn new() -> Self {
-        Self {
-            signature: Signature::exact(
-                // this function will always take one arguments of type f64
-                vec![DataType::Float64],
-                // this function is deterministic and will always return the same
-                // result for the same input
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-impl WindowUDFImpl for SimplifySmoothItUdf {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "simplify_smooth_it"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn partition_evaluator(
-        &self,
-        _partition_evaluator_args: PartitionEvaluatorArgs,
-    ) -> Result<Box<dyn PartitionEvaluator>> {
-        todo!()
-    }
-
-    /// this function will simplify `SimplifySmoothItUdf` to `SmoothItUdf`.
-    fn simplify(&self) -> Option<WindowFunctionSimplification> {
-        let simplify = |window_function: WindowFunction, _: &dyn SimplifyInfo| {
-            Ok(Expr::WindowFunction(WindowFunction {
-                fun: datafusion_expr::WindowFunctionDefinition::AggregateUDF(avg_udaf()),
-                args: window_function.args,
-                partition_by: window_function.partition_by,
-                order_by: window_function.order_by,
-                window_frame: window_function.window_frame,
-                null_treatment: window_function.null_treatment,
-            }))
-        };
-
-        Some(Box::new(simplify))
-    }
-
-    fn field(&self, field_args: WindowUDFFieldArgs) -> Result<Field> {
-        Ok(Field::new(field_args.name(), DataType::Float64, true))
-    }
-}
-
-// create local execution context with `cars.csv` registered as a table named `cars`
-async fn create_context() -> Result<SessionContext> {
-    // declare a new context. In spark API, this corresponds to a new spark SQL session
-    let ctx = SessionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    println!("pwd: {}", std::env::current_dir().unwrap().display());
-    let csv_path = "../../datafusion/core/tests/data/cars.csv".to_string();
-    let read_options = CsvReadOptions::default().has_header(true);
-
-    ctx.register_csv("cars", &csv_path, read_options).await?;
-    Ok(ctx)
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let ctx = create_context().await?;
-    let simplify_smooth_it = WindowUDF::from(SimplifySmoothItUdf::new());
-    ctx.register_udwf(simplify_smooth_it.clone());
-
-    // Use SQL to run the new window function
-    let df = ctx.sql("SELECT * from cars").await?;
-    // print the results
-    df.show().await?;
-
-    let df = ctx
-        .sql(
-            "SELECT \
-               car, \
-               speed, \
-               simplify_smooth_it(speed) OVER (PARTITION BY car ORDER BY time) AS smooth_speed,\
-               time \
-               from cars \
-             ORDER BY \
-               car",
-        )
-        .await?;
-    // print the results
-    df.show().await?;
-
-    Ok(())
-}
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index a81ec724dd66..b331a55a98d0 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -40,6 +40,7 @@ avro = ["apache-avro"]
 backtrace = []
 pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
 force_hash_collisions = []
+recursive_protection = ["dep:recursive"]
 
 [dependencies]
 ahash = { workspace = true }
@@ -62,7 +63,7 @@ object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
 pyo3 = { version = "0.22.0", optional = true }
-recursive = { workspace = true }
+recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true }
 tokio = { workspace = true }
 
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index d940bcf3146e..4f25260d5e9c 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -230,7 +230,7 @@ impl Column {
                         .collect::<Vec<_>>();
                     for using_col in using_columns {
                         let all_matched = columns.iter().all(|c| using_col.contains(c));
-                        // All matched fields belong to the same using column set, in orther words
+                        // All matched fields belong to the same using column set, in other words
                         // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
                             return Ok(columns[0].clone());
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 6e64700bd2e0..942aa308e200 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -904,12 +904,12 @@ pub trait ConfigExtension: ExtensionOptions {
 pub trait ExtensionOptions: Send + Sync + fmt::Debug + 'static {
     /// Return `self` as [`Any`]
     ///
-    /// This is needed until trait upcasting is stabilised
+    /// This is needed until trait upcasting is stabilized
     fn as_any(&self) -> &dyn Any;
 
     /// Return `self` as [`Any`]
     ///
-    /// This is needed until trait upcasting is stabilised
+    /// This is needed until trait upcasting is stabilized
     fn as_any_mut(&mut self) -> &mut dyn Any;
 
     /// Return a deep clone of this [`ExtensionOptions`]
diff --git a/datafusion/common/src/cse.rs b/datafusion/common/src/cse.rs
index f64571b8471e..674d3386171f 100644
--- a/datafusion/common/src/cse.rs
+++ b/datafusion/common/src/cse.rs
@@ -60,7 +60,7 @@ pub trait Normalizeable {
 }
 
 /// The `NormalizeEq` trait extends `Eq` and `Normalizeable` to provide a method for comparing
-/// normlized nodes in optimizations like Common Subexpression Elimination (CSE).
+/// normalized nodes in optimizations like Common Subexpression Elimination (CSE).
 ///
 /// The `normalize_eq` method ensures that two nodes that are semantically equivalent (after normalization)
 /// are considered equal in CSE optimization, even if their original forms differ.
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index b5f7b5681eef..ac4d8be8045f 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -955,7 +955,7 @@ pub trait ExprSchema: std::fmt::Debug {
     /// Returns the column's optional metadata.
     fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>>;
 
-    /// Return the coulmn's datatype and nullability
+    /// Return the column's datatype and nullability
     fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)>;
 }
 
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 4fac7298c455..1012c4cd2270 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -115,7 +115,7 @@ pub enum DataFusionError {
     Execution(String),
     /// [`JoinError`] during execution of the query.
     ///
-    /// This error can unoccur for unjoined tasks, such as execution shutdown.
+    /// This error can't occur for unjoined tasks, such as execution shutdown.
     ExecutionJoin(JoinError),
     /// Error when resources (such as memory of scratch disk space) are exhausted.
     ///
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index feb3c6f604f0..de14d3a01037 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -2216,7 +2216,7 @@ impl ScalarValue {
     ///
     /// Errors if `self` is
     /// - a decimal that fails be converted to a decimal array of size
-    /// - a `Fixedsizelist` that fails to be concatenated into an array of size
+    /// - a `FixedsizeList` that fails to be concatenated into an array of size
     /// - a `List` that fails to be concatenated into an array of size
     /// - a `Dictionary` that fails be converted to a dictionary array of size
     pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
@@ -2925,7 +2925,7 @@ impl ScalarValue {
     /// preferred over this function if at all possible as they can be
     /// vectorized and are generally much faster.
     ///
-    /// This function has a few narrow usescases such as hash table key
+    /// This function has a few narrow use cases such as hash table key
     /// comparisons where comparing a single row at a time is necessary.
     ///
     /// # Errors
@@ -4465,7 +4465,7 @@ mod tests {
         Ok(())
     }
 
-    // Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
+    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
     fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
     where
         T: ArrowNumericType,
@@ -6150,9 +6150,9 @@ mod tests {
             &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
         );
 
-        let newscalar = ScalarValue::try_from_array(&array, 0).unwrap();
+        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
         assert_eq!(
-            newscalar.data_type(),
+            new_scalar.data_type(),
             DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
         );
     }
diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index 0c153583e34b..c70389b63177 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -18,7 +18,6 @@
 //! [`TreeNode`] for visiting and rewriting expression and plan trees
 
 use crate::Result;
-use recursive::recursive;
 use std::collections::HashMap;
 use std::hash::Hash;
 use std::sync::Arc;
@@ -125,7 +124,7 @@ pub trait TreeNode: Sized {
     /// TreeNodeVisitor::f_up(ChildNode2)
     /// TreeNodeVisitor::f_up(ParentNode)
     /// ```
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn visit<'n, V: TreeNodeVisitor<'n, Node = Self>>(
         &'n self,
         visitor: &mut V,
@@ -175,7 +174,7 @@ pub trait TreeNode: Sized {
     /// TreeNodeRewriter::f_up(ChildNode2)
     /// TreeNodeRewriter::f_up(ParentNode)
     /// ```
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn rewrite<R: TreeNodeRewriter<Node = Self>>(
         self,
         rewriter: &mut R,
@@ -198,7 +197,7 @@ pub trait TreeNode: Sized {
         &'n self,
         mut f: F,
     ) -> Result<TreeNodeRecursion> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn apply_impl<'n, N: TreeNode, F: FnMut(&'n N) -> Result<TreeNodeRecursion>>(
             node: &'n N,
             f: &mut F,
@@ -233,7 +232,7 @@ pub trait TreeNode: Sized {
         self,
         mut f: F,
     ) -> Result<Transformed<Self>> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn transform_down_impl<N: TreeNode, F: FnMut(N) -> Result<Transformed<N>>>(
             node: N,
             f: &mut F,
@@ -257,7 +256,7 @@ pub trait TreeNode: Sized {
         self,
         mut f: F,
     ) -> Result<Transformed<Self>> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn transform_up_impl<N: TreeNode, F: FnMut(N) -> Result<Transformed<N>>>(
             node: N,
             f: &mut F,
@@ -372,7 +371,7 @@ pub trait TreeNode: Sized {
         mut f_down: FD,
         mut f_up: FU,
     ) -> Result<Transformed<Self>> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn transform_down_up_impl<
             N: TreeNode,
             FD: FnMut(N) -> Result<Transformed<N>>,
@@ -996,11 +995,11 @@ impl<
 /// construct a temporary container to be able to call `apply_ref_elements` on a
 /// collection of tree node references. But in that case the container's temporary
 /// lifetime is different to the lifetime of tree nodes that we put into it.
-/// Please find an example usecase in `Expr::apply_children` with the `Expr::Case` case.
+/// Please find an example use case in `Expr::apply_children` with the `Expr::Case` case.
 ///
 /// Most of the cases we don't need to create a temporary container with
 /// `TreeNodeRefContainer`, but we can just call `TreeNodeContainer::apply_elements`.
-/// Please find an example usecase in `Expr::apply_children` with the `Expr::GroupingSet`
+/// Please find an example use case in `Expr::apply_children` with the `Expr::GroupingSet`
 /// case.
 pub trait TreeNodeRefContainer<'a, T: 'a>: Sized {
     /// Applies `f` to all elements of the container.
@@ -2350,6 +2349,7 @@ pub(crate) mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "recursive_protection")]
     #[test]
     fn test_large_tree() {
         let mut item = TestTreeNode::new_leaf("initial".to_string());
diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs
index bb68d59eed59..ab73996fcd8b 100644
--- a/datafusion/common/src/utils/memory.rs
+++ b/datafusion/common/src/utils/memory.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module provides a function to estimate the memory size of a HashTable prior to alloaction
+//! This module provides a function to estimate the memory size of a HashTable prior to allocation
 
 use crate::{DataFusionError, Result};
 use std::mem::size_of;
@@ -79,7 +79,7 @@ pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result
     // For the majority of cases hashbrown overestimates the bucket quantity
     // to keep ~1/8 of them empty. We take this factor into account by
     // multiplying the number of elements with a fixed ratio of 8/7 (~1.14).
-    // This formula leads to overallocation for small tables (< 8 elements)
+    // This formula leads to over-allocation for small tables (< 8 elements)
     // but should be fine overall.
     num_elements
         .checked_mul(8)
diff --git a/datafusion/common/src/utils/proxy.rs b/datafusion/common/src/utils/proxy.rs
index b32164f682fa..d940677a5fb3 100644
--- a/datafusion/common/src/utils/proxy.rs
+++ b/datafusion/common/src/utils/proxy.rs
@@ -92,12 +92,12 @@ impl<T> VecAllocExt for Vec<T> {
     type T = T;
 
     fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) {
-        let prev_capacty = self.capacity();
+        let prev_capacity = self.capacity();
         self.push(x);
         let new_capacity = self.capacity();
-        if new_capacity > prev_capacty {
+        if new_capacity > prev_capacity {
             // capacity changed, so we allocated more
-            let bump_size = (new_capacity - prev_capacty) * size_of::<T>();
+            let bump_size = (new_capacity - prev_capacity) * size_of::<T>();
             // Note multiplication should never overflow because `push` would
             // have panic'd first, but the checked_add could potentially
             // overflow since accounting could be tracking additional values, and
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 9bf530a9d6ac..64ad8f2ba152 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -59,6 +59,7 @@ default = [
     "unicode_expressions",
     "compression",
     "parquet",
+    "recursive_protection",
 ]
 encoding_expressions = ["datafusion-functions/encoding_expressions"]
 # Used for testing ONLY: causes all values to hash to the same value (test for collisions)
@@ -69,6 +70,13 @@ pyarrow = ["datafusion-common/pyarrow", "parquet"]
 regex_expressions = [
     "datafusion-functions/regex_expressions",
 ]
+recursive_protection = [
+    "datafusion-common/recursive_protection",
+    "datafusion-expr/recursive_protection",
+    "datafusion-optimizer/recursive_protection",
+    "datafusion-physical-optimizer/recursive_protection",
+    "datafusion-sql/recursive_protection",
+]
 serde = ["arrow-schema/serde"]
 string_expressions = ["datafusion-functions/string_expressions"]
 unicode_expressions = [
@@ -87,7 +95,6 @@ async-compression = { version = "0.4.0", features = [
     "gzip",
     "xz",
     "zstd",
-    "futures-io",
     "tokio",
 ], optional = true }
 async-trait = { workspace = true }
diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs
index 349c2e438195..7d87a37b3b9c 100644
--- a/datafusion/core/benches/physical_plan.rs
+++ b/datafusion/core/benches/physical_plan.rs
@@ -38,7 +38,7 @@ use datafusion::physical_plan::{
 use datafusion::prelude::SessionContext;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
-// Initialise the operator using the provided record batches and the sort key
+// Initialize the operator using the provided record batches and the sort key
 // as inputs. All record batches must have the same schema.
 fn sort_preserving_merge_operator(
     session_ctx: Arc<SessionContext>,
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 414d6da7bc9b..60a09301ae0f 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -77,6 +77,9 @@ pub struct DataFrameWriteOptions {
     /// Sets which columns should be used for hive-style partitioned writes by name.
     /// Can be set to empty vec![] for non-partitioned writes.
     partition_by: Vec<String>,
+    /// Sets which columns should be used for sorting the output by name.
+    /// Can be set to empty vec![] for non-sorted writes.
+    sort_by: Vec<SortExpr>,
 }
 
 impl DataFrameWriteOptions {
@@ -86,6 +89,7 @@ impl DataFrameWriteOptions {
             insert_op: InsertOp::Append,
             single_file_output: false,
             partition_by: vec![],
+            sort_by: vec![],
         }
     }
 
@@ -106,6 +110,12 @@ impl DataFrameWriteOptions {
         self.partition_by = partition_by;
         self
     }
+
+    /// Sets the sort_by columns for output sorting
+    pub fn with_sort_by(mut self, sort_by: Vec<SortExpr>) -> Self {
+        self.sort_by = sort_by;
+        self
+    }
 }
 
 impl Default for DataFrameWriteOptions {
@@ -1517,8 +1527,17 @@ impl DataFrame {
         write_options: DataFrameWriteOptions,
     ) -> Result<Vec<RecordBatch>, DataFusionError> {
         let arrow_schema = Schema::from(self.schema());
+
+        let plan = if write_options.sort_by.is_empty() {
+            self.plan
+        } else {
+            LogicalPlanBuilder::from(self.plan)
+                .sort(write_options.sort_by)?
+                .build()?
+        };
+
         let plan = LogicalPlanBuilder::insert_into(
-            self.plan,
+            plan,
             table_name.to_owned(),
             &arrow_schema,
             write_options.insert_op,
@@ -1577,8 +1596,16 @@ impl DataFrame {
 
         let file_type = format_as_file_type(format);
 
+        let plan = if options.sort_by.is_empty() {
+            self.plan
+        } else {
+            LogicalPlanBuilder::from(self.plan)
+                .sort(options.sort_by)?
+                .build()?
+        };
+
         let plan = LogicalPlanBuilder::copy_to(
-            self.plan,
+            plan,
             path.into(),
             file_type,
             HashMap::new(),
@@ -1638,8 +1665,16 @@ impl DataFrame {
 
         let file_type = format_as_file_type(format);
 
+        let plan = if options.sort_by.is_empty() {
+            self.plan
+        } else {
+            LogicalPlanBuilder::from(self.plan)
+                .sort(options.sort_by)?
+                .build()?
+        };
+
         let plan = LogicalPlanBuilder::copy_to(
-            self.plan,
+            plan,
             path.into(),
             file_type,
             Default::default(),
@@ -1940,6 +1975,7 @@ mod tests {
     use crate::physical_plan::{ColumnarValue, Partitioning, PhysicalExpr};
     use crate::test_util::{register_aggregate_csv, test_table, test_table_with_name};
 
+    use crate::prelude::{CsvReadOptions, NdJsonReadOptions, ParquetReadOptions};
     use arrow::array::Int32Array;
     use datafusion_common::{assert_batches_eq, Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
@@ -1954,6 +1990,7 @@ mod tests {
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
     use sqlparser::ast::NullTreatment;
+    use tempfile::TempDir;
 
     // Get string representation of the plan
     async fn assert_physical_plan(df: &DataFrame, expected: Vec<&str>) {
@@ -3279,7 +3316,7 @@ mod tests {
             &df_results
         );
 
-        // check that col with the same name ovwewritten
+        // check that col with the same name overwritten
         let df_results_overwrite = df
             .clone()
             .with_column("c1", col("c2") + col("c3"))?
@@ -3302,7 +3339,7 @@ mod tests {
             &df_results_overwrite
         );
 
-        // check that col with the same name ovwewritten using same name as reference
+        // check that col with the same name overwritten using same name as reference
         let df_results_overwrite_self = df
             .clone()
             .with_column("c2", col("c2") + lit(1))?
@@ -4057,4 +4094,237 @@ mod tests {
 
         Ok(())
     }
+
+    // Test issue: https://github.com/apache/datafusion/issues/13873
+    #[tokio::test]
+    async fn write_parquet_with_order() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]));
+
+        let ctx = SessionContext::new();
+        let write_df = ctx.read_batch(RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 5, 7, 3, 2])),
+                Arc::new(Int32Array::from(vec![2, 3, 4, 5, 6])),
+            ],
+        )?)?;
+
+        let test_path = tmp_dir.path().join("test.parquet");
+
+        write_df
+            .clone()
+            .write_parquet(
+                test_path.to_str().unwrap(),
+                DataFrameWriteOptions::new()
+                    .with_sort_by(vec![col("a").sort(true, true)]),
+                None,
+            )
+            .await?;
+
+        let ctx = SessionContext::new();
+        ctx.register_parquet(
+            "data",
+            test_path.to_str().unwrap(),
+            ParquetReadOptions::default(),
+        )
+        .await?;
+
+        let df = ctx.sql("SELECT * FROM data").await?;
+        let results = df.collect().await?;
+
+        let df_explain = ctx.sql("explain SELECT a FROM data").await?;
+        let explain_result = df_explain.collect().await?;
+
+        println!("explain_result {:?}", explain_result);
+
+        assert_batches_eq!(
+            &[
+                "+---+---+",
+                "| a | b |",
+                "+---+---+",
+                "| 1 | 2 |",
+                "| 2 | 6 |",
+                "| 3 | 5 |",
+                "| 5 | 3 |",
+                "| 7 | 4 |",
+                "+---+---+",
+            ],
+            &results
+        );
+        Ok(())
+    }
+
+    // Test issue: https://github.com/apache/datafusion/issues/13873
+    #[tokio::test]
+    async fn write_csv_with_order() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]));
+
+        let ctx = SessionContext::new();
+        let write_df = ctx.read_batch(RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 5, 7, 3, 2])),
+                Arc::new(Int32Array::from(vec![2, 3, 4, 5, 6])),
+            ],
+        )?)?;
+
+        let test_path = tmp_dir.path().join("test.csv");
+
+        write_df
+            .clone()
+            .write_csv(
+                test_path.to_str().unwrap(),
+                DataFrameWriteOptions::new()
+                    .with_sort_by(vec![col("a").sort(true, true)]),
+                None,
+            )
+            .await?;
+
+        let ctx = SessionContext::new();
+        ctx.register_csv(
+            "data",
+            test_path.to_str().unwrap(),
+            CsvReadOptions::new().schema(&schema),
+        )
+        .await?;
+
+        let df = ctx.sql("SELECT * FROM data").await?;
+        let results = df.collect().await?;
+
+        assert_batches_eq!(
+            &[
+                "+---+---+",
+                "| a | b |",
+                "+---+---+",
+                "| 1 | 2 |",
+                "| 2 | 6 |",
+                "| 3 | 5 |",
+                "| 5 | 3 |",
+                "| 7 | 4 |",
+                "+---+---+",
+            ],
+            &results
+        );
+        Ok(())
+    }
+
+    // Test issue: https://github.com/apache/datafusion/issues/13873
+    #[tokio::test]
+    async fn write_json_with_order() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]));
+
+        let ctx = SessionContext::new();
+        let write_df = ctx.read_batch(RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 5, 7, 3, 2])),
+                Arc::new(Int32Array::from(vec![2, 3, 4, 5, 6])),
+            ],
+        )?)?;
+
+        let test_path = tmp_dir.path().join("test.json");
+
+        write_df
+            .clone()
+            .write_json(
+                test_path.to_str().unwrap(),
+                DataFrameWriteOptions::new()
+                    .with_sort_by(vec![col("a").sort(true, true)]),
+                None,
+            )
+            .await?;
+
+        let ctx = SessionContext::new();
+        ctx.register_json(
+            "data",
+            test_path.to_str().unwrap(),
+            NdJsonReadOptions::default().schema(&schema),
+        )
+        .await?;
+
+        let df = ctx.sql("SELECT * FROM data").await?;
+        let results = df.collect().await?;
+
+        assert_batches_eq!(
+            &[
+                "+---+---+",
+                "| a | b |",
+                "+---+---+",
+                "| 1 | 2 |",
+                "| 2 | 6 |",
+                "| 3 | 5 |",
+                "| 5 | 3 |",
+                "| 7 | 4 |",
+                "+---+---+",
+            ],
+            &results
+        );
+        Ok(())
+    }
+
+    // Test issue: https://github.com/apache/datafusion/issues/13873
+    #[tokio::test]
+    async fn write_table_with_order() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let ctx = SessionContext::new();
+        let location = tmp_dir.path().join("test_table/");
+
+        let mut write_df = ctx
+            .sql("values ('z'), ('x'), ('a'), ('b'), ('c')")
+            .await
+            .unwrap();
+
+        // Ensure the column names and types match the target table
+        write_df = write_df
+            .with_column_renamed("column1", "tablecol1")
+            .unwrap();
+        let sql_str =
+            "create external table data(tablecol1 varchar) stored as parquet location '"
+                .to_owned()
+                + location.to_str().unwrap()
+                + "'";
+
+        ctx.sql(sql_str.as_str()).await?.collect().await?;
+
+        // This is equivalent to INSERT INTO test.
+        write_df
+            .clone()
+            .write_table(
+                "data",
+                DataFrameWriteOptions::new()
+                    .with_sort_by(vec![col("tablecol1").sort(true, true)]),
+            )
+            .await?;
+
+        let df = ctx.sql("SELECT * FROM data").await?;
+        let results = df.collect().await?;
+
+        assert_batches_eq!(
+            &[
+                "+-----------+",
+                "| tablecol1 |",
+                "+-----------+",
+                "| a         |",
+                "| b         |",
+                "| c         |",
+                "| x         |",
+                "| z         |",
+                "+-----------+",
+            ],
+            &results
+        );
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index 0af68783c41f..1dd4d68fca6b 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -74,8 +74,16 @@ impl DataFrame {
 
         let file_type = format_as_file_type(format);
 
+        let plan = if options.sort_by.is_empty() {
+            self.plan
+        } else {
+            LogicalPlanBuilder::from(self.plan)
+                .sort(options.sort_by)?
+                .build()?
+        };
+
         let plan = LogicalPlanBuilder::copy_to(
-            self.plan,
+            plan,
             path.into(),
             file_type,
             Default::default(),
diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs
index 5efabd000d68..91c1e0ac97fc 100644
--- a/datafusion/core/src/datasource/default_table_source.rs
+++ b/datafusion/core/src/datasource/default_table_source.rs
@@ -67,7 +67,7 @@ impl TableSource for DefaultTableSource {
     }
 
     /// Tests whether the table provider can make use of any or all filter expressions
-    /// to optimise data retrieval.
+    /// to optimize data retrieval.
     fn supports_filters_pushdown(
         &self,
         filter: &[&Expr],
diff --git a/datafusion/core/src/datasource/physical_plan/file_groups.rs b/datafusion/core/src/datasource/physical_plan/file_groups.rs
index f9a19f1d9691..f681dfe219b5 100644
--- a/datafusion/core/src/datasource/physical_plan/file_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_groups.rs
@@ -781,7 +781,7 @@ mod test {
         assert_partitioned_files(expected, actual);
     }
 
-    /// Asserts that the two groups of `ParititonedFile` are the same
+    /// Asserts that the two groups of [`PartitionedFile`] are the same
     /// (PartitionedFile doesn't implement PartialEq)
     fn assert_partitioned_files(
         expected: Option<Vec<Vec<PartitionedFile>>>,
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 879c9817a382..4071f9c26b58 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -870,7 +870,7 @@ mod tests {
     )]
     #[cfg(feature = "compression")]
     #[tokio::test]
-    async fn test_json_with_repartitioing(
+    async fn test_json_with_repartitioning(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
         let config = SessionConfig::new()
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 7573e32f8652..83b544a76e11 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -333,7 +333,7 @@ impl ParquetExecBuilder {
 
     /// Set the filter predicate when reading.
     ///
-    /// See the "Predicate Pushdown" section of the [`ParquetExec`] documenation
+    /// See the "Predicate Pushdown" section of the [`ParquetExec`] documentation
     /// for more details.
     pub fn with_predicate(mut self, predicate: Arc<dyn PhysicalExpr>) -> Self {
         self.predicate = Some(predicate);
@@ -611,7 +611,7 @@ impl ParquetExec {
     }
 
     /// If enabled, the reader will read the page index
-    /// This is used to optimise filter pushdown
+    /// This is used to optimize filter pushdown
     /// via `RowSelector` and `RowFilter` by
     /// eliminating unnecessary IO and decoding
     pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
index af5ffb9d5743..f6428a693fb1 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
@@ -422,7 +422,7 @@ fn would_column_prevent_pushdown(
     checker.prevents_pushdown()
 }
 
-/// Recurses through expr as a trea, finds all `column`s, and checks if any of them would prevent
+/// Recurses through expr as a tree, finds all `column`s, and checks if any of them would prevent
 /// this expression from being predicate pushed down. If any of them would, this returns false.
 /// Otherwise, true.
 pub fn can_expr_be_pushed_down_with_schemas(
@@ -692,7 +692,7 @@ mod test {
 
         let mut parquet_reader = parquet_reader_builder.build().expect("building reader");
 
-        // Parquet file is small, we only need 1 recordbatch
+        // Parquet file is small, we only need 1 record batch
         let first_rb = parquet_reader
             .next()
             .expect("expected record batch")
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
index 810f74e8515b..3854f04566ee 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -294,7 +294,7 @@ impl BloomFilterStatistics {
                 }
                 _ => true,
             },
-            // One more parrern matching since not all data types are supported
+            // One more pattern matching since not all data types are supported
             // inside of a Dictionary
             ScalarValue::Dictionary(_, inner) => match inner.as_ref() {
                 ScalarValue::Int32(_)
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 2cea37fe17e2..768761bb9cf1 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -101,7 +101,7 @@ impl FromStr for StreamEncoding {
         match s.to_ascii_lowercase().as_str() {
             "csv" => Ok(Self::Csv),
             "json" => Ok(Self::Json),
-            _ => plan_err!("Unrecognised StreamEncoding {}", s),
+            _ => plan_err!("Unrecognized StreamEncoding {}", s),
         }
     }
 }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 67236c9a6bd2..3455cce132b6 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -1903,7 +1903,7 @@ mod tests {
     #[tokio::test]
     async fn send_context_to_threads() -> Result<()> {
         // ensure SessionContexts can be used in a multi-threaded
-        // environment. Usecase is for concurrent planing.
+        // environment. Use case is for concurrent planing.
         let tmp_dir = TempDir::new()?;
         let partition_count = 4;
         let ctx = Arc::new(create_ctx(&tmp_dir, partition_count).await?);
diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs
index 3f23c150be83..be87c7cac1d2 100644
--- a/datafusion/core/src/execution/context/parquet.rs
+++ b/datafusion/core/src/execution/context/parquet.rs
@@ -281,10 +281,10 @@ mod tests {
             )
             .await;
         let binding = DataFilePaths::to_urls(&path2).unwrap();
-        let expexted_path = binding[0].as_str();
+        let expected_path = binding[0].as_str();
         assert_eq!(
             read_df.unwrap_err().strip_backtrace(),
-            format!("Execution error: File path '{}' does not match the expected extension '.parquet'", expexted_path)
+            format!("Execution error: File path '{}' does not match the expected extension '.parquet'", expected_path)
         );
 
         // Read the dataframe from 'output3.parquet.snappy.parquet' with the correct file extension.
@@ -316,7 +316,7 @@ mod tests {
         let total_rows: usize = results.iter().map(|rb| rb.num_rows()).sum();
         assert_eq!(total_rows, 0);
 
-        // Read the datafram from doule dot folder;
+        // Read the dataframe from double dot folder;
         let read_df = ctx
             .read_parquet(
                 &path5,
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index ef32e84a7380..c5874deb6ed5 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -125,9 +125,9 @@ pub struct SessionState {
     session_id: String,
     /// Responsible for analyzing and rewrite a logical plan before optimization
     analyzer: Analyzer,
-    /// Provides support for customising the SQL planner, e.g. to add support for custom operators like `->>` or `?`
+    /// Provides support for customizing the SQL planner, e.g. to add support for custom operators like `->>` or `?`
     expr_planners: Vec<Arc<dyn ExprPlanner>>,
-    /// Provides support for customising the SQL type planning
+    /// Provides support for customizing the SQL type planning
     type_planner: Option<Arc<dyn TypePlanner>>,
     /// Responsible for optimizing a logical plan
     optimizer: Optimizer,
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index a1b18b8bfe8c..e9501bd37a8a 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -182,7 +182,7 @@
 //!
 //! DataFusion is designed to be highly extensible, so you can
 //! start with a working, full featured engine, and then
-//! specialize any behavior for your usecase. For example,
+//! specialize any behavior for your use case. For example,
 //! some projects may add custom [`ExecutionPlan`] operators, or create their own
 //! query language that directly creates [`LogicalPlan`] rather than using the
 //! built in SQL planner, [`SqlToRel`].
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 76c4d668d797..3c8d08ee32d4 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1252,7 +1252,7 @@ fn ensure_distribution(
                         // to increase parallelism.
                         child = add_roundrobin_on_top(child, target_partitions)?;
                     }
-                    // When inserting hash is necessary to satisy hash requirement, insert hash repartition.
+                    // When inserting hash is necessary to satisfy hash requirement, insert hash repartition.
                     if hash_necessary {
                         child =
                             add_hash_on_top(child, exprs.to_vec(), target_partitions)?;
@@ -2833,11 +2833,11 @@ pub(crate) mod tests {
                     ],
                 // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
                 // Since ordering of the left child is not preserved after SortMergeJoin
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
                 //   when mode is Inner, Left, LeftSemi, LeftAnti
                 // Similarly, since partitioning of the left side is not preserved
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
                 //   cases when mode is Inner, Left, LeftSemi, LeftAnti
                 _ => vec![
@@ -2885,11 +2885,11 @@ pub(crate) mod tests {
                     ],
                 // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
                 // Since ordering of the left child is not preserved after SortMergeJoin
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
                 //   when mode is Inner, Left, LeftSemi, LeftAnti
                 // Similarly, since partitioning of the left side is not preserved
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional Hash Repartition and Roundrobin repartition after
                 //   SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
                 _ => vec![
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index 85fe9ecfcdb0..dd8e9d900b7d 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -720,7 +720,7 @@ mod tests {
             let state = session_ctx.state();
 
             // This file has 4 rules that use tree node, apply these rules as in the
-            // EnforSorting::optimize implementation
+            // EnforceSorting::optimize implementation
             // After these operations tree nodes should be in a consistent state.
             // This code block makes sure that these rules doesn't violate tree node integrity.
             {
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 009757f3a938..29c6e0078847 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -61,7 +61,7 @@ impl JoinSelection {
 // TODO: We need some performance test for Right Semi/Right Join swap to Left Semi/Left Join in case that the right side is smaller but not much smaller.
 // TODO: In PrestoSQL, the optimizer flips join sides only if one side is much smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default is 8 times.
 /// Checks statistics for join swap.
-fn should_swap_join_order(
+pub(crate) fn should_swap_join_order(
     left: &dyn ExecutionPlan,
     right: &dyn ExecutionPlan,
 ) -> Result<bool> {
@@ -108,7 +108,7 @@ fn supports_collect_by_thresholds(
 }
 
 /// Predicate that checks whether the given join type supports input swapping.
-fn supports_swap(join_type: JoinType) -> bool {
+pub(crate) fn supports_swap(join_type: JoinType) -> bool {
     matches!(
         join_type,
         JoinType::Inner
@@ -222,7 +222,7 @@ pub fn swap_hash_join(
 }
 
 /// Swaps inputs of `NestedLoopJoinExec` and wraps it into `ProjectionExec` is required
-fn swap_nl_join(join: &NestedLoopJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
+pub(crate) fn swap_nl_join(join: &NestedLoopJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
     let new_filter = swap_join_filter(join.filter());
     let new_join_type = &swap_join_type(*join.join_type());
 
@@ -359,7 +359,7 @@ impl PhysicalOptimizerRule for JoinSelection {
 /// `CollectLeft` mode is applicable. Otherwise, it will try to swap the join sides.
 /// When the `ignore_threshold` is false, this function will also check left
 /// and right sizes in bytes or rows.
-fn try_collect_left(
+pub(crate) fn try_collect_left(
     hash_join: &HashJoinExec,
     ignore_threshold: bool,
     threshold_byte_size: usize,
@@ -421,7 +421,14 @@ fn try_collect_left(
     }
 }
 
-fn partitioned_hash_join(hash_join: &HashJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
+/// Creates a partitioned hash join execution plan, swapping inputs if beneficial.
+///
+/// Checks if the join order should be swapped based on the join type and input statistics.
+/// If swapping is optimal and supported, creates a swapped partitioned hash join; otherwise,
+/// creates a standard partitioned hash join.
+pub(crate) fn partitioned_hash_join(
+    hash_join: &HashJoinExec,
+) -> Result<Arc<dyn ExecutionPlan>> {
     let left = hash_join.left();
     let right = hash_join.right();
     if supports_swap(*hash_join.join_type()) && should_swap_join_order(&**left, &**right)?
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 3ac40bfb62ea..d2d35c3877c1 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -615,15 +615,15 @@ fn try_embed_projection<Exec: EmbeddedProjection + 'static>(
 /// Collect all column indices from the given projection expressions.
 fn collect_column_indices(exprs: &[(Arc<dyn PhysicalExpr>, String)]) -> Vec<usize> {
     // Collect indices and remove duplicates.
-    let mut indexs = exprs
+    let mut indices = exprs
         .iter()
         .flat_map(|(expr, _)| collect_columns(expr))
         .map(|x| x.index())
         .collect::<std::collections::HashSet<_>>()
         .into_iter()
         .collect::<Vec<_>>();
-    indexs.sort();
-    indexs
+    indices.sort();
+    indices
 }
 
 /// Tries to push `projection` down through `hash_join`. If possible, performs the
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index aa134f28fe5b..b1a6f014380e 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -286,7 +286,7 @@ impl DisplayAs for UnboundedExec {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 write!(
                     f,
-                    "UnboundableExec: unbounded={}",
+                    "UnboundedExec: unbounded={}",
                     self.batch_produce.is_none(),
                 )
             }
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index f4f754b11c6d..02fe2d83b3c4 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1140,7 +1140,7 @@ async fn unnest_fixed_list_drop_nulls() -> Result<()> {
 }
 
 #[tokio::test]
-async fn unnest_fixed_list_nonull() -> Result<()> {
+async fn unnest_fixed_list_non_null() -> Result<()> {
     let mut shape_id_builder = UInt32Builder::new();
     let mut tags_builder = FixedSizeListBuilder::new(StringBuilder::new(), 2);
 
@@ -2053,9 +2053,9 @@ async fn test_dataframe_placeholder_missing_param_values() -> Result<()> {
     // Executing LogicalPlans with placeholders that don't have bound values
     // should fail.
     let results = df.collect().await;
-    let err_mesg = results.unwrap_err().strip_backtrace();
+    let err_msg = results.unwrap_err().strip_backtrace();
     assert_eq!(
-        err_mesg,
+        err_msg,
         "Execution error: Placeholder '$0' was not provided a value for execution."
     );
 
@@ -2119,9 +2119,9 @@ async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
     // Executing LogicalPlans with placeholders that don't have bound values
     // should fail.
     let results = df.collect().await;
-    let err_mesg = results.unwrap_err().strip_backtrace();
+    let err_msg = results.unwrap_err().strip_backtrace();
     assert_eq!(
-        err_mesg,
+        err_msg,
         "Execution error: Placeholder '$1' was not provided a value for execution."
     );
 
@@ -2189,9 +2189,9 @@ async fn test_dataframe_placeholder_like_expression() -> Result<()> {
     // Executing LogicalPlans with placeholders that don't have bound values
     // should fail.
     let results = df.collect().await;
-    let err_mesg = results.unwrap_err().strip_backtrace();
+    let err_msg = results.unwrap_err().strip_backtrace();
     assert_eq!(
-        err_mesg,
+        err_msg,
         "Execution error: Placeholder '$1' was not provided a value for execution."
     );
 
@@ -2277,12 +2277,12 @@ async fn write_partitioned_parquet_results() -> Result<()> {
 
     // Explicitly read the parquet file at c2=123 to verify the physical files are partitioned
     let partitioned_file = format!("{out_dir}/c2=123", out_dir = out_dir);
-    let filted_df = ctx
+    let filter_df = ctx
         .read_parquet(&partitioned_file, ParquetReadOptions::default())
         .await?;
 
     // Check that the c2 column is gone and that c1 is abc.
-    let results = filted_df.collect().await?;
+    let results = filter_df.collect().await?;
     let expected = ["+-----+", "| c1  |", "+-----+", "| abc |", "+-----+"];
 
     assert_batches_eq!(expected, &results);
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
index 881949047bff..e18dab35fc91 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
@@ -184,7 +184,7 @@ fn add_equal_conditions_test() -> Result<()> {
     assert!(eq_groups.contains(&col_a_expr));
     assert!(eq_groups.contains(&col_b_expr));
 
-    // b and c are aliases. Exising equivalence class should expand,
+    // b and c are aliases. Existing equivalence class should expand,
     // however there shouldn't be any new equivalence class
     eq_properties.add_equal_conditions(&col_b_expr, &col_c_expr)?;
     assert_eq!(eq_properties.eq_group().len(), 1);
diff --git a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
index c52acdd82764..a82849f4ea92 100644
--- a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
@@ -281,7 +281,7 @@ fn i64string_batch<'a>(
     .unwrap()
 }
 
-/// Run the TopK test, sorting the input batches with the specified ftch
+/// Run the TopK test, sorting the input batches with the specified fetch
 /// (limit) and compares the results to the expected values.
 async fn run_limit_test(fetch: usize, data: &SortedData) {
     let input = data.batches();
diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
index e4acb96f4930..19ffa69f11d3 100644
--- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
@@ -93,7 +93,7 @@ impl SortTest {
         self
     }
 
-    /// specify that this test should use a memory pool of the specifeid size
+    /// specify that this test should use a memory pool of the specified size
     fn with_pool_size(mut self, pool_size: usize) -> Self {
         self.pool_size = Some(pool_size);
         self
diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
index 73f4a569954e..daa282c8fe4a 100644
--- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
@@ -261,15 +261,15 @@ mod sp_repartition_fuzz_tests {
 
             for ordering in eq_properties.oeq_class().iter() {
                 let err_msg = format!("error in eq properties: {:?}", eq_properties);
-                let sort_solumns = ordering
+                let sort_columns = ordering
                     .iter()
                     .map(|sort_expr| sort_expr.evaluate_to_sort_column(&res))
                     .collect::<Result<Vec<_>>>()?;
-                let orig_columns = sort_solumns
+                let orig_columns = sort_columns
                     .iter()
                     .map(|sort_column| sort_column.values.clone())
                     .collect::<Vec<_>>();
-                let sorted_columns = lexsort(&sort_solumns, None)?;
+                let sorted_columns = lexsort(&sort_columns, None)?;
 
                 // Make sure after merging ordering is still valid.
                 assert_eq!(orig_columns.len(), sorted_columns.len(), "{}", err_msg);
diff --git a/datafusion/core/tests/macro_hygiene/mod.rs b/datafusion/core/tests/macro_hygiene/mod.rs
index 62f24f5198e6..5aff1d5e3296 100644
--- a/datafusion/core/tests/macro_hygiene/mod.rs
+++ b/datafusion/core/tests/macro_hygiene/mod.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Verifies [Macro Hygene]
+//! Verifies [Macro Hygiene]
 //!
-//! [Macro Hygene]: https://en.wikipedia.org/wiki/Hygienic_macro
+//! [Macro Hygiene]: https://en.wikipedia.org/wiki/Hygienic_macro
 
 mod plan_err {
     // NO other imports!
diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs
index fa23f5c699e2..61a9e9b5757c 100644
--- a/datafusion/core/tests/parquet/external_access_plan.rs
+++ b/datafusion/core/tests/parquet/external_access_plan.rs
@@ -161,7 +161,7 @@ async fn plan_and_filter() {
         RowGroupAccess::Scan,
     ]));
 
-    // initia
+    // initial
     let parquet_metrics = TestFull {
         access_plan,
         expected_rows: 0,
@@ -274,7 +274,7 @@ struct Test {
 impl Test {
     /// Runs the test case, panic'ing on error.
     ///
-    /// Returns the `MetricsSet` from the ParqeutExec
+    /// Returns the [`MetricsSet`] from the [`ParquetExec`]
     async fn run_success(self) -> MetricsSet {
         let Self {
             access_plan,
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 39fd492786bc..5fb0b9852641 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -566,7 +566,7 @@ async fn csv_explain_verbose_plans() {
 #[tokio::test]
 async fn explain_analyze_runs_optimizers(#[values("*", "1")] count_expr: &str) {
     // repro for https://github.com/apache/datafusion/issues/917
-    // where EXPLAIN ANALYZE was not correctly running optiimizer
+    // where EXPLAIN ANALYZE was not correctly running optimizer
     let ctx = SessionContext::new();
     register_alltypes_parquet(&ctx).await;
 
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 82f73eadba8c..03c4ad7c013e 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -196,7 +196,7 @@ fn populate_csv_partitions(
     Ok(schema)
 }
 
-/// Specialised String representation
+/// Specialized String representation
 fn col_str(column: &ArrayRef, row_index: usize) -> String {
     // NullArray::is_null() does not work on NullArray.
     // can remove check for DataType::Null when
diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs
index 975984e5b11f..441af1639d9b 100644
--- a/datafusion/core/tests/sql/path_partition.rs
+++ b/datafusion/core/tests/sql/path_partition.rs
@@ -183,7 +183,7 @@ async fn parquet_distinct_partition_col() -> Result<()> {
     max_limit += 1;
     let last_batch = results
         .last()
-        .expect("There shouled be at least one record batch returned");
+        .expect("There should be at least one record batch returned");
     let last_row_idx = last_batch.num_rows() - 1;
     let mut min_limit =
         match ScalarValue::try_from_array(last_batch.column(0), last_row_idx)? {
@@ -568,7 +568,7 @@ async fn parquet_overlapping_columns() -> Result<()> {
 
     assert!(
         result.is_err(),
-        "Dupplicate qualified name should raise error"
+        "Duplicate qualified name should raise error"
     );
     Ok(())
 }
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 99c00615376f..bf32eef3b011 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -724,7 +724,7 @@ impl Accumulator for FirstSelector {
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        // cast argumets to the appropriate type (DataFusion will type
+        // cast arguments to the appropriate type (DataFusion will type
         // check these based on the declared allowed input types)
         let v = as_primitive_array::<Float64Type>(&values[0])?;
         let t = as_primitive_array::<TimestampNanosecondType>(&values[1])?;
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 77753290c37e..487063642345 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -482,7 +482,7 @@ impl ExtensionPlanner for TopKPlanner {
 /// code is not general and is meant as an illustration only
 struct TopKExec {
     input: Arc<dyn ExecutionPlan>,
-    /// The maxium number of values
+    /// The maximum number of values
     k: usize,
     cache: PlanProperties,
 }
diff --git a/datafusion/core/tests/user_defined/user_defined_table_functions.rs b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
index 39f10ef11ab0..b5f94107dd0b 100644
--- a/datafusion/core/tests/user_defined/user_defined_table_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
@@ -228,8 +228,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
         .with_header(true)
         .build(file)?;
     let mut batches = vec![];
-    for bacth in reader {
-        batches.push(bacth?);
+    for batch in reader {
+        batches.push(batch?);
     }
     let schema = Arc::new(schema);
     Ok((schema, batches))
diff --git a/datafusion/doc/src/lib.rs b/datafusion/doc/src/lib.rs
index 3d8fae6cd32b..6940a8ef3ca2 100644
--- a/datafusion/doc/src/lib.rs
+++ b/datafusion/doc/src/lib.rs
@@ -17,9 +17,11 @@
 
 #[allow(rustdoc::broken_intra_doc_links)]
 /// Documentation for use by [`ScalarUDFImpl`](ScalarUDFImpl),
-/// [`AggregateUDFImpl`](AggregateUDFImpl) and [`WindowUDFImpl`](WindowUDFImpl) functions
-/// that will be used to generate public documentation.
+/// [`AggregateUDFImpl`](AggregateUDFImpl) and [`WindowUDFImpl`](WindowUDFImpl) functions.
 ///
+/// See the [`DocumentationBuilder`] to create a new [`Documentation`] struct.
+///
+/// The DataFusion [SQL function documentation] is automatically  generated from these structs.
 /// The name of the udf will be pulled from the [`ScalarUDFImpl::name`](ScalarUDFImpl::name),
 /// [`AggregateUDFImpl::name`](AggregateUDFImpl::name) or [`WindowUDFImpl::name`](WindowUDFImpl::name)
 /// function as appropriate.
@@ -29,6 +31,8 @@
 ///
 /// Currently, documentation only supports a single language
 /// thus all text should be in English.
+///
+/// [SQL function documentation]: https://datafusion.apache.org/user-guide/sql/index.html
 #[derive(Debug, Clone)]
 pub struct Documentation {
     /// The section in the documentation where the UDF will be documented
@@ -61,7 +65,7 @@ impl Documentation {
         description: impl Into<String>,
         syntax_example: impl Into<String>,
     ) -> DocumentationBuilder {
-        DocumentationBuilder::new(doc_section, description, syntax_example)
+        DocumentationBuilder::new_with_details(doc_section, description, syntax_example)
     }
 
     /// Output the `Documentation` struct in form of custom Rust documentation attributes
@@ -160,7 +164,21 @@ pub struct DocSection {
     pub description: Option<&'static str>,
 }
 
-/// A builder to be used for building [`Documentation`]'s.
+impl Default for DocSection {
+    /// Returns a "default" Doc section.
+    ///
+    /// This is suitable for user defined functions that do not appear in the
+    /// DataFusion documentation.
+    fn default() -> Self {
+        Self {
+            include: true,
+            label: "Default",
+            description: None,
+        }
+    }
+}
+
+/// A builder for [`Documentation`]'s.
 ///
 /// Example:
 ///
@@ -189,7 +207,17 @@ pub struct DocumentationBuilder {
 }
 
 impl DocumentationBuilder {
-    pub fn new(
+    #[allow(clippy::new_without_default)]
+    #[deprecated(
+        since = "44.0.0",
+        note = "please use `DocumentationBuilder::new_with_details` instead"
+    )]
+    pub fn new() -> Self {
+        Self::new_with_details(DocSection::default(), "<no description>", "<no example>")
+    }
+
+    /// Creates a new [`DocumentationBuilder`] with all required fields
+    pub fn new_with_details(
         doc_section: DocSection,
         description: impl Into<String>,
         syntax_example: impl Into<String>,
diff --git a/datafusion/expr-common/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs
index 7155c7993f8c..dc1e023d4c3c 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -115,7 +115,7 @@ pub trait Accumulator: Send + Sync + Debug {
     ///              │                                │
     ///              │                                │
     /// ┌─────────────────────────┐      ┌─────────────────────────┐
-    /// │        GroubyBy         │      │        GroubyBy         │
+    /// │        GroupBy          │      │        GroupBy          │
     /// │(AggregateMode::Partial) │      │(AggregateMode::Partial) │
     /// └─────────────────────────┘      └─────────────────────────┘
     ///              ▲                                ▲
@@ -181,7 +181,7 @@ pub trait Accumulator: Send + Sync + Debug {
     ///              │                             │
     ///              │                             │
     /// ┌─────────────────────────┐  ┌──────────────────────────┐     2. Each AggregateMode::Partial
-    /// │        GroubyBy         │  │         GroubyBy         │     GroupBy has an entry for *all*
+    /// │        GroupBy          │  │       GroupBy            │     GroupBy has an entry for *all*
     /// │(AggregateMode::Partial) │  │ (AggregateMode::Partial) │     the groups
     /// └─────────────────────────┘  └──────────────────────────┘
     ///              ▲                             ▲
@@ -254,7 +254,7 @@ pub trait Accumulator: Send + Sync + Debug {
     /// or more intermediate values.
     ///
     /// For some aggregates (such as `SUM`), merge_batch is the same
-    /// as `update_batch`, but for some aggregrates (such as `COUNT`)
+    /// as `update_batch`, but for some aggregates (such as `COUNT`)
     /// the operations differ. See [`Self::state`] for more details on how
     /// state is used and merged.
     ///
diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs
index 77ba1858e35b..56f3029a4d7a 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -351,6 +351,15 @@ impl TypeSignature {
         }
     }
 
+    /// Returns true if the signature currently supports or used to supported 0
+    /// input arguments in a previous version of DataFusion.
+    pub fn used_to_support_zero_arguments(&self) -> bool {
+        match &self {
+            TypeSignature::Any(num) => *num == 0,
+            _ => self.supports_zero_argument(),
+        }
+    }
+
     /// get all possible types for the given `TypeSignature`
     pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
         match self {
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 2f41292f680f..b4f3f7fb680f 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -36,6 +36,7 @@ name = "datafusion_expr"
 path = "src/lib.rs"
 
 [features]
+recursive_protection = ["dep:recursive"]
 
 [dependencies]
 arrow = { workspace = true }
@@ -48,7 +49,7 @@ datafusion-functions-window-common = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 indexmap = { workspace = true }
 paste = "^1.0"
-recursive = { workspace = true }
+recursive = { workspace = true, optional = true }
 serde_json = { workspace = true }
 sqlparser = { workspace = true }
 
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c82572ebd5f1..b8e495ee7ae9 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1287,7 +1287,7 @@ impl Expr {
     /// let expr = col("foo").alias("bar") + col("baz");
     /// assert_eq!(expr.clone().unalias(), expr);
     ///
-    /// // `foo as "bar" as "baz" is unalaised to foo as "bar"
+    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
     /// let expr = col("foo").alias("bar").alias("baz");
     /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
     /// ```
@@ -1587,7 +1587,7 @@ impl Expr {
     /// Recursively find all [`Expr::Placeholder`] expressions, and
     /// to infer their [`DataType`] from the context of their use.
     ///
-    /// For example, gicen an expression like `<int32> = $0` will infer `$0` to
+    /// For example, given an expression like `<int32> = $0` will infer `$0` to
     /// have type `int32`.
     ///
     /// Returns transformed expression and flag that is true if expression contains
@@ -2263,7 +2263,7 @@ impl Display for SchemaDisplay<'_> {
                     "{}({}{})",
                     func.name(),
                     if *distinct { "DISTINCT " } else { "" },
-                    schema_name_from_exprs_comma_seperated_without_space(args)?
+                    schema_name_from_exprs_comma_separated_without_space(args)?
                 )?;
 
                 if let Some(null_treatment) = null_treatment {
@@ -2335,7 +2335,7 @@ impl Display for SchemaDisplay<'_> {
 
                 write!(f, "END")
             }
-            // Cast expr is not shown to be consistant with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
+            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
             Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
                 write!(f, "{}", SchemaDisplay(expr))
             }
@@ -2465,7 +2465,7 @@ impl Display for SchemaDisplay<'_> {
                     f,
                     "{}({})",
                     fun,
-                    schema_name_from_exprs_comma_seperated_without_space(args)?
+                    schema_name_from_exprs_comma_separated_without_space(args)?
                 )?;
 
                 if let Some(null_treatment) = null_treatment {
@@ -2495,7 +2495,7 @@ impl Display for SchemaDisplay<'_> {
 /// Internal usage. Please call `schema_name_from_exprs` instead
 // TODO: Use ", " to standardize the formatting of Vec<Expr>,
 // <https://github.com/apache/datafusion/issues/10364>
-pub(crate) fn schema_name_from_exprs_comma_seperated_without_space(
+pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
     exprs: &[Expr],
 ) -> Result<String, fmt::Error> {
     schema_name_from_exprs_inner(exprs, ",")
@@ -2536,6 +2536,9 @@ pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
     Ok(s)
 }
 
+pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
+pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
+
 /// Format expressions for display as part of a logical plan. In many cases, this will produce
 /// similar output to `Expr.name()` except that column names will be prefixed with '#'.
 impl Display for Expr {
@@ -2543,7 +2546,9 @@ impl Display for Expr {
         match self {
             Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
             Expr::Column(c) => write!(f, "{c}"),
-            Expr::OuterReferenceColumn(_, c) => write!(f, "outer_ref({c})"),
+            Expr::OuterReferenceColumn(_, c) => {
+                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
+            }
             Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
             Expr::Literal(v) => write!(f, "{v:?}"),
             Expr::Case(case) => {
@@ -2598,7 +2603,7 @@ impl Display for Expr {
             Expr::ScalarFunction(fun) => {
                 fmt_function(f, fun.name(), false, &fun.args, true)
             }
-            // TODO: use udf's display_name, need to fix the seperator issue, <https://github.com/apache/datafusion/issues/10364>
+            // TODO: use udf's display_name, need to fix the separator issue, <https://github.com/apache/datafusion/issues/10364>
             // Expr::ScalarFunction(ScalarFunction { func, args }) => {
             //     write!(f, "{}", func.display_name(args).unwrap())
             // }
@@ -2736,7 +2741,7 @@ impl Display for Expr {
             },
             Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
             Expr::Unnest(Unnest { expr }) => {
-                write!(f, "UNNEST({expr})")
+                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
             }
         }
     }
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 3317deafbd6c..d5c2ac396eb9 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -32,7 +32,6 @@ use datafusion_common::{
     TableReference,
 };
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
-use recursive::recursive;
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -100,7 +99,7 @@ impl ExprSchemable for Expr {
     /// expression refers to a column that does not exist in the
     /// schema, or when the expression is incorrectly typed
     /// (e.g. `[utf8] + [bool]`).
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn get_type(&self, schema: &dyn ExprSchema) -> Result<DataType> {
         match self {
             Expr::Alias(Alias { expr, name, .. }) => match &**expr {
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 23ffc83e3549..e0235d32292f 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -69,7 +69,7 @@ pub type StateTypeFunction =
 /// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked
 /// * 'info': [crate::simplify::SimplifyInfo]
 ///
-///Cclosure returns simplified [Expr] or an error.
+/// Closure returns simplified [Expr] or an error.
 pub type AggregateFunctionSimplification = Box<
     dyn Fn(
         crate::expr::AggregateFunction,
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index e15c0a36a0e9..c7cff3ac26b1 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -155,11 +155,11 @@ impl LogicalPlanBuilder {
         }
         // Ensure that the static term and the recursive term have the same number of fields
         let static_fields_len = self.plan.schema().fields().len();
-        let recurive_fields_len = recursive_term.schema().fields().len();
-        if static_fields_len != recurive_fields_len {
+        let recursive_fields_len = recursive_term.schema().fields().len();
+        if static_fields_len != recursive_fields_len {
             return plan_err!(
                 "Non-recursive term and recursive term must have the same number of columns ({} != {})",
-                static_fields_len, recurive_fields_len
+                static_fields_len, recursive_fields_len
             );
         }
         // Ensure that the recursive term has the same field types as the static term
@@ -254,7 +254,7 @@ impl LogicalPlanBuilder {
                     if can_cast_types(&data_type, field_type) {
                     } else {
                         return exec_err!(
-                            "type mistmatch and can't cast to got {} and {}",
+                            "type mismatch and can't cast to got {} and {}",
                             data_type,
                             field_type
                         );
@@ -1635,7 +1635,7 @@ pub fn wrap_projection_for_join_if_necessary(
         .iter()
         .map(|key| {
             // The display_name() of cast expression will ignore the cast info, and show the inner expression name.
-            // If we do not add alais, it will throw same field name error in the schema when adding projection.
+            // If we do not add alias, it will throw same field name error in the schema when adding projection.
             // For example:
             //    input scan : [a, b, c],
             //    join keys: [cast(a as int)]
@@ -1776,7 +1776,7 @@ pub fn get_unnested_columns(
             let new_field = Arc::new(Field::new(
                 col_name, data_type,
                 // Unnesting may produce NULLs even if the list is not null.
-                // For example: unnset([1], []) -> 1, null
+                // For example: unnest([1], []) -> 1, null
                 true,
             ));
             let column = Column::from_name(col_name);
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 3efaf296c29c..a433871ef20d 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -303,7 +303,7 @@ pub struct CreateMemoryTable {
     pub or_replace: bool,
     /// Default values for columns
     pub column_defaults: Vec<(String, Expr)>,
-    /// Wheter the table is `TableType::Temporary`
+    /// Whether the table is `TableType::Temporary`
     pub temporary: bool,
 }
 
@@ -318,7 +318,7 @@ pub struct CreateView {
     pub or_replace: bool,
     /// SQL used to create the view, if available
     pub definition: Option<String>,
-    /// Wheter the view is ephemeral
+    /// Whether the view is ephemeral
     pub temporary: bool,
 }
 
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 6c2b923cf6ad..47d9aac3caf2 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -1202,7 +1202,7 @@ impl LogicalPlan {
     /// # let schema = Schema::new(vec![
     /// #     Field::new("id", DataType::Int32, false),
     /// # ]);
-    /// // Build SELECT * FROM t1 WHRERE id = $1
+    /// // Build SELECT * FROM t1 WHERE id = $1
     /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
     ///     .filter(col("id").eq(placeholder("$1"))).unwrap()
     ///     .build().unwrap();
@@ -1225,7 +1225,7 @@ impl LogicalPlan {
     ///  );
     ///
     /// // Note you can also used named parameters
-    /// // Build SELECT * FROM t1 WHRERE id = $my_param
+    /// // Build SELECT * FROM t1 WHERE id = $my_param
     /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
     ///     .filter(col("id").eq(placeholder("$my_param"))).unwrap()
     ///     .build().unwrap()
@@ -3633,7 +3633,7 @@ digraph {
 "#;
 
         // just test for a few key lines in the output rather than the
-        // whole thing to make test mainteance easier.
+        // whole thing to make test maintenance easier.
         let graphviz = format!("{}", plan.display_graphviz());
 
         assert_eq!(expected_graphviz, graphviz);
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs
index 1539b69b4007..9a6103afd4b4 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -45,7 +45,6 @@ use crate::{
     UserDefinedLogicalNode, Values, Window,
 };
 use datafusion_common::tree_node::TreeNodeRefContainer;
-use recursive::recursive;
 
 use crate::expr::{Exists, InSubquery};
 use datafusion_common::tree_node::{
@@ -669,7 +668,7 @@ impl LogicalPlan {
 
     /// Visits a plan similarly to [`Self::visit`], including subqueries that
     /// may appear in expressions such as `IN (SELECT ...)`.
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     pub fn visit_with_subqueries<V: for<'n> TreeNodeVisitor<'n, Node = Self>>(
         &self,
         visitor: &mut V,
@@ -688,7 +687,7 @@ impl LogicalPlan {
     /// Similarly to [`Self::rewrite`], rewrites this node and its inputs using `f`,
     /// including subqueries that may appear in expressions such as `IN (SELECT
     /// ...)`.
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     pub fn rewrite_with_subqueries<R: TreeNodeRewriter<Node = Self>>(
         self,
         rewriter: &mut R,
@@ -707,7 +706,7 @@ impl LogicalPlan {
         &self,
         mut f: F,
     ) -> Result<TreeNodeRecursion> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn apply_with_subqueries_impl<
             F: FnMut(&LogicalPlan) -> Result<TreeNodeRecursion>,
         >(
@@ -742,7 +741,7 @@ impl LogicalPlan {
         self,
         mut f: F,
     ) -> Result<Transformed<Self>> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn transform_down_with_subqueries_impl<
             F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
         >(
@@ -767,7 +766,7 @@ impl LogicalPlan {
         self,
         mut f: F,
     ) -> Result<Transformed<Self>> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn transform_up_with_subqueries_impl<
             F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
         >(
@@ -795,7 +794,7 @@ impl LogicalPlan {
         mut f_down: FD,
         mut f_up: FU,
     ) -> Result<Transformed<Self>> {
-        #[recursive]
+        #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
         fn transform_down_up_with_subqueries_impl<
             FD: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
             FU: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
diff --git a/datafusion/expr/src/table_source.rs b/datafusion/expr/src/table_source.rs
index e9a677de50c1..d62484153f53 100644
--- a/datafusion/expr/src/table_source.rs
+++ b/datafusion/expr/src/table_source.rs
@@ -55,7 +55,7 @@ pub enum TableProviderFilterPushDown {
 pub enum TableType {
     /// An ordinary physical table.
     Base,
-    /// A non-materialised table that itself uses a query internally to provide data.
+    /// A non-materialized table that itself uses a query internally to provide data.
     View,
     /// A transient table.
     Temporary,
@@ -99,7 +99,7 @@ pub trait TableSource: Sync + Send {
     }
 
     /// Tests whether the table provider can make use of any or all filter expressions
-    /// to optimise data retrieval. Only non-volatile expressions are passed to this function.
+    /// to optimize data retrieval. Only non-volatile expressions are passed to this function.
     fn supports_filters_pushdown(
         &self,
         filters: &[&Expr],
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 7d2906e1731b..96bb5c4b2d8f 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -50,17 +50,21 @@ pub fn data_types_with_scalar_udf(
     func: &ScalarUDF,
 ) -> Result<Vec<DataType>> {
     let signature = func.signature();
+    let type_signature = &signature.type_signature;
 
     if current_types.is_empty() {
-        if signature.type_signature.supports_zero_argument() {
+        if type_signature.supports_zero_argument() {
             return Ok(vec![]);
+        } else if type_signature.used_to_support_zero_arguments() {
+            // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
+            return plan_err!("{} does not support zero arguments. Use TypeSignature::Nullary for zero arguments.", func.name());
         } else {
             return plan_err!("{} does not support zero arguments.", func.name());
         }
     }
 
     let valid_types =
-        get_valid_types_with_scalar_udf(&signature.type_signature, current_types, func)?;
+        get_valid_types_with_scalar_udf(type_signature, current_types, func)?;
 
     if valid_types
         .iter()
@@ -69,12 +73,7 @@ pub fn data_types_with_scalar_udf(
         return Ok(current_types.to_vec());
     }
 
-    try_coerce_types(
-        func.name(),
-        valid_types,
-        current_types,
-        &signature.type_signature,
-    )
+    try_coerce_types(func.name(), valid_types, current_types, type_signature)
 }
 
 /// Performs type coercion for aggregate function arguments.
@@ -89,20 +88,21 @@ pub fn data_types_with_aggregate_udf(
     func: &AggregateUDF,
 ) -> Result<Vec<DataType>> {
     let signature = func.signature();
+    let type_signature = &signature.type_signature;
 
     if current_types.is_empty() {
-        if signature.type_signature.supports_zero_argument() {
+        if type_signature.supports_zero_argument() {
             return Ok(vec![]);
+        } else if type_signature.used_to_support_zero_arguments() {
+            // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
+            return plan_err!("{} does not support zero arguments. Use TypeSignature::Nullary for zero arguments.", func.name());
         } else {
             return plan_err!("{} does not support zero arguments.", func.name());
         }
     }
 
-    let valid_types = get_valid_types_with_aggregate_udf(
-        &signature.type_signature,
-        current_types,
-        func,
-    )?;
+    let valid_types =
+        get_valid_types_with_aggregate_udf(type_signature, current_types, func)?;
     if valid_types
         .iter()
         .any(|data_type| data_type == current_types)
@@ -110,12 +110,7 @@ pub fn data_types_with_aggregate_udf(
         return Ok(current_types.to_vec());
     }
 
-    try_coerce_types(
-        func.name(),
-        valid_types,
-        current_types,
-        &signature.type_signature,
-    )
+    try_coerce_types(func.name(), valid_types, current_types, type_signature)
 }
 
 /// Performs type coercion for window function arguments.
@@ -130,17 +125,21 @@ pub fn data_types_with_window_udf(
     func: &WindowUDF,
 ) -> Result<Vec<DataType>> {
     let signature = func.signature();
+    let type_signature = &signature.type_signature;
 
     if current_types.is_empty() {
-        if signature.type_signature.supports_zero_argument() {
+        if type_signature.supports_zero_argument() {
             return Ok(vec![]);
+        } else if type_signature.used_to_support_zero_arguments() {
+            // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
+            return plan_err!("{} does not support zero arguments. Use TypeSignature::Nullary for zero arguments.", func.name());
         } else {
             return plan_err!("{} does not support zero arguments.", func.name());
         }
     }
 
     let valid_types =
-        get_valid_types_with_window_udf(&signature.type_signature, current_types, func)?;
+        get_valid_types_with_window_udf(type_signature, current_types, func)?;
     if valid_types
         .iter()
         .any(|data_type| data_type == current_types)
@@ -148,12 +147,7 @@ pub fn data_types_with_window_udf(
         return Ok(current_types.to_vec());
     }
 
-    try_coerce_types(
-        func.name(),
-        valid_types,
-        current_types,
-        &signature.type_signature,
-    )
+    try_coerce_types(func.name(), valid_types, current_types, type_signature)
 }
 
 /// Performs type coercion for function arguments.
@@ -168,18 +162,26 @@ pub fn data_types(
     current_types: &[DataType],
     signature: &Signature,
 ) -> Result<Vec<DataType>> {
+    let type_signature = &signature.type_signature;
+
     if current_types.is_empty() {
-        if signature.type_signature.supports_zero_argument() {
+        if type_signature.supports_zero_argument() {
             return Ok(vec![]);
+        } else if type_signature.used_to_support_zero_arguments() {
+            // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
+            return plan_err!(
+                "signature {:?} does not support zero arguments. Use TypeSignature::Nullary for zero arguments.",
+                type_signature
+            );
         } else {
             return plan_err!(
                 "signature {:?} does not support zero arguments.",
-                &signature.type_signature
+                type_signature
             );
         }
     }
 
-    let valid_types = get_valid_types(&signature.type_signature, current_types)?;
+    let valid_types = get_valid_types(type_signature, current_types)?;
     if valid_types
         .iter()
         .any(|data_type| data_type == current_types)
@@ -187,12 +189,7 @@ pub fn data_types(
         return Ok(current_types.to_vec());
     }
 
-    try_coerce_types(
-        function_name,
-        valid_types,
-        current_types,
-        &signature.type_signature,
-    )
+    try_coerce_types(function_name, valid_types, current_types, type_signature)
 }
 
 fn is_well_supported_signature(type_signature: &TypeSignature) -> bool {
@@ -523,7 +520,7 @@ fn get_valid_types(
         TypeSignature::Numeric(number) => {
             function_length_check(current_types.len(), *number)?;
 
-            // Find common numeric type amongs given types except string
+            // Find common numeric type among given types except string
             let mut valid_type = current_types.first().unwrap().to_owned();
             for t in current_types.iter().skip(1) {
                 let logical_data_type: NativeType = t.into();
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 83200edfa24c..51c42b5c4c30 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -17,7 +17,7 @@
 
 //! [`ScalarUDF`]: Scalar User Defined Functions
 
-use crate::expr::schema_name_from_exprs_comma_seperated_without_space;
+use crate::expr::schema_name_from_exprs_comma_separated_without_space;
 use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
 use crate::sort_properties::{ExprProperties, SortProperties};
 use crate::{
@@ -436,7 +436,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
         Ok(format!(
             "{}({})",
             self.name(),
-            schema_name_from_exprs_comma_seperated_without_space(args)?
+            schema_name_from_exprs_comma_separated_without_space(args)?
         ))
     }
 
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 4bfc3f07bb14..39e1e8f261a2 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -344,7 +344,7 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
     /// optimizations manually for specific UDFs.
     ///
     /// Example:
-    /// [`simplify_udwf_expression.rs`]: <https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simplify_udwf_expression.rs>
+    /// [`advanced_udwf.rs`]: <https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs>
     ///
     /// # Returns
     /// [None] if simplify is not defined or,
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 6f7c5d379260..9d0a2b5b95f6 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -67,7 +67,7 @@ pub fn grouping_set_expr_count(group_expr: &[Expr]) -> Result<usize> {
                 "Invalid group by expressions, GroupingSet must be the only expression"
             );
         }
-        // Groupings sets have an additional interal column for the grouping id
+        // Groupings sets have an additional integral column for the grouping id
         Ok(grouping_set.distinct_expr().len() + 1)
     } else {
         grouping_set_to_exprlist(group_expr).map(|exprs| exprs.len())
@@ -1112,7 +1112,7 @@ fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs: Vec<&'a Expr>) -> Vec<&
     }
 }
 
-/// Iteratate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
+/// Iterate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
 ///
 /// See [`split_conjunction_owned`] for more details and an example.
 pub fn iter_conjunction(expr: &Expr) -> impl Iterator<Item = &Expr> {
@@ -1136,7 +1136,7 @@ pub fn iter_conjunction(expr: &Expr) -> impl Iterator<Item = &Expr> {
     })
 }
 
-/// Iteratate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
+/// Iterate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
 ///
 /// See [`split_conjunction_owned`] for more details and an example.
 pub fn iter_conjunction_owned(expr: Expr) -> impl Iterator<Item = Expr> {
@@ -1301,7 +1301,7 @@ pub fn conjunction(filters: impl IntoIterator<Item = Expr>) -> Option<Expr> {
 ///   col("b").eq(lit(2)),
 /// ];
 ///
-/// // use disjuncton to join them together with `OR`
+/// // use disjunction to join them together with `OR`
 /// assert_eq!(disjunction(split), Some(expr));
 /// ```
 pub fn disjunction(filters: impl IntoIterator<Item = Expr>) -> Option<Expr> {
diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs
index 222914315d70..815d5742afd2 100644
--- a/datafusion/expr/src/window_frame.rs
+++ b/datafusion/expr/src/window_frame.rs
@@ -274,7 +274,7 @@ impl WindowFrame {
         Ok(())
     }
 
-    /// Returns whether the window frame can accept multiple ORDER BY expressons.
+    /// Returns whether the window frame can accept multiple ORDER BY expressions.
     pub fn can_accept_multi_orderby(&self) -> bool {
         match self.units {
             WindowFrameUnits::Rows => true,
diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs
index 01f7c46106a2..b229d908d10d 100644
--- a/datafusion/ffi/src/table_provider.rs
+++ b/datafusion/ffi/src/table_provider.rs
@@ -66,7 +66,7 @@ use datafusion::error::Result;
 /// calls defined on the `FFI_` structs. The second purpose of the `Foreign`
 /// structs is to contain additional data that may be needed by the traits that
 /// are implemented on them. Some of these traits require borrowing data which
-/// can be far more convienent to be locally stored.
+/// can be far more convenient to be locally stored.
 ///
 /// For example, we have a struct `FFI_TableProvider` to give access to the
 /// `TableProvider` functions like `table_type()` and `scan()`. If we write a
@@ -318,7 +318,7 @@ impl FFI_TableProvider {
     }
 }
 
-/// This wrapper struct exists on the reciever side of the FFI interface, so it has
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
 /// no guarantees about being able to access the data in `private_data`. Any functions
 /// defined on this struct must only use the stable functions provided in
 /// FFI_TableProvider to interact with the foreign table provider.
@@ -397,7 +397,7 @@ impl TableProvider for ForeignTableProvider {
     }
 
     /// Tests whether the table provider can make use of a filter expression
-    /// to optimise data retrieval.
+    /// to optimize data retrieval.
     fn supports_filters_pushdown(
         &self,
         filters: &[&Expr],
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index 03e4ef557269..aa2f5a586e87 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -79,7 +79,7 @@ use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 ///
 ///  Logical group         Current Min/Max value for that group stored
 ///     number             as a ScalarValue which points to an
-///                        indivdually allocated String
+///                        individually allocated String
 ///
 ///```
 ///
@@ -281,7 +281,7 @@ impl GroupsAccumulatorAdapter {
     /// See [`Self::allocation_bytes`] for rationale.
     fn free_allocation(&mut self, size: usize) {
         // use saturating sub to avoid errors if the accumulators
-        // report erronious sizes
+        // report erroneous sizes
         self.allocation_bytes = self.allocation_bytes.saturating_sub(size)
     }
 
diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
index 13e40a2b9966..378fc8c42bc6 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -23,7 +23,7 @@
 //! [Facebook's Folly TDigest] implementation.
 //!
 //! Alterations include reduction of runtime heap allocations, broader type
-//! support, (de-)serialisation support, reduced type conversions and null value
+//! support, (de-)serialization support, reduced type conversions and null value
 //! tolerance.
 //!
 //! [TDigest sketch algorithm]: https://arxiv.org/abs/1902.04023
@@ -612,7 +612,7 @@ impl TDigest {
         ]
     }
 
-    /// Unpack the serialised state of a [`TDigest`] produced by
+    /// Unpack the serialized state of a [`TDigest`] produced by
     /// [`Self::to_scalar_state()`].
     ///
     /// # Correctness
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 61424e8f2445..000c69d9f331 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -231,7 +231,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
     }
 
     #[allow(rustdoc::private_intra_doc_links)]
-    /// See [`TDigest::to_scalar_state()`] for a description of the serialised
+    /// See [`TDigest::to_scalar_state()`] for a description of the serialized
     /// state.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         Ok(vec![
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index 10b9b06f1f94..16dac2c1b8f0 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -172,7 +172,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
     }
 
     #[allow(rustdoc::private_intra_doc_links)]
-    /// See [`TDigest::to_scalar_state()`] for a description of the serialised
+    /// See [`TDigest::to_scalar_state()`] for a description of the serialized
     /// state.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         self.approx_percentile_cont.state_fields(args)
diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs
index ffbf2ceef052..d4ae27533c6d 100644
--- a/datafusion/functions-aggregate/src/covariance.rs
+++ b/datafusion/functions-aggregate/src/covariance.rs
@@ -246,7 +246,7 @@ impl AggregateUDFImpl for CovariancePopulation {
 /// Journal of the American Statistical Association. 69 (348): 859–866. doi:10.2307/2286154. JSTOR 2286154.
 ///
 /// Though it is not covered in the original paper but is based on the same idea, as a result the algorithm is online,
-/// parallelizable and numerically stable.
+/// parallelize and numerically stable.
 
 #[derive(Debug)]
 pub struct CovarianceAccumulator {
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index a0f7634c5fa8..c4e05bd57de6 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -372,7 +372,7 @@ macro_rules! typed_min_max_batch_string {
         ScalarValue::$SCALAR(value)
     }};
 }
-// Statically-typed version of min/max(array) -> ScalarValue for binay types.
+// Statically-typed version of min/max(array) -> ScalarValue for binary types.
 macro_rules! typed_min_max_batch_binary {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = downcast_value!($VALUES, $ARRAYTYPE);
diff --git a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
index 25499c252191..725b7a29bd47 100644
--- a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
@@ -449,7 +449,7 @@ impl MinMaxBytesState {
         self.min_max.resize(total_num_groups, None);
         // Minimize value copies by calculating the new min/maxes for each group
         // in this batch (either the existing min/max or the new input value)
-        // and updating the owne values in `self.min_maxes` at most once
+        // and updating the owned values in `self.min_maxes` at most once
         let mut locations = vec![MinMaxLocation::ExistingMinMax; total_num_groups];
 
         // Figure out the new min value for each group
@@ -463,12 +463,12 @@ impl MinMaxBytesState {
                 // previous input value was the min/max, so compare it
                 MinMaxLocation::Input(existing_val) => existing_val,
                 MinMaxLocation::ExistingMinMax => {
-                    let Some(exising_val) = self.min_max[group_index].as_ref() else {
+                    let Some(existing_val) = self.min_max[group_index].as_ref() else {
                         // no existing min/max, so this is the new min/max
                         locations[group_index] = MinMaxLocation::Input(new_val);
                         continue;
                     };
-                    exising_val.as_ref()
+                    existing_val.as_ref()
                 }
             };
 
diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs
index ced43ea8f00c..887daa71ec55 100644
--- a/datafusion/functions-table/src/generate_series.rs
+++ b/datafusion/functions-table/src/generate_series.rs
@@ -22,7 +22,7 @@ use async_trait::async_trait;
 use datafusion_catalog::Session;
 use datafusion_catalog::TableFunctionImpl;
 use datafusion_catalog::TableProvider;
-use datafusion_common::{not_impl_err, plan_err, Result, ScalarValue};
+use datafusion_common::{plan_err, Result, ScalarValue};
 use datafusion_expr::{Expr, TableType};
 use datafusion_physical_plan::memory::{LazyBatchGenerator, LazyMemoryExec};
 use datafusion_physical_plan::ExecutionPlan;
@@ -30,28 +30,45 @@ use parking_lot::RwLock;
 use std::fmt;
 use std::sync::Arc;
 
-/// Table that generates a series of integers from `start`(inclusive) to `end`(inclusive)
+/// Indicates the arguments used for generating a series.
+#[derive(Debug, Clone)]
+enum GenSeriesArgs {
+    /// ContainsNull signifies that at least one argument(start, end, step) was null, thus no series will be generated.
+    ContainsNull,
+    /// AllNotNullArgs holds the start, end, and step values for generating the series when all arguments are not null.
+    AllNotNullArgs { start: i64, end: i64, step: i64 },
+}
+
+/// Table that generates a series of integers from `start`(inclusive) to `end`(inclusive), incrementing by step
 #[derive(Debug, Clone)]
 struct GenerateSeriesTable {
     schema: SchemaRef,
-    // None if input is Null
-    start: Option<i64>,
-    // None if input is Null
-    end: Option<i64>,
+    args: GenSeriesArgs,
 }
 
-/// Table state that generates a series of integers from `start`(inclusive) to `end`(inclusive)
+/// Table state that generates a series of integers from `start`(inclusive) to `end`(inclusive), incrementing by step
 #[derive(Debug, Clone)]
 struct GenerateSeriesState {
     schema: SchemaRef,
     start: i64, // Kept for display
     end: i64,
+    step: i64,
     batch_size: usize,
 
     /// Tracks current position when generating table
     current: i64,
 }
 
+impl GenerateSeriesState {
+    fn reach_end(&self, val: i64) -> bool {
+        if self.step > 0 {
+            return val > self.end;
+        }
+
+        val < self.end
+    }
+}
+
 /// Detail to display for 'Explain' plan
 impl fmt::Display for GenerateSeriesState {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -65,19 +82,19 @@ impl fmt::Display for GenerateSeriesState {
 
 impl LazyBatchGenerator for GenerateSeriesState {
     fn generate_next_batch(&mut self) -> Result<Option<RecordBatch>> {
-        // Check if we've reached the end
-        if self.current > self.end {
+        let mut buf = Vec::with_capacity(self.batch_size);
+        while buf.len() < self.batch_size && !self.reach_end(self.current) {
+            buf.push(self.current);
+            self.current += self.step;
+        }
+        let array = Int64Array::from(buf);
+
+        if array.is_empty() {
             return Ok(None);
         }
 
-        // Construct batch
-        let batch_end = (self.current + self.batch_size as i64 - 1).min(self.end);
-        let array = Int64Array::from_iter_values(self.current..=batch_end);
         let batch = RecordBatch::try_new(self.schema.clone(), vec![Arc::new(array)])?;
 
-        // Update current position for next batch
-        self.current = batch_end + 1;
-
         Ok(Some(batch))
     }
 }
@@ -104,39 +121,31 @@ impl TableProvider for GenerateSeriesTable {
         _limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let batch_size = state.config_options().execution.batch_size;
-        match (self.start, self.end) {
-            (Some(start), Some(end)) => {
-                if start > end {
-                    return plan_err!(
-                        "End value must be greater than or equal to start value"
-                    );
-                }
-
-                Ok(Arc::new(LazyMemoryExec::try_new(
-                    self.schema.clone(),
-                    vec![Arc::new(RwLock::new(GenerateSeriesState {
-                        schema: self.schema.clone(),
-                        start,
-                        end,
-                        current: start,
-                        batch_size,
-                    }))],
-                )?))
-            }
-            _ => {
-                // Either start or end is None, return a generator that outputs 0 rows
-                Ok(Arc::new(LazyMemoryExec::try_new(
-                    self.schema.clone(),
-                    vec![Arc::new(RwLock::new(GenerateSeriesState {
-                        schema: self.schema.clone(),
-                        start: 0,
-                        end: 0,
-                        current: 1,
-                        batch_size,
-                    }))],
-                )?))
-            }
-        }
+
+        let state = match self.args {
+            // if args have null, then return 0 row
+            GenSeriesArgs::ContainsNull => GenerateSeriesState {
+                schema: self.schema.clone(),
+                start: 0,
+                end: 0,
+                step: 1,
+                current: 1,
+                batch_size,
+            },
+            GenSeriesArgs::AllNotNullArgs { start, end, step } => GenerateSeriesState {
+                schema: self.schema.clone(),
+                start,
+                end,
+                step,
+                current: start,
+                batch_size,
+            },
+        };
+
+        Ok(Arc::new(LazyMemoryExec::try_new(
+            self.schema.clone(),
+            vec![Arc::new(RwLock::new(state))],
+        )?))
     }
 }
 
@@ -144,37 +153,58 @@ impl TableProvider for GenerateSeriesTable {
 pub struct GenerateSeriesFunc {}
 
 impl TableFunctionImpl for GenerateSeriesFunc {
-    // Check input `exprs` type and number. Input validity check (e.g. start <= end)
-    // will be performed in `TableProvider::scan`
     fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
-        // TODO: support 1 or 3 arguments following DuckDB:
-        // <https://duckdb.org/docs/sql/functions/list#generate_series>
-        if exprs.len() == 3 || exprs.len() == 1 {
-            return not_impl_err!("generate_series does not support 1 or 3 arguments");
+        if exprs.is_empty() || exprs.len() > 3 {
+            return plan_err!("generate_series function requires 1 to 3 arguments");
         }
 
-        if exprs.len() != 2 {
-            return plan_err!("generate_series expects 2 arguments");
+        let mut normalize_args = Vec::new();
+        for expr in exprs {
+            match expr {
+                Expr::Literal(ScalarValue::Null) => {}
+                Expr::Literal(ScalarValue::Int64(Some(n))) => normalize_args.push(*n),
+                _ => return plan_err!("First argument must be an integer literal"),
+            };
         }
 
-        let start = match &exprs[0] {
-            Expr::Literal(ScalarValue::Null) => None,
-            Expr::Literal(ScalarValue::Int64(Some(n))) => Some(*n),
-            _ => return plan_err!("First argument must be an integer literal"),
-        };
-
-        let end = match &exprs[1] {
-            Expr::Literal(ScalarValue::Null) => None,
-            Expr::Literal(ScalarValue::Int64(Some(n))) => Some(*n),
-            _ => return plan_err!("Second argument must be an integer literal"),
-        };
-
         let schema = Arc::new(Schema::new(vec![Field::new(
             "value",
             DataType::Int64,
             false,
         )]));
 
-        Ok(Arc::new(GenerateSeriesTable { schema, start, end }))
+        if normalize_args.len() != exprs.len() {
+            // contain null
+            return Ok(Arc::new(GenerateSeriesTable {
+                schema,
+                args: GenSeriesArgs::ContainsNull,
+            }));
+        }
+
+        let (start, end, step) = match &normalize_args[..] {
+            [end] => (0, *end, 1),
+            [start, end] => (*start, *end, 1),
+            [start, end, step] => (*start, *end, *step),
+            _ => {
+                return plan_err!("generate_series function requires 1 to 3 arguments");
+            }
+        };
+
+        if start > end && step > 0 {
+            return plan_err!("start is bigger than end, but increment is positive: cannot generate infinite series");
+        }
+
+        if start < end && step < 0 {
+            return plan_err!("start is smaller than end, but increment is negative: cannot generate infinite series");
+        }
+
+        if step == 0 {
+            return plan_err!("step cannot be zero");
+        }
+
+        Ok(Arc::new(GenerateSeriesTable {
+            schema,
+            args: GenSeriesArgs::AllNotNullArgs { start, end, step },
+        }))
     }
 }
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index de72c7ee946b..fd986c4be41c 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -212,4 +212,4 @@ required-features = ["math_expressions"]
 [[bench]]
 harness = false
 name = "initcap"
-required-features = ["string_expressions"]
+required-features = ["unicode_expressions"]
diff --git a/datafusion/functions/benches/initcap.rs b/datafusion/functions/benches/initcap.rs
index c88b6b513980..97c76831b33c 100644
--- a/datafusion/functions/benches/initcap.rs
+++ b/datafusion/functions/benches/initcap.rs
@@ -24,7 +24,7 @@ use arrow::util::bench_util::{
 };
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
-use datafusion_functions::string;
+use datafusion_functions::unicode;
 use std::sync::Arc;
 
 fn create_args<O: OffsetSizeTrait>(
@@ -46,7 +46,7 @@ fn create_args<O: OffsetSizeTrait>(
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
-    let initcap = string::initcap();
+    let initcap = unicode::initcap();
     for size in [1024, 4096] {
         let args = create_args::<i32>(size, 8, true);
         c.bench_function(
diff --git a/datafusion/functions/src/core/greatest.rs b/datafusion/functions/src/core/greatest.rs
index e91ec2b0c4d8..654b2a2987c7 100644
--- a/datafusion/functions/src/core/greatest.rs
+++ b/datafusion/functions/src/core/greatest.rs
@@ -85,7 +85,7 @@ impl GreatestLeastOperator for GreatestFunc {
     /// Nulls are always considered smaller than any other value
     fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result<BooleanArray> {
         // Fast path:
-        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel
+        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorized kernel
         // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined.
         // - both array does not have any nulls: cmp::gt_eq will return null if any of the input is null while we want to return false in that case
         if !lhs.data_type().is_nested()
diff --git a/datafusion/functions/src/core/least.rs b/datafusion/functions/src/core/least.rs
index b9ea65cdb732..085090727773 100644
--- a/datafusion/functions/src/core/least.rs
+++ b/datafusion/functions/src/core/least.rs
@@ -98,7 +98,7 @@ impl GreatestLeastOperator for LeastFunc {
     /// Nulls are always considered larger than any other value
     fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result<BooleanArray> {
         // Fast path:
-        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel
+        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorized kernel
         // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined.
         // - both array does not have any nulls: cmp::lt_eq will return null if any of the input is null while we want to return false in that case
         if !lhs.data_type().is_nested()
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index ba8255d2e472..76fb4bbe5b47 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -115,8 +115,8 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         // `get_field(my_struct_col, "field_name")`.
         //
         // However, it is also exposed directly for use cases such as
-        // serializing / deserializing plans with the field access  desugared to
-        // calls to `get_field`
+        // serializing / deserializing plans with the field access desugared to
+        // calls to [`get_field`]
         get_field(),
         coalesce(),
         greatest(),
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 0c2d01376de9..7c86047a0243 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -215,7 +215,7 @@ mod tests {
 
     #[test]
     // Ensure that arrays with no nulls can also invoke NULLIF() correctly
-    fn nullif_int32_nonulls() -> Result<()> {
+    fn nullif_int32_non_nulls() -> Result<()> {
         let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
         let a = ColumnarValue::Array(Arc::new(a));
 
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index 6c470eca3d46..eb8c9260601f 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -216,7 +216,7 @@ mod tests {
 
     #[test]
     // Ensure that arrays with no nulls can also invoke nvl() correctly
-    fn nvl_int32_nonulls() -> Result<()> {
+    fn nvl_int32_non_nulls() -> Result<()> {
         let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
         let a = ColumnarValue::Array(Arc::new(a));
 
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 49bf00d5c17b..bb3f2177b9a4 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -318,7 +318,7 @@ fn to_utc_date_time(nanos: i64) -> DateTime<Utc> {
 // Supported intervals:
 //  1. IntervalDayTime: this means that the stride is in days, hours, minutes, seconds and milliseconds
 //     We will assume month interval won't be converted into this type
-//     TODO (my next PR): without `INTERVAL` keyword, the stride was converted into ScalarValue::IntervalDayTime somwhere
+//     TODO (my next PR): without `INTERVAL` keyword, the stride was converted into ScalarValue::IntervalDayTime somewhere
 //             for month interval. I need to find that and make it ScalarValue::IntervalMonthDayNano instead
 // 2. IntervalMonthDayNano
 fn date_bin_impl(
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index b43fcb6db706..0f115563c8db 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -105,7 +105,7 @@ impl ScalarUDFImpl for DatePartFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        internal_err!("return_type_from_exprs shoud be called instead")
+        internal_err!("return_type_from_exprs should be called instead")
     }
 
     fn return_type_from_exprs(
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index 9b4a7b04552b..4ed9350e9729 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Regx expressions
+//! Regex expressions
 use arrow::array::ArrayDataBuilder;
 use arrow::array::BufferBuilder;
 use arrow::array::GenericStringArray;
diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs
index f156f070d960..442c055ac37d 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -30,7 +30,6 @@ pub mod concat;
 pub mod concat_ws;
 pub mod contains;
 pub mod ends_with;
-pub mod initcap;
 pub mod levenshtein;
 pub mod lower;
 pub mod ltrim;
@@ -52,7 +51,6 @@ make_udf_function!(chr::ChrFunc, chr);
 make_udf_function!(concat::ConcatFunc, concat);
 make_udf_function!(concat_ws::ConcatWsFunc, concat_ws);
 make_udf_function!(ends_with::EndsWithFunc, ends_with);
-make_udf_function!(initcap::InitcapFunc, initcap);
 make_udf_function!(levenshtein::LevenshteinFunc, levenshtein);
 make_udf_function!(ltrim::LtrimFunc, ltrim);
 make_udf_function!(lower::LowerFunc, lower);
@@ -94,10 +92,6 @@ pub mod expr_fn {
         ends_with,
         "Returns true if the `string` ends with the `suffix`, false otherwise.",
         string suffix
-    ),(
-        initcap,
-        "Converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase",
-        string
     ),(
         levenshtein,
         "Returns the Levenshtein distance between the two given strings",
@@ -151,7 +145,7 @@ pub mod expr_fn {
         "returns uuid v4 as a string value",
     ), (
         contains,
-        "Return true if search_string is found within string. treated it like a reglike",
+        "Return true if search_string is found within string.",
     ));
 
     #[doc = "Removes all characters, spaces by default, from both sides of a string"]
@@ -177,7 +171,6 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         concat(),
         concat_ws(),
         ends_with(),
-        initcap(),
         levenshtein(),
         lower(),
         ltrim(),
diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs
index a6587a91a9fe..f18573db827f 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -189,7 +189,7 @@ impl StringArrayBuilder {
         self.offsets_buffer.push(next_offset);
     }
 
-    /// Finalise the builder into a concrete [`StringArray`].
+    /// Finalize the builder into a concrete [`StringArray`].
     ///
     /// # Panics
     ///
@@ -358,7 +358,7 @@ impl LargeStringArrayBuilder {
         self.offsets_buffer.push(next_offset);
     }
 
-    /// Finalise the builder into a concrete [`LargeStringArray`].
+    /// Finalize the builder into a concrete [`LargeStringArray`].
     ///
     /// # Panics
     ///
diff --git a/datafusion/functions/src/string/initcap.rs b/datafusion/functions/src/unicode/initcap.rs
similarity index 68%
rename from datafusion/functions/src/string/initcap.rs
rename to datafusion/functions/src/unicode/initcap.rs
index 2780dcaeeb83..e9f966b95868 100644
--- a/datafusion/functions/src/string/initcap.rs
+++ b/datafusion/functions/src/unicode/initcap.rs
@@ -18,7 +18,9 @@
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
+use arrow::array::{
+    Array, ArrayRef, GenericStringBuilder, OffsetSizeTrait, StringViewBuilder,
+};
 use arrow::datatypes::DataType;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
@@ -74,7 +76,7 @@ impl ScalarUDFImpl for InitcapFunc {
             DataType::LargeUtf8 => make_scalar_function(initcap::<i64>, vec![])(args),
             DataType::Utf8View => make_scalar_function(initcap_utf8view, vec![])(args),
             other => {
-                exec_err!("Unsupported data type {other:?} for function initcap")
+                exec_err!("Unsupported data type {other:?} for function `initcap`")
             }
         }
     }
@@ -90,9 +92,8 @@ fn get_initcap_doc() -> &'static Documentation {
     DOCUMENTATION.get_or_init(|| {
         Documentation::builder(
             DOC_SECTION_STRING,
-            "Capitalizes the first character in each word in the ASCII input string. \
-            Words are delimited by non-alphanumeric characters.\n\n\
-            Note this function does not support UTF-8 characters.",
+            "Capitalizes the first character in each word in the input string. \
+            Words are delimited by non-alphanumeric characters.",
             "initcap(str)",
         )
         .with_sql_example(
@@ -123,50 +124,70 @@ fn get_initcap_doc() -> &'static Documentation {
 fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = as_generic_string_array::<T>(&args[0])?;
 
-    // first map is the iterator, second is for the `Option<_>`
-    let result = string_array
-        .iter()
-        .map(initcap_string)
-        .collect::<GenericStringArray<T>>();
+    let mut builder = GenericStringBuilder::<T>::with_capacity(
+        string_array.len(),
+        string_array.value_data().len(),
+    );
 
-    Ok(Arc::new(result) as ArrayRef)
+    string_array.iter().for_each(|str| match str {
+        Some(s) => {
+            let initcap_str = initcap_string(s);
+            builder.append_value(initcap_str);
+        }
+        None => builder.append_null(),
+    });
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 fn initcap_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_view_array = as_string_view_array(&args[0])?;
 
-    let result = string_view_array
-        .iter()
-        .map(initcap_string)
-        .collect::<StringArray>();
+    let mut builder = StringViewBuilder::with_capacity(string_view_array.len());
+
+    string_view_array.iter().for_each(|str| match str {
+        Some(s) => {
+            let initcap_str = initcap_string(s);
+            builder.append_value(initcap_str);
+        }
+        None => builder.append_null(),
+    });
 
-    Ok(Arc::new(result) as ArrayRef)
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
-fn initcap_string(input: Option<&str>) -> Option<String> {
-    input.map(|s| {
-        let mut result = String::with_capacity(s.len());
-        let mut prev_is_alphanumeric = false;
+fn initcap_string(input: &str) -> String {
+    let mut result = String::with_capacity(input.len());
+    let mut prev_is_alphanumeric = false;
 
-        for c in s.chars() {
-            let transformed = if prev_is_alphanumeric {
-                c.to_ascii_lowercase()
+    if input.is_ascii() {
+        for c in input.chars() {
+            if prev_is_alphanumeric {
+                result.push(c.to_ascii_lowercase());
             } else {
-                c.to_ascii_uppercase()
+                result.push(c.to_ascii_uppercase());
             };
-            result.push(transformed);
             prev_is_alphanumeric = c.is_ascii_alphanumeric();
         }
+    } else {
+        for c in input.chars() {
+            if prev_is_alphanumeric {
+                result.extend(c.to_lowercase());
+            } else {
+                result.extend(c.to_uppercase());
+            }
+            prev_is_alphanumeric = c.is_alphanumeric();
+        }
+    }
 
-        result
-    })
+    result
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::string::initcap::InitcapFunc;
+    use crate::unicode::initcap::InitcapFunc;
     use crate::utils::test::test_function;
-    use arrow::array::{Array, StringArray};
+    use arrow::array::{Array, StringArray, StringViewArray};
     use arrow::datatypes::DataType::Utf8;
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
@@ -181,6 +202,19 @@ mod tests {
             Utf8,
             StringArray
         );
+        test_function!(
+            InitcapFunc::new(),
+            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                "êM ả ñAnDÚ ÁrBOL ОлЕГ ИвАНОВИч ÍslENsku ÞjóðaRiNNaR εΛλΗΝΙκΉ"
+                    .to_string()
+            )))],
+            Ok(Some(
+                "Êm Ả Ñandú Árbol Олег Иванович Íslensku Þjóðarinnar Ελληνική"
+            )),
+            &str,
+            Utf8,
+            StringArray
+        );
         test_function!(
             InitcapFunc::new(),
             vec![ColumnarValue::Scalar(ScalarValue::from(""))],
@@ -205,6 +239,7 @@ mod tests {
             Utf8,
             StringArray
         );
+
         test_function!(
             InitcapFunc::new(),
             vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
@@ -213,7 +248,7 @@ mod tests {
             Ok(Some("Hi Thomas")),
             &str,
             Utf8,
-            StringArray
+            StringViewArray
         );
         test_function!(
             InitcapFunc::new(),
@@ -223,7 +258,20 @@ mod tests {
             Ok(Some("Hi Thomas With M0re Than 12 Chars")),
             &str,
             Utf8,
-            StringArray
+            StringViewArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "đẸp đẼ êM ả ñAnDÚ ÁrBOL ОлЕГ ИвАНОВИч ÍslENsku ÞjóðaRiNNaR εΛλΗΝΙκΉ"
+                    .to_string()
+            )))],
+            Ok(Some(
+                "Đẹp Đẽ Êm Ả Ñandú Árbol Олег Иванович Íslensku Þjóðarinnar Ελληνική"
+            )),
+            &str,
+            Utf8,
+            StringViewArray
         );
         test_function!(
             InitcapFunc::new(),
@@ -233,7 +281,7 @@ mod tests {
             Ok(Some("")),
             &str,
             Utf8,
-            StringArray
+            StringViewArray
         );
         test_function!(
             InitcapFunc::new(),
@@ -241,7 +289,7 @@ mod tests {
             Ok(None),
             &str,
             Utf8,
-            StringArray
+            StringViewArray
         );
 
         Ok(())
diff --git a/datafusion/functions/src/unicode/mod.rs b/datafusion/functions/src/unicode/mod.rs
index f31ece9196d8..e8e3eb3f4e75 100644
--- a/datafusion/functions/src/unicode/mod.rs
+++ b/datafusion/functions/src/unicode/mod.rs
@@ -23,6 +23,7 @@ use datafusion_expr::ScalarUDF;
 
 pub mod character_length;
 pub mod find_in_set;
+pub mod initcap;
 pub mod left;
 pub mod lpad;
 pub mod reverse;
@@ -36,6 +37,7 @@ pub mod translate;
 // create UDFs
 make_udf_function!(character_length::CharacterLengthFunc, character_length);
 make_udf_function!(find_in_set::FindInSetFunc, find_in_set);
+make_udf_function!(initcap::InitcapFunc, initcap);
 make_udf_function!(left::LeftFunc, left);
 make_udf_function!(lpad::LPadFunc, lpad);
 make_udf_function!(right::RightFunc, right);
@@ -94,6 +96,10 @@ pub mod expr_fn {
         left,
         "returns the first `n` characters in the `string`",
         string n
+    ),(
+        initcap,
+        "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase",
+        string
     ),(
         find_in_set,
         "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings",
@@ -126,6 +132,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         character_length(),
         find_in_set(),
+        initcap(),
         left(),
         lpad(),
         reverse(),
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index 9979df689b0a..ba0dedc57675 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -35,6 +35,9 @@ workspace = true
 name = "datafusion_optimizer"
 path = "src/lib.rs"
 
+[features]
+recursive_protection = ["dep:recursive"]
+
 [dependencies]
 arrow = { workspace = true }
 chrono = { workspace = true }
@@ -44,7 +47,7 @@ datafusion-physical-expr = { workspace = true }
 indexmap = { workspace = true }
 itertools = { workspace = true }
 log = { workspace = true }
-recursive = { workspace = true }
+recursive = { workspace = true, optional = true }
 regex = { workspace = true }
 regex-syntax = "0.8.0"
 
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index fee06eeb9f75..7129da85f375 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -17,7 +17,6 @@
 
 use crate::analyzer::check_plan;
 use crate::utils::collect_subquery_cols;
-use recursive::recursive;
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion_common::{plan_err, Result};
@@ -79,7 +78,7 @@ pub fn check_subquery_expr(
             match outer_plan {
                 LogicalPlan::Projection(_)
                 | LogicalPlan::Filter(_) => Ok(()),
-                LogicalPlan::Aggregate(Aggregate {group_expr, aggr_expr,..}) => {
+                LogicalPlan::Aggregate(Aggregate { group_expr, aggr_expr, .. }) => {
                     if group_expr.contains(expr) && !aggr_expr.contains(expr) {
                         // TODO revisit this validation logic
                         plan_err!(
@@ -88,7 +87,7 @@ pub fn check_subquery_expr(
                     } else {
                         Ok(())
                     }
-                },
+                }
                 _ => plan_err!(
                     "Correlated scalar subquery can only be used in Projection, Filter, Aggregate plan nodes"
                 )
@@ -129,7 +128,7 @@ fn check_correlations_in_subquery(inner_plan: &LogicalPlan) -> Result<()> {
 }
 
 // Recursively check the unsupported outer references in the sub query plan.
-#[recursive]
+#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
 fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) -> Result<()> {
     if !can_contain_outer_ref && inner_plan.contains_outer_reference() {
         return plan_err!("Accessing outer reference columns is not allowed in the plan");
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 628c1498f973..89dd4ca60a6a 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -943,7 +943,7 @@ pub fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
             );
         }
 
-        // coerce data type and nullablity for each field
+        // coerce data type and nullability for each field
         for (union_datatype, union_nullable, union_field_map, plan_field) in izip!(
             union_datatypes.iter_mut(),
             union_nullabilities.iter_mut(),
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index e7c9a198f3ad..4b9a83fd3e4c 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -22,7 +22,6 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 use crate::{OptimizerConfig, OptimizerRule};
-use recursive::recursive;
 
 use crate::optimizer::ApplyOrder;
 use crate::utils::NamePreserver;
@@ -383,10 +382,10 @@ impl CommonSubexprEliminate {
                         //  keep column names and get rid of additional name
                         //  preserving logic here.
                         if let Some(aggr_expr) = aggr_expr {
-                            let name_perserver = NamePreserver::new_for_projection();
+                            let name_preserver = NamePreserver::new_for_projection();
                             let saved_names = aggr_expr
                                 .iter()
-                                .map(|expr| name_perserver.save(expr))
+                                .map(|expr| name_preserver.save(expr))
                                 .collect::<Vec<_>>();
                             let new_aggr_expr = rewritten_aggr_expr
                                 .into_iter()
@@ -532,7 +531,7 @@ impl OptimizerRule for CommonSubexprEliminate {
         None
     }
 
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn rewrite(
         &self,
         plan: LogicalPlan,
@@ -952,7 +951,7 @@ mod test {
             )?
             .build()?;
 
-        let expected ="Aggregate: groupBy=[[]], aggr=[[avg(__common_expr_1) AS col1, my_agg(__common_expr_1) AS col2]]\
+        let expected = "Aggregate: groupBy=[[]], aggr=[[avg(__common_expr_1) AS col1, my_agg(__common_expr_1) AS col2]]\
         \n  Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\
         \n    TableScan: test";
 
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 7fdad5ba4b6e..3e5a85ea02db 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -1593,7 +1593,7 @@ mod tests {
         assert_optimized_plan_equal(plan, expected)
     }
 
-    /// Test for correlated exists subquery filter with disjustions
+    /// Test for correlated exists subquery filter with disjunctions
     #[test]
     fn exists_subquery_disjunction() -> Result<()> {
         let sq = Arc::new(
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index 32b7ce44a63a..d35572e6d34a 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -17,7 +17,6 @@
 
 //! [`EliminateCrossJoin`] converts `CROSS JOIN` to `INNER JOIN` if join predicates are available.
 use crate::{OptimizerConfig, OptimizerRule};
-use recursive::recursive;
 use std::sync::Arc;
 
 use crate::join_key_set::JoinKeySet;
@@ -80,7 +79,7 @@ impl OptimizerRule for EliminateCrossJoin {
         true
     }
 
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn rewrite(
         &self,
         plan: LogicalPlan,
@@ -94,7 +93,7 @@ impl OptimizerRule for EliminateCrossJoin {
         let parent_predicate = if let LogicalPlan::Filter(filter) = plan {
             // if input isn't a join that can potentially be rewritten
             // avoid unwrapping the input
-            let rewriteable = matches!(
+            let rewritable = matches!(
                 filter.input.as_ref(),
                 LogicalPlan::Join(Join {
                     join_type: JoinType::Inner,
@@ -102,7 +101,7 @@ impl OptimizerRule for EliminateCrossJoin {
                 })
             );
 
-            if !rewriteable {
+            if !rewritable {
                 // recursively try to rewrite children
                 return rewrite_children(self, LogicalPlan::Filter(filter), config);
             }
@@ -651,7 +650,7 @@ mod tests {
             "    Inner Join: t1.a = t2.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "      Inner Join: t1.a = t3.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "        TableScan: t1 [a:UInt32, b:UInt32, c:UInt32]",
-            "        TableScan: t3 [a:UInt32, b:UInt32, c:UInt32]", 
+            "        TableScan: t3 [a:UInt32, b:UInt32, c:UInt32]",
             "      TableScan: t2 [a:UInt32, b:UInt32, c:UInt32]"
         ];
 
@@ -1237,10 +1236,10 @@ mod tests {
             .build()?;
 
         let expected = vec![
-              "Filter: t1.a + UInt32(100) = t2.a * UInt32(2) OR t2.b = t1.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
-              "  Cross Join:  [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
-              "    TableScan: t1 [a:UInt32, b:UInt32, c:UInt32]",
-              "    TableScan: t2 [a:UInt32, b:UInt32, c:UInt32]",
+            "Filter: t1.a + UInt32(100) = t2.a * UInt32(2) OR t2.b = t1.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
+            "  Cross Join:  [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
+            "    TableScan: t1 [a:UInt32, b:UInt32, c:UInt32]",
+            "    TableScan: t2 [a:UInt32, b:UInt32, c:UInt32]",
         ];
 
         assert_optimized_plan_eq(plan, expected);
@@ -1293,10 +1292,10 @@ mod tests {
             .build()?;
 
         let expected = vec![
-        "Filter: t2.c < UInt32(15) OR t2.c = UInt32(688) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
-        "  Inner Join: t1.a + UInt32(100) = t2.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
-        "    TableScan: t1 [a:UInt32, b:UInt32, c:UInt32]",
-        "    TableScan: t2 [a:UInt32, b:UInt32, c:UInt32]",
+            "Filter: t2.c < UInt32(15) OR t2.c = UInt32(688) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
+            "  Inner Join: t1.a + UInt32(100) = t2.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
+            "    TableScan: t1 [a:UInt32, b:UInt32, c:UInt32]",
+            "    TableScan: t2 [a:UInt32, b:UInt32, c:UInt32]",
         ];
 
         assert_optimized_plan_eq(plan, expected);
diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs
index 035a1d2da229..1213c8ffb368 100644
--- a/datafusion/optimizer/src/eliminate_group_by_constant.rs
+++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs
@@ -94,7 +94,7 @@ impl OptimizerRule for EliminateGroupByConstant {
 /// Checks if expression is constant, and can be eliminated from group by.
 ///
 /// Intended to be used only within this rule, helper function, which heavily
-/// reiles on `SimplifyExpressions` result.
+/// relies on `SimplifyExpressions` result.
 fn is_constant_expression(expr: &Expr) -> bool {
     match expr {
         Expr::Alias(e) => is_constant_expression(&e.expr),
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 1519c54dbf68..b7dd391586a1 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -21,7 +21,6 @@ mod required_indices;
 
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
-use recursive::recursive;
 use std::collections::HashSet;
 use std::sync::Arc;
 
@@ -36,7 +35,7 @@ use datafusion_expr::{
     TableScan, Window,
 };
 
-use crate::optimize_projections::required_indices::RequiredIndicies;
+use crate::optimize_projections::required_indices::RequiredIndices;
 use crate::utils::NamePreserver;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
@@ -86,7 +85,7 @@ impl OptimizerRule for OptimizeProjections {
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         // All output fields are necessary:
-        let indices = RequiredIndicies::new_for_all_exprs(&plan);
+        let indices = RequiredIndices::new_for_all_exprs(&plan);
         optimize_projections(plan, config, indices)
     }
 }
@@ -110,11 +109,11 @@ impl OptimizerRule for OptimizeProjections {
 ///   columns.
 /// - `Ok(None)`: Signal that the given logical plan did not require any change.
 /// - `Err(error)`: An error occurred during the optimization process.
-#[recursive]
+#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
 fn optimize_projections(
     plan: LogicalPlan,
     config: &dyn OptimizerConfig,
-    indices: RequiredIndicies,
+    indices: RequiredIndices,
 ) -> Result<Transformed<LogicalPlan>> {
     // Recursively rewrite any nodes that may be able to avoid computation given
     // their parents' required indices.
@@ -177,7 +176,7 @@ fn optimize_projections(
             let all_exprs_iter = new_group_bys.iter().chain(new_aggr_expr.iter());
             let schema = aggregate.input.schema();
             let necessary_indices =
-                RequiredIndicies::new().with_exprs(schema, all_exprs_iter);
+                RequiredIndices::new().with_exprs(schema, all_exprs_iter);
             let necessary_exprs = necessary_indices.get_required_exprs(schema);
 
             return optimize_projections(
@@ -275,7 +274,7 @@ fn optimize_projections(
 
     // For other plan node types, calculate indices for columns they use and
     // try to rewrite their children
-    let mut child_required_indices: Vec<RequiredIndicies> = match &plan {
+    let mut child_required_indices: Vec<RequiredIndices> = match &plan {
         LogicalPlan::Sort(_)
         | LogicalPlan::Filter(_)
         | LogicalPlan::Repartition(_)
@@ -321,7 +320,7 @@ fn optimize_projections(
             //       EXISTS expression), we may not need to require all indices.
             plan.inputs()
                 .into_iter()
-                .map(RequiredIndicies::new_for_all_exprs)
+                .map(RequiredIndices::new_for_all_exprs)
                 .collect()
         }
         LogicalPlan::Extension(extension) => {
@@ -341,7 +340,7 @@ fn optimize_projections(
                 .into_iter()
                 .zip(necessary_children_indices)
                 .map(|(child, necessary_indices)| {
-                    RequiredIndicies::new_from_indices(necessary_indices)
+                    RequiredIndices::new_from_indices(necessary_indices)
                         .with_plan_exprs(&plan, child.schema())
                 })
                 .collect::<Result<Vec<_>>>()?
@@ -380,7 +379,7 @@ fn optimize_projections(
         LogicalPlan::Unnest(Unnest {
             dependency_indices, ..
         }) => {
-            vec![RequiredIndicies::new_from_indices(
+            vec![RequiredIndices::new_from_indices(
                 dependency_indices.clone(),
             )]
         }
@@ -444,7 +443,7 @@ fn optimize_projections(
 /// - `Ok(Some(Projection))`: Merge was beneficial and successful. Contains the
 ///   merged projection.
 /// - `Ok(None)`: Signals that merge is not beneficial (and has not taken place).
-/// - `Err(error)`: An error occured during the function call.
+/// - `Err(error)`: An error occurred during the function call.
 fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Projection>> {
     let Projection {
         expr,
@@ -673,9 +672,9 @@ fn outer_columns_helper_multi<'a, 'b>(
 /// adjusted based on the join type.
 fn split_join_requirements(
     left_len: usize,
-    indices: RequiredIndicies,
+    indices: RequiredIndices,
     join_type: &JoinType,
-) -> (RequiredIndicies, RequiredIndicies) {
+) -> (RequiredIndices, RequiredIndices) {
     match join_type {
         // In these cases requirements are split between left/right children:
         JoinType::Inner
@@ -688,10 +687,10 @@ fn split_join_requirements(
             indices.split_off(left_len)
         }
         // All requirements can be re-routed to left child directly.
-        JoinType::LeftAnti | JoinType::LeftSemi => (indices, RequiredIndicies::new()),
+        JoinType::LeftAnti | JoinType::LeftSemi => (indices, RequiredIndices::new()),
         // All requirements can be re-routed to right side directly.
         // No need to change index, join schema is right child schema.
-        JoinType::RightSemi | JoinType::RightAnti => (RequiredIndicies::new(), indices),
+        JoinType::RightSemi | JoinType::RightAnti => (RequiredIndices::new(), indices),
     }
 }
 
@@ -742,18 +741,18 @@ fn add_projection_on_top_if_helpful(
 ///
 /// - `Ok(Some(LogicalPlan))`: Contains the rewritten projection
 /// - `Ok(None)`: No rewrite necessary.
-/// - `Err(error)`: An error occured during the function call.
+/// - `Err(error)`: An error occurred during the function call.
 fn rewrite_projection_given_requirements(
     proj: Projection,
     config: &dyn OptimizerConfig,
-    indices: &RequiredIndicies,
+    indices: &RequiredIndices,
 ) -> Result<Transformed<LogicalPlan>> {
     let Projection { expr, input, .. } = proj;
 
     let exprs_used = indices.get_at_indices(&expr);
 
     let required_indices =
-        RequiredIndicies::new().with_exprs(input.schema(), exprs_used.iter());
+        RequiredIndices::new().with_exprs(input.schema(), exprs_used.iter());
 
     // rewrite the children projection, and if they are changed rewrite the
     // projection down
diff --git a/datafusion/optimizer/src/optimize_projections/required_indices.rs b/datafusion/optimizer/src/optimize_projections/required_indices.rs
index 55e8081eaeb2..c1e0885c9b5f 100644
--- a/datafusion/optimizer/src/optimize_projections/required_indices.rs
+++ b/datafusion/optimizer/src/optimize_projections/required_indices.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`RequiredIndicies`] helper for OptimizeProjection
+//! [`RequiredIndices`] helper for OptimizeProjection
 
 use crate::optimize_projections::outer_columns;
 use datafusion_common::tree_node::TreeNodeRecursion;
@@ -35,7 +35,7 @@ use datafusion_expr::{Expr, LogicalPlan};
 /// indices were added `[3, 2, 4, 3, 6, 1]`,  the instance would be represented
 /// by  `[1, 2, 3, 4, 6]`.
 #[derive(Debug, Clone, Default)]
-pub(super) struct RequiredIndicies {
+pub(super) struct RequiredIndices {
     /// The indices of the required columns in the
     indices: Vec<usize>,
     /// If putting a projection above children is beneficial for the parent.
@@ -43,7 +43,7 @@ pub(super) struct RequiredIndicies {
     projection_beneficial: bool,
 }
 
-impl RequiredIndicies {
+impl RequiredIndices {
     /// Create a new, empty instance
     pub fn new() -> Self {
         Self::default()
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 2e2c8fb1d6f8..9e7f8eed8a25 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -134,7 +134,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     return Ok(Transformed::no(LogicalPlan::Projection(projection)));
                 }
 
-                let mut all_subqueryies = vec![];
+                let mut all_subqueries = vec![];
                 let mut expr_to_rewrite_expr_map = HashMap::new();
                 let mut subquery_to_expr_map = HashMap::new();
                 for expr in projection.expr.iter() {
@@ -143,15 +143,15 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     for (subquery, _) in &subqueries {
                         subquery_to_expr_map.insert(subquery.clone(), expr.clone());
                     }
-                    all_subqueryies.extend(subqueries);
+                    all_subqueries.extend(subqueries);
                     expr_to_rewrite_expr_map.insert(expr, rewrite_exprs);
                 }
-                if all_subqueryies.is_empty() {
+                if all_subqueries.is_empty() {
                     return internal_err!("Expected subqueries not found in projection");
                 }
                 // iterate through all subqueries in predicate, turning each into a left join
                 let mut cur_input = projection.input.as_ref().clone();
-                for (subquery, alias) in all_subqueryies {
+                for (subquery, alias) in all_subqueries {
                     if let Some((optimized_subquery, expr_check_map)) =
                         build_join(&subquery, &cur_input, &alias)?
                     {
@@ -879,7 +879,7 @@ mod tests {
         Ok(())
     }
 
-    /// Test for correlated scalar subquery filter with disjustions
+    /// Test for correlated scalar subquery filter with disjunctions
     #[test]
     fn scalar_subquery_disjunction() -> Result<()> {
         let sq = Arc::new(
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index e3bcb6da8e53..74d2ce0b6be9 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -3661,7 +3661,7 @@ mod tests {
     }
 
     #[test]
-    fn test_like_and_ilke() {
+    fn test_like_and_ilike() {
         let null = lit(ScalarValue::Utf8(None));
 
         // expr [NOT] [I]LIKE NULL
@@ -3931,7 +3931,7 @@ mod tests {
     }
 
     #[test]
-    fn simplify_common_factor_conjuction_in_disjunction() {
+    fn simplify_common_factor_conjunction_in_disjunction() {
         let props = ExecutionProps::new();
         let schema = boolean_test_schema();
         let simplifier =
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 31e21d08b569..8cba2c88e244 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -281,7 +281,7 @@ fn is_supported_type(data_type: &DataType) -> bool {
         || is_supported_dictionary_type(data_type)
 }
 
-/// Returns true if [[UnwrapCastExprRewriter]] suppors this numeric type
+/// Returns true if [[UnwrapCastExprRewriter]] support this numeric type
 fn is_supported_numeric_type(data_type: &DataType) -> bool {
     matches!(
         data_type,
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index 902e53a7f236..b35d978045d9 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -259,7 +259,7 @@ mod tests {
         assert!(eq_groups.contains(&col_a_expr));
         assert!(eq_groups.contains(&col_b_expr));
 
-        // b and c are aliases. Exising equivalence class should expand,
+        // b and c are aliases. Existing equivalence class should expand,
         // however there shouldn't be any new equivalence class
         eq_properties.add_equal_conditions(&col_b_expr, &col_c_expr)?;
         assert_eq!(eq_properties.eq_group().len(), 1);
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index f019b2e570ff..a7f27ab73684 100755
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -1408,7 +1408,7 @@ fn construct_prefix_orderings(
 /// current projection expression.
 ///
 /// # Example
-///  If `dependences` is `a + b ASC` and the dependency map holds dependencies
+///  If `dependencies` is `a + b ASC` and the dependency map holds dependencies
 ///  * `a ASC` --> `[c ASC]`
 ///  * `b ASC` --> `[d DESC]`,
 ///
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 938d775a2ad1..2ab53b214d7f 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -404,7 +404,7 @@ impl PhysicalExpr for BinaryExpr {
         if self.op.eq(&Operator::And) {
             if interval.eq(&Interval::CERTAINLY_TRUE) {
                 // A certainly true logical conjunction can only derive from possibly
-                // true operands. Otherwise, we prove infeasability.
+                // true operands. Otherwise, we prove infeasibility.
                 Ok((!left_interval.eq(&Interval::CERTAINLY_FALSE)
                     && !right_interval.eq(&Interval::CERTAINLY_FALSE))
                 .then(|| vec![Interval::CERTAINLY_TRUE, Interval::CERTAINLY_TRUE]))
@@ -444,7 +444,7 @@ impl PhysicalExpr for BinaryExpr {
         } else if self.op.eq(&Operator::Or) {
             if interval.eq(&Interval::CERTAINLY_FALSE) {
                 // A certainly false logical conjunction can only derive from certainly
-                // false operands. Otherwise, we prove infeasability.
+                // false operands. Otherwise, we prove infeasibility.
                 Ok((!left_interval.eq(&Interval::CERTAINLY_TRUE)
                     && !right_interval.eq(&Interval::CERTAINLY_TRUE))
                 .then(|| vec![Interval::CERTAINLY_FALSE, Interval::CERTAINLY_FALSE]))
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 0e307153341b..711a521da14c 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -346,7 +346,10 @@ impl CaseExpr {
                 .downcast_ref::<BooleanArray>()
                 .expect("predicate should evaluate to a boolean array");
             // invert the bitmask
-            let bit_mask = not(bit_mask)?;
+            let bit_mask = match bit_mask.null_count() {
+                0 => not(bit_mask)?,
+                _ => not(&prep_null_mask_filter(bit_mask))?,
+            };
             match then_expr.evaluate(batch)? {
                 ColumnarValue::Array(array) => {
                     Ok(ColumnarValue::Array(nullif(&array, &bit_mask)?))
@@ -885,6 +888,32 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_when_null_and_some_cond_else_null() -> Result<()> {
+        let batch = case_test_batch()?;
+        let schema = batch.schema();
+
+        let when = binary(
+            Arc::new(Literal::new(ScalarValue::Boolean(None))),
+            Operator::And,
+            binary(col("a", &schema)?, Operator::Eq, lit("foo"), &schema)?,
+            &schema,
+        )?;
+        let then = col("a", &schema)?;
+
+        // SELECT CASE WHEN (NULL AND a = 'foo') THEN a ELSE NULL END
+        let expr = Arc::new(CaseExpr::try_new(None, vec![(when, then)], None)?);
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())
+            .expect("Failed to convert to array");
+        let result = as_string_array(&result);
+
+        // all result values should be null
+        assert_eq!(result.logical_null_count(), batch.num_rows());
+        Ok(())
+    }
+
     fn case_test_batch() -> Result<RecordBatch> {
         let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
         let a = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index 5f6932f6d725..0649cbd65d34 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -42,7 +42,7 @@ use datafusion_expr::ColumnarValue;
 ///
 /// # Example:
 ///  If the schema is `a`, `b`, `c` the `Column` for `b` would be represented by
-///  index 1, since `b` is the second colum in the schema.
+///  index 1, since `b` is the second column in the schema.
 ///
 /// ```
 /// # use datafusion_physical_expr::expressions::Column;
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index c594f039ff2f..232f9769b056 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -113,7 +113,7 @@ mod tests {
 
     #[test]
     fn literal_i32() -> Result<()> {
-        // create an arbitrary record bacth
+        // create an arbitrary record batch
         let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
         let a = Int32Array::from(vec![Some(1), None, Some(3), Some(4), Some(5)]);
         let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs
index 98c0c864b9f7..eb7e1ea6282b 100644
--- a/datafusion/physical-expr/src/partitioning.rs
+++ b/datafusion/physical-expr/src/partitioning.rs
@@ -97,7 +97,7 @@ use std::sync::Arc;
 /// # Additional Examples
 ///
 /// A simple `FileScanExec` might produce one output stream (partition) for each
-/// file (note the actual DataFusion file scaners can read individual files in
+/// file (note the actual DataFusion file scanners can read individual files in
 /// parallel, potentially producing multiple partitions per file)
 ///
 /// Plans such as `SortPreservingMerge` produce a single output stream
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index 82c718cfaca3..0ae4115de67a 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -233,7 +233,7 @@ pub fn create_physical_expr(
     // verify that input data types is consistent with function's `TypeSignature`
     data_types_with_scalar_udf(&input_expr_types, fun)?;
 
-    // Since we have arg_types, we dont need args and schema.
+    // Since we have arg_types, we don't need args and schema.
     let return_type =
         fun.return_type_from_exprs(args, input_dfschema, &input_expr_types)?;
 
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 2c73df7cfd7d..7afb78b8bf2e 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -808,7 +808,7 @@ mod test {
             vec![not_in_guarantee("b", [1, 2, 3]), in_guarantee("b", [3, 4])],
         );
         // b IN (1, 2, 3) OR b = 2
-        // TODO this should be in_guarantee("b", [1, 2, 3]) but currently we don't support to anylize this kind of disjunction. Only `ColOpLit OR ColOpLit` is supported.
+        // TODO this should be in_guarantee("b", [1, 2, 3]) but currently we don't support to analyze this kind of disjunction. Only `ColOpLit OR ColOpLit` is supported.
         test_analyze(
             col("b")
                 .in_list(vec![lit(1), lit(2), lit(3)], false)
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 838617ae9889..3454209445dc 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [lints]
 workspace = true
 
+[features]
+recursive_protection = ["dep:recursive"]
+
 [dependencies]
 arrow = { workspace = true }
 datafusion-common = { workspace = true, default-features = true }
@@ -40,7 +43,7 @@ datafusion-physical-expr = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 itertools = { workspace = true }
 log = { workspace = true }
-recursive = { workspace = true }
+recursive = { workspace = true, optional = true }
 
 [dev-dependencies]
 datafusion-expr = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 87077183110d..a00bc4b1d571 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -25,7 +25,6 @@ use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::udaf::{AggregateFunctionExpr, StatisticsArgs};
 use datafusion_physical_plan::{expressions, ExecutionPlan};
-use recursive::recursive;
 use std::sync::Arc;
 
 use crate::PhysicalOptimizerRule;
@@ -42,7 +41,7 @@ impl AggregateStatistics {
 }
 
 impl PhysicalOptimizerRule for AggregateStatistics {
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn optimize(
         &self,
         plan: Arc<dyn ExecutionPlan>,
@@ -295,7 +294,7 @@ mod tests {
         let field = &fields[0];
         assert_eq!(field.name(), agg.column_name());
         assert_eq!(field.data_type(), &DataType::Int64);
-        // note that nullabiolity differs
+        // note that nullability differs
 
         assert_eq!(
             as_int64_array(batch.column(0)).unwrap().values(),
@@ -378,7 +377,7 @@ mod tests {
             Arc::clone(&schema),
         )?;
 
-        // We introduce an intermediate optimization step between the partial and final aggregtator
+        // We introduce an intermediate optimization step between the partial and final aggregator
         let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
 
         let final_agg = AggregateExec::try_new(
@@ -410,7 +409,7 @@ mod tests {
             Arc::clone(&schema),
         )?;
 
-        // We introduce an intermediate optimization step between the partial and final aggregtator
+        // We introduce an intermediate optimization step between the partial and final aggregator
         let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
 
         let final_agg = AggregateExec::try_new(
diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
index d5ffaad6d872..e107bb85d7b8 100644
--- a/datafusion/physical-optimizer/src/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -44,7 +44,7 @@ use crate::PhysicalOptimizerRule;
 /// `new_add_mode` and `new_remove_mode`. With this rule, we can keep track of
 /// the global requirements (ordering and distribution) across rules.
 ///
-/// The primary usecase of this node and rule is to specify and preserve the desired output
+/// The primary use case of this node and rule is to specify and preserve the desired output
 /// ordering and distribution the entire plan. When sending to a single client, a single partition may
 /// be desirable, but when sending to a multi-partitioned writer, keeping multiple partitions may be
 /// better.
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
index 35a79cbd91ed..8e975e10180f 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
@@ -352,7 +352,7 @@ where
         let null_buffer = self.nulls.take_n(n);
         let first_remaining_offset = O::as_usize(self.offsets[n]);
 
-        // Given offests like [0, 2, 4, 5] and n = 1, we expect to get
+        // Given offsets like [0, 2, 4, 5] and n = 1, we expect to get
         // offsets [0, 2, 3]. We first create two offsets for first_n as [0, 2] and the remaining as [2, 4, 5].
         // And we shift the offset starting from 0 for the remaining one, [2, 4, 5] -> [0, 2, 3].
         let mut first_n_offsets = self.offsets.drain(0..n).collect::<Vec<_>>();
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index c261310f56e3..98787d740c20 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -137,7 +137,7 @@ struct SkipAggregationProbe {
     // ========================================================================
     // STATES:
     // Fields changes during execution. Can be buffer, or state flags that
-    // influence the exeuction in parent `GroupedHashAggregateStream`
+    // influence the execution in parent `GroupedHashAggregateStream`
     // ========================================================================
     /// Number of processed input rows (updated during probing)
     input_rows: usize,
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index 1fc3280ceb16..708f006b0d39 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -173,7 +173,7 @@ impl ExecutionPlan for AnalyzeExec {
             );
         }
 
-        // Create future that computes thefinal output
+        // Create future that computes the final output
         let start = Instant::now();
         let captured_input = Arc::clone(&self.input);
         let captured_schema = Arc::clone(&self.schema);
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
index 46875fae94fc..f38876d93ec1 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -180,7 +180,7 @@ impl BatchCoalescer {
 /// Indicates the state of the [`BatchCoalescer`] buffer after the
 /// [`BatchCoalescer::push_batch()`] operation.
 ///
-/// The caller should take diferent actions, depending on the variant returned.
+/// The caller should take different actions, depending on the variant returned.
 pub enum CoalescerState {
     /// Neither the limit nor the target batch size is reached.
     ///
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 88b85a85a102..961d2f639897 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -457,7 +457,7 @@ pub trait DisplayAs {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> fmt::Result;
 }
 
-/// A newtype wrapper to display `T` implementing`DisplayAs` using the `Default` mode
+/// A new type wrapper to display `T` implementing`DisplayAs` using the `Default` mode
 pub struct DefaultDisplay<T>(pub T);
 
 impl<T: DisplayAs> fmt::Display for DefaultDisplay<T> {
@@ -466,7 +466,7 @@ impl<T: DisplayAs> fmt::Display for DefaultDisplay<T> {
     }
 }
 
-/// A newtype wrapper to display `T` implementing `DisplayAs` using the `Verbose` mode
+/// A new type wrapper to display `T` implementing `DisplayAs` using the `Verbose` mode
 pub struct VerboseDisplay<T>(pub T);
 
 impl<T: DisplayAs> fmt::Display for VerboseDisplay<T> {
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index 09bb80734401..5f0b229ce92a 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -521,7 +521,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan {
 /// For unbounded streams, it also tracks whether the operator requires finite memory
 /// to process the stream or if memory usage could grow unbounded.
 ///
-/// Bounedness of the output stream is based on the the boundedness of the input stream and the nature of
+/// Boundedness of the output stream is based on the the boundedness of the input stream and the nature of
 /// the operator. For example, limit or topk with fetch operator can convert an unbounded stream to a bounded stream.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Boundedness {
@@ -903,7 +903,7 @@ pub fn execute_stream_partitioned(
 /// and context. It then checks if there are any columns in the input that might
 /// violate the `not null` constraints specified in the `sink_schema`. If there are
 /// such columns, it wraps the resulting stream to enforce the `not null` constraints
-/// by invoking the `check_not_null_contraits` function on each batch of the stream.
+/// by invoking the [`check_not_null_constraints`] function on each batch of the stream.
 pub fn execute_input_stream(
     input: Arc<dyn ExecutionPlan>,
     sink_schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index ef70392a01b7..dabe42ee43a2 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -138,13 +138,13 @@ impl JoinLeftData {
 }
 
 #[allow(rustdoc::private_intra_doc_links)]
-/// Join execution plan: Evaluates eqijoin predicates in parallel on multiple
+/// Join execution plan: Evaluates equijoin predicates in parallel on multiple
 /// partitions using a hash table and an optional filter list to apply post
 /// join.
 ///
 /// # Join Expressions
 ///
-/// This implementation is optimized for evaluating eqijoin predicates  (
+/// This implementation is optimized for evaluating equijoin predicates  (
 /// `<col1> = <col2>`) expressions, which are represented as a list of `Columns`
 /// in [`Self::on`].
 ///
@@ -198,7 +198,7 @@ impl JoinLeftData {
 ///
 ///  Original build-side data   Inserting build-side values into hashmap    Concatenated build-side batch
 ///                                                                         ┌───────────────────────────┐
-///                             hasmap.insert(row-hash, row-idx + offset)   │                      idx  │
+///                             hashmap.insert(row-hash, row-idx + offset)  │                      idx  │
 ///            ┌───────┐                                                    │          ┌───────┐        │
 ///            │ Row 1 │        1) update_hash for batch 3 with offset 0    │          │ Row 6 │    0   │
 ///   Batch 1  │       │           - hashmap.insert(Row 7, idx 1)           │ Batch 3  │       │        │
@@ -849,7 +849,7 @@ async fn collect_left_input(
             acc.2.build_mem_used.add(batch_size);
             acc.2.build_input_batches.add(1);
             acc.2.build_input_rows.add(batch.num_rows());
-            // Update rowcount
+            // Update row count
             acc.1 += batch.num_rows();
             // Push batch to output
             acc.0.push(batch);
@@ -3490,7 +3490,7 @@ mod tests {
         Ok(())
     }
 
-    /// Test for parallelised HashJoinExec with PartitionMode::CollectLeft
+    /// Test for parallelized HashJoinExec with PartitionMode::CollectLeft
     #[tokio::test]
     async fn test_collect_left_multiple_partitions_join() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index b8cb7b313bc1..438d9818475d 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -86,7 +86,7 @@ use futures::{Stream, StreamExt};
 /// # Sorting
 ///
 /// Assumes that both the left and right input to the join are pre-sorted. It is not the
-/// responisibility of this execution plan to sort the inputs.
+/// responsibility of this execution plan to sort the inputs.
 ///
 /// # "Streamed" vs "Buffered"
 ///
@@ -101,7 +101,7 @@ use futures::{Stream, StreamExt};
 /// If the memory limit increases beyond the specified value and spilling is enabled,
 /// buffered batches could be spilled to disk. If spilling is disabled, the execution
 /// will fail under the same conditions. Multiple record batches of buffered could currently reside
-/// in memory/disk during the exectution. The number of buffered batches residing in
+/// in memory/disk during the execution. The number of buffered batches residing in
 /// memory/disk depends on the number of rows of buffered input having the same value
 /// of join key as that of streamed input rows currently present in memory. Due to pre-sorted inputs,
 /// the algorithm understands when it is not needed anymore, and releases the buffered batches
@@ -304,11 +304,10 @@ impl SortMergeJoinExec {
         let output_partitioning =
             symmetric_join_output_partitioning(left, right, &join_type);
 
-        // TODO: Emission type may be incremental if the input is sorted
         PlanProperties::new(
             eq_properties,
             output_partitioning,
-            EmissionType::Final,
+            EmissionType::Incremental,
             boundedness_from_children([left, right]),
         )
     }
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 0366c9fa5e46..d792e143046c 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -1645,7 +1645,7 @@ macro_rules! handle_state {
 
 /// Represents the result of a stateful operation.
 ///
-/// This enumueration indicates whether the state produced a result that is
+/// This enumeration indicates whether the state produced a result that is
 /// ready for use (`Ready`) or if the operation requires continuation (`Continue`).
 ///
 /// Variants:
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index 2037ddb70c2d..dbda0a310ce5 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -50,7 +50,7 @@ pub struct MetricBuilder<'a> {
     /// optional partition number
     partition: Option<usize>,
 
-    /// arbitrary name=value pairs identifiying this metric
+    /// arbitrary name=value pairs identifying this metric
     labels: Vec<Label>,
 }
 
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 4712729bdaf5..50252e8d973a 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -376,7 +376,7 @@ impl ExecutionPlanMetricsSet {
     }
 }
 
-/// `name=value` pairs identifiying a metric. This concept is called various things
+/// `name=value` pairs identifying a metric. This concept is called various things
 /// in various different systems:
 ///
 /// "labels" in
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index 0e49a791cbae..bf7d2c7f275c 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -147,7 +147,7 @@ impl ExecutionPlan for RecursiveQueryExec {
     }
 
     // TODO: control these hints and see whether we can
-    // infer some from the child plans (static/recurisve terms).
+    // infer some from the child plans (static/recursive terms).
     fn maintains_input_order(&self) -> Vec<bool> {
         vec![false, false]
     }
diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs
index 8d09c664fbb2..6e06c87a4821 100644
--- a/datafusion/physical-plan/src/repartition/distributor_channels.rs
+++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 //! Special channel construction to distribute data from various inputs into N outputs
-//! minimizing buffering but preventing deadlocks when repartitoning
+//! minimizing buffering but preventing deadlocks when repartitioning
 //!
 //! # Design
 //!
@@ -167,11 +167,11 @@ impl<T> Drop for DistributionSender<T> {
             //
             // If the last sender is dropped first, `state.data` will still exists and the sender side decrements the
             // signal. The receiver side then MUST check the `n_senders` counter during the section and if it is zero,
-            // it inferres that it is dropped afterwards and MUST NOT decrement the counter.
+            // it infers that it is dropped afterwards and MUST NOT decrement the counter.
             //
-            // If the receiver end is dropped first, it will inferr -- based on `n_senders` -- that there are still
+            // If the receiver end is dropped first, it will infer -- based on `n_senders` -- that there are still
             // senders and it will decrement the `empty_channels` counter. It will also set `data` to `None`. The sender
-            // side will then see that `data` is `None` and can therefore inferr that the receiver end was dropped, and
+            // side will then see that `data` is `None` and can therefore infer that the receiver end was dropped, and
             // hence it MUST NOT decrement the `empty_channels` counter.
             if state
                 .data
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 963ccc6fd809..270aabeb553c 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -170,7 +170,7 @@ impl RepartitionExecState {
 /// which is commonly set to the number of CPU cores and all call execute at the same time.
 ///
 /// Thus, use a **tokio** `OnceCell` for this initialization so as not to waste CPU cycles
-/// in a futex lock but instead allow other threads to do something useful.
+/// in a mutex lock but instead allow other threads to do something useful.
 ///
 /// Uses a parking_lot `Mutex` to control other accesses as they are very short duration
 ///  (e.g. removing channels on completion) where the overhead of `await` is not warranted.
@@ -343,7 +343,7 @@ impl BatchPartitioner {
 /// sufficient care in implementation.
 ///
 /// DataFusion's planner picks the target number of partitions and
-/// then `RepartionExec` redistributes [`RecordBatch`]es to that number
+/// then [`RepartitionExec`] redistributes [`RecordBatch`]es to that number
 /// of output partitions.
 ///
 /// For example, given `target_partitions=3` (trying to use 3 cores)
@@ -734,7 +734,7 @@ impl RepartitionExec {
         )
     }
 
-    /// Specify if this reparititoning operation should preserve the order of
+    /// Specify if this repartitioning operation should preserve the order of
     /// rows from its input when producing output. Preserving order is more
     /// expensive at runtime, so should only be set if the output of this
     /// operator can take advantage of it.
@@ -1410,7 +1410,7 @@ mod tests {
             .flat_map(|batch| {
                 assert_eq!(batch.columns().len(), 1);
                 let string_array = as_string_array(batch.column(0))
-                    .expect("Unexpected type for repartitoned batch");
+                    .expect("Unexpected type for repartitioned batch");
 
                 string_array
                     .iter()
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index 258e234b35c7..1c2b8cd0c91b 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -114,7 +114,7 @@ pub(crate) struct SortPreservingMergeStream<C: CursorValues> {
     /// causing upstream operator buffers for the other partitions to grow
     /// excessively, as they continued receiving data without consuming it.
     ///
-    /// For example, an upstream operator like `RepartitonExec` execution would
+    /// For example, an upstream operator like `RepartitionExec` execution would
     /// keep sending data to certain partitions, but those partitions wouldn't
     /// consume the data if they weren't selected as winners. This resulted in
     /// inefficient buffer usage.
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index f14ba6606e89..c838376a482e 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -366,7 +366,7 @@ impl PartialSortStream {
             return Poll::Ready(None);
         }
         loop {
-            return Poll::Ready(Some(match ready!(self.input.poll_next_unpin(cx)) {
+            return Poll::Ready(match ready!(self.input.poll_next_unpin(cx)) {
                 Some(Ok(batch)) => {
                     if let Some(slice_point) =
                         self.get_slice_point(self.common_prefix_length, &batch)?
@@ -374,21 +374,33 @@ impl PartialSortStream {
                         self.in_mem_batches.push(batch.slice(0, slice_point));
                         let remaining_batch =
                             batch.slice(slice_point, batch.num_rows() - slice_point);
+                        // Extract the sorted batch
                         let sorted_batch = self.sort_in_mem_batches();
+                        // Refill with the remaining batch
                         self.in_mem_batches.push(remaining_batch);
-                        sorted_batch
+
+                        debug_assert!(sorted_batch
+                            .as_ref()
+                            .map(|batch| batch.num_rows() > 0)
+                            .unwrap_or(true));
+                        Some(sorted_batch)
                     } else {
                         self.in_mem_batches.push(batch);
                         continue;
                     }
                 }
-                Some(Err(e)) => Err(e),
+                Some(Err(e)) => Some(Err(e)),
                 None => {
                     self.is_closed = true;
                     // once input is consumed, sort the rest of the inserted batches
-                    self.sort_in_mem_batches()
+                    let remaining_batch = self.sort_in_mem_batches()?;
+                    if remaining_batch.num_rows() > 0 {
+                        Some(Ok(remaining_batch))
+                    } else {
+                        None
+                    }
                 }
-            }));
+            });
         }
     }
 
@@ -409,9 +421,6 @@ impl PartialSortStream {
                 self.is_closed = true;
             }
         }
-        // Empty record batches should not be emitted.
-        // They need to be treated as [`Option<RecordBatch>`]es and handle separately
-        debug_assert!(result.num_rows() > 0);
         Ok(result)
     }
 
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 8d8a5c5f7055..33c8a2b2fee3 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -598,7 +598,7 @@ impl ExternalSorter {
     }
 
     /// If this sort may spill, pre-allocates
-    /// `sort_spill_reservation_bytes` of memory to gurarantee memory
+    /// `sort_spill_reservation_bytes` of memory to guarantee memory
     /// left for the in memory sort/merge.
     fn reserve_memory_for_merge(&mut self) -> Result<()> {
         // Reserve headroom for next merge sort
@@ -1366,7 +1366,7 @@ mod tests {
         // Data is correct
         assert_eq!(&vec![expected_batch], &result);
 
-        // explicitlty ensure the metadata is present
+        // explicitly ensure the metadata is present
         assert_eq!(result[0].schema().fields()[0].metadata(), &field_metadata);
         assert_eq!(result[0].schema().metadata(), &schema_metadata);
 
diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs
index 7c57fdf9baae..ab8054be59a8 100644
--- a/datafusion/physical-plan/src/sorts/stream.rs
+++ b/datafusion/physical-plan/src/sorts/stream.rs
@@ -49,7 +49,7 @@ pub trait PartitionedStream: std::fmt::Debug + Send {
     ) -> Poll<Option<Self::Output>>;
 }
 
-/// A newtype wrapper around a set of fused [`SendableRecordBatchStream`]
+/// A new type wrapper around a set of fused [`SendableRecordBatchStream`]
 /// that implements debug, and skips over empty [`RecordBatch`]
 struct FusedStreams(Vec<Fuse<SendableRecordBatchStream>>);
 
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index a05b46d22840..331cded165a8 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -256,7 +256,7 @@ impl RecordBatchReceiverStreamBuilder {
     }
 
     /// Runs the `partition` of the `input` ExecutionPlan on the
-    /// tokio threadpool and writes its outputs to this stream
+    /// tokio thread pool and writes its outputs to this stream
     ///
     /// If the input partition produces an error, the error will be
     /// sent to the output stream and no further results are sent.
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index b31a53e55e88..f0149faa8433 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -578,7 +578,7 @@ impl ExecutionPlan for StatisticsExec {
 
 /// Execution plan that emits streams that block forever.
 ///
-/// This is useful to test shutdown / cancelation behavior of certain execution plans.
+/// This is useful to test shutdown / cancellation behavior of certain execution plans.
 #[derive(Debug)]
 pub struct BlockingExec {
     /// Schema that is mocked by this plan.
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index c8842965ac33..6d5299f71647 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -125,7 +125,7 @@ impl TopK {
         let row_converter = RowConverter::new(sort_fields)?;
         let scratch_rows = row_converter.empty_rows(
             batch_size,
-            20 * batch_size, // guestimate 20 bytes per row
+            20 * batch_size, // guesstimate 20 bytes per row
         );
 
         Ok(Self {
@@ -261,7 +261,7 @@ struct TopKHeap {
     k: usize,
     /// The target number of rows for output batches
     batch_size: usize,
-    /// Storage for up at most `k` items using a BinaryHeap. Reverserd
+    /// Storage for up at most `k` items using a BinaryHeap. Reversed
     /// so that the smallest k so far is on the top
     inner: BinaryHeap<TopKRow>,
     /// Storage the original row values (TopKRow only has the sort key)
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 19a090ca284f..3e2b3fb26d45 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -456,7 +456,7 @@ fn list_unnest_at_level(
         unnest_list_arrays(arrs_to_unnest.as_ref(), unnested_length, total_length)?;
 
     // Create the take indices array for other columns
-    let take_indices = create_take_indicies(unnested_length, total_length);
+    let take_indices = create_take_indices(unnested_length, total_length);
     unnested_temp_arrays
         .into_iter()
         .zip(list_unnest_specs.iter())
@@ -823,14 +823,14 @@ fn unnest_list_array(
     capacity: usize,
 ) -> Result<ArrayRef> {
     let values = list_array.values();
-    let mut take_indicies_builder = PrimitiveArray::<Int64Type>::builder(capacity);
+    let mut take_indices_builder = PrimitiveArray::<Int64Type>::builder(capacity);
     for row in 0..list_array.len() {
         let mut value_length = 0;
         if !list_array.is_null(row) {
             let (start, end) = list_array.value_offsets(row);
             value_length = end - start;
             for i in start..end {
-                take_indicies_builder.append_value(i)
+                take_indices_builder.append_value(i)
             }
         }
         let target_length = length_array.value(row);
@@ -840,17 +840,17 @@ fn unnest_list_array(
         );
         // Pad with NULL values
         for _ in value_length..target_length {
-            take_indicies_builder.append_null();
+            take_indices_builder.append_null();
         }
     }
     Ok(kernels::take::take(
         &values,
-        &take_indicies_builder.finish(),
+        &take_indices_builder.finish(),
         None,
     )?)
 }
 
-/// Creates take indicies that will be used to expand all columns except for the list type
+/// Creates take indices that will be used to expand all columns except for the list type
 /// [`columns`](UnnestExec::list_column_indices) that is being unnested.
 /// Every column value needs to be repeated multiple times according to the length array.
 ///
@@ -859,13 +859,13 @@ fn unnest_list_array(
 /// ```ignore
 /// [2, 3, 1]
 /// ```
-/// Then `create_take_indicies` will return an array like this
+/// Then [`create_take_indices`] will return an array like this
 ///
 /// ```ignore
 /// [0, 0, 1, 1, 1, 2]
 /// ```
 ///
-fn create_take_indicies(
+fn create_take_indices(
     length_array: &PrimitiveArray<Int64Type>,
     capacity: usize,
 ) -> PrimitiveArray<Int64Type> {
@@ -1270,11 +1270,11 @@ mod tests {
     }
 
     #[test]
-    fn test_create_take_indicies() -> Result<()> {
+    fn test_create_take_indices() -> Result<()> {
         let length_array = Int64Array::from(vec![2, 3, 1]);
-        let take_indicies = create_take_indicies(&length_array, 6);
+        let take_indices = create_take_indices(&length_array, 6);
         let expected = Int64Array::from(vec![0, 0, 1, 1, 1, 2]);
-        assert_eq!(take_indicies, expected);
+        assert_eq!(take_indices, expected);
         Ok(())
     }
 }
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index b66147bf7439..2ac86da92e50 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -459,7 +459,7 @@ pub struct LinearSearch {
 
 impl PartitionSearcher for LinearSearch {
     /// This method constructs output columns using the result of each window expression.
-    // Assume input buffer is         |      Partition Buffers would be (Where each partition and its data is seperated)
+    // Assume input buffer is         |      Partition Buffers would be (Where each partition and its data is separated)
     // a, 2                           |      a, 2
     // b, 2                           |      a, 2
     // a, 2                           |      a, 2
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index c0885ece08bc..687406c7db41 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -2573,7 +2573,7 @@ async fn roundtrip_union_query() -> Result<()> {
     ctx.register_csv("t2", "tests/testdata/test.csv", CsvReadOptions::default())
         .await?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    // proto deserialisation only supports 2-way union, hence this plan has nested unions
+    // proto deserialization only supports 2-way union, hence this plan has nested unions
     // apply the flatten unions optimizer rule to be able to compare
     let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
     let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, _y| {})?;
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index e1e4d8df3d22..224c7cb191a3 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -39,6 +39,7 @@ path = "src/lib.rs"
 default = ["unicode_expressions", "unparser"]
 unicode_expressions = []
 unparser = []
+recursive_protection = ["dep:recursive"]
 
 [dependencies]
 arrow = { workspace = true }
@@ -49,7 +50,7 @@ datafusion-common = { workspace = true, default-features = true }
 datafusion-expr = { workspace = true }
 indexmap = { workspace = true }
 log = { workspace = true }
-recursive = { workspace = true }
+recursive = { workspace = true, optional = true }
 regex = { workspace = true }
 sqlparser = { workspace = true }
 
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index a651d8fa5d35..9b40ebdaf6a5 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -20,7 +20,6 @@ use arrow_schema::TimeUnit;
 use datafusion_expr::planner::{
     PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
 };
-use recursive::recursive;
 use sqlparser::ast::{
     BinaryOperator, CastFormat, CastKind, DataType as SQLDataType, DictionaryField,
     Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript,
@@ -197,7 +196,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
     /// Internal implementation. Use
     /// [`Self::sql_expr_to_logical_expr`] to plan exprs.
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     fn sql_expr_to_logical_expr_internal(
         &self,
         sql: SQLExpr,
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index a70934b5cd5d..847163c6d3b3 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -24,8 +24,8 @@ use arrow_schema::{DataType, DECIMAL256_MAX_PRECISION};
 use bigdecimal::num_bigint::BigInt;
 use bigdecimal::{BigDecimal, Signed, ToPrimitive};
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema,
-    DataFusionError, Result, ScalarValue,
+    internal_datafusion_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result,
+    ScalarValue,
 };
 use datafusion_expr::expr::{BinaryExpr, Placeholder};
 use datafusion_expr::planner::PlannerResult;
@@ -169,7 +169,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             }
         }
 
-        internal_err!("Expected a simplified result, but none was found")
+        not_impl_err!("Could not plan array literal. Hint: Please try with `nested_expressions` DataFusion feature enabled")
     }
 
     /// Convert a SQL interval expression to a DataFusion logical plan
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index a5d538989453..f20560fe7c90 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -43,6 +43,7 @@ mod query;
 mod relation;
 mod select;
 mod set_expr;
+mod stack;
 mod statement;
 #[cfg(feature = "unparser")]
 pub mod unparser;
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index d917a707ca20..628bcb2fbdcd 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -463,7 +463,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 {
                     Ok(DataType::Time64(TimeUnit::Nanosecond))
                 } else {
-                    // We dont support TIMETZ and TIME WITH TIME ZONE for now
+                    // We don't support TIMETZ and TIME WITH TIME ZONE for now
                     not_impl_err!(
                         "Unsupported SQL type {sql_type:?}"
                     )
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index 2e115d140ea8..9d5a54d90b2c 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -19,6 +19,7 @@ use std::sync::Arc;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
+use crate::stack::StackGuard;
 use datafusion_common::{not_impl_err, Constraints, DFSchema, Result};
 use datafusion_expr::expr::Sort;
 use datafusion_expr::{
@@ -62,10 +63,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 // The functions called from `set_expr_to_plan()` need more than 128KB
                 // stack in debug builds as investigated in:
                 // https://github.com/apache/datafusion/pull/13310#discussion_r1836813902
-                let min_stack_size = recursive::get_minimum_stack_size();
-                recursive::set_minimum_stack_size(256 * 1024);
-                let plan = self.set_expr_to_plan(other, planner_context)?;
-                recursive::set_minimum_stack_size(min_stack_size);
+                let plan = {
+                    // scope for dropping _guard
+                    let _guard = StackGuard::new(256 * 1024);
+                    self.set_expr_to_plan(other, planner_context)
+                }?;
                 let oby_exprs = to_order_by_exprs(query.order_by)?;
                 let order_by_rex = self.order_by_to_sort_expr(
                     oby_exprs,
diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs
index 3b1201d3dd59..75fdbd20e840 100644
--- a/datafusion/sql/src/set_expr.rs
+++ b/datafusion/sql/src/set_expr.rs
@@ -18,11 +18,10 @@
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
-use recursive::recursive;
 use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier};
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
-    #[recursive]
+    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
     pub(super) fn set_expr_to_plan(
         &self,
         set_expr: SetExpr,
diff --git a/datafusion/sql/src/stack.rs b/datafusion/sql/src/stack.rs
new file mode 100644
index 000000000000..b7d5eebdd718
--- /dev/null
+++ b/datafusion/sql/src/stack.rs
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub use inner::StackGuard;
+
+/// A guard that sets the minimum stack size for the current thread to `min_stack_size` bytes.
+#[cfg(feature = "recursive_protection")]
+mod inner {
+    /// Sets the stack size to `min_stack_size` bytes on call to `new()` and
+    /// resets to the previous value when this structure is dropped.
+    pub struct StackGuard {
+        previous_stack_size: usize,
+    }
+
+    impl StackGuard {
+        /// Sets the stack size to `min_stack_size` bytes on call to `new()` and
+        /// resets to the previous value when this structure is dropped.
+        pub fn new(min_stack_size: usize) -> Self {
+            let previous_stack_size = recursive::get_minimum_stack_size();
+            recursive::set_minimum_stack_size(min_stack_size);
+            Self {
+                previous_stack_size,
+            }
+        }
+    }
+
+    impl Drop for StackGuard {
+        fn drop(&mut self) {
+            recursive::set_minimum_stack_size(self.previous_stack_size);
+        }
+    }
+}
+
+/// A stub implementation of the stack guard when the recursive protection
+/// feature is not enabled
+#[cfg(not(feature = "recursive_protection"))]
+mod inner {
+    /// A stub implementation of the stack guard when the recursive protection
+    /// feature is not enabled that does nothing
+    pub struct StackGuard;
+
+    impl StackGuard {
+        /// A stub implementation of the stack guard when the recursive protection
+        /// feature is not enabled
+        pub fn new(_min_stack_size: usize) -> Self {
+            Self
+        }
+    }
+}
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 4fa359ebe00d..dfd3a4fd76a2 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -683,7 +683,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 parameters,
                 using,
                 // has_parentheses specifies the syntax, but the plan is the
-                // same no matter the synax used, so ignore it
+                // same no matter the syntax used, so ignore it
                 has_parentheses: _,
             } => {
                 // `USING` is a MySQL-specific syntax and currently not supported.
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index a82687533e31..3a44d7f0ec48 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -152,7 +152,7 @@ pub trait Dialect: Send + Sync {
 
     /// Allow to unparse a qualified column with a full qualified name
     /// (e.g. catalog_name.schema_name.table_name.column_name)
-    /// Otherwise, the column will be unparsed with only the table name and colum name
+    /// Otherwise, the column will be unparsed with only the table name and column name
     /// (e.g. table_name.column_name)
     fn full_qualified_col(&self) -> bool {
         false
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index f09de133b571..d012d3437720 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -747,7 +747,7 @@ impl Unparser<'_> {
     }
 
     /// Given an expression of the form `((a + b) * (c * d))`,
-    /// the parenthesing is redundant if the precedence of the nested expression is already higher
+    /// the parenthesis is redundant if the precedence of the nested expression is already higher
     /// than the surrounding operators' precedence. The above expression would become
     /// `(a + b) * c * d`.
     ///
@@ -1246,7 +1246,7 @@ impl Unparser<'_> {
     /// MySQL requires INTERVAL sql to be in the format: INTERVAL 1 YEAR + INTERVAL 1 MONTH + INTERVAL 1 DAY etc
     /// `<https://dev.mysql.com/doc/refman/8.4/en/expressions.html#temporal-intervals>`
     /// Interval sequence can't be wrapped in brackets - (INTERVAL 1 YEAR + INTERVAL 1 MONTH ...) so we need to generate
-    /// a single INTERVAL expression so it works correct for interval substraction cases
+    /// a single INTERVAL expression so it works correct for interval subtraction cases
     /// MySQL supports the DAY_MICROSECOND unit type (format is DAYS HOURS:MINUTES:SECONDS.MICROSECONDS), but it is not supported by sqlparser
     /// so we calculate the best single interval to represent the provided duration
     fn interval_to_mysql_expr(
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 3dcf0f66747c..6f30845eb810 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -36,13 +36,14 @@ use crate::unparser::ast::UnnestRelationBuilder;
 use crate::unparser::extension_unparser::{
     UnparseToStatementResult, UnparseWithinStatementResult,
 };
-use crate::unparser::utils::unproject_agg_exprs;
+use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_exprs};
 use crate::utils::UNNEST_PLACEHOLDER;
 use datafusion_common::{
     internal_err, not_impl_err,
     tree_node::{TransformedResult, TreeNode},
     Column, DataFusionError, Result, TableReference,
 };
+use datafusion_expr::expr::OUTER_REFERENCE_COLUMN_PREFIX;
 use datafusion_expr::{
     expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
     LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest,
@@ -284,9 +285,10 @@ impl Unparser<'_> {
         plan: &LogicalPlan,
         relation: &mut RelationBuilder,
         alias: Option<ast::TableAlias>,
+        lateral: bool,
     ) -> Result<()> {
         let mut derived_builder = DerivedRelationBuilder::default();
-        derived_builder.lateral(false).alias(alias).subquery({
+        derived_builder.lateral(lateral).alias(alias).subquery({
             let inner_statement = self.plan_to_sql(plan)?;
             if let ast::Statement::Query(inner_query) = inner_statement {
                 inner_query
@@ -306,15 +308,17 @@ impl Unparser<'_> {
         alias: &str,
         plan: &LogicalPlan,
         relation: &mut RelationBuilder,
+        lateral: bool,
     ) -> Result<()> {
         if self.dialect.requires_derived_table_alias() {
             self.derive(
                 plan,
                 relation,
                 Some(self.new_table_alias(alias.to_string(), vec![])),
+                lateral,
             )
         } else {
-            self.derive(plan, relation, None)
+            self.derive(plan, relation, None, lateral)
         }
     }
 
@@ -366,10 +370,12 @@ impl Unparser<'_> {
                 // Projection can be top-level plan for unnest relation
                 // The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have
                 // only one expression, which is the placeholder column generated by the rewriter.
-                if self.dialect.unnest_as_table_factor()
-                    && p.expr.len() == 1
-                    && Self::is_unnest_placeholder(&p.expr[0])
-                {
+                let unnest_input_type = if p.expr.len() == 1 {
+                    Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
+                } else {
+                    None
+                };
+                if self.dialect.unnest_as_table_factor() && unnest_input_type.is_some() {
                     if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() {
                         return self
                             .unnest_to_table_factor_sql(unnest, query, select, relation);
@@ -382,6 +388,9 @@ impl Unparser<'_> {
                         "derived_projection",
                         plan,
                         relation,
+                        unnest_input_type
+                            .filter(|t| matches!(t, UnnestInputType::OuterReference))
+                            .is_some(),
                     );
                 }
                 self.reconstruct_select_statement(plan, p, select)?;
@@ -414,6 +423,7 @@ impl Unparser<'_> {
                         "derived_limit",
                         plan,
                         relation,
+                        false,
                     );
                 }
                 if let Some(fetch) = &limit.fetch {
@@ -451,6 +461,7 @@ impl Unparser<'_> {
                         "derived_sort",
                         plan,
                         relation,
+                        false,
                     );
                 }
                 let Some(query_ref) = query else {
@@ -521,6 +532,7 @@ impl Unparser<'_> {
                         "derived_distinct",
                         plan,
                         relation,
+                        false,
                     );
                 }
                 let (select_distinct, input) = match distinct {
@@ -707,6 +719,7 @@ impl Unparser<'_> {
                         "derived_union",
                         plan,
                         relation,
+                        false,
                     );
                 }
 
@@ -788,19 +801,54 @@ impl Unparser<'_> {
                     internal_err!("Unnest input is not a Projection: {unnest:?}")
                 }
             }
-            _ => not_impl_err!("Unsupported operator: {plan:?}"),
+            LogicalPlan::Subquery(subquery)
+                if find_unnest_node_until_relation(subquery.subquery.as_ref())
+                    .is_some() =>
+            {
+                if self.dialect.unnest_as_table_factor() {
+                    self.select_to_sql_recursively(
+                        subquery.subquery.as_ref(),
+                        query,
+                        select,
+                        relation,
+                    )
+                } else {
+                    self.derive_with_dialect_alias(
+                        "derived_unnest",
+                        subquery.subquery.as_ref(),
+                        relation,
+                        true,
+                    )
+                }
+            }
+            _ => {
+                not_impl_err!("Unsupported operator: {plan:?}")
+            }
         }
     }
 
-    /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter`
-    /// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
-    fn is_unnest_placeholder(expr: &Expr) -> bool {
+    /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter`.
+    ///
+    /// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`,
+    ///     it means it is a scalar column, return [UnnestInputType::Scalar].
+    /// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`,
+    ///     it means it is an outer reference column, return [UnnestInputType::OuterReference].
+    /// - If the column is not a placeholder column, return [None].
+    ///
+    /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
+    fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option<UnnestInputType> {
         if let Expr::Alias(Alias { expr, .. }) = expr {
             if let Expr::Column(Column { name, .. }) = expr.as_ref() {
-                return name.starts_with(UNNEST_PLACEHOLDER);
+                if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) {
+                    if prefix.starts_with(&format!("({}(", OUTER_REFERENCE_COLUMN_PREFIX))
+                    {
+                        return Some(UnnestInputType::OuterReference);
+                    }
+                    return Some(UnnestInputType::Scalar);
+                }
             }
         }
-        false
+        None
     }
 
     fn unnest_to_table_factor_sql(
@@ -1157,3 +1205,12 @@ impl From<BuilderError> for DataFusionError {
         DataFusionError::External(Box::new(e))
     }
 }
+
+/// The type of the input to the UNNEST table factor.
+#[derive(Debug)]
+enum UnnestInputType {
+    /// The input is a column reference. It will be presented like `outer_ref(column_name)`.
+    OuterReference,
+    /// The input is a scalar value. It will be presented like a scalar array or struct.
+    Scalar,
+}
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index 68af121a4117..db9837483168 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -23,7 +23,7 @@ use datafusion_common::{
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
     Column, HashMap, Result, TableReference,
 };
-use datafusion_expr::expr::Alias;
+use datafusion_expr::expr::{Alias, UNNEST_COLUMN_PREFIX};
 use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
 use sqlparser::ast::Ident;
 
@@ -190,10 +190,11 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
     }
 }
 
-/// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
-/// subquery
+/// This logic is to work out the columns and inner query for SubqueryAlias plan for some types of
+/// subquery or unnest
 /// - `(SELECT column_a as a from table) AS A`
 /// - `(SELECT column_a from table) AS A (a)`
+/// - `SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1)` (see [find_unnest_column_alias])
 ///
 /// A roundtrip example for table alias with columns
 ///
@@ -222,6 +223,15 @@ pub(super) fn subquery_alias_inner_query_and_columns(
 ) -> (&LogicalPlan, Vec<Ident>) {
     let plan: &LogicalPlan = subquery_alias.input.as_ref();
 
+    if let LogicalPlan::Subquery(subquery) = plan {
+        let (inner_projection, Some(column)) =
+            find_unnest_column_alias(subquery.subquery.as_ref())
+        else {
+            return (plan, vec![]);
+        };
+        return (inner_projection, vec![Ident::new(column)]);
+    }
+
     let LogicalPlan::Projection(outer_projections) = plan else {
         return (plan, vec![]);
     };
@@ -257,6 +267,48 @@ pub(super) fn subquery_alias_inner_query_and_columns(
     (outer_projections.input.as_ref(), columns)
 }
 
+/// Try to find the column alias for UNNEST in the inner projection.
+/// For example:
+/// ```sql
+///     SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1)
+/// ```
+/// The above query will be parsed into the following plan:
+/// ```text
+/// Projection: *
+///   Cross Join:
+///     SubqueryAlias: t1
+///       TableScan: t
+///     SubqueryAlias: u
+///       Subquery:
+///         Projection: UNNEST(outer_ref(t1.c1)) AS c1
+///           Projection: __unnest_placeholder(outer_ref(t1.c1),depth=1) AS UNNEST(outer_ref(t1.c1))
+///             Unnest: lists[__unnest_placeholder(outer_ref(t1.c1))|depth=1] structs[]
+///               Projection: outer_ref(t1.c1) AS __unnest_placeholder(outer_ref(t1.c1))
+///                 EmptyRelation
+/// ```
+/// The function will return the inner projection and the column alias `c1` if the column name
+/// starts with `UNNEST(` (the `Display` result of [Expr::Unnest]) in the inner projection.
+pub(super) fn find_unnest_column_alias(
+    plan: &LogicalPlan,
+) -> (&LogicalPlan, Option<String>) {
+    if let LogicalPlan::Projection(projection) = plan {
+        if projection.expr.len() != 1 {
+            return (plan, None);
+        }
+        if let Some(Expr::Alias(alias)) = projection.expr.first() {
+            if alias
+                .expr
+                .schema_name()
+                .to_string()
+                .starts_with(&format!("{UNNEST_COLUMN_PREFIX}("))
+            {
+                return (projection.input.as_ref(), Some(alias.name.clone()));
+            }
+        }
+    }
+    (plan, None)
+}
+
 /// Injects column aliases into a subquery's logical plan. The function searches for a `Projection`
 /// within the given plan, which may be wrapped by other operators (e.g., LIMIT, SORT).
 /// If the top-level plan is a `Projection`, it directly injects the column aliases.
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index 3a7fa5ddcabb..f21fb2fcb49f 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -89,6 +89,31 @@ pub(crate) fn find_unnest_node_within_select(plan: &LogicalPlan) -> Option<&Unne
     }
 }
 
+/// Recursively searches children of [LogicalPlan] to find Unnest node if exist
+/// until encountering a Relation node with single input
+pub(crate) fn find_unnest_node_until_relation(plan: &LogicalPlan) -> Option<&Unnest> {
+    // Note that none of the nodes that have a corresponding node can have more
+    // than 1 input node. E.g. Projection / Filter always have 1 input node.
+    let input = plan.inputs();
+    let input = if input.len() > 1 {
+        return None;
+    } else {
+        input.first()?
+    };
+
+    if let LogicalPlan::Unnest(unnest) = input {
+        Some(unnest)
+    } else if let LogicalPlan::TableScan(_) = input {
+        None
+    } else if let LogicalPlan::Subquery(_) = input {
+        None
+    } else if let LogicalPlan::SubqueryAlias(_) = input {
+        None
+    } else {
+        find_unnest_node_within_select(input)
+    }
+}
+
 /// Recursively searches children of [LogicalPlan] to find Window nodes if exist
 /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor).
 /// If Window node is not found prior to this or at all before reaching the end
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 1c2a3ea91a2b..74ead8cf94ea 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -434,7 +434,7 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
             // if expr inside unnest is a struct, do not consider
             // the next unnest as consecutive unnest (if any)
             // meaning unnest(unnest(struct_arr_col)) can't
-            // be interpreted as unest(struct_arr_col, depth:=2)
+            // be interpreted as unnest(struct_arr_col, depth:=2)
             // but has to be split into multiple unnest logical plan instead
             // a.k.a:
             // - unnest(struct_col)
@@ -488,7 +488,7 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
             if traversing_unnest == self.top_most_unnest.as_ref().unwrap() {
                 self.top_most_unnest = None;
             }
-            // Find inside consecutive_unnest, the sequence of continous unnest exprs
+            // Find inside consecutive_unnest, the sequence of continuous unnest exprs
 
             // Get the latest consecutive unnest exprs
             // and check if current upward traversal is the returning to the root expr
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 3fdd4f74a0c2..24ec7f03deb0 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -626,6 +626,30 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             parser_dialect: Box::new(GenericDialect {}),
             unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
         },
+        TestStatementWithDialect {
+            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
+            expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
+            expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
+            expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))")"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
+            expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))") AS t1 (c1)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
     ];
 
     for query in tests {
diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
index fe2785603e76..a490488cd764 100644
--- a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
+++ b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
@@ -75,14 +75,14 @@ impl Postgres {
         let uri =
             std::env::var("PG_URI").map_or(PG_URI.to_string(), std::convert::identity);
 
-        debug!("Using posgres connection string: {uri}");
+        debug!("Using postgres connection string: {uri}");
 
         let config = tokio_postgres::Config::from_str(&uri)?;
 
         // hint to user what the connection string was
         let res = config.connect(tokio_postgres::NoTls).await;
         if res.is_err() {
-            eprintln!("Error connecting to posgres using PG_URI={uri}");
+            eprintln!("Error connecting to postgres using PG_URI={uri}");
         };
 
         let (client, connection) = res?;
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index 3c967eed219a..4f3320931d2c 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -50,7 +50,7 @@ NULL
 6
 NULL
 NULL
-7
+NULL
 
 # column or implicit null
 query I
@@ -61,8 +61,30 @@ NULL
 6
 NULL
 NULL
+NULL
+
+# column or implicit null (no nulls)
+query I
+SELECT CASE WHEN NULLIF(NVL(a, 0) >= 0, FALSE) THEN b END FROM foo
+----
+2
+4
+6
+NULL
+NULL
 7
 
+# column or implicit null (all nulls)
+query I
+SELECT CASE WHEN NULLIF(NVL(a, 0) >= 0, TRUE) THEN b END FROM foo
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
 # scalar or scalar (string)
 query T
 SELECT CASE WHEN a > 2 THEN 'even' ELSE 'odd' END FROM foo
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part
index 80fcc0102887..2414e5864c99 100644
--- a/datafusion/sqllogictest/test_files/string/string_query.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -460,7 +460,7 @@ Andrew Datafusion📊🔥
 Xiangpeng Datafusion数据融合
 Raphael Datafusionдатафусион
 Under_Score Un Iść Core
-Percent Pan Tadeusz Ma Iść W KąT
+Percent Pan Tadeusz Ma Iść W Kąt
 (empty) (empty)
 (empty) (empty)
 % (empty)
diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt
index 79294993dded..2769da03b8bb 100644
--- a/datafusion/sqllogictest/test_files/table_functions.slt
+++ b/datafusion/sqllogictest/test_files/table_functions.slt
@@ -16,6 +16,18 @@
 # under the License.
 
 # Test generate_series table function
+query I
+SELECT * FROM generate_series(6)
+----
+0
+1
+2
+3
+4
+5
+6
+
+
 
 query I rowsort
 SELECT * FROM generate_series(1, 5)
@@ -39,11 +51,35 @@ SELECT * FROM generate_series(3, 6)
 5
 6
 
+# #generated_data > batch_size
+query I
+SELECT count(v1) FROM generate_series(-66666,66666) t1(v1)
+----
+133333
+
+
+
+
 query I rowsort
 SELECT SUM(v1) FROM generate_series(1, 5) t1(v1)
 ----
 15
 
+query I
+SELECT * FROM generate_series(6, -1, -2)
+----
+6 
+4 
+2 
+0 
+
+query I
+SELECT * FROM generate_series(6, 66, 666)
+----
+6 
+
+
+
 # Test generate_series with WHERE clause
 query I rowsort
 SELECT * FROM generate_series(1, 10) t1(v1) WHERE v1 % 2 = 0
@@ -93,6 +129,10 @@ ON a.v1 = b.v1 - 1
 2 3
 3 4
 
+#
+# Test generate_series with null arguments
+#
+
 query I
 SELECT * FROM generate_series(NULL, 5)
 ----
@@ -105,6 +145,11 @@ query I
 SELECT * FROM generate_series(NULL, NULL)
 ----
 
+query I
+SELECT * FROM generate_series(1, 5, NULL)
+----
+
+
 query TT
 EXPLAIN SELECT * FROM generate_series(1, 5)
 ----
@@ -115,20 +160,22 @@ physical_plan LazyMemoryExec: partitions=1, batch_generators=[generate_series: s
 # Test generate_series with invalid arguments
 #
 
-query error DataFusion error: Error during planning: End value must be greater than or equal to start value
+query error DataFusion error: Error during planning: start is bigger than end, but increment is positive: cannot generate infinite series
 SELECT * FROM generate_series(5, 1)
 
-statement error DataFusion error: This feature is not implemented: generate_series does not support 1 or 3 arguments
-SELECT * FROM generate_series(1, 5, NULL)
+query error DataFusion error: Error during planning: start is smaller than end, but increment is negative: cannot generate infinite series
+SELECT * FROM generate_series(-6, 6, -1)
+
+query error DataFusion error: Error during planning: step cannot be zero
+SELECT * FROM generate_series(-6, 6, 0)
+
+query error DataFusion error: Error during planning: start is bigger than end, but increment is positive: cannot generate infinite series
+SELECT * FROM generate_series(6, -6, 1)
 
-statement error DataFusion error: This feature is not implemented: generate_series does not support 1 or 3 arguments
-SELECT * FROM generate_series(1)
 
-statement error DataFusion error: Error during planning: generate_series expects 2 arguments
+statement error DataFusion error: Error during planning: generate_series function requires 1 to 3 arguments
 SELECT * FROM generate_series(1, 2, 3, 4)
 
-statement error DataFusion error: Error during planning: Second argument must be an integer literal
-SELECT * FROM generate_series(1, '2')
 
 statement error DataFusion error: Error during planning: First argument must be an integer literal
 SELECT * FROM generate_series('foo', 'bar')
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 9f98fdace6a0..515553152659 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -21,19 +21,19 @@ use datafusion::arrow::array::{GenericListArray, MapArray};
 use datafusion::arrow::datatypes::{
     DataType, Field, FieldRef, Fields, IntervalUnit, Schema, TimeUnit,
 };
-use datafusion::common::plan_err;
 use datafusion::common::{
-    not_impl_err, plan_datafusion_err, substrait_datafusion_err, substrait_err, DFSchema,
-    DFSchemaRef,
+    not_impl_datafusion_err, not_impl_err, plan_datafusion_err, plan_err,
+    substrait_datafusion_err, substrait_err, DFSchema, DFSchemaRef,
 };
 use datafusion::datasource::provider_as_source;
 use datafusion::logical_expr::expr::{Exists, InSubquery, Sort};
 
 use datafusion::logical_expr::{
-    Aggregate, BinaryExpr, Case, EmptyRelation, Expr, ExprSchemable, LogicalPlan,
-    Operator, Projection, SortExpr, TryCast, Values,
+    Aggregate, BinaryExpr, Case, Cast, EmptyRelation, Expr, ExprSchemable, Extension,
+    LogicalPlan, Operator, Projection, SortExpr, Subquery, TryCast, Values,
 };
 use substrait::proto::aggregate_rel::Grouping;
+use substrait::proto::expression as substrait_expression;
 use substrait::proto::expression::subquery::set_predicate::PredicateOp;
 use substrait::proto::expression_reference::ExprType;
 use url::Url;
@@ -53,14 +53,17 @@ use crate::variation_const::{
     TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
     TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
 };
+use async_trait::async_trait;
 use datafusion::arrow::array::{new_empty_array, AsArray};
 use datafusion::arrow::temporal_conversions::NANOSECONDS;
+use datafusion::catalog::TableProvider;
 use datafusion::common::scalar::ScalarStructBuilder;
+use datafusion::execution::{FunctionRegistry, SessionState};
 use datafusion::logical_expr::builder::project;
 use datafusion::logical_expr::expr::InList;
 use datafusion::logical_expr::{
-    col, expr, Cast, Extension, GroupingSet, Like, LogicalPlanBuilder, Partitioning,
-    Repartition, Subquery, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+    col, expr, GroupingSet, Like, LogicalPlanBuilder, Partitioning, Repartition,
+    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
 };
 use datafusion::prelude::{lit, JoinType};
 use datafusion::sql::TableReference;
@@ -70,17 +73,21 @@ use datafusion::{
 };
 use std::collections::HashSet;
 use std::sync::Arc;
+use substrait::proto;
 use substrait::proto::exchange_rel::ExchangeKind;
 use substrait::proto::expression::cast::FailureBehavior::ReturnNull;
 use substrait::proto::expression::literal::user_defined::Val;
 use substrait::proto::expression::literal::{
     interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
-    UserDefined,
 };
 use substrait::proto::expression::subquery::SubqueryType;
-use substrait::proto::expression::{FieldReference, Literal, ScalarFunction};
+use substrait::proto::expression::{
+    Enum, FieldReference, IfThen, Literal, MultiOrList, Nested, ScalarFunction,
+    SingularOrList, SwitchExpression, WindowFunction,
+};
 use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
 use substrait::proto::rel_common::{Emit, EmitKind};
+use substrait::proto::set_rel::SetOp;
 use substrait::proto::{
     aggregate_function::AggregationInvocation,
     expression::{
@@ -90,17 +97,469 @@ use substrait::proto::{
         window_function::bound::Kind as BoundKind, window_function::Bound,
         window_function::BoundsType, MaskExpression, RexType,
     },
+    fetch_rel,
     function_argument::ArgType,
     join_rel, plan_rel, r#type,
     read_rel::ReadType,
     rel::RelType,
-    rel_common, set_rel,
+    rel_common,
     sort_field::{SortDirection, SortKind::*},
-    AggregateFunction, Expression, NamedStruct, Plan, Rel, RelCommon, Type,
+    AggregateFunction, AggregateRel, ConsistentPartitionWindowRel, CrossRel, ExchangeRel,
+    Expression, ExtendedExpression, ExtensionLeafRel, ExtensionMultiRel,
+    ExtensionSingleRel, FetchRel, FilterRel, FunctionArgument, JoinRel, NamedStruct,
+    Plan, ProjectRel, ReadRel, Rel, RelCommon, SetRel, SortField, SortRel, Type,
 };
-use substrait::proto::{fetch_rel, ExtendedExpression, FunctionArgument, SortField};
 
-use super::state::SubstraitPlanningState;
+#[async_trait]
+/// This trait is used to consume Substrait plans, converting them into DataFusion Logical Plans.
+/// It can be implemented by users to allow for custom handling of relations, expressions, etc.
+///
+/// # Example Usage
+///
+/// ```
+/// # use async_trait::async_trait;
+/// # use datafusion::catalog::TableProvider;
+/// # use datafusion::common::{not_impl_err, substrait_err, DFSchema, ScalarValue, TableReference};
+/// # use datafusion::error::Result;
+/// # use datafusion::execution::{FunctionRegistry, SessionState};
+/// # use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
+/// # use std::sync::Arc;
+/// # use substrait::proto;
+/// # use substrait::proto::{ExtensionLeafRel, FilterRel, ProjectRel};
+/// # use datafusion::arrow::datatypes::DataType;
+/// # use datafusion::logical_expr::expr::ScalarFunction;
+/// # use datafusion_substrait::extensions::Extensions;
+/// # use datafusion_substrait::logical_plan::consumer::{
+/// #     from_project_rel, from_substrait_rel, from_substrait_rex, SubstraitConsumer
+/// # };
+///
+/// struct CustomSubstraitConsumer {
+///     extensions: Arc<Extensions>,
+///     state: Arc<SessionState>,
+/// }
+///
+/// #[async_trait]
+/// impl SubstraitConsumer for CustomSubstraitConsumer {
+///     async fn resolve_table_ref(
+///         &self,
+///         table_ref: &TableReference,
+///     ) -> Result<Option<Arc<dyn TableProvider>>> {
+///         let table = table_ref.table().to_string();
+///         let schema = self.state.schema_for_ref(table_ref.clone())?;
+///         let table_provider = schema.table(&table).await?;
+///         Ok(table_provider)
+///     }
+///
+///     fn get_extensions(&self) -> &Extensions {
+///         self.extensions.as_ref()
+///     }
+///
+///     fn get_function_registry(&self) -> &impl FunctionRegistry {
+///         self.state.as_ref()
+///     }
+///
+///     // You can reuse existing consumer code to assist in handling advanced extensions
+///     async fn consume_project(&self, rel: &ProjectRel) -> Result<LogicalPlan> {
+///         let df_plan = from_project_rel(self, rel).await?;
+///         if let Some(advanced_extension) = rel.advanced_extension.as_ref() {
+///             not_impl_err!(
+///                 "decode and handle an advanced extension: {:?}",
+///                 advanced_extension
+///             )
+///         } else {
+///             Ok(df_plan)
+///         }
+///     }
+///
+///     // You can implement a fully custom consumer method if you need special handling
+///     async fn consume_filter(&self, rel: &FilterRel) -> Result<LogicalPlan> {
+///         let input = from_substrait_rel(self, rel.input.as_ref().unwrap()).await?;
+///         let expression =
+///             from_substrait_rex(self, rel.condition.as_ref().unwrap(), input.schema())
+///                 .await?;
+///         // though this one is quite boring
+///         LogicalPlanBuilder::from(input).filter(expression)?.build()
+///     }
+///
+///     // You can add handlers for extension relations
+///     async fn consume_extension_leaf(
+///         &self,
+///         rel: &ExtensionLeafRel,
+///     ) -> Result<LogicalPlan> {
+///         not_impl_err!(
+///             "handle protobuf Any {} as you need",
+///             rel.detail.as_ref().unwrap().type_url
+///         )
+///     }
+///
+///     // and handlers for user-define types
+///     fn consume_user_defined_type(&self, typ: &proto::r#type::UserDefined) -> Result<DataType> {
+///         let type_string = self.extensions.types.get(&typ.type_reference).unwrap();
+///         match type_string.as_str() {
+///             "u!foo" => not_impl_err!("handle foo conversion"),
+///             "u!bar" => not_impl_err!("handle bar conversion"),
+///             _ => substrait_err!("unexpected type")
+///         }
+///     }
+///
+///     // and user-defined literals
+///     fn consume_user_defined_literal(&self, literal: &proto::expression::literal::UserDefined) -> Result<ScalarValue> {
+///         let type_string = self.extensions.types.get(&literal.type_reference).unwrap();
+///         match type_string.as_str() {
+///             "u!foo" => not_impl_err!("handle foo conversion"),
+///             "u!bar" => not_impl_err!("handle bar conversion"),
+///             _ => substrait_err!("unexpected type")
+///         }
+///     }
+/// }
+/// ```
+///
+pub trait SubstraitConsumer: Send + Sync + Sized {
+    async fn resolve_table_ref(
+        &self,
+        table_ref: &TableReference,
+    ) -> Result<Option<Arc<dyn TableProvider>>>;
+
+    // TODO: Remove these two methods
+    //   Ideally, the abstract consumer should not place any constraints on implementations.
+    //   The functionality for which the Extensions and FunctionRegistry is needed should be abstracted
+    //   out into methods on the trait. As an example, resolve_table_reference is such a method.
+    //   See: https://github.com/apache/datafusion/issues/13863
+    fn get_extensions(&self) -> &Extensions;
+    fn get_function_registry(&self) -> &impl FunctionRegistry;
+
+    // Relation Methods
+    // There is one method per Substrait relation to allow for easy overriding of consumer behaviour.
+    // These methods have default implementations calling the common handler code, to allow for users
+    // to re-use common handling logic.
+
+    async fn consume_read(&self, rel: &ReadRel) -> Result<LogicalPlan> {
+        from_read_rel(self, rel).await
+    }
+
+    async fn consume_filter(&self, rel: &FilterRel) -> Result<LogicalPlan> {
+        from_filter_rel(self, rel).await
+    }
+
+    async fn consume_fetch(&self, rel: &FetchRel) -> Result<LogicalPlan> {
+        from_fetch_rel(self, rel).await
+    }
+
+    async fn consume_aggregate(&self, rel: &AggregateRel) -> Result<LogicalPlan> {
+        from_aggregate_rel(self, rel).await
+    }
+
+    async fn consume_sort(&self, rel: &SortRel) -> Result<LogicalPlan> {
+        from_sort_rel(self, rel).await
+    }
+
+    async fn consume_join(&self, rel: &JoinRel) -> Result<LogicalPlan> {
+        from_join_rel(self, rel).await
+    }
+
+    async fn consume_project(&self, rel: &ProjectRel) -> Result<LogicalPlan> {
+        from_project_rel(self, rel).await
+    }
+
+    async fn consume_set(&self, rel: &SetRel) -> Result<LogicalPlan> {
+        from_set_rel(self, rel).await
+    }
+
+    async fn consume_cross(&self, rel: &CrossRel) -> Result<LogicalPlan> {
+        from_cross_rel(self, rel).await
+    }
+
+    async fn consume_consistent_partition_window(
+        &self,
+        _rel: &ConsistentPartitionWindowRel,
+    ) -> Result<LogicalPlan> {
+        not_impl_err!("Consistent Partition Window Rel not supported")
+    }
+
+    async fn consume_exchange(&self, rel: &ExchangeRel) -> Result<LogicalPlan> {
+        from_exchange_rel(self, rel).await
+    }
+
+    // Expression Methods
+    // There is one method per Substrait expression to allow for easy overriding of consumer behaviour
+    // These methods have default implementations calling the common handler code, to allow for users
+    // to re-use common handling logic.
+
+    async fn consume_literal(&self, expr: &Literal) -> Result<Expr> {
+        from_literal(self, expr).await
+    }
+
+    async fn consume_field_reference(
+        &self,
+        expr: &FieldReference,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_field_reference(self, expr, input_schema).await
+    }
+
+    async fn consume_scalar_function(
+        &self,
+        expr: &ScalarFunction,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_scalar_function(self, expr, input_schema).await
+    }
+
+    async fn consume_window_function(
+        &self,
+        expr: &WindowFunction,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_window_function(self, expr, input_schema).await
+    }
+
+    async fn consume_if_then(
+        &self,
+        expr: &IfThen,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_if_then(self, expr, input_schema).await
+    }
+
+    async fn consume_switch(
+        &self,
+        _expr: &SwitchExpression,
+        _input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        not_impl_err!("Switch expression not supported")
+    }
+
+    async fn consume_singular_or_list(
+        &self,
+        expr: &SingularOrList,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_singular_or_list(self, expr, input_schema).await
+    }
+
+    async fn consume_multi_or_list(
+        &self,
+        _expr: &MultiOrList,
+        _input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        not_impl_err!("Multi Or List expression not supported")
+    }
+
+    async fn consume_cast(
+        &self,
+        expr: &substrait_expression::Cast,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_cast(self, expr, input_schema).await
+    }
+
+    async fn consume_subquery(
+        &self,
+        expr: &substrait_expression::Subquery,
+        input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        from_subquery(self, expr, input_schema).await
+    }
+
+    async fn consume_nested(
+        &self,
+        _expr: &Nested,
+        _input_schema: &DFSchema,
+    ) -> Result<Expr> {
+        not_impl_err!("Nested expression not supported")
+    }
+
+    async fn consume_enum(&self, _expr: &Enum, _input_schema: &DFSchema) -> Result<Expr> {
+        not_impl_err!("Enum expression not supported")
+    }
+
+    // User-Defined Functionality
+
+    // The details of extension relations, and how to handle them, are fully up to users to specify.
+    // The following methods allow users to customize the consumer behaviour
+
+    async fn consume_extension_leaf(
+        &self,
+        rel: &ExtensionLeafRel,
+    ) -> Result<LogicalPlan> {
+        if let Some(detail) = rel.detail.as_ref() {
+            return substrait_err!(
+                "Missing handler for ExtensionLeafRel: {}",
+                detail.type_url
+            );
+        }
+        substrait_err!("Missing handler for ExtensionLeafRel")
+    }
+
+    async fn consume_extension_single(
+        &self,
+        rel: &ExtensionSingleRel,
+    ) -> Result<LogicalPlan> {
+        if let Some(detail) = rel.detail.as_ref() {
+            return substrait_err!(
+                "Missing handler for ExtensionSingleRel: {}",
+                detail.type_url
+            );
+        }
+        substrait_err!("Missing handler for ExtensionSingleRel")
+    }
+
+    async fn consume_extension_multi(
+        &self,
+        rel: &ExtensionMultiRel,
+    ) -> Result<LogicalPlan> {
+        if let Some(detail) = rel.detail.as_ref() {
+            return substrait_err!(
+                "Missing handler for ExtensionMultiRel: {}",
+                detail.type_url
+            );
+        }
+        substrait_err!("Missing handler for ExtensionMultiRel")
+    }
+
+    // Users can bring their own types to Substrait which require custom handling
+
+    fn consume_user_defined_type(
+        &self,
+        user_defined_type: &r#type::UserDefined,
+    ) -> Result<DataType> {
+        substrait_err!(
+            "Missing handler for user-defined type: {}",
+            user_defined_type.type_reference
+        )
+    }
+
+    fn consume_user_defined_literal(
+        &self,
+        user_defined_literal: &proto::expression::literal::UserDefined,
+    ) -> Result<ScalarValue> {
+        substrait_err!(
+            "Missing handler for user-defined literals {}",
+            user_defined_literal.type_reference
+        )
+    }
+}
+
+/// Convert Substrait Rel to DataFusion DataFrame
+#[async_recursion]
+pub async fn from_substrait_rel(
+    consumer: &impl SubstraitConsumer,
+    relation: &Rel,
+) -> Result<LogicalPlan> {
+    let plan: Result<LogicalPlan> = match &relation.rel_type {
+        Some(rel_type) => match rel_type {
+            RelType::Read(rel) => consumer.consume_read(rel).await,
+            RelType::Filter(rel) => consumer.consume_filter(rel).await,
+            RelType::Fetch(rel) => consumer.consume_fetch(rel).await,
+            RelType::Aggregate(rel) => consumer.consume_aggregate(rel).await,
+            RelType::Sort(rel) => consumer.consume_sort(rel).await,
+            RelType::Join(rel) => consumer.consume_join(rel).await,
+            RelType::Project(rel) => consumer.consume_project(rel).await,
+            RelType::Set(rel) => consumer.consume_set(rel).await,
+            RelType::ExtensionSingle(rel) => consumer.consume_extension_single(rel).await,
+            RelType::ExtensionMulti(rel) => consumer.consume_extension_multi(rel).await,
+            RelType::ExtensionLeaf(rel) => consumer.consume_extension_leaf(rel).await,
+            RelType::Cross(rel) => consumer.consume_cross(rel).await,
+            RelType::Window(rel) => {
+                consumer.consume_consistent_partition_window(rel).await
+            }
+            RelType::Exchange(rel) => consumer.consume_exchange(rel).await,
+            rt => not_impl_err!("{rt:?} rel not supported yet"),
+        },
+        None => return substrait_err!("rel must set rel_type"),
+    };
+    apply_emit_kind(retrieve_rel_common(relation), plan?)
+}
+
+/// Default SubstraitConsumer for converting standard Substrait without user-defined extensions.
+///
+/// Used as the consumer in [from_substrait_plan]
+pub struct DefaultSubstraitConsumer<'a> {
+    extensions: &'a Extensions,
+    state: &'a SessionState,
+}
+
+impl<'a> DefaultSubstraitConsumer<'a> {
+    pub fn new(extensions: &'a Extensions, state: &'a SessionState) -> Self {
+        DefaultSubstraitConsumer { extensions, state }
+    }
+}
+
+#[async_trait]
+impl SubstraitConsumer for DefaultSubstraitConsumer<'_> {
+    async fn resolve_table_ref(
+        &self,
+        table_ref: &TableReference,
+    ) -> Result<Option<Arc<dyn TableProvider>>> {
+        let table = table_ref.table().to_string();
+        let schema = self.state.schema_for_ref(table_ref.clone())?;
+        let table_provider = schema.table(&table).await?;
+        Ok(table_provider)
+    }
+
+    fn get_extensions(&self) -> &Extensions {
+        self.extensions
+    }
+
+    fn get_function_registry(&self) -> &impl FunctionRegistry {
+        self.state
+    }
+
+    async fn consume_extension_leaf(
+        &self,
+        rel: &ExtensionLeafRel,
+    ) -> Result<LogicalPlan> {
+        let Some(ext_detail) = &rel.detail else {
+            return substrait_err!("Unexpected empty detail in ExtensionLeafRel");
+        };
+        let plan = self
+            .state
+            .serializer_registry()
+            .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
+        Ok(LogicalPlan::Extension(Extension { node: plan }))
+    }
+
+    async fn consume_extension_single(
+        &self,
+        rel: &ExtensionSingleRel,
+    ) -> Result<LogicalPlan> {
+        let Some(ext_detail) = &rel.detail else {
+            return substrait_err!("Unexpected empty detail in ExtensionSingleRel");
+        };
+        let plan = self
+            .state
+            .serializer_registry()
+            .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
+        let Some(input_rel) = &rel.input else {
+            return substrait_err!(
+                    "ExtensionSingleRel missing input rel, try using ExtensionLeafRel instead"
+                );
+        };
+        let input_plan = from_substrait_rel(self, input_rel).await?;
+        let plan = plan.with_exprs_and_inputs(plan.expressions(), vec![input_plan])?;
+        Ok(LogicalPlan::Extension(Extension { node: plan }))
+    }
+
+    async fn consume_extension_multi(
+        &self,
+        rel: &ExtensionMultiRel,
+    ) -> Result<LogicalPlan> {
+        let Some(ext_detail) = &rel.detail else {
+            return substrait_err!("Unexpected empty detail in ExtensionMultiRel");
+        };
+        let plan = self
+            .state
+            .serializer_registry()
+            .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
+        let mut inputs = Vec::with_capacity(rel.inputs.len());
+        for input in &rel.inputs {
+            let input_plan = from_substrait_rel(self, input).await?;
+            inputs.push(input_plan);
+        }
+        let plan = plan.with_exprs_and_inputs(plan.expressions(), inputs)?;
+        Ok(LogicalPlan::Extension(Extension { node: plan }))
+    }
+}
 
 // Substrait PrecisionTimestampTz indicates that the timestamp is relative to UTC, which
 // is the same as the expectation for any non-empty timezone in DF, so any non-empty timezone
@@ -202,16 +661,15 @@ fn split_eq_and_noneq_join_predicate_with_nulls_equality(
 }
 
 async fn union_rels(
+    consumer: &impl SubstraitConsumer,
     rels: &[Rel],
-    state: &dyn SubstraitPlanningState,
-    extensions: &Extensions,
     is_all: bool,
 ) -> Result<LogicalPlan> {
     let mut union_builder = Ok(LogicalPlanBuilder::from(
-        from_substrait_rel(state, &rels[0], extensions).await?,
+        from_substrait_rel(consumer, &rels[0]).await?,
     ));
     for input in &rels[1..] {
-        let rel_plan = from_substrait_rel(state, input, extensions).await?;
+        let rel_plan = from_substrait_rel(consumer, input).await?;
 
         union_builder = if is_all {
             union_builder?.union(rel_plan)
@@ -223,17 +681,16 @@ async fn union_rels(
 }
 
 async fn intersect_rels(
+    consumer: &impl SubstraitConsumer,
     rels: &[Rel],
-    state: &dyn SubstraitPlanningState,
-    extensions: &Extensions,
     is_all: bool,
 ) -> Result<LogicalPlan> {
-    let mut rel = from_substrait_rel(state, &rels[0], extensions).await?;
+    let mut rel = from_substrait_rel(consumer, &rels[0]).await?;
 
     for input in &rels[1..] {
         rel = LogicalPlanBuilder::intersect(
             rel,
-            from_substrait_rel(state, input, extensions).await?,
+            from_substrait_rel(consumer, input).await?,
             is_all,
         )?
     }
@@ -242,17 +699,16 @@ async fn intersect_rels(
 }
 
 async fn except_rels(
+    consumer: &impl SubstraitConsumer,
     rels: &[Rel],
-    state: &dyn SubstraitPlanningState,
-    extensions: &Extensions,
     is_all: bool,
 ) -> Result<LogicalPlan> {
-    let mut rel = from_substrait_rel(state, &rels[0], extensions).await?;
+    let mut rel = from_substrait_rel(consumer, &rels[0]).await?;
 
     for input in &rels[1..] {
         rel = LogicalPlanBuilder::except(
             rel,
-            from_substrait_rel(state, input, extensions).await?,
+            from_substrait_rel(consumer, input).await?,
             is_all,
         )?
     }
@@ -262,7 +718,7 @@ async fn except_rels(
 
 /// Convert Substrait Plan to DataFusion LogicalPlan
 pub async fn from_substrait_plan(
-    state: &dyn SubstraitPlanningState,
+    state: &SessionState,
     plan: &Plan,
 ) -> Result<LogicalPlan> {
     // Register function extension
@@ -271,16 +727,27 @@ pub async fn from_substrait_plan(
         return not_impl_err!("Type variation extensions are not supported");
     }
 
-    // Parse relations
+    let consumer = DefaultSubstraitConsumer {
+        extensions: &extensions,
+        state,
+    };
+    from_substrait_plan_with_consumer(&consumer, plan).await
+}
+
+/// Convert Substrait Plan to DataFusion LogicalPlan using the given consumer
+pub async fn from_substrait_plan_with_consumer(
+    consumer: &impl SubstraitConsumer,
+    plan: &Plan,
+) -> Result<LogicalPlan> {
     match plan.relations.len() {
         1 => {
             match plan.relations[0].rel_type.as_ref() {
                 Some(rt) => match rt {
                     plan_rel::RelType::Rel(rel) => {
-                        Ok(from_substrait_rel(state, rel, &extensions).await?)
+                        Ok(from_substrait_rel(consumer, rel).await?)
                     },
                     plan_rel::RelType::Root(root) => {
-                        let plan = from_substrait_rel(state, root.input.as_ref().unwrap(), &extensions).await?;
+                        let plan = from_substrait_rel(consumer, root.input.as_ref().unwrap()).await?;
                         if root.names.is_empty() {
                             // Backwards compatibility for plans missing names
                             return Ok(plan);
@@ -341,7 +808,7 @@ pub struct ExprContainer {
 /// between systems.  This is often useful for scenarios like pushdown where filter
 /// expressions need to be sent to remote systems.
 pub async fn from_substrait_extended_expr(
-    state: &dyn SubstraitPlanningState,
+    state: &SessionState,
     extended_expr: &ExtendedExpression,
 ) -> Result<ExprContainer> {
     // Register function extension
@@ -350,8 +817,13 @@ pub async fn from_substrait_extended_expr(
         return not_impl_err!("Type variation extensions are not supported");
     }
 
+    let consumer = DefaultSubstraitConsumer {
+        extensions: &extensions,
+        state,
+    };
+
     let input_schema = DFSchemaRef::new(match &extended_expr.base_schema {
-        Some(base_schema) => from_substrait_named_struct(base_schema, &extensions),
+        Some(base_schema) => from_substrait_named_struct(&consumer, base_schema),
         None => {
             plan_err!("required property `base_schema` missing from Substrait ExtendedExpression message")
         }
@@ -369,8 +841,7 @@ pub async fn from_substrait_extended_expr(
                 plan_err!("required property `expr_type` missing from Substrait ExpressionReference message")
             }
         }?;
-        let expr =
-            from_substrait_rex(state, scalar_expr, &input_schema, &extensions).await?;
+        let expr = from_substrait_rex(&consumer, scalar_expr, &input_schema).await?;
         let (output_type, expected_nullability) =
             expr.data_type_and_nullable(&input_schema)?;
         let output_field = Field::new("", output_type, expected_nullability);
@@ -557,583 +1028,498 @@ fn make_renamed_schema(
     )
 }
 
-/// Convert Substrait Rel to DataFusion DataFrame
-#[allow(deprecated)]
 #[async_recursion]
-pub async fn from_substrait_rel(
-    state: &dyn SubstraitPlanningState,
-    rel: &Rel,
-    extensions: &Extensions,
+pub async fn from_project_rel(
+    consumer: &impl SubstraitConsumer,
+    p: &ProjectRel,
 ) -> Result<LogicalPlan> {
-    let plan: Result<LogicalPlan> = match &rel.rel_type {
-        Some(RelType::Project(p)) => {
-            if let Some(input) = p.input.as_ref() {
-                let mut input = LogicalPlanBuilder::from(
-                    from_substrait_rel(state, input, extensions).await?,
-                );
-                let original_schema = input.schema().clone();
-
-                // Ensure that all expressions have a unique display name, so that
-                // validate_unique_names does not fail when constructing the project.
-                let mut name_tracker = NameTracker::new();
-
-                // By default, a Substrait Project emits all inputs fields followed by all expressions.
-                // We build the explicit expressions first, and then the input expressions to avoid
-                // adding aliases to the explicit expressions (as part of ensuring unique names).
-                //
-                // This is helpful for plan visualization and tests, because when DataFusion produces
-                // Substrait Projects it adds an output mapping that excludes all input columns
-                // leaving only explicit expressions.
-
-                let mut explicit_exprs: Vec<Expr> = vec![];
-                for expr in &p.expressions {
-                    let e = from_substrait_rex(
-                        state,
-                        expr,
-                        input.clone().schema(),
-                        extensions,
-                    )
-                    .await?;
-                    // if the expression is WindowFunction, wrap in a Window relation
-                    if let Expr::WindowFunction(_) = &e {
-                        // Adding the same expression here and in the project below
-                        // works because the project's builder uses columnize_expr(..)
-                        // to transform it into a column reference
-                        input = input.window(vec![e.clone()])?
-                    }
-                    explicit_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
-                }
+    if let Some(input) = p.input.as_ref() {
+        let mut input =
+            LogicalPlanBuilder::from(from_substrait_rel(consumer, input).await?);
+        let original_schema = input.schema().clone();
+
+        // Ensure that all expressions have a unique display name, so that
+        // validate_unique_names does not fail when constructing the project.
+        let mut name_tracker = NameTracker::new();
+
+        // By default, a Substrait Project emits all inputs fields followed by all expressions.
+        // We build the explicit expressions first, and then the input expressions to avoid
+        // adding aliases to the explicit expressions (as part of ensuring unique names).
+        //
+        // This is helpful for plan visualization and tests, because when DataFusion produces
+        // Substrait Projects it adds an output mapping that excludes all input columns
+        // leaving only explicit expressions.
+
+        let mut explicit_exprs: Vec<Expr> = vec![];
+        for expr in &p.expressions {
+            let e = from_substrait_rex(consumer, expr, input.clone().schema()).await?;
+            // if the expression is WindowFunction, wrap in a Window relation
+            if let Expr::WindowFunction(_) = &e {
+                // Adding the same expression here and in the project below
+                // works because the project's builder uses columnize_expr(..)
+                // to transform it into a column reference
+                input = input.window(vec![e.clone()])?
+            }
+            explicit_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
+        }
 
-                let mut final_exprs: Vec<Expr> = vec![];
-                for index in 0..original_schema.fields().len() {
-                    let e = Expr::Column(Column::from(
-                        original_schema.qualified_field(index),
-                    ));
-                    final_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
-                }
-                final_exprs.append(&mut explicit_exprs);
+        let mut final_exprs: Vec<Expr> = vec![];
+        for index in 0..original_schema.fields().len() {
+            let e = Expr::Column(Column::from(original_schema.qualified_field(index)));
+            final_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
+        }
+        final_exprs.append(&mut explicit_exprs);
+        input.project(final_exprs)?.build()
+    } else {
+        not_impl_err!("Projection without an input is not supported")
+    }
+}
 
-                input.project(final_exprs)?.build()
-            } else {
-                not_impl_err!("Projection without an input is not supported")
-            }
+#[async_recursion]
+pub async fn from_filter_rel(
+    consumer: &impl SubstraitConsumer,
+    filter: &FilterRel,
+) -> Result<LogicalPlan> {
+    if let Some(input) = filter.input.as_ref() {
+        let input = LogicalPlanBuilder::from(from_substrait_rel(consumer, input).await?);
+        if let Some(condition) = filter.condition.as_ref() {
+            let expr = from_substrait_rex(consumer, condition, input.schema()).await?;
+            input.filter(expr)?.build()
+        } else {
+            not_impl_err!("Filter without an condition is not valid")
         }
-        Some(RelType::Filter(filter)) => {
-            if let Some(input) = filter.input.as_ref() {
-                let input = LogicalPlanBuilder::from(
-                    from_substrait_rel(state, input, extensions).await?,
-                );
-                if let Some(condition) = filter.condition.as_ref() {
-                    let expr =
-                        from_substrait_rex(state, condition, input.schema(), extensions)
-                            .await?;
-                    input.filter(expr)?.build()
-                } else {
-                    not_impl_err!("Filter without an condition is not valid")
-                }
-            } else {
-                not_impl_err!("Filter without an input is not valid")
+    } else {
+        not_impl_err!("Filter without an input is not valid")
+    }
+}
+
+#[async_recursion]
+pub async fn from_fetch_rel(
+    consumer: &impl SubstraitConsumer,
+    fetch: &FetchRel,
+) -> Result<LogicalPlan> {
+    if let Some(input) = fetch.input.as_ref() {
+        let input = LogicalPlanBuilder::from(from_substrait_rel(consumer, input).await?);
+        let empty_schema = DFSchemaRef::new(DFSchema::empty());
+        let offset = match &fetch.offset_mode {
+            Some(fetch_rel::OffsetMode::Offset(offset)) => Some(lit(*offset)),
+            Some(fetch_rel::OffsetMode::OffsetExpr(expr)) => {
+                Some(from_substrait_rex(consumer, expr, &empty_schema).await?)
             }
-        }
-        Some(RelType::Fetch(fetch)) => {
-            if let Some(input) = fetch.input.as_ref() {
-                let input = LogicalPlanBuilder::from(
-                    from_substrait_rel(state, input, extensions).await?,
-                );
-                let empty_schema = DFSchemaRef::new(DFSchema::empty());
-                let offset = match &fetch.offset_mode {
-                    Some(fetch_rel::OffsetMode::Offset(offset)) => Some(lit(*offset)),
-                    Some(fetch_rel::OffsetMode::OffsetExpr(expr)) => Some(
-                        from_substrait_rex(state, expr, &empty_schema, extensions)
-                            .await?,
-                    ),
-                    None => None,
-                };
-                let count = match &fetch.count_mode {
-                    Some(fetch_rel::CountMode::Count(count)) => {
-                        // -1 means that ALL records should be returned, equivalent to None
-                        (*count != -1).then(|| lit(*count))
-                    }
-                    Some(fetch_rel::CountMode::CountExpr(expr)) => Some(
-                        from_substrait_rex(state, expr, &empty_schema, extensions)
-                            .await?,
-                    ),
-                    None => None,
-                };
-                input.limit_by_expr(offset, count)?.build()
-            } else {
-                not_impl_err!("Fetch without an input is not valid")
+            None => None,
+        };
+        let count = match &fetch.count_mode {
+            Some(fetch_rel::CountMode::Count(count)) => {
+                // -1 means that ALL records should be returned, equivalent to None
+                (*count != -1).then(|| lit(*count))
             }
-        }
-        Some(RelType::Sort(sort)) => {
-            if let Some(input) = sort.input.as_ref() {
-                let input = LogicalPlanBuilder::from(
-                    from_substrait_rel(state, input, extensions).await?,
-                );
-                let sorts =
-                    from_substrait_sorts(state, &sort.sorts, input.schema(), extensions)
-                        .await?;
-                input.sort(sorts)?.build()
-            } else {
-                not_impl_err!("Sort without an input is not valid")
+            Some(fetch_rel::CountMode::CountExpr(expr)) => {
+                Some(from_substrait_rex(consumer, expr, &empty_schema).await?)
             }
+            None => None,
+        };
+        input.limit_by_expr(offset, count)?.build()
+    } else {
+        not_impl_err!("Fetch without an input is not valid")
+    }
+}
+
+pub async fn from_sort_rel(
+    consumer: &impl SubstraitConsumer,
+    sort: &SortRel,
+) -> Result<LogicalPlan> {
+    if let Some(input) = sort.input.as_ref() {
+        let input = LogicalPlanBuilder::from(from_substrait_rel(consumer, input).await?);
+        let sorts = from_substrait_sorts(consumer, &sort.sorts, input.schema()).await?;
+        input.sort(sorts)?.build()
+    } else {
+        not_impl_err!("Sort without an input is not valid")
+    }
+}
+
+pub async fn from_aggregate_rel(
+    consumer: &impl SubstraitConsumer,
+    agg: &AggregateRel,
+) -> Result<LogicalPlan> {
+    if let Some(input) = agg.input.as_ref() {
+        let input = LogicalPlanBuilder::from(from_substrait_rel(consumer, input).await?);
+        let mut ref_group_exprs = vec![];
+
+        for e in &agg.grouping_expressions {
+            let x = from_substrait_rex(consumer, e, input.schema()).await?;
+            ref_group_exprs.push(x);
         }
-        Some(RelType::Aggregate(agg)) => {
-            if let Some(input) = agg.input.as_ref() {
-                let input = LogicalPlanBuilder::from(
-                    from_substrait_rel(state, input, extensions).await?,
-                );
-                let mut ref_group_exprs = vec![];
 
-                for e in &agg.grouping_expressions {
-                    let x =
-                        from_substrait_rex(state, e, input.schema(), extensions).await?;
-                    ref_group_exprs.push(x);
+        let mut group_exprs = vec![];
+        let mut aggr_exprs = vec![];
+
+        match agg.groupings.len() {
+            1 => {
+                group_exprs.extend_from_slice(
+                    &from_substrait_grouping(
+                        consumer,
+                        &agg.groupings[0],
+                        &ref_group_exprs,
+                        input.schema(),
+                    )
+                    .await?,
+                );
+            }
+            _ => {
+                let mut grouping_sets = vec![];
+                for grouping in &agg.groupings {
+                    let grouping_set = from_substrait_grouping(
+                        consumer,
+                        grouping,
+                        &ref_group_exprs,
+                        input.schema(),
+                    )
+                    .await?;
+                    grouping_sets.push(grouping_set);
                 }
+                // Single-element grouping expression of type Expr::GroupingSet.
+                // Note that GroupingSet::Rollup would become GroupingSet::GroupingSets, when
+                // parsed by the producer and consumer, since Substrait does not have a type dedicated
+                // to ROLLUP. Only vector of Groupings (grouping sets) is available.
+                group_exprs
+                    .push(Expr::GroupingSet(GroupingSet::GroupingSets(grouping_sets)));
+            }
+        };
 
-                let mut group_exprs = vec![];
-                let mut aggr_exprs = vec![];
-
-                match agg.groupings.len() {
-                    1 => {
-                        group_exprs.extend_from_slice(
-                            &from_substrait_grouping(
-                                state,
-                                &agg.groupings[0],
-                                &ref_group_exprs,
-                                input.schema(),
-                                extensions,
-                            )
-                            .await?,
-                        );
-                    }
-                    _ => {
-                        let mut grouping_sets = vec![];
-                        for grouping in &agg.groupings {
-                            let grouping_set = from_substrait_grouping(
-                                state,
-                                grouping,
-                                &ref_group_exprs,
-                                input.schema(),
-                                extensions,
-                            )
-                            .await?;
-                            grouping_sets.push(grouping_set);
+        for m in &agg.measures {
+            let filter = match &m.filter {
+                Some(fil) => Some(Box::new(
+                    from_substrait_rex(consumer, fil, input.schema()).await?,
+                )),
+                None => None,
+            };
+            let agg_func = match &m.measure {
+                Some(f) => {
+                    let distinct = match f.invocation {
+                        _ if f.invocation == AggregationInvocation::Distinct as i32 => {
+                            true
                         }
-                        // Single-element grouping expression of type Expr::GroupingSet.
-                        // Note that GroupingSet::Rollup would become GroupingSet::GroupingSets, when
-                        // parsed by the producer and consumer, since Substrait does not have a type dedicated
-                        // to ROLLUP. Only vector of Groupings (grouping sets) is available.
-                        group_exprs.push(Expr::GroupingSet(GroupingSet::GroupingSets(
-                            grouping_sets,
-                        )));
-                    }
-                };
-
-                for m in &agg.measures {
-                    let filter = match &m.filter {
-                        Some(fil) => Some(Box::new(
-                            from_substrait_rex(state, fil, input.schema(), extensions)
-                                .await?,
-                        )),
-                        None => None,
+                        _ if f.invocation == AggregationInvocation::All as i32 => false,
+                        _ => false,
                     };
-                    let agg_func = match &m.measure {
-                        Some(f) => {
-                            let distinct = match f.invocation {
-                                _ if f.invocation
-                                    == AggregationInvocation::Distinct as i32 =>
-                                {
-                                    true
-                                }
-                                _ if f.invocation
-                                    == AggregationInvocation::All as i32 =>
-                                {
-                                    false
-                                }
-                                _ => false,
-                            };
-                            let order_by = if !f.sorts.is_empty() {
-                                Some(
-                                    from_substrait_sorts(
-                                        state,
-                                        &f.sorts,
-                                        input.schema(),
-                                        extensions,
-                                    )
-                                    .await?,
-                                )
-                            } else {
-                                None
-                            };
-
-                            from_substrait_agg_func(
-                                state,
-                                f,
-                                input.schema(),
-                                extensions,
-                                filter,
-                                order_by,
-                                distinct,
-                            )
-                            .await
-                        }
-                        None => not_impl_err!(
-                            "Aggregate without aggregate function is not supported"
-                        ),
+                    let order_by = if !f.sorts.is_empty() {
+                        Some(
+                            from_substrait_sorts(consumer, &f.sorts, input.schema())
+                                .await?,
+                        )
+                    } else {
+                        None
                     };
-                    aggr_exprs.push(agg_func?.as_ref().clone());
-                }
-                input.aggregate(group_exprs, aggr_exprs)?.build()
-            } else {
-                not_impl_err!("Aggregate without an input is not valid")
-            }
-        }
-        Some(RelType::Join(join)) => {
-            if join.post_join_filter.is_some() {
-                return not_impl_err!(
-                    "JoinRel with post_join_filter is not yet supported"
-                );
-            }
 
-            let left: LogicalPlanBuilder = LogicalPlanBuilder::from(
-                from_substrait_rel(state, join.left.as_ref().unwrap(), extensions)
-                    .await?,
-            );
-            let right = LogicalPlanBuilder::from(
-                from_substrait_rel(state, join.right.as_ref().unwrap(), extensions)
-                    .await?,
-            );
-            let (left, right) = requalify_sides_if_needed(left, right)?;
-
-            let join_type = from_substrait_jointype(join.r#type)?;
-            // The join condition expression needs full input schema and not the output schema from join since we lose columns from
-            // certain join types such as semi and anti joins
-            let in_join_schema = left.schema().join(right.schema())?;
-
-            // If join expression exists, parse the `on` condition expression, build join and return
-            // Otherwise, build join with only the filter, without join keys
-            match &join.expression.as_ref() {
-                Some(expr) => {
-                    let on = from_substrait_rex(state, expr, &in_join_schema, extensions)
-                        .await?;
-                    // The join expression can contain both equal and non-equal ops.
-                    // As of datafusion 31.0.0, the equal and non equal join conditions are in separate fields.
-                    // So we extract each part as follows:
-                    // - If an Eq or IsNotDistinctFrom op is encountered, add the left column, right column and is_null_equal_nulls to `join_ons` vector
-                    // - Otherwise we add the expression to join_filter (use conjunction if filter already exists)
-                    let (join_ons, nulls_equal_nulls, join_filter) =
-                        split_eq_and_noneq_join_predicate_with_nulls_equality(&on);
-                    let (left_cols, right_cols): (Vec<_>, Vec<_>) =
-                        itertools::multiunzip(join_ons);
-                    left.join_detailed(
-                        right.build()?,
-                        join_type,
-                        (left_cols, right_cols),
-                        join_filter,
-                        nulls_equal_nulls,
-                    )?
-                    .build()
+                    from_substrait_agg_func(
+                        consumer,
+                        f,
+                        input.schema(),
+                        filter,
+                        order_by,
+                        distinct,
+                    )
+                    .await
                 }
                 None => {
-                    let on: Vec<String> = vec![];
-                    left.join_detailed(
-                        right.build()?,
-                        join_type,
-                        (on.clone(), on),
-                        None,
-                        false,
-                    )?
-                    .build()
+                    not_impl_err!("Aggregate without aggregate function is not supported")
                 }
-            }
+            };
+            aggr_exprs.push(agg_func?.as_ref().clone());
         }
-        Some(RelType::Cross(cross)) => {
-            let left = LogicalPlanBuilder::from(
-                from_substrait_rel(state, cross.left.as_ref().unwrap(), extensions)
-                    .await?,
-            );
-            let right = LogicalPlanBuilder::from(
-                from_substrait_rel(state, cross.right.as_ref().unwrap(), extensions)
-                    .await?,
-            );
-            let (left, right) = requalify_sides_if_needed(left, right)?;
-            left.cross_join(right.build()?)?.build()
+        input.aggregate(group_exprs, aggr_exprs)?.build()
+    } else {
+        not_impl_err!("Aggregate without an input is not valid")
+    }
+}
+
+pub async fn from_join_rel(
+    consumer: &impl SubstraitConsumer,
+    join: &JoinRel,
+) -> Result<LogicalPlan> {
+    if join.post_join_filter.is_some() {
+        return not_impl_err!("JoinRel with post_join_filter is not yet supported");
+    }
+
+    let left: LogicalPlanBuilder = LogicalPlanBuilder::from(
+        from_substrait_rel(consumer, join.left.as_ref().unwrap()).await?,
+    );
+    let right = LogicalPlanBuilder::from(
+        from_substrait_rel(consumer, join.right.as_ref().unwrap()).await?,
+    );
+    let (left, right) = requalify_sides_if_needed(left, right)?;
+
+    let join_type = from_substrait_jointype(join.r#type)?;
+    // The join condition expression needs full input schema and not the output schema from join since we lose columns from
+    // certain join types such as semi and anti joins
+    let in_join_schema = left.schema().join(right.schema())?;
+
+    // If join expression exists, parse the `on` condition expression, build join and return
+    // Otherwise, build join with only the filter, without join keys
+    match &join.expression.as_ref() {
+        Some(expr) => {
+            let on = from_substrait_rex(consumer, expr, &in_join_schema).await?;
+            // The join expression can contain both equal and non-equal ops.
+            // As of datafusion 31.0.0, the equal and non equal join conditions are in separate fields.
+            // So we extract each part as follows:
+            // - If an Eq or IsNotDistinctFrom op is encountered, add the left column, right column and is_null_equal_nulls to `join_ons` vector
+            // - Otherwise we add the expression to join_filter (use conjunction if filter already exists)
+            let (join_ons, nulls_equal_nulls, join_filter) =
+                split_eq_and_noneq_join_predicate_with_nulls_equality(&on);
+            let (left_cols, right_cols): (Vec<_>, Vec<_>) =
+                itertools::multiunzip(join_ons);
+            left.join_detailed(
+                right.build()?,
+                join_type,
+                (left_cols, right_cols),
+                join_filter,
+                nulls_equal_nulls,
+            )?
+            .build()
         }
-        Some(RelType::Read(read)) => {
-            async fn read_with_schema(
-                state: &dyn SubstraitPlanningState,
-                table_ref: TableReference,
-                schema: DFSchema,
-                projection: &Option<MaskExpression>,
-            ) -> Result<LogicalPlan> {
-                let schema = schema.replace_qualifier(table_ref.clone());
-
-                let plan = {
-                    let provider = match state.table(&table_ref).await? {
-                        Some(ref provider) => Arc::clone(provider),
-                        _ => return plan_err!("No table named '{table_ref}'"),
-                    };
+        None => {
+            let on: Vec<String> = vec![];
+            left.join_detailed(right.build()?, join_type, (on.clone(), on), None, false)?
+                .build()
+        }
+    }
+}
 
-                    LogicalPlanBuilder::scan(
-                        table_ref,
-                        provider_as_source(Arc::clone(&provider)),
-                        None,
-                    )?
-                    .build()?
-                };
+pub async fn from_cross_rel(
+    consumer: &impl SubstraitConsumer,
+    cross: &CrossRel,
+) -> Result<LogicalPlan> {
+    let left = LogicalPlanBuilder::from(
+        from_substrait_rel(consumer, cross.left.as_ref().unwrap()).await?,
+    );
+    let right = LogicalPlanBuilder::from(
+        from_substrait_rel(consumer, cross.right.as_ref().unwrap()).await?,
+    );
+    let (left, right) = requalify_sides_if_needed(left, right)?;
+    left.cross_join(right.build()?)?.build()
+}
+
+#[allow(deprecated)]
+pub async fn from_read_rel(
+    consumer: &impl SubstraitConsumer,
+    read: &ReadRel,
+) -> Result<LogicalPlan> {
+    async fn read_with_schema(
+        consumer: &impl SubstraitConsumer,
+        table_ref: TableReference,
+        schema: DFSchema,
+        projection: &Option<MaskExpression>,
+    ) -> Result<LogicalPlan> {
+        let schema = schema.replace_qualifier(table_ref.clone());
+
+        let plan = {
+            let provider = match consumer.resolve_table_ref(&table_ref).await? {
+                Some(ref provider) => Arc::clone(provider),
+                _ => return plan_err!("No table named '{table_ref}'"),
+            };
 
-                ensure_schema_compatability(plan.schema(), schema.clone())?;
+            LogicalPlanBuilder::scan(
+                table_ref,
+                provider_as_source(Arc::clone(&provider)),
+                None,
+            )?
+            .build()?
+        };
 
-                let schema = apply_masking(schema, projection)?;
+        ensure_schema_compatibility(plan.schema(), schema.clone())?;
 
-                apply_projection(plan, schema)
-            }
+        let schema = apply_masking(schema, projection)?;
 
-            let named_struct = read.base_schema.as_ref().ok_or_else(|| {
-                substrait_datafusion_err!("No base schema provided for Read Relation")
-            })?;
+        apply_projection(plan, schema)
+    }
 
-            let substrait_schema = from_substrait_named_struct(named_struct, extensions)?;
+    let named_struct = read.base_schema.as_ref().ok_or_else(|| {
+        substrait_datafusion_err!("No base schema provided for Read Relation")
+    })?;
 
-            match &read.as_ref().read_type {
-                Some(ReadType::NamedTable(nt)) => {
-                    let table_reference = match nt.names.len() {
-                        0 => {
-                            return plan_err!("No table name found in NamedTable");
-                        }
-                        1 => TableReference::Bare {
-                            table: nt.names[0].clone().into(),
-                        },
-                        2 => TableReference::Partial {
-                            schema: nt.names[0].clone().into(),
-                            table: nt.names[1].clone().into(),
-                        },
-                        _ => TableReference::Full {
-                            catalog: nt.names[0].clone().into(),
-                            schema: nt.names[1].clone().into(),
-                            table: nt.names[2].clone().into(),
-                        },
-                    };
+    let substrait_schema = from_substrait_named_struct(consumer, named_struct)?;
 
-                    read_with_schema(
-                        state,
-                        table_reference,
-                        substrait_schema,
-                        &read.projection,
-                    )
-                    .await
+    match &read.read_type {
+        Some(ReadType::NamedTable(nt)) => {
+            let table_reference = match nt.names.len() {
+                0 => {
+                    return plan_err!("No table name found in NamedTable");
                 }
-                Some(ReadType::VirtualTable(vt)) => {
-                    if vt.values.is_empty() {
-                        return Ok(LogicalPlan::EmptyRelation(EmptyRelation {
-                            produce_one_row: false,
-                            schema: DFSchemaRef::new(substrait_schema),
-                        }));
-                    }
+                1 => TableReference::Bare {
+                    table: nt.names[0].clone().into(),
+                },
+                2 => TableReference::Partial {
+                    schema: nt.names[0].clone().into(),
+                    table: nt.names[1].clone().into(),
+                },
+                _ => TableReference::Full {
+                    catalog: nt.names[0].clone().into(),
+                    schema: nt.names[1].clone().into(),
+                    table: nt.names[2].clone().into(),
+                },
+            };
 
-                    let values = vt
-                    .values
-                    .iter()
-                    .map(|row| {
-                        let mut name_idx = 0;
-                        let lits = row
-                            .fields
-                            .iter()
-                            .map(|lit| {
-                                name_idx += 1; // top-level names are provided through schema
-                                Ok(Expr::Literal(from_substrait_literal(
-                                    lit,
-                                    extensions,
-                                    &named_struct.names,
-                                    &mut name_idx,
-                                )?))
-                            })
-                            .collect::<Result<_>>()?;
-                        if name_idx != named_struct.names.len() {
-                            return substrait_err!(
+            read_with_schema(
+                consumer,
+                table_reference,
+                substrait_schema,
+                &read.projection,
+            )
+            .await
+        }
+        Some(ReadType::VirtualTable(vt)) => {
+            if vt.values.is_empty() {
+                return Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+                    produce_one_row: false,
+                    schema: DFSchemaRef::new(substrait_schema),
+                }));
+            }
+
+            let values = vt
+                .values
+                .iter()
+                .map(|row| {
+                    let mut name_idx = 0;
+                    let lits = row
+                        .fields
+                        .iter()
+                        .map(|lit| {
+                            name_idx += 1; // top-level names are provided through schema
+                            Ok(Expr::Literal(from_substrait_literal(
+                                consumer,
+                                lit,
+                                &named_struct.names,
+                                &mut name_idx,
+                            )?))
+                        })
+                        .collect::<Result<_>>()?;
+                    if name_idx != named_struct.names.len() {
+                        return substrait_err!(
                                 "Names list must match exactly to nested schema, but found {} uses for {} names",
                                 name_idx,
                                 named_struct.names.len()
                             );
-                        }
-                        Ok(lits)
-                    })
-                    .collect::<Result<_>>()?;
+                    }
+                    Ok(lits)
+                })
+                .collect::<Result<_>>()?;
 
-                    Ok(LogicalPlan::Values(Values {
-                        schema: DFSchemaRef::new(substrait_schema),
-                        values,
-                    }))
-                }
-                Some(ReadType::LocalFiles(lf)) => {
-                    fn extract_filename(name: &str) -> Option<String> {
-                        let corrected_url = if name.starts_with("file://")
-                            && !name.starts_with("file:///")
-                        {
-                            name.replacen("file://", "file:///", 1)
-                        } else {
-                            name.to_string()
-                        };
+            Ok(LogicalPlan::Values(Values {
+                schema: DFSchemaRef::new(substrait_schema),
+                values,
+            }))
+        }
+        Some(ReadType::LocalFiles(lf)) => {
+            fn extract_filename(name: &str) -> Option<String> {
+                let corrected_url =
+                    if name.starts_with("file://") && !name.starts_with("file:///") {
+                        name.replacen("file://", "file:///", 1)
+                    } else {
+                        name.to_string()
+                    };
 
-                        Url::parse(&corrected_url).ok().and_then(|url| {
-                            let path = url.path();
-                            std::path::Path::new(path)
-                                .file_name()
-                                .map(|filename| filename.to_string_lossy().to_string())
-                        })
-                    }
+                Url::parse(&corrected_url).ok().and_then(|url| {
+                    let path = url.path();
+                    std::path::Path::new(path)
+                        .file_name()
+                        .map(|filename| filename.to_string_lossy().to_string())
+                })
+            }
 
-                    // we could use the file name to check the original table provider
-                    // TODO: currently does not support multiple local files
-                    let filename: Option<String> =
-                        lf.items.first().and_then(|x| match x.path_type.as_ref() {
-                            Some(UriFile(name)) => extract_filename(name),
-                            _ => None,
-                        });
+            // we could use the file name to check the original table provider
+            // TODO: currently does not support multiple local files
+            let filename: Option<String> =
+                lf.items.first().and_then(|x| match x.path_type.as_ref() {
+                    Some(UriFile(name)) => extract_filename(name),
+                    _ => None,
+                });
 
-                    if lf.items.len() > 1 || filename.is_none() {
-                        return not_impl_err!("Only single file reads are supported");
-                    }
-                    let name = filename.unwrap();
-                    // directly use unwrap here since we could determine it is a valid one
-                    let table_reference = TableReference::Bare { table: name.into() };
-
-                    read_with_schema(
-                        state,
-                        table_reference,
-                        substrait_schema,
-                        &read.projection,
-                    )
-                    .await
-                }
-                _ => {
-                    not_impl_err!("Unsupported ReadType: {:?}", &read.as_ref().read_type)
-                }
-            }
-        }
-        Some(RelType::Set(set)) => match set_rel::SetOp::try_from(set.op) {
-            Ok(set_op) => {
-                if set.inputs.len() < 2 {
-                    substrait_err!("Set operation requires at least two inputs")
-                } else {
-                    match set_op {
-                        set_rel::SetOp::UnionAll => {
-                            union_rels(&set.inputs, state, extensions, true).await
-                        }
-                        set_rel::SetOp::UnionDistinct => {
-                            union_rels(&set.inputs, state, extensions, false).await
-                        }
-                        set_rel::SetOp::IntersectionPrimary => {
-                            LogicalPlanBuilder::intersect(
-                                from_substrait_rel(state, &set.inputs[0], extensions)
-                                    .await?,
-                                union_rels(&set.inputs[1..], state, extensions, true)
-                                    .await?,
-                                false,
-                            )
-                        }
-                        set_rel::SetOp::IntersectionMultiset => {
-                            intersect_rels(&set.inputs, state, extensions, false).await
-                        }
-                        set_rel::SetOp::IntersectionMultisetAll => {
-                            intersect_rels(&set.inputs, state, extensions, true).await
-                        }
-                        set_rel::SetOp::MinusPrimary => {
-                            except_rels(&set.inputs, state, extensions, false).await
-                        }
-                        set_rel::SetOp::MinusPrimaryAll => {
-                            except_rels(&set.inputs, state, extensions, true).await
-                        }
-                        _ => not_impl_err!("Unsupported set operator: {set_op:?}"),
-                    }
-                }
+            if lf.items.len() > 1 || filename.is_none() {
+                return not_impl_err!("Only single file reads are supported");
             }
-            Err(e) => not_impl_err!("Invalid set operation type {}: {e}", set.op),
-        },
-        Some(RelType::ExtensionLeaf(extension)) => {
-            let Some(ext_detail) = &extension.detail else {
-                return substrait_err!("Unexpected empty detail in ExtensionLeafRel");
-            };
-            let plan = state
-                .serializer_registry()
-                .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
-            Ok(LogicalPlan::Extension(Extension { node: plan }))
+            let name = filename.unwrap();
+            // directly use unwrap here since we could determine it is a valid one
+            let table_reference = TableReference::Bare { table: name.into() };
+
+            read_with_schema(
+                consumer,
+                table_reference,
+                substrait_schema,
+                &read.projection,
+            )
+            .await
         }
-        Some(RelType::ExtensionSingle(extension)) => {
-            let Some(ext_detail) = &extension.detail else {
-                return substrait_err!("Unexpected empty detail in ExtensionSingleRel");
-            };
-            let plan = state
-                .serializer_registry()
-                .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
-            let Some(input_rel) = &extension.input else {
-                return substrait_err!(
-                    "ExtensionSingleRel doesn't contains input rel. Try use ExtensionLeafRel instead"
-                );
-            };
-            let input_plan = from_substrait_rel(state, input_rel, extensions).await?;
-            let plan =
-                plan.with_exprs_and_inputs(plan.expressions(), vec![input_plan])?;
-            Ok(LogicalPlan::Extension(Extension { node: plan }))
+        _ => {
+            not_impl_err!("Unsupported ReadType: {:?}", read.read_type)
         }
-        Some(RelType::ExtensionMulti(extension)) => {
-            let Some(ext_detail) = &extension.detail else {
-                return substrait_err!("Unexpected empty detail in ExtensionSingleRel");
-            };
-            let plan = state
-                .serializer_registry()
-                .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
-            let mut inputs = Vec::with_capacity(extension.inputs.len());
-            for input in &extension.inputs {
-                let input_plan = from_substrait_rel(state, input, extensions).await?;
-                inputs.push(input_plan);
-            }
-            let plan = plan.with_exprs_and_inputs(plan.expressions(), inputs)?;
-            Ok(LogicalPlan::Extension(Extension { node: plan }))
+    }
+}
+
+pub async fn from_set_rel(
+    consumer: &impl SubstraitConsumer,
+    set: &SetRel,
+) -> Result<LogicalPlan> {
+    if set.inputs.len() < 2 {
+        substrait_err!("Set operation requires at least two inputs")
+    } else {
+        match set.op() {
+            SetOp::UnionAll => union_rels(consumer, &set.inputs, true).await,
+            SetOp::UnionDistinct => union_rels(consumer, &set.inputs, false).await,
+            SetOp::IntersectionPrimary => LogicalPlanBuilder::intersect(
+                from_substrait_rel(consumer, &set.inputs[0]).await?,
+                union_rels(consumer, &set.inputs[1..], true).await?,
+                false,
+            ),
+            SetOp::IntersectionMultiset => {
+                intersect_rels(consumer, &set.inputs, false).await
+            }
+            SetOp::IntersectionMultisetAll => {
+                intersect_rels(consumer, &set.inputs, true).await
+            }
+            SetOp::MinusPrimary => except_rels(consumer, &set.inputs, false).await,
+            SetOp::MinusPrimaryAll => except_rels(consumer, &set.inputs, true).await,
+            set_op => not_impl_err!("Unsupported set operator: {set_op:?}"),
         }
-        Some(RelType::Exchange(exchange)) => {
-            let Some(input) = exchange.input.as_ref() else {
-                return substrait_err!("Unexpected empty input in ExchangeRel");
-            };
-            let input = Arc::new(from_substrait_rel(state, input, extensions).await?);
+    }
+}
 
-            let Some(exchange_kind) = &exchange.exchange_kind else {
-                return substrait_err!("Unexpected empty input in ExchangeRel");
-            };
+pub async fn from_exchange_rel(
+    consumer: &impl SubstraitConsumer,
+    exchange: &ExchangeRel,
+) -> Result<LogicalPlan> {
+    let Some(input) = exchange.input.as_ref() else {
+        return substrait_err!("Unexpected empty input in ExchangeRel");
+    };
+    let input = Arc::new(from_substrait_rel(consumer, input).await?);
 
-            // ref: https://substrait.io/relations/physical_relations/#exchange-types
-            let partitioning_scheme = match exchange_kind {
-                ExchangeKind::ScatterByFields(scatter_fields) => {
-                    let mut partition_columns = vec![];
-                    let input_schema = input.schema();
-                    for field_ref in &scatter_fields.fields {
-                        let column =
-                            from_substrait_field_reference(field_ref, input_schema)?;
-                        partition_columns.push(column);
-                    }
-                    Partitioning::Hash(
-                        partition_columns,
-                        exchange.partition_count as usize,
-                    )
-                }
-                ExchangeKind::RoundRobin(_) => {
-                    Partitioning::RoundRobinBatch(exchange.partition_count as usize)
-                }
-                ExchangeKind::SingleTarget(_)
-                | ExchangeKind::MultiTarget(_)
-                | ExchangeKind::Broadcast(_) => {
-                    return not_impl_err!("Unsupported exchange kind: {exchange_kind:?}");
-                }
-            };
-            Ok(LogicalPlan::Repartition(Repartition {
-                input,
-                partitioning_scheme,
-            }))
+    let Some(exchange_kind) = &exchange.exchange_kind else {
+        return substrait_err!("Unexpected empty input in ExchangeRel");
+    };
+
+    // ref: https://substrait.io/relations/physical_relations/#exchange-types
+    let partitioning_scheme = match exchange_kind {
+        ExchangeKind::ScatterByFields(scatter_fields) => {
+            let mut partition_columns = vec![];
+            let input_schema = input.schema();
+            for field_ref in &scatter_fields.fields {
+                let column = from_substrait_field_reference(field_ref, input_schema)?;
+                partition_columns.push(column);
+            }
+            Partitioning::Hash(partition_columns, exchange.partition_count as usize)
+        }
+        ExchangeKind::RoundRobin(_) => {
+            Partitioning::RoundRobinBatch(exchange.partition_count as usize)
+        }
+        ExchangeKind::SingleTarget(_)
+        | ExchangeKind::MultiTarget(_)
+        | ExchangeKind::Broadcast(_) => {
+            return not_impl_err!("Unsupported exchange kind: {exchange_kind:?}");
         }
-        _ => not_impl_err!("Unsupported RelType: {:?}", rel.rel_type),
     };
-    apply_emit_kind(retrieve_rel_common(rel), plan?)
+    Ok(LogicalPlan::Repartition(Repartition {
+        input,
+        partitioning_scheme,
+    }))
 }
 
 fn retrieve_rel_common(rel: &Rel) -> Option<&RelCommon> {
@@ -1273,8 +1659,8 @@ impl NameTracker {
 /// This means:
 /// 1. All fields present in the Substrait schema are present in the DataFusion schema. The
 ///    DataFusion schema may have MORE fields, but not the other way around.
-/// 2. All fields are compatible. See [`ensure_field_compatability`] for details
-fn ensure_schema_compatability(
+/// 2. All fields are compatible. See [`ensure_field_compatibility`] for details
+fn ensure_schema_compatibility(
     table_schema: &DFSchema,
     substrait_schema: DFSchema,
 ) -> Result<()> {
@@ -1285,7 +1671,7 @@ fn ensure_schema_compatability(
         .try_for_each(|substrait_field| {
             let df_field =
                 table_schema.field_with_unqualified_name(substrait_field.name())?;
-            ensure_field_compatability(df_field, substrait_field)
+            ensure_field_compatibility(df_field, substrait_field)
         })
 }
 
@@ -1343,7 +1729,7 @@ fn apply_projection(
 ///
 /// If a Substrait field is not nullable, the Substrait plan may be built around assuming it is not
 /// nullable. As such if DataFusion has that field as nullable the plan should be rejected.
-fn ensure_field_compatability(
+fn ensure_field_compatibility(
     datafusion_field: &Field,
     substrait_field: &Field,
 ) -> Result<()> {
@@ -1384,7 +1770,7 @@ fn compatible_nullabilities(
 }
 
 /// (Re)qualify the sides of a join if needed, i.e. if the columns from one side would otherwise
-/// conflict with the columns from the other.  
+/// conflict with the columns from the other.
 /// Substrait doesn't currently allow specifying aliases, neither for columns nor for tables. For
 /// Substrait the names don't matter since it only refers to columns by indices, however DataFusion
 /// requires columns to be uniquely identifiable, in some places (see e.g. DFSchema::check_names).
@@ -1430,16 +1816,14 @@ fn from_substrait_jointype(join_type: i32) -> Result<JoinType> {
 
 /// Convert Substrait Sorts to DataFusion Exprs
 pub async fn from_substrait_sorts(
-    state: &dyn SubstraitPlanningState,
+    consumer: &impl SubstraitConsumer,
     substrait_sorts: &Vec<SortField>,
     input_schema: &DFSchema,
-    extensions: &Extensions,
 ) -> Result<Vec<Sort>> {
     let mut sorts: Vec<Sort> = vec![];
     for s in substrait_sorts {
         let expr =
-            from_substrait_rex(state, s.expr.as_ref().unwrap(), input_schema, extensions)
-                .await?;
+            from_substrait_rex(consumer, s.expr.as_ref().unwrap(), input_schema).await?;
         let asc_nullfirst = match &s.sort_kind {
             Some(k) => match k {
                 Direction(d) => {
@@ -1480,15 +1864,13 @@ pub async fn from_substrait_sorts(
 
 /// Convert Substrait Expressions to DataFusion Exprs
 pub async fn from_substrait_rex_vec(
-    state: &dyn SubstraitPlanningState,
+    consumer: &impl SubstraitConsumer,
     exprs: &Vec<Expression>,
     input_schema: &DFSchema,
-    extensions: &Extensions,
 ) -> Result<Vec<Expr>> {
     let mut expressions: Vec<Expr> = vec![];
     for expr in exprs {
-        let expression =
-            from_substrait_rex(state, expr, input_schema, extensions).await?;
+        let expression = from_substrait_rex(consumer, expr, input_schema).await?;
         expressions.push(expression);
     }
     Ok(expressions)
@@ -1496,16 +1878,15 @@ pub async fn from_substrait_rex_vec(
 
 /// Convert Substrait FunctionArguments to DataFusion Exprs
 pub async fn from_substrait_func_args(
-    state: &dyn SubstraitPlanningState,
+    consumer: &impl SubstraitConsumer,
     arguments: &Vec<FunctionArgument>,
     input_schema: &DFSchema,
-    extensions: &Extensions,
 ) -> Result<Vec<Expr>> {
     let mut args: Vec<Expr> = vec![];
     for arg in arguments {
         let arg_expr = match &arg.arg_type {
             Some(ArgType::Value(e)) => {
-                from_substrait_rex(state, e, input_schema, extensions).await
+                from_substrait_rex(consumer, e, input_schema).await
             }
             _ => not_impl_err!("Function argument non-Value type not supported"),
         };
@@ -1516,370 +1897,416 @@ pub async fn from_substrait_func_args(
 
 /// Convert Substrait AggregateFunction to DataFusion Expr
 pub async fn from_substrait_agg_func(
-    state: &dyn SubstraitPlanningState,
+    consumer: &impl SubstraitConsumer,
     f: &AggregateFunction,
     input_schema: &DFSchema,
-    extensions: &Extensions,
     filter: Option<Box<Expr>>,
     order_by: Option<Vec<SortExpr>>,
     distinct: bool,
 ) -> Result<Arc<Expr>> {
-    let args =
-        from_substrait_func_args(state, &f.arguments, input_schema, extensions).await?;
-
-    let Some(function_name) = extensions.functions.get(&f.function_reference) else {
+    let Some(fn_signature) = consumer
+        .get_extensions()
+        .functions
+        .get(&f.function_reference)
+    else {
         return plan_err!(
             "Aggregate function not registered: function anchor = {:?}",
             f.function_reference
         );
     };
 
-    let function_name = substrait_fun_name(function_name);
-    // try udaf first, then built-in aggr fn.
-    if let Ok(fun) = state.udaf(function_name) {
-        // deal with situation that count(*) got no arguments
-        let args = if fun.name() == "count" && args.is_empty() {
-            vec![Expr::Literal(ScalarValue::Int64(Some(1)))]
-        } else {
-            args
-        };
-
-        Ok(Arc::new(Expr::AggregateFunction(
-            expr::AggregateFunction::new_udf(fun, args, distinct, filter, order_by, None),
-        )))
-    } else {
-        not_impl_err!(
+    let fn_name = substrait_fun_name(fn_signature);
+    let udaf = consumer.get_function_registry().udaf(fn_name);
+    let udaf = udaf.map_err(|_| {
+        not_impl_datafusion_err!(
             "Aggregate function {} is not supported: function anchor = {:?}",
-            function_name,
+            fn_signature,
             f.function_reference
         )
-    }
+    })?;
+
+    let args = from_substrait_func_args(consumer, &f.arguments, input_schema).await?;
+
+    // deal with situation that count(*) got no arguments
+    let args = if udaf.name() == "count" && args.is_empty() {
+        vec![Expr::Literal(ScalarValue::Int64(Some(1)))]
+    } else {
+        args
+    };
+
+    Ok(Arc::new(Expr::AggregateFunction(
+        expr::AggregateFunction::new_udf(udaf, args, distinct, filter, order_by, None),
+    )))
 }
 
 /// Convert Substrait Rex to DataFusion Expr
-#[async_recursion]
 pub async fn from_substrait_rex(
-    state: &dyn SubstraitPlanningState,
-    e: &Expression,
+    consumer: &impl SubstraitConsumer,
+    expression: &Expression,
     input_schema: &DFSchema,
-    extensions: &Extensions,
 ) -> Result<Expr> {
-    match &e.rex_type {
-        Some(RexType::SingularOrList(s)) => {
-            let substrait_expr = s.value.as_ref().unwrap();
-            let substrait_list = s.options.as_ref();
-            Ok(Expr::InList(InList {
-                expr: Box::new(
-                    from_substrait_rex(state, substrait_expr, input_schema, extensions)
-                        .await?,
-                ),
-                list: from_substrait_rex_vec(
-                    state,
-                    substrait_list,
-                    input_schema,
-                    extensions,
-                )
-                .await?,
-                negated: false,
-            }))
-        }
-        Some(RexType::Selection(field_ref)) => {
-            Ok(from_substrait_field_reference(field_ref, input_schema)?)
-        }
-        Some(RexType::IfThen(if_then)) => {
-            // Parse `ifs`
-            // If the first element does not have a `then` part, then we can assume it's a base expression
-            let mut when_then_expr: Vec<(Box<Expr>, Box<Expr>)> = vec![];
-            let mut expr = None;
-            for (i, if_expr) in if_then.ifs.iter().enumerate() {
-                if i == 0 {
-                    // Check if the first element is type base expression
-                    if if_expr.then.is_none() {
-                        expr = Some(Box::new(
-                            from_substrait_rex(
-                                state,
-                                if_expr.r#if.as_ref().unwrap(),
-                                input_schema,
-                                extensions,
-                            )
-                            .await?,
-                        ));
-                        continue;
-                    }
-                }
-                when_then_expr.push((
-                    Box::new(
-                        from_substrait_rex(
-                            state,
-                            if_expr.r#if.as_ref().unwrap(),
-                            input_schema,
-                            extensions,
-                        )
-                        .await?,
-                    ),
-                    Box::new(
-                        from_substrait_rex(
-                            state,
-                            if_expr.then.as_ref().unwrap(),
-                            input_schema,
-                            extensions,
-                        )
-                        .await?,
-                    ),
-                ));
+    match &expression.rex_type {
+        Some(t) => match t {
+            RexType::Literal(expr) => consumer.consume_literal(expr).await,
+            RexType::Selection(expr) => {
+                consumer.consume_field_reference(expr, input_schema).await
+            }
+            RexType::ScalarFunction(expr) => {
+                consumer.consume_scalar_function(expr, input_schema).await
+            }
+            RexType::WindowFunction(expr) => {
+                consumer.consume_window_function(expr, input_schema).await
+            }
+            RexType::IfThen(expr) => consumer.consume_if_then(expr, input_schema).await,
+            RexType::SwitchExpression(expr) => {
+                consumer.consume_switch(expr, input_schema).await
+            }
+            RexType::SingularOrList(expr) => {
+                consumer.consume_singular_or_list(expr, input_schema).await
             }
-            // Parse `else`
-            let else_expr = match &if_then.r#else {
-                Some(e) => Some(Box::new(
-                    from_substrait_rex(state, e, input_schema, extensions).await?,
-                )),
-                None => None,
-            };
-            Ok(Expr::Case(Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            }))
-        }
-        Some(RexType::ScalarFunction(f)) => {
-            let Some(fn_name) = extensions.functions.get(&f.function_reference) else {
-                return plan_err!(
-                    "Scalar function not found: function reference = {:?}",
-                    f.function_reference
-                );
-            };
-            let fn_name = substrait_fun_name(fn_name);
 
-            let args =
-                from_substrait_func_args(state, &f.arguments, input_schema, extensions)
-                    .await?;
+            RexType::MultiOrList(expr) => {
+                consumer.consume_multi_or_list(expr, input_schema).await
+            }
 
-            // try to first match the requested function into registered udfs, then built-in ops
-            // and finally built-in expressions
-            if let Ok(func) = state.udf(fn_name) {
-                Ok(Expr::ScalarFunction(expr::ScalarFunction::new_udf(
-                    func.to_owned(),
-                    args,
-                )))
-            } else if let Some(op) = name_to_op(fn_name) {
-                if f.arguments.len() < 2 {
-                    return not_impl_err!(
-                        "Expect at least two arguments for binary operator {op:?}, the provided number of operators is {:?}",
-                        f.arguments.len()
-                    );
-                }
-                // Some expressions are binary in DataFusion but take in a variadic number of args in Substrait.
-                // In those cases we iterate through all the arguments, applying the binary expression against them all
-                let combined_expr = args
-                    .into_iter()
-                    .fold(None, |combined_expr: Option<Expr>, arg: Expr| {
-                        Some(match combined_expr {
-                            Some(expr) => Expr::BinaryExpr(BinaryExpr {
-                                left: Box::new(expr),
-                                op,
-                                right: Box::new(arg),
-                            }),
-                            None => arg,
-                        })
-                    })
-                    .unwrap();
+            RexType::Cast(expr) => {
+                consumer.consume_cast(expr.as_ref(), input_schema).await
+            }
 
-                Ok(combined_expr)
-            } else if let Some(builder) = BuiltinExprBuilder::try_from_name(fn_name) {
-                builder.build(state, f, input_schema, extensions).await
-            } else {
-                not_impl_err!("Unsupported function name: {fn_name:?}")
+            RexType::Subquery(expr) => {
+                consumer.consume_subquery(expr.as_ref(), input_schema).await
             }
-        }
-        Some(RexType::Literal(lit)) => {
-            let scalar_value = from_substrait_literal_without_names(lit, extensions)?;
-            Ok(Expr::Literal(scalar_value))
-        }
-        Some(RexType::Cast(cast)) => match cast.as_ref().r#type.as_ref() {
-            Some(output_type) => {
-                let input_expr = Box::new(
+            RexType::Nested(expr) => consumer.consume_nested(expr, input_schema).await,
+            RexType::Enum(expr) => consumer.consume_enum(expr, input_schema).await,
+        },
+        None => substrait_err!("Expression must set rex_type: {:?}", expression),
+    }
+}
+
+pub async fn from_singular_or_list(
+    consumer: &impl SubstraitConsumer,
+    expr: &SingularOrList,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    let substrait_expr = expr.value.as_ref().unwrap();
+    let substrait_list = expr.options.as_ref();
+    Ok(Expr::InList(InList {
+        expr: Box::new(from_substrait_rex(consumer, substrait_expr, input_schema).await?),
+        list: from_substrait_rex_vec(consumer, substrait_list, input_schema).await?,
+        negated: false,
+    }))
+}
+
+pub async fn from_field_reference(
+    _consumer: &impl SubstraitConsumer,
+    field_ref: &FieldReference,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    from_substrait_field_reference(field_ref, input_schema)
+}
+
+pub async fn from_if_then(
+    consumer: &impl SubstraitConsumer,
+    if_then: &IfThen,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    // Parse `ifs`
+    // If the first element does not have a `then` part, then we can assume it's a base expression
+    let mut when_then_expr: Vec<(Box<Expr>, Box<Expr>)> = vec![];
+    let mut expr = None;
+    for (i, if_expr) in if_then.ifs.iter().enumerate() {
+        if i == 0 {
+            // Check if the first element is type base expression
+            if if_expr.then.is_none() {
+                expr = Some(Box::new(
                     from_substrait_rex(
-                        state,
-                        cast.as_ref().input.as_ref().unwrap().as_ref(),
+                        consumer,
+                        if_expr.r#if.as_ref().unwrap(),
                         input_schema,
-                        extensions,
                     )
                     .await?,
-                );
-                let data_type =
-                    from_substrait_type_without_names(output_type, extensions)?;
-                if cast.failure_behavior() == ReturnNull {
-                    Ok(Expr::TryCast(TryCast::new(input_expr, data_type)))
-                } else {
-                    Ok(Expr::Cast(Cast::new(input_expr, data_type)))
-                }
+                ));
+                continue;
             }
-            None => substrait_err!("Cast expression without output type is not allowed"),
-        },
-        Some(RexType::WindowFunction(window)) => {
-            let Some(fn_name) = extensions.functions.get(&window.function_reference)
-            else {
-                return plan_err!(
-                    "Window function not found: function reference = {:?}",
-                    window.function_reference
-                );
-            };
-            let fn_name = substrait_fun_name(fn_name);
-
-            // check udwf first, then udaf, then built-in window and aggregate functions
-            let fun = if let Ok(udwf) = state.udwf(fn_name) {
-                Ok(WindowFunctionDefinition::WindowUDF(udwf))
-            } else if let Ok(udaf) = state.udaf(fn_name) {
-                Ok(WindowFunctionDefinition::AggregateUDF(udaf))
-            } else {
-                not_impl_err!(
-                    "Window function {} is not supported: function anchor = {:?}",
-                    fn_name,
-                    window.function_reference
+        }
+        when_then_expr.push((
+            Box::new(
+                from_substrait_rex(
+                    consumer,
+                    if_expr.r#if.as_ref().unwrap(),
+                    input_schema,
                 )
-            }?;
-
-            let order_by =
-                from_substrait_sorts(state, &window.sorts, input_schema, extensions)
-                    .await?;
-
-            let bound_units =
-                match BoundsType::try_from(window.bounds_type).map_err(|e| {
-                    plan_datafusion_err!("Invalid bound type {}: {e}", window.bounds_type)
-                })? {
-                    BoundsType::Rows => WindowFrameUnits::Rows,
-                    BoundsType::Range => WindowFrameUnits::Range,
-                    BoundsType::Unspecified => {
-                        // If the plan does not specify the bounds type, then we use a simple logic to determine the units
-                        // If there is no `ORDER BY`, then by default, the frame counts each row from the lower up to upper boundary
-                        // If there is `ORDER BY`, then by default, each frame is a range starting from unbounded preceding to current row
-                        if order_by.is_empty() {
-                            WindowFrameUnits::Rows
-                        } else {
-                            WindowFrameUnits::Range
-                        }
-                    }
-                };
-            Ok(Expr::WindowFunction(expr::WindowFunction {
-                fun,
-                args: from_substrait_func_args(
-                    state,
-                    &window.arguments,
+                .await?,
+            ),
+            Box::new(
+                from_substrait_rex(
+                    consumer,
+                    if_expr.then.as_ref().unwrap(),
                     input_schema,
-                    extensions,
                 )
                 .await?,
-                partition_by: from_substrait_rex_vec(
-                    state,
-                    &window.partitions,
+            ),
+        ));
+    }
+    // Parse `else`
+    let else_expr = match &if_then.r#else {
+        Some(e) => Some(Box::new(
+            from_substrait_rex(consumer, e, input_schema).await?,
+        )),
+        None => None,
+    };
+    Ok(Expr::Case(Case {
+        expr,
+        when_then_expr,
+        else_expr,
+    }))
+}
+
+pub async fn from_scalar_function(
+    consumer: &impl SubstraitConsumer,
+    f: &ScalarFunction,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    let Some(fn_signature) = consumer
+        .get_extensions()
+        .functions
+        .get(&f.function_reference)
+    else {
+        return plan_err!(
+            "Scalar function not found: function reference = {:?}",
+            f.function_reference
+        );
+    };
+    let fn_name = substrait_fun_name(fn_signature);
+    let args = from_substrait_func_args(consumer, &f.arguments, input_schema).await?;
+
+    // try to first match the requested function into registered udfs, then built-in ops
+    // and finally built-in expressions
+    if let Ok(func) = consumer.get_function_registry().udf(fn_name) {
+        Ok(Expr::ScalarFunction(expr::ScalarFunction::new_udf(
+            func.to_owned(),
+            args,
+        )))
+    } else if let Some(op) = name_to_op(fn_name) {
+        if f.arguments.len() < 2 {
+            return not_impl_err!(
+                        "Expect at least two arguments for binary operator {op:?}, the provided number of operators is {:?}",
+                       f.arguments.len()
+                    );
+        }
+        // Some expressions are binary in DataFusion but take in a variadic number of args in Substrait.
+        // In those cases we iterate through all the arguments, applying the binary expression against them all
+        let combined_expr = args
+            .into_iter()
+            .fold(None, |combined_expr: Option<Expr>, arg: Expr| {
+                Some(match combined_expr {
+                    Some(expr) => Expr::BinaryExpr(BinaryExpr {
+                        left: Box::new(expr),
+                        op,
+                        right: Box::new(arg),
+                    }),
+                    None => arg,
+                })
+            })
+            .unwrap();
+
+        Ok(combined_expr)
+    } else if let Some(builder) = BuiltinExprBuilder::try_from_name(fn_name) {
+        builder.build(consumer, f, input_schema).await
+    } else {
+        not_impl_err!("Unsupported function name: {fn_name:?}")
+    }
+}
+
+pub async fn from_literal(
+    consumer: &impl SubstraitConsumer,
+    expr: &Literal,
+) -> Result<Expr> {
+    let scalar_value = from_substrait_literal_without_names(consumer, expr)?;
+    Ok(Expr::Literal(scalar_value))
+}
+
+pub async fn from_cast(
+    consumer: &impl SubstraitConsumer,
+    cast: &substrait_expression::Cast,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    match cast.r#type.as_ref() {
+        Some(output_type) => {
+            let input_expr = Box::new(
+                from_substrait_rex(
+                    consumer,
+                    cast.input.as_ref().unwrap().as_ref(),
                     input_schema,
-                    extensions,
                 )
                 .await?,
-                order_by,
-                window_frame: datafusion::logical_expr::WindowFrame::new_bounds(
-                    bound_units,
-                    from_substrait_bound(&window.lower_bound, true)?,
-                    from_substrait_bound(&window.upper_bound, false)?,
-                ),
-                null_treatment: None,
-            }))
+            );
+            let data_type = from_substrait_type_without_names(consumer, output_type)?;
+            if cast.failure_behavior() == ReturnNull {
+                Ok(Expr::TryCast(TryCast::new(input_expr, data_type)))
+            } else {
+                Ok(Expr::Cast(Cast::new(input_expr, data_type)))
+            }
         }
-        Some(RexType::Subquery(subquery)) => match &subquery.as_ref().subquery_type {
-            Some(subquery_type) => match subquery_type {
-                SubqueryType::InPredicate(in_predicate) => {
-                    if in_predicate.needles.len() != 1 {
-                        substrait_err!("InPredicate Subquery type must have exactly one Needle expression")
-                    } else {
-                        let needle_expr = &in_predicate.needles[0];
-                        let haystack_expr = &in_predicate.haystack;
-                        if let Some(haystack_expr) = haystack_expr {
-                            let haystack_expr =
-                                from_substrait_rel(state, haystack_expr, extensions)
-                                    .await?;
-                            let outer_refs = haystack_expr.all_out_ref_exprs();
-                            Ok(Expr::InSubquery(InSubquery {
-                                expr: Box::new(
-                                    from_substrait_rex(
-                                        state,
-                                        needle_expr,
-                                        input_schema,
-                                        extensions,
-                                    )
+        None => substrait_err!("Cast expression without output type is not allowed"),
+    }
+}
+
+pub async fn from_window_function(
+    consumer: &impl SubstraitConsumer,
+    window: &WindowFunction,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    let Some(fn_signature) = consumer
+        .get_extensions()
+        .functions
+        .get(&window.function_reference)
+    else {
+        return plan_err!(
+            "Window function not found: function reference = {:?}",
+            window.function_reference
+        );
+    };
+    let fn_name = substrait_fun_name(fn_signature);
+
+    // check udwf first, then udaf, then built-in window and aggregate functions
+    let fun = if let Ok(udwf) = consumer.get_function_registry().udwf(fn_name) {
+        Ok(WindowFunctionDefinition::WindowUDF(udwf))
+    } else if let Ok(udaf) = consumer.get_function_registry().udaf(fn_name) {
+        Ok(WindowFunctionDefinition::AggregateUDF(udaf))
+    } else {
+        not_impl_err!(
+            "Window function {} is not supported: function anchor = {:?}",
+            fn_name,
+            window.function_reference
+        )
+    }?;
+
+    let order_by = from_substrait_sorts(consumer, &window.sorts, input_schema).await?;
+
+    let bound_units = match BoundsType::try_from(window.bounds_type).map_err(|e| {
+        plan_datafusion_err!("Invalid bound type {}: {e}", window.bounds_type)
+    })? {
+        BoundsType::Rows => WindowFrameUnits::Rows,
+        BoundsType::Range => WindowFrameUnits::Range,
+        BoundsType::Unspecified => {
+            // If the plan does not specify the bounds type, then we use a simple logic to determine the units
+            // If there is no `ORDER BY`, then by default, the frame counts each row from the lower up to upper boundary
+            // If there is `ORDER BY`, then by default, each frame is a range starting from unbounded preceding to current row
+            if order_by.is_empty() {
+                WindowFrameUnits::Rows
+            } else {
+                WindowFrameUnits::Range
+            }
+        }
+    };
+    Ok(Expr::WindowFunction(expr::WindowFunction {
+        fun,
+        args: from_substrait_func_args(consumer, &window.arguments, input_schema).await?,
+        partition_by: from_substrait_rex_vec(consumer, &window.partitions, input_schema)
+            .await?,
+        order_by,
+        window_frame: datafusion::logical_expr::WindowFrame::new_bounds(
+            bound_units,
+            from_substrait_bound(&window.lower_bound, true)?,
+            from_substrait_bound(&window.upper_bound, false)?,
+        ),
+        null_treatment: None,
+    }))
+}
+
+pub async fn from_subquery(
+    consumer: &impl SubstraitConsumer,
+    subquery: &substrait_expression::Subquery,
+    input_schema: &DFSchema,
+) -> Result<Expr> {
+    match &subquery.subquery_type {
+        Some(subquery_type) => match subquery_type {
+            SubqueryType::InPredicate(in_predicate) => {
+                if in_predicate.needles.len() != 1 {
+                    substrait_err!("InPredicate Subquery type must have exactly one Needle expression")
+                } else {
+                    let needle_expr = &in_predicate.needles[0];
+                    let haystack_expr = &in_predicate.haystack;
+                    if let Some(haystack_expr) = haystack_expr {
+                        let haystack_expr =
+                            from_substrait_rel(consumer, haystack_expr).await?;
+                        let outer_refs = haystack_expr.all_out_ref_exprs();
+                        Ok(Expr::InSubquery(InSubquery {
+                            expr: Box::new(
+                                from_substrait_rex(consumer, needle_expr, input_schema)
                                     .await?,
-                                ),
-                                subquery: Subquery {
-                                    subquery: Arc::new(haystack_expr),
-                                    outer_ref_columns: outer_refs,
-                                },
-                                negated: false,
-                            }))
-                        } else {
-                            substrait_err!("InPredicate Subquery type must have a Haystack expression")
-                        }
+                            ),
+                            subquery: Subquery {
+                                subquery: Arc::new(haystack_expr),
+                                outer_ref_columns: outer_refs,
+                            },
+                            negated: false,
+                        }))
+                    } else {
+                        substrait_err!(
+                            "InPredicate Subquery type must have a Haystack expression"
+                        )
                     }
                 }
-                SubqueryType::Scalar(query) => {
-                    let plan = from_substrait_rel(
-                        state,
-                        &(query.input.clone()).unwrap_or_default(),
-                        extensions,
-                    )
-                    .await?;
-                    let outer_ref_columns = plan.all_out_ref_exprs();
-                    Ok(Expr::ScalarSubquery(Subquery {
-                        subquery: Arc::new(plan),
-                        outer_ref_columns,
-                    }))
-                }
-                SubqueryType::SetPredicate(predicate) => {
-                    match predicate.predicate_op() {
-                        // exist
-                        PredicateOp::Exists => {
-                            let relation = &predicate.tuples;
-                            let plan = from_substrait_rel(
-                                state,
-                                &relation.clone().unwrap_or_default(),
-                                extensions,
-                            )
-                            .await?;
-                            let outer_ref_columns = plan.all_out_ref_exprs();
-                            Ok(Expr::Exists(Exists::new(
-                                Subquery {
-                                    subquery: Arc::new(plan),
-                                    outer_ref_columns,
-                                },
-                                false,
-                            )))
-                        }
-                        other_type => substrait_err!(
-                            "unimplemented type {:?} for set predicate",
-                            other_type
-                        ),
+            }
+            SubqueryType::Scalar(query) => {
+                let plan = from_substrait_rel(
+                    consumer,
+                    &(query.input.clone()).unwrap_or_default(),
+                )
+                .await?;
+                let outer_ref_columns = plan.all_out_ref_exprs();
+                Ok(Expr::ScalarSubquery(Subquery {
+                    subquery: Arc::new(plan),
+                    outer_ref_columns,
+                }))
+            }
+            SubqueryType::SetPredicate(predicate) => {
+                match predicate.predicate_op() {
+                    // exist
+                    PredicateOp::Exists => {
+                        let relation = &predicate.tuples;
+                        let plan = from_substrait_rel(
+                            consumer,
+                            &relation.clone().unwrap_or_default(),
+                        )
+                        .await?;
+                        let outer_ref_columns = plan.all_out_ref_exprs();
+                        Ok(Expr::Exists(Exists::new(
+                            Subquery {
+                                subquery: Arc::new(plan),
+                                outer_ref_columns,
+                            },
+                            false,
+                        )))
                     }
+                    other_type => substrait_err!(
+                        "unimplemented type {:?} for set predicate",
+                        other_type
+                    ),
                 }
-                other_type => {
-                    substrait_err!("Subquery type {:?} not implemented", other_type)
-                }
-            },
-            None => {
-                substrait_err!("Subquery expression without SubqueryType is not allowed")
+            }
+            other_type => {
+                substrait_err!("Subquery type {:?} not implemented", other_type)
             }
         },
-        _ => not_impl_err!("unsupported rex_type"),
+        None => {
+            substrait_err!("Subquery expression without SubqueryType is not allowed")
+        }
     }
 }
 
 pub(crate) fn from_substrait_type_without_names(
+    consumer: &impl SubstraitConsumer,
     dt: &Type,
-    extensions: &Extensions,
 ) -> Result<DataType> {
-    from_substrait_type(dt, extensions, &[], &mut 0)
+    from_substrait_type(consumer, dt, &[], &mut 0)
 }
 
 fn from_substrait_type(
+    consumer: &impl SubstraitConsumer,
     dt: &Type,
-    extensions: &Extensions,
     dfs_names: &[String],
     name_idx: &mut usize,
 ) -> Result<DataType> {
@@ -1992,7 +2419,7 @@ fn from_substrait_type(
                     substrait_datafusion_err!("List type must have inner type")
                 })?;
                 let field = Arc::new(Field::new_list_field(
-                    from_substrait_type(inner_type, extensions, dfs_names, name_idx)?,
+                    from_substrait_type(consumer, inner_type, dfs_names, name_idx)?,
                     // We ignore Substrait's nullability here to match to_substrait_literal
                     // which always creates nullable lists
                     true,
@@ -2014,12 +2441,12 @@ fn from_substrait_type(
                 })?;
                 let key_field = Arc::new(Field::new(
                     "key",
-                    from_substrait_type(key_type, extensions, dfs_names, name_idx)?,
+                    from_substrait_type(consumer, key_type, dfs_names, name_idx)?,
                     false,
                 ));
                 let value_field = Arc::new(Field::new(
                     "value",
-                    from_substrait_type(value_type, extensions, dfs_names, name_idx)?,
+                    from_substrait_type(consumer, value_type, dfs_names, name_idx)?,
                     true,
                 ));
                 Ok(DataType::Map(
@@ -2050,42 +2477,48 @@ fn from_substrait_type(
                 Ok(DataType::Interval(IntervalUnit::MonthDayNano))
             }
             r#type::Kind::UserDefined(u) => {
-                if let Some(name) = extensions.types.get(&u.type_reference) {
+                if let Ok(data_type) = consumer.consume_user_defined_type(u) {
+                    return Ok(data_type);
+                }
+
+                // TODO: remove the code below once the producer has been updated
+                if let Some(name) = consumer.get_extensions().types.get(&u.type_reference)
+                {
                     #[allow(deprecated)]
-                    match name.as_ref() {
-                        // Kept for backwards compatibility, producers should use IntervalCompound instead
-                        INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+                        match name.as_ref() {
+                            // Kept for backwards compatibility, producers should use IntervalCompound instead
+                            INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
                             _ => not_impl_err!(
                                 "Unsupported Substrait user defined type with ref {} and variation {}",
                                 u.type_reference,
                                 u.type_variation_reference
                             ),
-                    }
+                        }
                 } else {
                     #[allow(deprecated)]
-                    match u.type_reference {
-                        // Kept for backwards compatibility, producers should use IntervalYear instead
-                        INTERVAL_YEAR_MONTH_TYPE_REF => {
-                            Ok(DataType::Interval(IntervalUnit::YearMonth))
-                        }
-                        // Kept for backwards compatibility, producers should use IntervalDay instead
-                        INTERVAL_DAY_TIME_TYPE_REF => {
-                            Ok(DataType::Interval(IntervalUnit::DayTime))
-                        }
-                        // Kept for backwards compatibility, producers should use IntervalCompound instead
-                        INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
-                            Ok(DataType::Interval(IntervalUnit::MonthDayNano))
-                        }
-                        _ => not_impl_err!(
+                        match u.type_reference {
+                            // Kept for backwards compatibility, producers should use IntervalYear instead
+                            INTERVAL_YEAR_MONTH_TYPE_REF => {
+                                Ok(DataType::Interval(IntervalUnit::YearMonth))
+                            }
+                            // Kept for backwards compatibility, producers should use IntervalDay instead
+                            INTERVAL_DAY_TIME_TYPE_REF => {
+                                Ok(DataType::Interval(IntervalUnit::DayTime))
+                            }
+                            // Kept for backwards compatibility, producers should use IntervalCompound instead
+                            INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
+                                Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+                            }
+                            _ => not_impl_err!(
                         "Unsupported Substrait user defined type with ref {} and variation {}",
                         u.type_reference,
                         u.type_variation_reference
                     ),
-                    }
+                        }
                 }
             }
             r#type::Kind::Struct(s) => Ok(DataType::Struct(from_substrait_struct_type(
-                s, extensions, dfs_names, name_idx,
+                consumer, s, dfs_names, name_idx,
             )?)),
             r#type::Kind::Varchar(_) => Ok(DataType::Utf8),
             r#type::Kind::FixedChar(_) => Ok(DataType::Utf8),
@@ -2096,8 +2529,8 @@ fn from_substrait_type(
 }
 
 fn from_substrait_struct_type(
+    consumer: &impl SubstraitConsumer,
     s: &r#type::Struct,
-    extensions: &Extensions,
     dfs_names: &[String],
     name_idx: &mut usize,
 ) -> Result<Fields> {
@@ -2105,7 +2538,7 @@ fn from_substrait_struct_type(
     for (i, f) in s.types.iter().enumerate() {
         let field = Field::new(
             next_struct_field_name(i, dfs_names, name_idx)?,
-            from_substrait_type(f, extensions, dfs_names, name_idx)?,
+            from_substrait_type(consumer, f, dfs_names, name_idx)?,
             true, // We assume everything to be nullable since that's easier than ensuring it matches
         );
         fields.push(field);
@@ -2133,15 +2566,15 @@ fn next_struct_field_name(
 
 /// Convert Substrait NamedStruct to DataFusion DFSchemaRef
 pub fn from_substrait_named_struct(
+    consumer: &impl SubstraitConsumer,
     base_schema: &NamedStruct,
-    extensions: &Extensions,
 ) -> Result<DFSchema> {
     let mut name_idx = 0;
     let fields = from_substrait_struct_type(
+        consumer,
         base_schema.r#struct.as_ref().ok_or_else(|| {
             substrait_datafusion_err!("Named struct must contain a struct")
         })?,
-        extensions,
         &base_schema.names,
         &mut name_idx,
     );
@@ -2202,15 +2635,15 @@ fn from_substrait_bound(
 }
 
 pub(crate) fn from_substrait_literal_without_names(
+    consumer: &impl SubstraitConsumer,
     lit: &Literal,
-    extensions: &Extensions,
 ) -> Result<ScalarValue> {
-    from_substrait_literal(lit, extensions, &vec![], &mut 0)
+    from_substrait_literal(consumer, lit, &vec![], &mut 0)
 }
 
 fn from_substrait_literal(
+    consumer: &impl SubstraitConsumer,
     lit: &Literal,
-    extensions: &Extensions,
     dfs_names: &Vec<String>,
     name_idx: &mut usize,
 ) -> Result<ScalarValue> {
@@ -2346,12 +2779,7 @@ fn from_substrait_literal(
                 .iter()
                 .map(|el| {
                     element_name_idx = *name_idx;
-                    from_substrait_literal(
-                        el,
-                        extensions,
-                        dfs_names,
-                        &mut element_name_idx,
-                    )
+                    from_substrait_literal(consumer, el, dfs_names, &mut element_name_idx)
                 })
                 .collect::<Result<Vec<_>>>()?;
             *name_idx = element_name_idx;
@@ -2375,8 +2803,8 @@ fn from_substrait_literal(
         }
         Some(LiteralType::EmptyList(l)) => {
             let element_type = from_substrait_type(
+                consumer,
                 l.r#type.clone().unwrap().as_ref(),
-                extensions,
                 dfs_names,
                 name_idx,
             )?;
@@ -2402,14 +2830,14 @@ fn from_substrait_literal(
                 .map(|kv| {
                     entry_name_idx = *name_idx;
                     let key_sv = from_substrait_literal(
+                        consumer,
                         kv.key.as_ref().unwrap(),
-                        extensions,
                         dfs_names,
                         &mut entry_name_idx,
                     )?;
                     let value_sv = from_substrait_literal(
+                        consumer,
                         kv.value.as_ref().unwrap(),
-                        extensions,
                         dfs_names,
                         &mut entry_name_idx,
                     )?;
@@ -2447,8 +2875,8 @@ fn from_substrait_literal(
                 Some(v) => Ok(v),
                 _ => plan_err!("Missing value type for empty map"),
             }?;
-            let key_type = from_substrait_type(key, extensions, dfs_names, name_idx)?;
-            let value_type = from_substrait_type(value, extensions, dfs_names, name_idx)?;
+            let key_type = from_substrait_type(consumer, key, dfs_names, name_idx)?;
+            let value_type = from_substrait_type(consumer, value, dfs_names, name_idx)?;
 
             // new_empty_array on a MapType creates a too empty array
             // We want it to contain an empty struct array to align with an empty MapBuilder one
@@ -2474,7 +2902,7 @@ fn from_substrait_literal(
             let mut builder = ScalarStructBuilder::new();
             for (i, field) in s.fields.iter().enumerate() {
                 let name = next_struct_field_name(i, dfs_names, name_idx)?;
-                let sv = from_substrait_literal(field, extensions, dfs_names, name_idx)?;
+                let sv = from_substrait_literal(consumer, field, dfs_names, name_idx)?;
                 // We assume everything to be nullable, since Arrow's strict about things matching
                 // and it's hard to match otherwise.
                 builder = builder.with_scalar(Field::new(name, sv.data_type(), true), sv);
@@ -2482,7 +2910,7 @@ fn from_substrait_literal(
             builder.build()?
         }
         Some(LiteralType::Null(ntype)) => {
-            from_substrait_null(ntype, extensions, dfs_names, name_idx)?
+            from_substrait_null(consumer, ntype, dfs_names, name_idx)?
         }
         Some(LiteralType::IntervalDayToSecond(IntervalDayToSecond {
             days,
@@ -2546,9 +2974,15 @@ fn from_substrait_literal(
         },
         Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
         Some(LiteralType::UserDefined(user_defined)) => {
+            if let Ok(value) = consumer.consume_user_defined_literal(user_defined) {
+                return Ok(value);
+            }
+
+            // TODO: remove the code below once the producer has been updated
+
             // Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
             let interval_month_day_nano =
-                |user_defined: &UserDefined| -> Result<ScalarValue> {
+                |user_defined: &proto::expression::literal::UserDefined| -> Result<ScalarValue> {
                     let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
                         return substrait_err!("Interval month day nano value is empty");
                     };
@@ -2572,7 +3006,11 @@ fn from_substrait_literal(
                     )))
                 };
 
-            if let Some(name) = extensions.types.get(&user_defined.type_reference) {
+            if let Some(name) = consumer
+                .get_extensions()
+                .types
+                .get(&user_defined.type_reference)
+            {
                 match name.as_ref() {
                     // Kept for backwards compatibility - producers should use IntervalCompound instead
                     #[allow(deprecated)]
@@ -2645,8 +3083,8 @@ fn from_substrait_literal(
 }
 
 fn from_substrait_null(
+    consumer: &impl SubstraitConsumer,
     null_type: &Type,
-    extensions: &Extensions,
     dfs_names: &[String],
     name_idx: &mut usize,
 ) -> Result<ScalarValue> {
@@ -2764,8 +3202,8 @@ fn from_substrait_null(
             r#type::Kind::List(l) => {
                 let field = Field::new_list_field(
                     from_substrait_type(
+                        consumer,
                         l.r#type.clone().unwrap().as_ref(),
-                        extensions,
                         dfs_names,
                         name_idx,
                     )?,
@@ -2792,9 +3230,9 @@ fn from_substrait_null(
                 })?;
 
                 let key_type =
-                    from_substrait_type(key_type, extensions, dfs_names, name_idx)?;
+                    from_substrait_type(consumer, key_type, dfs_names, name_idx)?;
                 let value_type =
-                    from_substrait_type(value_type, extensions, dfs_names, name_idx)?;
+                    from_substrait_type(consumer, value_type, dfs_names, name_idx)?;
                 let entries_field = Arc::new(Field::new_struct(
                     "entries",
                     vec![
@@ -2808,7 +3246,7 @@ fn from_substrait_null(
             }
             r#type::Kind::Struct(s) => {
                 let fields =
-                    from_substrait_struct_type(s, extensions, dfs_names, name_idx)?;
+                    from_substrait_struct_type(consumer, s, dfs_names, name_idx)?;
                 Ok(ScalarStructBuilder::new_null(fields))
             }
             _ => not_impl_err!("Unsupported Substrait type for null: {kind:?}"),
@@ -2820,16 +3258,15 @@ fn from_substrait_null(
 
 #[allow(deprecated)]
 async fn from_substrait_grouping(
-    state: &dyn SubstraitPlanningState,
+    consumer: &impl SubstraitConsumer,
     grouping: &Grouping,
     expressions: &[Expr],
     input_schema: &DFSchemaRef,
-    extensions: &Extensions,
 ) -> Result<Vec<Expr>> {
     let mut group_exprs = vec![];
     if !grouping.grouping_expressions.is_empty() {
         for e in &grouping.grouping_expressions {
-            let expr = from_substrait_rex(state, e, input_schema, extensions).await?;
+            let expr = from_substrait_rex(consumer, e, input_schema).await?;
             group_exprs.push(expr);
         }
         return Ok(group_exprs);
@@ -2882,29 +3319,17 @@ impl BuiltinExprBuilder {
 
     pub async fn build(
         self,
-        state: &dyn SubstraitPlanningState,
+        consumer: &impl SubstraitConsumer,
         f: &ScalarFunction,
         input_schema: &DFSchema,
-        extensions: &Extensions,
     ) -> Result<Expr> {
         match self.expr_name.as_str() {
-            "like" => {
-                Self::build_like_expr(state, false, f, input_schema, extensions).await
-            }
-            "ilike" => {
-                Self::build_like_expr(state, true, f, input_schema, extensions).await
-            }
+            "like" => Self::build_like_expr(consumer, false, f, input_schema).await,
+            "ilike" => Self::build_like_expr(consumer, true, f, input_schema).await,
             "not" | "negative" | "negate" | "is_null" | "is_not_null" | "is_true"
             | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
             | "is_not_unknown" => {
-                Self::build_unary_expr(
-                    state,
-                    &self.expr_name,
-                    f,
-                    input_schema,
-                    extensions,
-                )
-                .await
+                Self::build_unary_expr(consumer, &self.expr_name, f, input_schema).await
             }
             _ => {
                 not_impl_err!("Unsupported builtin expression: {}", self.expr_name)
@@ -2913,11 +3338,10 @@ impl BuiltinExprBuilder {
     }
 
     async fn build_unary_expr(
-        state: &dyn SubstraitPlanningState,
+        consumer: &impl SubstraitConsumer,
         fn_name: &str,
         f: &ScalarFunction,
         input_schema: &DFSchema,
-        extensions: &Extensions,
     ) -> Result<Expr> {
         if f.arguments.len() != 1 {
             return substrait_err!("Expect one argument for {fn_name} expr");
@@ -2925,8 +3349,7 @@ impl BuiltinExprBuilder {
         let Some(ArgType::Value(expr_substrait)) = &f.arguments[0].arg_type else {
             return substrait_err!("Invalid arguments type for {fn_name} expr");
         };
-        let arg =
-            from_substrait_rex(state, expr_substrait, input_schema, extensions).await?;
+        let arg = from_substrait_rex(consumer, expr_substrait, input_schema).await?;
         let arg = Box::new(arg);
 
         let expr = match fn_name {
@@ -2947,11 +3370,10 @@ impl BuiltinExprBuilder {
     }
 
     async fn build_like_expr(
-        state: &dyn SubstraitPlanningState,
+        consumer: &impl SubstraitConsumer,
         case_insensitive: bool,
         f: &ScalarFunction,
         input_schema: &DFSchema,
-        extensions: &Extensions,
     ) -> Result<Expr> {
         let fn_name = if case_insensitive { "ILIKE" } else { "LIKE" };
         if f.arguments.len() != 2 && f.arguments.len() != 3 {
@@ -2961,14 +3383,12 @@ impl BuiltinExprBuilder {
         let Some(ArgType::Value(expr_substrait)) = &f.arguments[0].arg_type else {
             return substrait_err!("Invalid arguments type for `{fn_name}` expr");
         };
-        let expr =
-            from_substrait_rex(state, expr_substrait, input_schema, extensions).await?;
+        let expr = from_substrait_rex(consumer, expr_substrait, input_schema).await?;
         let Some(ArgType::Value(pattern_substrait)) = &f.arguments[1].arg_type else {
             return substrait_err!("Invalid arguments type for `{fn_name}` expr");
         };
         let pattern =
-            from_substrait_rex(state, pattern_substrait, input_schema, extensions)
-                .await?;
+            from_substrait_rex(consumer, pattern_substrait, input_schema).await?;
 
         // Default case: escape character is Literal(Utf8(None))
         let escape_char = if f.arguments.len() == 3 {
@@ -2977,13 +3397,8 @@ impl BuiltinExprBuilder {
                 return substrait_err!("Invalid arguments type for `{fn_name}` expr");
             };
 
-            let escape_char_expr = from_substrait_rex(
-                state,
-                escape_char_substrait,
-                input_schema,
-                extensions,
-            )
-            .await?;
+            let escape_char_expr =
+                from_substrait_rex(consumer, escape_char_substrait, input_schema).await?;
 
             match escape_char_expr {
                 Expr::Literal(ScalarValue::Utf8(escape_char_string)) => {
@@ -3013,16 +3428,29 @@ impl BuiltinExprBuilder {
 #[cfg(test)]
 mod test {
     use crate::extensions::Extensions;
-    use crate::logical_plan::consumer::from_substrait_literal_without_names;
+    use crate::logical_plan::consumer::{
+        from_substrait_literal_without_names, DefaultSubstraitConsumer,
+    };
     use arrow_buffer::IntervalMonthDayNano;
     use datafusion::error::Result;
+    use datafusion::execution::SessionState;
+    use datafusion::prelude::SessionContext;
     use datafusion::scalar::ScalarValue;
+    use std::sync::OnceLock;
     use substrait::proto::expression::literal::{
         interval_day_to_second, IntervalCompound, IntervalDayToSecond,
         IntervalYearToMonth, LiteralType,
     };
     use substrait::proto::expression::Literal;
 
+    static TEST_SESSION_STATE: OnceLock<SessionState> = OnceLock::new();
+    static TEST_EXTENSIONS: OnceLock<Extensions> = OnceLock::new();
+    fn test_consumer() -> DefaultSubstraitConsumer<'static> {
+        let extensions = TEST_EXTENSIONS.get_or_init(Extensions::default);
+        let state = TEST_SESSION_STATE.get_or_init(|| SessionContext::default().state());
+        DefaultSubstraitConsumer::new(extensions, state)
+    }
+
     #[test]
     fn interval_compound_different_precision() -> Result<()> {
         // DF producer (and thus roundtrip) always uses precision = 9,
@@ -3046,8 +3474,9 @@ mod test {
             })),
         };
 
+        let consumer = test_consumer();
         assert_eq!(
-            from_substrait_literal_without_names(&substrait, &Extensions::default())?,
+            from_substrait_literal_without_names(&consumer, &substrait)?,
             ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
                 months: 14,
                 days: 3,
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 375cb734f564..b73d246e1989 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -361,21 +361,45 @@ pub fn to_substrait_rel(
                 }))),
             }))
         }
-        LogicalPlan::Sort(sort) => {
-            let input = to_substrait_rel(sort.input.as_ref(), state, extensions)?;
-            let sort_fields = sort
-                .expr
+        LogicalPlan::Sort(datafusion::logical_expr::Sort { expr, input, fetch }) => {
+            let sort_fields = expr
                 .iter()
-                .map(|e| substrait_sort_field(state, e, sort.input.schema(), extensions))
+                .map(|e| substrait_sort_field(state, e, input.schema(), extensions))
                 .collect::<Result<Vec<_>>>()?;
-            Ok(Box::new(Rel {
+
+            let input = to_substrait_rel(input.as_ref(), state, extensions)?;
+
+            let sort_rel = Box::new(Rel {
                 rel_type: Some(RelType::Sort(Box::new(SortRel {
                     common: None,
                     input: Some(input),
                     sorts: sort_fields,
                     advanced_extension: None,
                 }))),
-            }))
+            });
+
+            match fetch {
+                Some(amount) => {
+                    let count_mode =
+                        Some(fetch_rel::CountMode::CountExpr(Box::new(Expression {
+                            rex_type: Some(RexType::Literal(Literal {
+                                nullable: false,
+                                type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                                literal_type: Some(LiteralType::I64(*amount as i64)),
+                            })),
+                        })));
+                    Ok(Box::new(Rel {
+                        rel_type: Some(RelType::Fetch(Box::new(FetchRel {
+                            common: None,
+                            input: Some(sort_rel),
+                            offset_mode: None,
+                            count_mode,
+                            advanced_extension: None,
+                        }))),
+                    }))
+                }
+                None => Ok(sort_rel),
+            }
         }
         LogicalPlan::Aggregate(agg) => {
             let input = to_substrait_rel(agg.input.as_ref(), state, extensions)?;
@@ -2211,11 +2235,11 @@ fn substrait_field_ref(index: usize) -> Result<Expression> {
 
 #[cfg(test)]
 mod test {
-
     use super::*;
     use crate::logical_plan::consumer::{
         from_substrait_extended_expr, from_substrait_literal_without_names,
         from_substrait_named_struct, from_substrait_type_without_names,
+        DefaultSubstraitConsumer,
     };
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
     use datafusion::arrow::array::{
@@ -2224,7 +2248,17 @@ mod test {
     use datafusion::arrow::datatypes::{Field, Fields, Schema};
     use datafusion::common::scalar::ScalarStructBuilder;
     use datafusion::common::DFSchema;
-    use datafusion::execution::SessionStateBuilder;
+    use datafusion::execution::{SessionState, SessionStateBuilder};
+    use datafusion::prelude::SessionContext;
+    use std::sync::OnceLock;
+
+    static TEST_SESSION_STATE: OnceLock<SessionState> = OnceLock::new();
+    static TEST_EXTENSIONS: OnceLock<Extensions> = OnceLock::new();
+    fn test_consumer() -> DefaultSubstraitConsumer<'static> {
+        let extensions = TEST_EXTENSIONS.get_or_init(Extensions::default);
+        let state = TEST_SESSION_STATE.get_or_init(|| SessionContext::default().state());
+        DefaultSubstraitConsumer::new(extensions, state)
+    }
 
     #[test]
     fn round_trip_literals() -> Result<()> {
@@ -2350,7 +2384,7 @@ mod test {
         let mut extensions = Extensions::default();
         let substrait_literal = to_substrait_literal(&scalar, &mut extensions)?;
         let roundtrip_scalar =
-            from_substrait_literal_without_names(&substrait_literal, &extensions)?;
+            from_substrait_literal_without_names(&test_consumer(), &substrait_literal)?;
         assert_eq!(scalar, roundtrip_scalar);
         Ok(())
     }
@@ -2429,8 +2463,8 @@ mod test {
         // As DataFusion doesn't consider nullability as a property of the type, but field,
         // it doesn't matter if we set nullability to true or false here.
         let substrait = to_substrait_type(&dt, true)?;
-        let roundtrip_dt =
-            from_substrait_type_without_names(&substrait, &Extensions::default())?;
+        let consumer = test_consumer();
+        let roundtrip_dt = from_substrait_type_without_names(&consumer, &substrait)?;
         assert_eq!(dt, roundtrip_dt);
         Ok(())
     }
@@ -2481,7 +2515,7 @@ mod test {
         );
 
         let roundtrip_schema =
-            from_substrait_named_struct(&named_struct, &Extensions::default())?;
+            from_substrait_named_struct(&test_consumer(), &named_struct)?;
         assert_eq!(schema.as_ref(), &roundtrip_schema);
         Ok(())
     }
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index 58774db424da..e5bebf8e1181 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -23,7 +23,7 @@
 //!
 //! The rules of type variations here are:
 //! - Default type reference is 0. It is used when the actual type is the same with the original type.
-//! - Extended variant type references start from 1, and ususlly increase by 1.
+//! - Extended variant type references start from 1, and usually increase by 1.
 //!
 //! TODO: Definitions here are not the final form. All the non-system-preferred variations will be defined
 //! using [simple extensions] as per the [spec of type_variations](https://substrait.io/types/type_variations/)
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 1291bbd6a244..383fe44be507 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -30,6 +30,7 @@ use datafusion::common::{not_impl_err, plan_err, DFSchema, DFSchemaRef};
 use datafusion::error::Result;
 use datafusion::execution::registry::SerializerRegistry;
 use datafusion::execution::runtime_env::RuntimeEnv;
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::logical_expr::{
     Extension, LogicalPlan, PartitionEvaluator, Repartition, UserDefinedLogicalNode,
     Values, Volatility,
@@ -38,8 +39,6 @@ use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLI
 use datafusion::prelude::*;
 use std::hash::Hash;
 use std::sync::Arc;
-
-use datafusion::execution::session_state::SessionStateBuilder;
 use substrait::proto::extensions::simple_extension_declaration::MappingType;
 use substrait::proto::rel::RelType;
 use substrait::proto::{plan_rel, Plan, Rel};
@@ -200,6 +199,16 @@ async fn select_with_filter() -> Result<()> {
     roundtrip("SELECT * FROM data WHERE a > 1").await
 }
 
+#[tokio::test]
+async fn select_with_filter_sort_limit() -> Result<()> {
+    roundtrip("SELECT * FROM data WHERE a > 1 ORDER BY b ASC LIMIT 2").await
+}
+
+#[tokio::test]
+async fn select_with_filter_sort_limit_offset() -> Result<()> {
+    roundtrip("SELECT * FROM data WHERE a > 1 ORDER BY b ASC LIMIT 2 OFFSET 1").await
+}
+
 #[tokio::test]
 async fn select_with_reused_functions() -> Result<()> {
     let ctx = create_context().await?;
@@ -1126,7 +1135,7 @@ fn check_post_join_filters(rel: &Rel) -> Result<()> {
             // check if join filter is None
             if join.post_join_filter.is_some() {
                 plan_err!(
-                    "DataFusion generated Susbtrait plan cannot have post_join_filter in JoinRel"
+                    "DataFusion generated Substrait plan cannot have post_join_filter in JoinRel"
                 )
             } else {
                 // recursively check JoinRels
diff --git a/datafusion/substrait/tests/cases/substrait_validations.rs b/datafusion/substrait/tests/cases/substrait_validations.rs
index c77bf1489f4e..8357e0a8621d 100644
--- a/datafusion/substrait/tests/cases/substrait_validations.rs
+++ b/datafusion/substrait/tests/cases/substrait_validations.rs
@@ -18,8 +18,8 @@
 #[cfg(test)]
 mod tests {
 
-    // verify the schema compatability validations
-    mod schema_compatability {
+    // verify the schema compatibility validations
+    mod schema_compatibility {
         use crate::utils::test::read_json;
         use datafusion::arrow::datatypes::{DataType, Field};
         use datafusion::catalog_common::TableReference;
diff --git a/datafusion/substrait/tests/utils.rs b/datafusion/substrait/tests/utils.rs
index 00cbfb0c412c..6984baeb21a5 100644
--- a/datafusion/substrait/tests/utils.rs
+++ b/datafusion/substrait/tests/utils.rs
@@ -24,7 +24,9 @@ pub mod test {
     use datafusion::error::Result;
     use datafusion::prelude::SessionContext;
     use datafusion_substrait::extensions::Extensions;
-    use datafusion_substrait::logical_plan::consumer::from_substrait_named_struct;
+    use datafusion_substrait::logical_plan::consumer::{
+        from_substrait_named_struct, DefaultSubstraitConsumer, SubstraitConsumer,
+    };
     use std::collections::HashMap;
     use std::fs::File;
     use std::io::BufReader;
@@ -50,7 +52,18 @@ pub mod test {
         ctx: SessionContext,
         plan: &Plan,
     ) -> Result<SessionContext> {
-        let schemas = TestSchemaCollector::collect_schemas(plan)?;
+        let extensions = Extensions::default();
+        let state = ctx.state();
+        let consumer = DefaultSubstraitConsumer::new(&extensions, &state);
+        add_plan_schemas_to_ctx_with_consumer(&consumer, ctx, plan)
+    }
+
+    fn add_plan_schemas_to_ctx_with_consumer(
+        consumer: &impl SubstraitConsumer,
+        ctx: SessionContext,
+        plan: &Plan,
+    ) -> Result<SessionContext> {
+        let schemas = TestSchemaCollector::collect_schemas(consumer, plan)?;
         let mut schema_map: HashMap<TableReference, Arc<dyn TableProvider>> =
             HashMap::new();
         for (table_reference, table) in schemas.into_iter() {
@@ -71,21 +84,24 @@ pub mod test {
         Ok(ctx)
     }
 
-    pub struct TestSchemaCollector {
+    pub struct TestSchemaCollector<'a, T: SubstraitConsumer> {
+        consumer: &'a T,
         schemas: Vec<(TableReference, Arc<dyn TableProvider>)>,
     }
 
-    impl TestSchemaCollector {
-        fn new() -> Self {
+    impl<'a, T: SubstraitConsumer> TestSchemaCollector<'a, T> {
+        fn new(consumer: &'a T) -> Self {
             TestSchemaCollector {
                 schemas: Vec::new(),
+                consumer,
             }
         }
 
         fn collect_schemas(
+            consumer: &'a T,
             plan: &Plan,
         ) -> Result<Vec<(TableReference, Arc<dyn TableProvider>)>> {
-            let mut schema_collector = Self::new();
+            let mut schema_collector = Self::new(consumer);
 
             for plan_rel in plan.relations.iter() {
                 let rel_type = plan_rel
@@ -132,15 +148,8 @@ pub mod test {
                     "No base schema found for NamedTable: {}",
                     table_reference
                 ))?;
-            let empty_extensions = Extensions {
-                functions: Default::default(),
-                types: Default::default(),
-                type_variations: Default::default(),
-            };
-
-            let df_schema =
-                from_substrait_named_struct(substrait_schema, &empty_extensions)?
-                    .replace_qualifier(table_reference.clone());
+            let df_schema = from_substrait_named_struct(self.consumer, substrait_schema)?
+                .replace_qualifier(table_reference.clone());
 
             let table = EmptyTable::new(df_schema.inner().clone());
             self.schemas.push((table_reference, Arc::new(table)));
@@ -288,7 +297,7 @@ pub mod test {
                 RelType::Exchange(e) => {
                     self.apply(e.input.as_ref().map(|b| b.as_ref()))?;
                     let exchange_kind = e.exchange_kind.as_ref().ok_or(
-                        substrait_datafusion_err!("Exhange must set exchange_kind"),
+                        substrait_datafusion_err!("Exchange must set exchange_kind"),
                     )?;
                     match exchange_kind {
                         ExchangeKind::ScatterByFields(_) => {}
diff --git a/dev/changelog/44.0.0.md b/dev/changelog/44.0.0.md
new file mode 100644
index 000000000000..233e302e50e6
--- /dev/null
+++ b/dev/changelog/44.0.0.md
@@ -0,0 +1,467 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 44.0.0 Changelog
+
+This release consists of 332 commits from 94 contributors. See credits at the end of this changelog for more information.
+
+**Breaking changes:**
+
+- Introduce `full_qualified_col` option for the unparser dialect [#13241](https://github.com/apache/datafusion/pull/13241) (goldmedal)
+- Implement `Eq`, `PartialEq`, `Hash` for `dyn PhysicalExpr` [#13005](https://github.com/apache/datafusion/pull/13005) (peter-toth)
+- Change `schema_infer_max_rec ` config to use `Option<usize>` rather than `usize` [#13250](https://github.com/apache/datafusion/pull/13250) (alihan-synnada)
+- refactor: move PREPARE/EXECUTE into `LogicalPlan::Statement` [#13311](https://github.com/apache/datafusion/pull/13311) (jonahgao)
+- Convert `nth_value` builtIn function to User Defined Window Function [#13201](https://github.com/apache/datafusion/pull/13201) (buraksenn)
+- Return int32 for integer type date part [#13466](https://github.com/apache/datafusion/pull/13466) (jayzhan211)
+- User documentation: Generate docs from macros, make`DocumentationBuilder::build` infallable [#12822](https://github.com/apache/datafusion/pull/12822) (comphead)
+- Replace `execution_mode` with `emission_type` and `boundedness` [#13823](https://github.com/apache/datafusion/pull/13823) (jayzhan-synnada)
+- feat(substrait): modular substrait consumer [#13803](https://github.com/apache/datafusion/pull/13803) (vbarua)
+
+**Implemented enhancements:**
+
+- feat: Add `Time`/`Interval`/`Decimal`/`Utf8View` in aggregate fuzz testing [#13226](https://github.com/apache/datafusion/pull/13226) (LeslieKid)
+- feat: basic support for executing prepared statements [#13242](https://github.com/apache/datafusion/pull/13242) (jonahgao)
+- feat: Updating flamegraph docs [#13299](https://github.com/apache/datafusion/pull/13299) (jonathanc-n)
+- feat: Added Timestamp/Binary/Float to fuzz [#13280](https://github.com/apache/datafusion/pull/13280) (jonathanc-n)
+- feat: support `DEALLOCATE` to remove prepared statements [#13327](https://github.com/apache/datafusion/pull/13327) (jonahgao)
+- feat: Add boolean column to aggregate queries for fuzz testing [#13331](https://github.com/apache/datafusion/pull/13331) (jonathanc-n)
+- feat: Add random timezone to aggregate fuzz test [#13349](https://github.com/apache/datafusion/pull/13349) (jonathanc-n)
+- feat: Add `stringview` support to `encode` and `decode` and `bit_length` [#13332](https://github.com/apache/datafusion/pull/13332) (jonathanc-n)
+- feat: Support faster multi-column grouping ( `GroupColumn`) for `Date/Time/Timestamp` types [#13457](https://github.com/apache/datafusion/pull/13457) (jonathanc-n)
+- feat(substrait): replace SessionContext with a trait [#13343](https://github.com/apache/datafusion/pull/13343) (notfilippo)
+- feat(function): add greatest function [#12474](https://github.com/apache/datafusion/pull/12474) (rluvaton)
+- feat: Add `Boolean` Column Support for Window Functions [#13577](https://github.com/apache/datafusion/pull/13577) (jonathanc-n)
+- feat(substrait): support-try-cast [#13562](https://github.com/apache/datafusion/pull/13562) (eatthepear)
+- feat: Add GroupColumn `Decimal128Array` [#13564](https://github.com/apache/datafusion/pull/13564) (jonathanc-n)
+- feat(substrait): remove dependency on datafusion default features [#13594](https://github.com/apache/datafusion/pull/13594) (notfilippo)
+- feat: `parse_float_as_decimal` supports scientific notation and Decimal256 [#13806](https://github.com/apache/datafusion/pull/13806) (jonahgao)
+- feat: support normalized expr in CSE [#13315](https://github.com/apache/datafusion/pull/13315) (zhuliquan)
+- feat(function): add `least` function [#13786](https://github.com/apache/datafusion/pull/13786) (rluvaton)
+
+**Fixed bugs:**
+
+- fix: Fix stddev indeterministically producing NAN [#13248](https://github.com/apache/datafusion/pull/13248) (eejbyfeldt)
+- fix: Fix failures caused by `array_prepend` [#13367](https://github.com/apache/datafusion/pull/13367) (jonathanc-n)
+- fix: Fix documentation for `null_string` to `array_to_string` + `string_to_array` [#13397](https://github.com/apache/datafusion/pull/13397) (jonathanc-n)
+- fix: serialize user-defined window functions to proto [#13421](https://github.com/apache/datafusion/pull/13421) (jcsherin)
+- fix: Ignore names of technical inner fields (of List and Map types) when comparing datatypes for logical equivalence [#13522](https://github.com/apache/datafusion/pull/13522) (Blizzara)
+- fix: CI build failed on main [#13640](https://github.com/apache/datafusion/pull/13640) (jonahgao)
+- fix: cargo msrv check failed [#13654](https://github.com/apache/datafusion/pull/13654) (jonahgao)
+- fix: repartitioned reads of CSV with custom line terminator [#13677](https://github.com/apache/datafusion/pull/13677) (korowa)
+- fix: Implicitly plan `UNNEST` as lateral [#13695](https://github.com/apache/datafusion/pull/13695) (rkrishn7)
+- fix: specify roottype in substrait fieldreference [#13647](https://github.com/apache/datafusion/pull/13647) (mbwhite)
+- fix: add `null_buffer` length check to `StringArrayBuilder`/`LargeStringArrayBuilder` [#13758](https://github.com/apache/datafusion/pull/13758) (jdockerty)
+- fix: Limit together with pushdown_filters [#13788](https://github.com/apache/datafusion/pull/13788) (zhuqi-lucas)
+- fix: pruning by bloom filters for dictionary columns [#13768](https://github.com/apache/datafusion/pull/13768) (korowa)
+- fix: Ignore empty files in ListingTable when listing files with or without partition filters, as well as when inferring schema [#13750](https://github.com/apache/datafusion/pull/13750) (Blizzara)
+- fix: enable DF's nested_expressions feature by in datafusion-substrait tests to make them pass [#13857](https://github.com/apache/datafusion/pull/13857) (Blizzara)
+
+**Documentation updates:**
+
+- chore: Prepare 43.0.0 release [#13254](https://github.com/apache/datafusion/pull/13254) (andygrove)
+- minor: typo in command example for flamegraph docs [#13269](https://github.com/apache/datafusion/pull/13269) (jonathanc-n)
+- Remove warning logs during the document build process [#13324](https://github.com/apache/datafusion/pull/13324) (tlm365)
+- minor(docs): Add docs for `regen.sh` [#13321](https://github.com/apache/datafusion/pull/13321) (jonathanc-n)
+- minor(docs): Correct array_prepend docs [#13362](https://github.com/apache/datafusion/pull/13362) (NoeB)
+- Docs: Update dependencies in `requirements.txt` for python3.12 [#13339](https://github.com/apache/datafusion/pull/13339) (AnthonyZhOon)
+- Docs: Add Content Library Page to the docs [#13335](https://github.com/apache/datafusion/pull/13335) (alamb)
+- chore: Remove `window_functions_new` docs [#13395](https://github.com/apache/datafusion/pull/13395) (jonathanc-n)
+- Minor: Add SQL example for `date_bin` [#13390](https://github.com/apache/datafusion/pull/13390) (alamb)
+- Fix typo in concepts-readings-events.md [#13400](https://github.com/apache/datafusion/pull/13400) (alamb)
+- fix docs of register_table to match implementation [#13438](https://github.com/apache/datafusion/pull/13438) (adriangb)
+- Minor: Fix broken links for meetups in content library [#13445](https://github.com/apache/datafusion/pull/13445) (alamb)
+- Clarify documentation about use of tokio tasks [#13474](https://github.com/apache/datafusion/pull/13474) (alamb)
+- Include license and notice files in published crates [#13512](https://github.com/apache/datafusion/pull/13512) (ankane)
+- Minor: clean up error entries [#13521](https://github.com/apache/datafusion/pull/13521) (comphead)
+- Move many udf implementations from `invoke` to `invoke_batch` [#13491](https://github.com/apache/datafusion/pull/13491) (joseph-isaacs)
+- chore: rename known project ZincObserve to OpenObserve [#13587](https://github.com/apache/datafusion/pull/13587) (hengfeiyang)
+- Minor: Add example of backporting / `cherry-pick`ing to release branch [#13565](https://github.com/apache/datafusion/pull/13565) (alamb)
+- [minor] Fix logo image path by using absolute url [#13619](https://github.com/apache/datafusion/pull/13619) (A-JM)
+- Update governance page for 7 days of voting [#13629](https://github.com/apache/datafusion/pull/13629) (alamb)
+- Doc gen: Migrate aggregate functions doc to attribute based. [#13646](https://github.com/apache/datafusion/pull/13646) (comphead)
+- Minor: Rephrase MSRV policy to be more explanatory [#13668](https://github.com/apache/datafusion/pull/13668) (comphead)
+- Improve documentation for TableProvider [#13724](https://github.com/apache/datafusion/pull/13724) (alamb)
+- Minor: Add documentation explaining that initcap only works for ASCII [#13749](https://github.com/apache/datafusion/pull/13749) (alamb)
+- Minor: improve the Deprecation / API health guidelines [#13701](https://github.com/apache/datafusion/pull/13701) (alamb)
+- Minor: Add some more blog posts to the readings page [#13761](https://github.com/apache/datafusion/pull/13761) (alamb)
+- docs: update GroupsAccumulator instead of GroupAccumulator [#13787](https://github.com/apache/datafusion/pull/13787) (rluvaton)
+- Improve Deprecation Guidelines more [#13776](https://github.com/apache/datafusion/pull/13776) (alamb)
+- Update documentation guidelines for contribution content [#13703](https://github.com/apache/datafusion/pull/13703) (alamb)
+- Minor: improve `Analyzer` docs [#13798](https://github.com/apache/datafusion/pull/13798) (alamb)
+- Add configurable normalization for configuration options and preserve case for S3 paths [#13576](https://github.com/apache/datafusion/pull/13576) (blaginin)
+- Improve`Signature` and `comparison_coercion` documentation [#13840](https://github.com/apache/datafusion/pull/13840) (alamb)
+- Add documentation for `SHOW FUNCTIONS` [#13868](https://github.com/apache/datafusion/pull/13868) (alamb)
+- Support unicode character for `initcap` function [#13752](https://github.com/apache/datafusion/pull/13752) (tlm365)
+- [minor] make recursive package dependency optional [#13778](https://github.com/apache/datafusion/pull/13778) (buraksenn)
+- Fix `recursive-protection` feature flag [#13887](https://github.com/apache/datafusion/pull/13887) (alamb)
+
+**Other:**
+
+- Minor: Improve documentation about `OnceAsync` [#13223](https://github.com/apache/datafusion/pull/13223) (alamb)
+- Introduce HashMap and HashSet type aliases [#13236](https://github.com/apache/datafusion/pull/13236) (drauschenbach)
+- Remove `Expr` clones from `SortExpr`s [#13258](https://github.com/apache/datafusion/pull/13258) (peter-toth)
+- Update substrait requirement from 0.45 to 0.46 [#13257](https://github.com/apache/datafusion/pull/13257) (dependabot[bot])
+- Deprecate `LexOrderingRef` and `LexRequirementRef` [#13233](https://github.com/apache/datafusion/pull/13233) (jatin510)
+- Optimize `Statistics::projection` to avoid a clone [#13225](https://github.com/apache/datafusion/pull/13225) (alamb)
+- Migrate invoke_no_args to invoke_batch [#13246](https://github.com/apache/datafusion/pull/13246) (findepi)
+- fix bugs explain with non-correlated query [#13210](https://github.com/apache/datafusion/pull/13210) (Lordworms)
+- Example: FFI Table Provider as dynamic module loading [#13183](https://github.com/apache/datafusion/pull/13183) (timsaucer)
+- Fix incorrect `... LIKE '%'` simplification with NULLs [#13259](https://github.com/apache/datafusion/pull/13259) (findepi)
+- allow passing in metadata_size_hint on a per-file basis [#13213](https://github.com/apache/datafusion/pull/13213) (adriangb)
+- Deprecate `PhysicalSortRequirement::from_sort_exprs` and `PhysicalSortRequirement::to_sort_exprs` [#13222](https://github.com/apache/datafusion/pull/13222) (alamb)
+- Ensure schema and data have the same size [#13264](https://github.com/apache/datafusion/pull/13264) (blaginin)
+- Update thiserror requirement from 1.0.44 to 2.0.0 [#13273](https://github.com/apache/datafusion/pull/13273) (dependabot[bot])
+- Support vectorized append and compare for multi group by [#12996](https://github.com/apache/datafusion/pull/12996) (Rachelint)
+- Use LogicalType for TypeSignature `Numeric` and `String`, `Coercible` [#13240](https://github.com/apache/datafusion/pull/13240) (jayzhan211)
+- Minor: Clarify use of infallable APIs [#13217](https://github.com/apache/datafusion/pull/13217) (alamb)
+- chore: Move `OptimizeAggregateOrder` from core to optimizer crate [#13284](https://github.com/apache/datafusion/pull/13284) (jonathanc-n)
+- Fix license header [#12008](https://github.com/apache/datafusion/pull/12008) (waynexia)
+- Introduce `INFORMATION_SCHEMA.ROUTINES` table [#13255](https://github.com/apache/datafusion/pull/13255) (goldmedal)
+- minor: doc fixes in fuzz testing [#13281](https://github.com/apache/datafusion/pull/13281) (jonathanc-n)
+- Minor: improve `LogicalPlanBuilder::join_with_expr_keys` docs [#13276](https://github.com/apache/datafusion/pull/13276) (alamb)
+- Test LIKE with implicit `\` escape [#13288](https://github.com/apache/datafusion/pull/13288) (findepi)
+- Enable `clone_on_ref_ptr` clippy lint on physical-expr-common crate [#13295](https://github.com/apache/datafusion/pull/13295) (getChan)
+- chore: Move `CoalesceBatches` from core to optimizer crate [#13283](https://github.com/apache/datafusion/pull/13283) (jonathanc-n)
+- Minor: Document how to test for trailing whitespace in `slt` / sqllogictests [#13215](https://github.com/apache/datafusion/pull/13215) (alamb)
+- Improve documentation on start_demuxer_task and file_extension [#13216](https://github.com/apache/datafusion/pull/13216) (alamb)
+- Expand LIKE simplification: cover `NULL` pattern/expression and constant [#13260](https://github.com/apache/datafusion/pull/13260) (findepi)
+- join_with_expr_keys to support expressions with both unqualified and qualified column names [#13182](https://github.com/apache/datafusion/pull/13182) (onursatici)
+- Derive Eq, Hash, Ord for ResolvedTableReference to be capable of map key [#13303](https://github.com/apache/datafusion/pull/13303) (kezhuw)
+- chore: Post 43.0.0 release fixes [#13319](https://github.com/apache/datafusion/pull/13319) (andygrove)
+- Implement get_possible_types for Uniform, Coercible, Variadic, Numeric and String [#13313](https://github.com/apache/datafusion/pull/13313) (delamarch3)
+- Support `Utf8View` for string function `bit_length()` [#13221](https://github.com/apache/datafusion/pull/13221) (austin362667)
+- Optimize `replace_params_with_values` [#13308](https://github.com/apache/datafusion/pull/13308) (askalt)
+- minor: include datafusion-ffi in publishing instructions [#13333](https://github.com/apache/datafusion/pull/13333) (alamb)
+- Update dev script to publish FFI crate [#13322](https://github.com/apache/datafusion/pull/13322) (timsaucer)
+- Fix `scalar_functions.md` for CI [#13340](https://github.com/apache/datafusion/pull/13340) (goldmedal)
+- Fix ExprSchema extraction of metadata for Cast expressions. [#13305](https://github.com/apache/datafusion/pull/13305) (wiedld)
+- refactor: replace `instant` with `web-time` [#13355](https://github.com/apache/datafusion/pull/13355) (crepererum)
+- Add stacker and recursive [#13310](https://github.com/apache/datafusion/pull/13310) (peter-toth)
+- Introduce `information_schema.parameters` table [#13341](https://github.com/apache/datafusion/pull/13341) (goldmedal)
+- support recursive CTEs logical plans in datafusion-proto [#13314](https://github.com/apache/datafusion/pull/13314) (leoyvens)
+- Update substrait requirement from 0.46 to 0.47 [#13374](https://github.com/apache/datafusion/pull/13374) (dependabot[bot])
+- Enable `clone_on_ref_ptr` clippy lint on core crate [#13338](https://github.com/apache/datafusion/pull/13338) (getChan)
+- Support TypeSignature::Nullary [#13354](https://github.com/apache/datafusion/pull/13354) (jayzhan211)
+- improve performance of regexp_count [#13364](https://github.com/apache/datafusion/pull/13364) (Dimchikkk)
+- Annotate `Expr::get_type()` with recursive [#13376](https://github.com/apache/datafusion/pull/13376) (peter-toth)
+- Add utf8view support in nullif [#13380](https://github.com/apache/datafusion/pull/13380) (Omega359)
+- Split the GroupColumn Implementations into smaller modules [#13352](https://github.com/apache/datafusion/pull/13352) (jiashenC)
+- Move filtered SMJ Full filtered join out of `join_partial` phase [#13369](https://github.com/apache/datafusion/pull/13369) (comphead)
+- Remove uses of #[allow(dead_code)] in favor of \_identifier [#13328](https://github.com/apache/datafusion/pull/13328) (ding-young)
+- Allow aggregation without projection in `Unparser` [#13326](https://github.com/apache/datafusion/pull/13326) (blaginin)
+- Introduce `TypePlanner` for customizing type planning [#13294](https://github.com/apache/datafusion/pull/13294) (goldmedal)
+- Minor: SortMergeJoin small refactoring [#13398](https://github.com/apache/datafusion/pull/13398) (comphead)
+- Add Utf8View to `TypeCategory::Unknown` [#13350](https://github.com/apache/datafusion/pull/13350) (jayzhan211)
+- refactor: Consolidate single group by column code into sub modules [#13392](https://github.com/apache/datafusion/pull/13392) (Rachelint)
+- Update sqlparser requirement from 0.51.0 to 0.52.0 [#13373](https://github.com/apache/datafusion/pull/13373) (dependabot[bot])
+- chore: expose TypeCoercionRewriter::new and add examples for type coerce expressions [#13387](https://github.com/apache/datafusion/pull/13387) (niebayes)
+- Migrate code from invoke to invoke_batch. [#13345](https://github.com/apache/datafusion/pull/13345) (irenjj)
+- Improve the coverage of `bit_length` testing [#13336](https://github.com/apache/datafusion/pull/13336) (goldmedal)
+- Consolidate dataframe example [#13410](https://github.com/apache/datafusion/pull/13410) (alamb)
+- parquet:Add file_extension for specify file_extension of ParquetReadOptions [#13353](https://github.com/apache/datafusion/pull/13353) (glfeng318)
+- Add getters to `ExecutionPlan` Properties [#13409](https://github.com/apache/datafusion/pull/13409) (shehabgamin)
+- Have DefaultTableSource return the correct TableType [#13416](https://github.com/apache/datafusion/pull/13416) (leoyvens)
+- Remove `BuiltInWindowFunction` (LogicalPlans) [#13393](https://github.com/apache/datafusion/pull/13393) (alamb)
+- Fix `regex` cache on pattern, less alloc, hash less often [#13414](https://github.com/apache/datafusion/pull/13414) (Dimchikkk)
+- Fix record batch memory size double counting [#13377](https://github.com/apache/datafusion/pull/13377) (2010YOUY01)
+- Make DFSchema::datatype_is_semantically_equal public [#13429](https://github.com/apache/datafusion/pull/13429) (Sevenannn)
+- Add support for utf8view to nvl function [#13382](https://github.com/apache/datafusion/pull/13382) (Omega359)
+- Add support for Utf8View to crypto functions [#13407](https://github.com/apache/datafusion/pull/13407) (Omega359)
+- Fix `concat` simplifier for Utf8View types [#13346](https://github.com/apache/datafusion/pull/13346) (timsaucer)
+- Add sort integration benchmark [#13306](https://github.com/apache/datafusion/pull/13306) (2010YOUY01)
+- Minor: Remove MOVED file [#13442](https://github.com/apache/datafusion/pull/13442) (alamb)
+- Deduplicate and standardize deserialization logic for streams [#13412](https://github.com/apache/datafusion/pull/13412) (alihan-synnada)
+- Minor: Doc and organize fields in `struct ExternalSorter` [#13447](https://github.com/apache/datafusion/pull/13447) (2010YOUY01)
+- Support unparsing Array plan to SQL string [#13418](https://github.com/apache/datafusion/pull/13418) (goldmedal)
+- Fix test query results even for quick test execution [#13453](https://github.com/apache/datafusion/pull/13453) (findepi)
+- [MINOR]: fix min max accumulator nan bug [#13432](https://github.com/apache/datafusion/pull/13432) (akurmustafa)
+- Evaluate cheaper condition first in join selection and physical planner [#13435](https://github.com/apache/datafusion/pull/13435) (findepi)
+- Fix duckdb & sqlite character_length scalar unparsing [#13428](https://github.com/apache/datafusion/pull/13428) (Sevenannn)
+- chore: remove unnecessary test helpers [#13317](https://github.com/apache/datafusion/pull/13317) (findepi)
+- Produce informative error on physical schema mismatch [#13434](https://github.com/apache/datafusion/pull/13434) (findepi)
+- Fix invalid swap for LeftMark nested loops join [#13426](https://github.com/apache/datafusion/pull/13426) (findepi)
+- Fix redundant data copying in unnest [#13441](https://github.com/apache/datafusion/pull/13441) (demetribu)
+- Add docs on TableProvider::statistics() [#13454](https://github.com/apache/datafusion/pull/13454) (avantgardnerio)
+- Update sqllogictest requirement from 0.22.0 to 0.23.0 [#13464](https://github.com/apache/datafusion/pull/13464) (dependabot[bot])
+- Improve documentation (and ASCII art) about streaming execution, and thread pools [#13423](https://github.com/apache/datafusion/pull/13423) (alamb)
+- Fix Binary & Binary View Unparsing [#13427](https://github.com/apache/datafusion/pull/13427) (Sevenannn)
+- Support Utf8View in Unparser `expr_to_sql` [#13462](https://github.com/apache/datafusion/pull/13462) (phillipleblanc)
+- Unparse inner join with no conditions as a cross join [#13460](https://github.com/apache/datafusion/pull/13460) (phillipleblanc)
+- Remove unreachable filter logic in final grouping stage [#13463](https://github.com/apache/datafusion/pull/13463) (jayzhan211)
+- MINOR: remove one duplicated inparam in TopK [#13479](https://github.com/apache/datafusion/pull/13479) (waynexia)
+- Fix join on arrays of unhashable types and allow hash join on all types supported at run-time [#13388](https://github.com/apache/datafusion/pull/13388) (findepi)
+- Remove redundant dead_code check suppressions [#13490](https://github.com/apache/datafusion/pull/13490) (findepi)
+- chore: try make Setup Rust CI step immune to network hang [#13495](https://github.com/apache/datafusion/pull/13495) (findepi)
+- Move `Pruning` into `physical-optimizer` crate [#13485](https://github.com/apache/datafusion/pull/13485) (irenjj)
+- Add `Container` trait and to simplify `Expr` and `LogicalPlan` apply and map methods [#13467](https://github.com/apache/datafusion/pull/13467) (peter-toth)
+- Chunk based iteration in `accumulate_indices` [#13451](https://github.com/apache/datafusion/pull/13451) (jayzhan211)
+- Fix DataFusionError use in schema_err macro [#13488](https://github.com/apache/datafusion/pull/13488) (findepi)
+- Fixed issue with md5 not support LargeUtf8 correctly [#13502](https://github.com/apache/datafusion/pull/13502) (Omega359)
+- Coerce Array inner types [#13452](https://github.com/apache/datafusion/pull/13452) (blaginin)
+- Update arrow/parquet to arrow/parquet `53.3.0` [#13508](https://github.com/apache/datafusion/pull/13508) (alamb)
+- Minor: Add debug log message for creating GroupValuesRows [#13506](https://github.com/apache/datafusion/pull/13506) (alamb)
+- Unparse struct to sql [#13493](https://github.com/apache/datafusion/pull/13493) (delamarch3)
+- Add `ScalarUDFImpl::invoke_with_args` to support passing the return type created for the udf instance [#13290](https://github.com/apache/datafusion/pull/13290) (joseph-isaacs)
+- Update substrait requirement from 0.47 to 0.48 [#13480](https://github.com/apache/datafusion/pull/13480) (dependabot[bot])
+- Add support for Utf8View to string_to_array and array_to_string [#13403](https://github.com/apache/datafusion/pull/13403) (Omega359)
+- refactor: use `std::slice::from_ref` to remove some clones [#13518](https://github.com/apache/datafusion/pull/13518) (jonahgao)
+- Introduce TypeSignature::Comparable and update `NullIf` signature [#13356](https://github.com/apache/datafusion/pull/13356) (jayzhan211)
+- refactor: change some `hashbrown` `RawTable` uses to `HashTable` [#13514](https://github.com/apache/datafusion/pull/13514) (crepererum)
+- Avoid pushdown of volatile functions to tablescan [#13475](https://github.com/apache/datafusion/pull/13475) (theirix)
+- Unparse map to sql [#13532](https://github.com/apache/datafusion/pull/13532) (delamarch3)
+- Minor: add version to RuntimeEnv::new deprecation message [#13537](https://github.com/apache/datafusion/pull/13537) (alamb)
+- [bug]: Fix wrong order by removal from plan [#13497](https://github.com/apache/datafusion/pull/13497) (akurmustafa)
+- Fix panic when hashing empty FixedSizeList Array [#13533](https://github.com/apache/datafusion/pull/13533) (findepi)
+- Minor: Fix two test cases causing CI failures by adding `rowsort` [#13547](https://github.com/apache/datafusion/pull/13547) (2010YOUY01)
+- chore: add version to deprecation messages [#13543](https://github.com/apache/datafusion/pull/13543) (findepi)
+- Set timezone for group column timestamp type [#13535](https://github.com/apache/datafusion/pull/13535) (jayzhan211)
+- Support Unnest in Subqueries [#13523](https://github.com/apache/datafusion/pull/13523) (kosiew)
+- Support LIKE with ESCAPE `\` [#13312](https://github.com/apache/datafusion/pull/13312) (findepi)
+- chore: Remove redundant assert_starts_with test helper [#13542](https://github.com/apache/datafusion/pull/13542) (findepi)
+- Preserve field name when casting List [#13468](https://github.com/apache/datafusion/pull/13468) (timsaucer)
+- Rename `BuiltInWindow*` to `StandardWindow*` [#13536](https://github.com/apache/datafusion/pull/13536) (irenjj)
+- test: allow external_access_plan run on windows [#13531](https://github.com/apache/datafusion/pull/13531) (zhuliquan)
+- chore: Remove unused dependencies [#13541](https://github.com/apache/datafusion/pull/13541) (findepi)
+- Update tests and resolve TODOs after arrow update [#13538](https://github.com/apache/datafusion/pull/13538) (findepi)
+- Added documentation for SortMergeJoin [#13469](https://github.com/apache/datafusion/pull/13469) (athultr1997)
+- Fixed imports in custom_datasource.rs example [#13561](https://github.com/apache/datafusion/pull/13561) (lucafabbian)
+- Update substrait requirement from 0.48 to 0.49 [#13556](https://github.com/apache/datafusion/pull/13556) (dependabot[bot])
+- [minor] Update Doc of required_indices.rs [#13555](https://github.com/apache/datafusion/pull/13555) (xinlmain)
+- Add zero-`decimal`-cast test [#13571](https://github.com/apache/datafusion/pull/13571) (blaginin)
+- Fix Duplicated filters within (filter(TableScan)) plan for unparser [#13422](https://github.com/apache/datafusion/pull/13422) (Sevenannn)
+- chore(deps): update bigdecimal from 0.4.1 to 0.4.6 [#13569](https://github.com/apache/datafusion/pull/13569) (jonahgao)
+- Doc gen: Attributes to support `related_udf`, `alternative_syntax` [#13575](https://github.com/apache/datafusion/pull/13575) (comphead)
+- Deprecate `adjust_output_array` in favor of `PrimitiveArray::with_data_type` [#13585](https://github.com/apache/datafusion/pull/13585) (alamb)
+- Remove redundant type constraints from ScalarUDF from [#13578](https://github.com/apache/datafusion/pull/13578) (findepi)
+- Supplement as\_\*\_array functions [#13580](https://github.com/apache/datafusion/pull/13580) (findepi)
+- [Minor] Use std::thread::available_parallelism instead of `num_cpus` [#13579](https://github.com/apache/datafusion/pull/13579) (Dandandan)
+- Temporarily pin toolchain version to avoid clippy [#13598](https://github.com/apache/datafusion/pull/13598) (findepi)
+- Apply clippy fixes for Rust 1.83 [#13596](https://github.com/apache/datafusion/pull/13596) (findepi)
+- refactor: add `get_available_parallelism` function [#13595](https://github.com/apache/datafusion/pull/13595) (alan910127)
+- Test sort merge join on TPC-H benchmark [#13572](https://github.com/apache/datafusion/pull/13572) (Dandandan)
+- Tidy up join test code [#13604](https://github.com/apache/datafusion/pull/13604) (ozankabak)
+- [minor]: Update median implementation [#13554](https://github.com/apache/datafusion/pull/13554) (akurmustafa)
+- Add SimpleScalarUDF::new_with_signature [#13592](https://github.com/apache/datafusion/pull/13592) (findepi)
+- Use // for unparsing DuckDB division operator [#13509](https://github.com/apache/datafusion/pull/13509) (Sevenannn)
+- Fix `LogicalPlan::..._with_subqueries` methods [#13589](https://github.com/apache/datafusion/pull/13589) (peter-toth)
+- chore: exposing ArraySize and ArrayFlatten [#13600](https://github.com/apache/datafusion/pull/13600) (Groennbeck)
+- support unknown col expr in proto [#13603](https://github.com/apache/datafusion/pull/13603) (onursatici)
+- Improve unsupported compound identifier message [#13605](https://github.com/apache/datafusion/pull/13605) (alamb)
+- Minor: Simplify `IdentTaker` [#13609](https://github.com/apache/datafusion/pull/13609) (alamb)
+- Add generate_series() udtf (and introduce 'lazy' `MemoryExec`) [#13540](https://github.com/apache/datafusion/pull/13540) (2010YOUY01)
+- [refactor]: Convert Vec<PhysicalExpr> to HashSet<PhysicalExpr> [#13612](https://github.com/apache/datafusion/pull/13612) (akurmustafa)
+- Increase minimum supported Rust version (MSRV) to 1.80 [#13622](https://github.com/apache/datafusion/pull/13622) (findepi)
+- [minor] Consolidate construction of the list field [#13627](https://github.com/apache/datafusion/pull/13627) (gruuya)
+- Create `ArrayScalarBuilder` for creating single element List arrays [#13623](https://github.com/apache/datafusion/pull/13623) (alamb)
+- [minor]: Introduce IndexSet and IndexMap aliases. [#13611](https://github.com/apache/datafusion/pull/13611) (akurmustafa)
+- Improve unparsing after optimize_projections optimization [#13599](https://github.com/apache/datafusion/pull/13599) (sgrebnov)
+- allow http in datafusion-cli http object store [#13626](https://github.com/apache/datafusion/pull/13626) (adriangb)
+- Report current operation when coercion fails [#13628](https://github.com/apache/datafusion/pull/13628) (findepi)
+- refactor: change some `hashbrown` `RawTable` uses to `HashTable` (round 2) [#13524](https://github.com/apache/datafusion/pull/13524) (crepererum)
+- refactor: replace `OnceLock` with `LazyLock` [#13641](https://github.com/apache/datafusion/pull/13641) (jonahgao)
+- Allow ColumnarValue to array conversion with less copying [#13644](https://github.com/apache/datafusion/pull/13644) (findepi)
+- Deprecate `RuntimeConfig`, update code to use new builder style [#13635](https://github.com/apache/datafusion/pull/13635) (alamb)
+- Report current function name when invoke result length wrong [#13643](https://github.com/apache/datafusion/pull/13643) (findepi)
+- Minor: add examples for using `displayable` to show `ExecutionPlans` [#13636](https://github.com/apache/datafusion/pull/13636) (alamb)
+- Allow place holders like `$1` in more types of queries. [#13632](https://github.com/apache/datafusion/pull/13632) (davisp)
+- test: support run filter_pushdown on windows machine [#13610](https://github.com/apache/datafusion/pull/13610) (zhuliquan)
+- Add csv loading benchmarks. [#13544](https://github.com/apache/datafusion/pull/13544) (dhegberg)
+- Retract IndexSet, IndexMap type alias [#13655](https://github.com/apache/datafusion/pull/13655) (akurmustafa)
+- chore: Minor code improvements suggested by newer clippy [#13666](https://github.com/apache/datafusion/pull/13666) (findepi)
+- Increase minimum supported Rust version (MSRV) to 1.80.1 [#13667](https://github.com/apache/datafusion/pull/13667) (findepi)
+- refactor: replace `OnceLock` with `LazyLock` (round 2) [#13674](https://github.com/apache/datafusion/pull/13674) (jonahgao)
+- refactor: change some `hashbrown` `RawTable` uses to `HashTable` (round 3) [#13658](https://github.com/apache/datafusion/pull/13658) (crepererum)
+- Minor: Comment temporary function for documentation migration [#13669](https://github.com/apache/datafusion/pull/13669) (comphead)
+- chore: macros crate cleanup [#13685](https://github.com/apache/datafusion/pull/13685) (findepi)
+- Refactor regexplike signature [#13394](https://github.com/apache/datafusion/pull/13394) (jiashenC)
+- Performance: enable array allocation reuse (`ScalarFunctionArgs` gets owned `ColumnReference`) [#13637](https://github.com/apache/datafusion/pull/13637) (alamb)
+- Temporary fix for CI [#13689](https://github.com/apache/datafusion/pull/13689) (jonahgao)
+- refactor: use `LazyLock` in the `user_doc` macro [#13684](https://github.com/apache/datafusion/pull/13684) (jonahgao)
+- Unlock lexical-write-integer version. [#13693](https://github.com/apache/datafusion/pull/13693) (Alexhuszagh)
+- [minor]: Simplifications [#13697](https://github.com/apache/datafusion/pull/13697) (akurmustafa)
+- Fix hash join with sort push down [#13560](https://github.com/apache/datafusion/pull/13560) (haohuaijin)
+- Improve substr() performance by avoiding using owned string [#13688](https://github.com/apache/datafusion/pull/13688) (richox)
+- chore: reinstate down_cast_any_ref [#13705](https://github.com/apache/datafusion/pull/13705) (andygrove)
+- Optimize performance of `character_length` function [#13696](https://github.com/apache/datafusion/pull/13696) (tlm365)
+- Update prost-build requirement from =0.13.3 to =0.13.4 [#13698](https://github.com/apache/datafusion/pull/13698) (dependabot[bot])
+- Minor: Output elapsed time for sql logic test [#13718](https://github.com/apache/datafusion/pull/13718) (comphead)
+- refactor: simplify the `make_udf_function` macro [#13712](https://github.com/apache/datafusion/pull/13712) (jonahgao)
+- refactor: replace `Vec` with `IndexMap` for expression mappings in `ProjectionMapping` and `EquivalenceGroup` [#13675](https://github.com/apache/datafusion/pull/13675) (Weijun-H)
+- Handle alias when parsing sql(parse_sql_expr) [#12939](https://github.com/apache/datafusion/pull/12939) (Eason0729)
+- Reveal implementing type and return type in simple UDF implementations [#13730](https://github.com/apache/datafusion/pull/13730) (findepi)
+- minor: Extract tests for `EXTRACT` AND `date_part` to their own file [#13731](https://github.com/apache/datafusion/pull/13731) (alamb)
+- Support unparsing `UNNEST` plan to `UNNEST` table factor SQL [#13660](https://github.com/apache/datafusion/pull/13660) (goldmedal)
+- Update to apache-avro 0.17, fix compatibility changes schema handling [#13727](https://github.com/apache/datafusion/pull/13727) (mdroogh)
+- Minor: Add doc example to RecordBatchStreamAdapter [#13725](https://github.com/apache/datafusion/pull/13725) (alamb)
+- Implement GroupsAccumulator for corr(x,y) aggregate function [#13581](https://github.com/apache/datafusion/pull/13581) (2010YOUY01)
+- fix union serialisation order in proto [#13709](https://github.com/apache/datafusion/pull/13709) (onursatici)
+- Minor: make unsupported `nanosecond` part a real (not internal) error [#13733](https://github.com/apache/datafusion/pull/13733) (alamb)
+- Add tests for date_part on columns + timestamps with / without timezones [#13732](https://github.com/apache/datafusion/pull/13732) (alamb)
+- Optimize performance of `initcap` function (~2x faster) [#13691](https://github.com/apache/datafusion/pull/13691) (tlm365)
+- Support sqllogictest --complete with postgres [#13746](https://github.com/apache/datafusion/pull/13746) (findepi)
+- doc-gen: migrate window functions documentation to attribute based [#13739](https://github.com/apache/datafusion/pull/13739) (zjregee)
+- Minor: Remove memory reservation in `JoinLeftData` used in HashJoin [#13751](https://github.com/apache/datafusion/pull/13751) (jayzhan-synnada)
+- Update to bigdecimal 0.4.7 [#13747](https://github.com/apache/datafusion/pull/13747) (findepi)
+- chore: clean up dependencies [#13728](https://github.com/apache/datafusion/pull/13728) (comphead)
+- Simplify type signatures using `TypeSignatureClass` for mixed type function signature [#13372](https://github.com/apache/datafusion/pull/13372) (jayzhan211)
+- Revert the removal of reservation in HashJoin [#13792](https://github.com/apache/datafusion/pull/13792) (jayzhan-synnada)
+- [minor] add missing slt tests for count(partitioned,aggregated, aggregated cube) [#13790](https://github.com/apache/datafusion/pull/13790) (buraksenn)
+- Add Round trip tests for Array <--> ScalarValue [#13777](https://github.com/apache/datafusion/pull/13777) (alamb)
+- Minor: cargo update in datafusion-cli [#13801](https://github.com/apache/datafusion/pull/13801) (alamb)
+- Fix `ScalarValue::to_array_of_size` for DenseUnion [#13797](https://github.com/apache/datafusion/pull/13797) (kylebarron)
+- Handle empty rows for `array_distinct` [#13810](https://github.com/apache/datafusion/pull/13810) (cht42)
+- Fix get_type for higher-order array functions [#13756](https://github.com/apache/datafusion/pull/13756) (findepi)
+- Chore: Do not return empty record batches from streams [#13794](https://github.com/apache/datafusion/pull/13794) (mertak-synnada)
+- Handle possible overflows in StringArrayBuilder / LargeStringArrayBuilder [#13802](https://github.com/apache/datafusion/pull/13802) (wiedld)
+- Support 'NULL' as Null in csv parser. [#13228](https://github.com/apache/datafusion/pull/13228) (dhegberg)
+- Minor: Extend ScalarValue::new_zero() [#13828](https://github.com/apache/datafusion/pull/13828) (berkaysynnada)
+- chore: temporarily disable Windows Rust flow [#13833](https://github.com/apache/datafusion/pull/13833) (comphead)
+- Minor: Replace `BooleanArray::extend` with `append_n` [#13832](https://github.com/apache/datafusion/pull/13832) (comphead)
+- Rename `TypeSignature::NullAry` --> `TypeSignature::Nullary` and improve comments [#13817](https://github.com/apache/datafusion/pull/13817) (alamb)
+- [bugfix] ScalarFunctionExpr does not preserve the nullable flag on roundtrip [#13830](https://github.com/apache/datafusion/pull/13830) (ccciudatu)
+- Add example of interacting with a remote catalog [#13722](https://github.com/apache/datafusion/pull/13722) (alamb)
+- Update substrait requirement from 0.49 to 0.50 [#13808](https://github.com/apache/datafusion/pull/13808) (dependabot[bot])
+- typo: remove extraneous "`" in doc comment, fix header [#13848](https://github.com/apache/datafusion/pull/13848) (akesling)
+- MINOR: typo -- remove extra "`" interfering with doc formatting [#13847](https://github.com/apache/datafusion/pull/13847) (akesling)
+- Support n-ary monotonic functions in ordering equivalence [#13841](https://github.com/apache/datafusion/pull/13841) (gokselk)
+- Preserve ordering equivalencies on `with_reorder` [#13770](https://github.com/apache/datafusion/pull/13770) (gokselk)
+- replace CASE expressions in predicate pruning with boolean algebra [#13795](https://github.com/apache/datafusion/pull/13795) (adriangb)
+- Upgrade to sqlparser `0.53.0` [#13767](https://github.com/apache/datafusion/pull/13767) (alamb)
+- Minor: Use `resize` instead of `extend` for static values in SMJ logic [#13861](https://github.com/apache/datafusion/pull/13861) (comphead)
+- Improve SortPreservingMerge::enable_round_robin_repartition docs [#13826](https://github.com/apache/datafusion/pull/13826) (alamb)
+- Minor: Unify `downcast_arg` method [#13865](https://github.com/apache/datafusion/pull/13865) (comphead)
+- Implement `SHOW FUNCTIONS` [#13799](https://github.com/apache/datafusion/pull/13799) (goldmedal)
+- Update bzip2 requirement from 0.4.3 to 0.5.0 [#13740](https://github.com/apache/datafusion/pull/13740) (dependabot[bot])
+- Fix build `use of undeclared type ShowStatementFilter` [#13869](https://github.com/apache/datafusion/pull/13869) (alamb)
+- Minor: fix: Include FetchRel when producing LogicalPlan from Sort [#13862](https://github.com/apache/datafusion/pull/13862) (robtandy)
+- Minor: improve error message when ARRAY literals can not be planned [#13859](https://github.com/apache/datafusion/pull/13859) (alamb)
+- Minor: remove unused async-compression `futures-io` feature [#13875](https://github.com/apache/datafusion/pull/13875) (jonahgao)
+- Consolidate Example: dataframe_output.rs into dataframe.rs [#13877](https://github.com/apache/datafusion/pull/13877) (zhuqi-lucas)
+- Restore `DocBuilder::new()` to avoid breaking API change [#13870](https://github.com/apache/datafusion/pull/13870) (alamb)
+- Improve error messages for incorrect zero argument signatures [#13881](https://github.com/apache/datafusion/pull/13881) (alamb)
+- chore: Consolidate Example: simplify_udwf_expression.rs into advanced_udwf.rs [#13883](https://github.com/apache/datafusion/pull/13883) (xarus01)
+- minor: fix typos in comments / structure names [#13879](https://github.com/apache/datafusion/pull/13879) (zhuliquan)
+- Support 1 or 3 arg in generate_series() UDTF [#13856](https://github.com/apache/datafusion/pull/13856) (UBarney)
+- Support (order by / sort) for DataFrameWriteOptions [#13874](https://github.com/apache/datafusion/pull/13874) (zhuqi-lucas)
+- Minor: change the sort merge join emission as incremental [#13894](https://github.com/apache/datafusion/pull/13894) (berkaysynnada)
+- Minor: change visibility of hash join utils [#13893](https://github.com/apache/datafusion/pull/13893) (berkaysynnada)
+- Fix visibility of `swap_hash_join` to be `pub` [#13899](https://github.com/apache/datafusion/pull/13899) (alamb)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    59	Andrew Lamb
+    35	Piotr Findeisen
+    16	Jonathan Chen
+    14	Jonah Gao
+    14	Oleks V
+    12	Jay Zhan
+    10	dependabot[bot]
+     9	Jax Liu
+     7	Mustafa Akur
+     6	Peter Toth
+     6	Yongting You
+     5	Bruce Ritchie
+     5	Dmitrii Blaginin
+     5	Qianqian
+     4	Adrian Garcia Badaracco
+     4	Marco Neumann
+     4	Tai Le Manh
+     4	Tim Saucer
+     4	zhuliquan
+     3	Andy Grove
+     3	Arttu
+     3	Berkay Şahin
+     3	Burak Şen
+     3	Onur Satici
+     3	Qi Zhu
+     3	Raz Luvaton
+     3	delamarch3
+     3	irenjj
+     2	Alex Kesling
+     2	Alihan Çelikcan
+     2	Daniel Hegberg
+     2	Daniël Heres
+     2	Dima
+     2	Eduard Karacharov
+     2	Filippo Rossi
+     2	Goksel Kabadayi
+     2	Jiashen Cao
+     2	Joe Isaacs
+     2	Leonardo Yvens
+     2	Namgung Chan
+     2	Phillip LeBlanc
+     2	Ruihang Xia
+     2	kamille
+     2	wiedld
+     1	Albert Skalt
+     1	Alex Huang
+     1	Alexander Huszagh
+     1	Andrew Kane
+     1	AnthonyZhOon
+     1	Ariana J Mann
+     1	Athul T R
+     1	Austin Liu
+     1	Brent Gardner
+     1	Costi Ciudatu
+     1	David Rauschenbach
+     1	Dmitrii Bu
+     1	Eason
+     1	Emil
+     1	Emil Ejbyfeldt
+     1	Hengfei Yang
+     1	Huaijin
+     1	Jack
+     1	Jack Park
+     1	Jagdish Parihar
+     1	Justin Zhu
+     1	Kezhu Wang
+     1	Kyle Barron
+     1	Leslie Su
+     1	Li-Lun Lin
+     1	Lordworms
+     1	Luca Fabbian
+     1	Marc Droogh
+     1	Marko Grujic
+     1	Matthew B White
+     1	Mehmet Ozan Kabak
+     1	NoeB
+     1	Paul J. Davis
+     1	Rohan Krishnaswamy
+     1	Sergei Grebnov
+     1	Shehab Amin
+     1	Sherin Jacob
+     1	UBarney
+     1	Victor Barua
+     1	Zhang Li
+     1	cht42
+     1	ding-young
+     1	glfeng
+     1	kosiew
+     1	mertak-synnada
+     1	niebayes
+     1	robtandy
+     1	theirix
+     1	xinlmain
+     1	zjregee
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py
index dc25b3dca2bd..1349416bcaa5 100755
--- a/dev/release/generate-changelog.py
+++ b/dev/release/generate-changelog.py
@@ -44,6 +44,7 @@ def generate_changelog(repo, repo_name, tag1, tag2, version):
     unique_pulls = []
     all_pulls = []
     for commit in comparison.commits:
+        print(".", file=sys.stderr, end=" ", flush=True) # progress indicator
         pulls = commit.get_pulls()
         for pull in pulls:
             # there can be multiple commits per PR if squash merge is not being used and
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 77433c85cb66..329b9a95c8f9 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -67,7 +67,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.execution.parquet.max_statistics_size                        | 4096                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.created_by                                 | datafusion version 43.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.parquet.created_by                                 | datafusion version 44.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                     |
diff --git a/docs/source/user-guide/sql/information_schema.md b/docs/source/user-guide/sql/information_schema.md
index bf4aa00e1dde..db74ec0708b3 100644
--- a/docs/source/user-guide/sql/information_schema.md
+++ b/docs/source/user-guide/sql/information_schema.md
@@ -22,7 +22,10 @@
 DataFusion supports showing metadata about the tables and views available. This information can be accessed using the
 views of the ISO SQL `information_schema` schema or the DataFusion specific `SHOW TABLES` and `SHOW COLUMNS` commands.
 
-To show tables in the DataFusion catalog, use the `SHOW TABLES` command or the `information_schema.tables` view:
+## `SHOW TABLES`
+
+To show tables in the DataFusion catalog, use the `SHOW TABLES` command or the
+`information_schema.tables` view:
 
 ```sql
 > show tables;
@@ -39,7 +42,10 @@ or
 
 ```
 
-To show the schema of a table in DataFusion, use the `SHOW COLUMNS` command or the `information_schema.columns` view:
+## `SHOW COLUMNS`
+
+To show the schema of a table in DataFusion, use the `SHOW COLUMNS` command or
+the `information_schema.columns` view.
 
 ```sql
 > show columns from t;
@@ -52,7 +58,10 @@ or
 +---------------+--------------+------------+-------------+-----------+-------------+
 ```
 
-To show the current session configuration options, use the `SHOW ALL` command or the `information_schema.df_settings` view:
+## `SHOW ALL` (configuration options)
+
+To show the current session configuration options, use the `SHOW ALL` command or
+the `information_schema.df_settings` view:
 
 ```sql
 select * from information_schema.df_settings;
@@ -65,7 +74,48 @@ select * from information_schema.df_settings;
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |
+...
 | datafusion.optimizer.filter_null_join_keys      | false   |
 | datafusion.optimizer.skip_failed_rules          | true    |
 +-------------------------------------------------+---------+
 ```
+
+## `SHOW FUNCTIONS`
+
+To show the list of functions available, use the `SHOW FUNCTIONS` command or the
+
+- `information_schema.information_schema.routines` view: functions and descriptions
+- `information_schema.information_schema.parameters` view: parameters and descriptions
+
+Syntax:
+
+```sql
+SHOW FUNCTIONS [ LIKE <pattern> ];
+```
+
+Example output
+
+```sql
+> show functions like '%datetrunc';
++---------------+-------------------------------------+-------------------------+-------------------------------------------------+---------------+-------------------------------------------------------+-----------------------------------+
+| function_name | return_type                         | parameters              | parameter_types                                 | function_type | description                                           | syntax_example                    |
++---------------+-------------------------------------+-------------------------+-------------------------------------------------+---------------+-------------------------------------------------------+-----------------------------------+
+| datetrunc     | Timestamp(Microsecond, Some("+TZ")) | [precision, expression] | [Utf8, Timestamp(Microsecond, Some("+TZ"))]     | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Nanosecond, None)         | [precision, expression] | [Utf8View, Timestamp(Nanosecond, None)]         | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Second, Some("+TZ"))      | [precision, expression] | [Utf8View, Timestamp(Second, Some("+TZ"))]      | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Microsecond, None)        | [precision, expression] | [Utf8View, Timestamp(Microsecond, None)]        | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Second, None)             | [precision, expression] | [Utf8View, Timestamp(Second, None)]             | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Microsecond, None)        | [precision, expression] | [Utf8, Timestamp(Microsecond, None)]            | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Second, None)             | [precision, expression] | [Utf8, Timestamp(Second, None)]                 | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Microsecond, Some("+TZ")) | [precision, expression] | [Utf8View, Timestamp(Microsecond, Some("+TZ"))] | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Nanosecond, Some("+TZ"))  | [precision, expression] | [Utf8, Timestamp(Nanosecond, Some("+TZ"))]      | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Millisecond, None)        | [precision, expression] | [Utf8, Timestamp(Millisecond, None)]            | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Millisecond, Some("+TZ")) | [precision, expression] | [Utf8, Timestamp(Millisecond, Some("+TZ"))]     | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Second, Some("+TZ"))      | [precision, expression] | [Utf8, Timestamp(Second, Some("+TZ"))]          | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Nanosecond, None)         | [precision, expression] | [Utf8, Timestamp(Nanosecond, None)]             | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Millisecond, None)        | [precision, expression] | [Utf8View, Timestamp(Millisecond, None)]        | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Millisecond, Some("+TZ")) | [precision, expression] | [Utf8View, Timestamp(Millisecond, Some("+TZ"))] | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
+| datetrunc     | Timestamp(Nanosecond, Some("+TZ"))  | [precision, expression] | [Utf8View, Timestamp(Nanosecond, Some("+TZ"))]  | SCALAR        | Truncates a timestamp value to a specified precision. | date_trunc(precision, expression) |
++---------------+-------------------------------------+-------------------------+-------------------------------------------------+---------------+-------------------------------------------------------+-----------------------------------+
+16 row(s) fetched.
+```
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 2e4147f96e0f..be4f5e56b3af 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1070,9 +1070,7 @@ find_in_set(str, strlist)
 
 ### `initcap`
 
-Capitalizes the first character in each word in the ASCII input string. Words are delimited by non-alphanumeric characters.
-
-Note this function does not support UTF-8 characters.
+Capitalizes the first character in each word in the input string. Words are delimited by non-alphanumeric characters.
 
 ```
 initcap(str)
diff --git a/test-utils/src/array_gen/decimal.rs b/test-utils/src/array_gen/decimal.rs
index f878a830c4eb..d46ea9fe5457 100644
--- a/test-utils/src/array_gen/decimal.rs
+++ b/test-utils/src/array_gen/decimal.rs
@@ -60,7 +60,7 @@ impl DecimalArrayGenerator {
         };
 
         // pick num_decimals randomly from the distinct decimal table
-        let indicies: UInt32Array = (0..self.num_decimals)
+        let indices: UInt32Array = (0..self.num_decimals)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -74,6 +74,6 @@ impl DecimalArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_decimals, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_decimals, &indices, options).unwrap()
     }
 }
diff --git a/test-utils/src/array_gen/primitive.rs b/test-utils/src/array_gen/primitive.rs
index bfc151b43dbe..58d39c14e65d 100644
--- a/test-utils/src/array_gen/primitive.rs
+++ b/test-utils/src/array_gen/primitive.rs
@@ -78,8 +78,8 @@ impl PrimitiveArrayGenerator {
             }
         };
 
-        // pick num_primitves randomly from the distinct string table
-        let indicies: UInt32Array = (0..self.num_primitives)
+        // pick num_primitives randomly from the distinct string table
+        let indices: UInt32Array = (0..self.num_primitives)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -93,7 +93,7 @@ impl PrimitiveArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_primitives, &indices, options).unwrap()
     }
 
     // Generates a random timezone or returns `None`.
diff --git a/test-utils/src/array_gen/string.rs b/test-utils/src/array_gen/string.rs
index b5cef6321bc8..a405cb76b1bd 100644
--- a/test-utils/src/array_gen/string.rs
+++ b/test-utils/src/array_gen/string.rs
@@ -45,7 +45,7 @@ impl StringArrayGenerator {
             .collect();
 
         // pick num_strings randomly from the distinct string table
-        let indicies: UInt32Array = (0..self.num_strings)
+        let indices: UInt32Array = (0..self.num_strings)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -59,7 +59,7 @@ impl StringArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_strings, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_strings, &indices, options).unwrap()
     }
 
     /// Creates a StringViewArray with random strings.
@@ -69,7 +69,7 @@ impl StringArrayGenerator {
             .collect();
 
         // pick num_strings randomly from the distinct string table
-        let indicies: UInt32Array = (0..self.num_strings)
+        let indices: UInt32Array = (0..self.num_strings)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -83,7 +83,7 @@ impl StringArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_string_views, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_string_views, &indices, options).unwrap()
     }
 }
 
diff --git a/test-utils/src/data_gen.rs b/test-utils/src/data_gen.rs
index 4227f2d9a737..7ac6f3d3e255 100644
--- a/test-utils/src/data_gen.rs
+++ b/test-utils/src/data_gen.rs
@@ -237,7 +237,7 @@ fn generate_sorted_strings(
 
 /// Iterator that generates sorted, [`RecordBatch`]es with randomly generated data with
 /// an access log style schema for tracing or monitoring type
-/// usecases.
+/// use cases.
 ///
 /// This is useful for writing tests queries on such data
 ///