From e718c1a5c5770c071c9c2e14a7681a7f1a2f3f23 Mon Sep 17 00:00:00 2001
From: zhuliquan <zlqlovecode@foxmail.com>
Date: Tue, 24 Dec 2024 06:24:49 +0800
Subject: [PATCH] minor: fix typos in  comments / structure names (#13879)

* minor: fix typo error in datafusion

* fix: fix rebase error

* fix: format HashJoinExec doc

* doc: recover thiserror/preemptively

* fix: other typo error fixed

* fix: directories to dir_entries in catalog example
---
 Cargo.toml                                    |  2 +-
 datafusion-cli/src/functions.rs               |  2 +-
 datafusion-examples/README.md                 |  2 +-
 .../examples/advanced_parquet_index.rs        |  4 +--
 datafusion-examples/examples/analyzer_rule.rs |  4 +--
 datafusion-examples/examples/catalog.rs       | 10 +++---
 datafusion-examples/examples/expr_api.rs      |  4 +--
 .../examples/function_factory.rs              |  2 +-
 datafusion-examples/examples/memtable.rs      |  2 +-
 .../examples/optimizer_rule.rs                |  2 +-
 datafusion-examples/examples/plan_to_sql.rs   |  2 +-
 datafusion-examples/examples/simple_udtf.rs   |  6 ++--
 datafusion/common/src/column.rs               |  2 +-
 datafusion/common/src/config.rs               |  4 +--
 datafusion/common/src/cse.rs                  |  2 +-
 datafusion/common/src/dfschema.rs             |  2 +-
 datafusion/common/src/error.rs                |  2 +-
 datafusion/common/src/scalar/mod.rs           | 10 +++---
 datafusion/common/src/tree_node.rs            |  4 +--
 datafusion/common/src/utils/memory.rs         |  4 +--
 datafusion/common/src/utils/proxy.rs          |  6 ++--
 datafusion/core/benches/physical_plan.rs      |  2 +-
 datafusion/core/src/dataframe/mod.rs          |  4 +--
 .../src/datasource/default_table_source.rs    |  2 +-
 .../datasource/physical_plan/file_groups.rs   |  2 +-
 .../core/src/datasource/physical_plan/json.rs |  2 +-
 .../datasource/physical_plan/parquet/mod.rs   |  4 +--
 .../physical_plan/parquet/row_filter.rs       |  4 +--
 .../physical_plan/parquet/row_group_filter.rs |  2 +-
 datafusion/core/src/datasource/stream.rs      |  2 +-
 datafusion/core/src/execution/context/mod.rs  |  2 +-
 .../core/src/execution/context/parquet.rs     |  6 ++--
 .../core/src/execution/session_state.rs       |  4 +--
 datafusion/core/src/lib.rs                    |  2 +-
 .../enforce_distribution.rs                   | 10 +++---
 .../src/physical_optimizer/enforce_sorting.rs |  2 +-
 .../physical_optimizer/projection_pushdown.rs |  6 ++--
 datafusion/core/src/test_util/mod.rs          |  2 +-
 datafusion/core/tests/dataframe/mod.rs        | 18 +++++------
 .../tests/fuzz_cases/equivalence/utils.rs     |  2 +-
 .../core/tests/fuzz_cases/limit_fuzz.rs       |  2 +-
 datafusion/core/tests/fuzz_cases/sort_fuzz.rs |  2 +-
 .../sort_preserving_repartition_fuzz.rs       |  6 ++--
 datafusion/core/tests/macro_hygiene/mod.rs    |  4 +--
 .../tests/parquet/external_access_plan.rs     |  4 +--
 datafusion/core/tests/sql/explain_analyze.rs  |  2 +-
 datafusion/core/tests/sql/mod.rs              |  2 +-
 datafusion/core/tests/sql/path_partition.rs   |  4 +--
 .../user_defined/user_defined_aggregates.rs   |  2 +-
 .../tests/user_defined/user_defined_plan.rs   |  2 +-
 .../user_defined_table_functions.rs           |  4 +--
 datafusion/expr-common/src/accumulator.rs     |  6 ++--
 datafusion/expr/src/expr.rs                   | 14 ++++----
 datafusion/expr/src/function.rs               |  2 +-
 datafusion/expr/src/logical_plan/builder.rs   | 12 +++----
 datafusion/expr/src/logical_plan/ddl.rs       |  4 +--
 datafusion/expr/src/logical_plan/plan.rs      |  6 ++--
 datafusion/expr/src/table_source.rs           |  4 +--
 .../expr/src/type_coercion/functions.rs       |  2 +-
 datafusion/expr/src/udf.rs                    |  4 +--
 datafusion/expr/src/utils.rs                  |  8 ++---
 datafusion/expr/src/window_frame.rs           |  2 +-
 datafusion/ffi/src/table_provider.rs          |  6 ++--
 .../src/aggregate/groups_accumulator.rs       |  4 +--
 .../functions-aggregate-common/src/tdigest.rs |  4 +--
 .../src/approx_percentile_cont.rs             |  2 +-
 .../src/approx_percentile_cont_with_weight.rs |  2 +-
 .../functions-aggregate/src/covariance.rs     |  2 +-
 datafusion/functions-aggregate/src/min_max.rs |  2 +-
 .../src/min_max/min_max_bytes.rs              |  6 ++--
 datafusion/functions/src/core/greatest.rs     |  2 +-
 datafusion/functions/src/core/least.rs        |  2 +-
 datafusion/functions/src/core/mod.rs          |  4 +--
 datafusion/functions/src/core/nullif.rs       |  2 +-
 datafusion/functions/src/core/nvl.rs          |  2 +-
 datafusion/functions/src/datetime/date_bin.rs |  2 +-
 .../functions/src/datetime/date_part.rs       |  2 +-
 .../functions/src/regex/regexpreplace.rs      |  2 +-
 datafusion/functions/src/string/mod.rs        |  2 +-
 datafusion/functions/src/strings.rs           |  4 +--
 .../optimizer/src/analyzer/type_coercion.rs   |  2 +-
 .../optimizer/src/common_subexpr_eliminate.rs |  4 +--
 .../src/decorrelate_predicate_subquery.rs     |  2 +-
 .../optimizer/src/eliminate_cross_join.rs     |  4 +--
 .../src/eliminate_group_by_constant.rs        |  2 +-
 .../optimizer/src/optimize_projections/mod.rs | 32 +++++++++----------
 .../optimize_projections/required_indices.rs  |  6 ++--
 .../optimizer/src/scalar_subquery_to_join.rs  | 10 +++---
 .../simplify_expressions/expr_simplifier.rs   |  4 +--
 .../src/unwrap_cast_in_comparison.rs          |  2 +-
 .../physical-expr/src/equivalence/mod.rs      |  2 +-
 .../src/equivalence/properties.rs             |  2 +-
 .../physical-expr/src/expressions/binary.rs   |  4 +--
 .../physical-expr/src/expressions/column.rs   |  2 +-
 .../physical-expr/src/expressions/literal.rs  |  2 +-
 datafusion/physical-expr/src/partitioning.rs  |  2 +-
 .../physical-expr/src/scalar_function.rs      |  2 +-
 .../physical-expr/src/utils/guarantee.rs      |  2 +-
 .../src/aggregate_statistics.rs               |  6 ++--
 .../src/output_requirements.rs                |  2 +-
 .../group_values/multi_group_by/bytes.rs      |  2 +-
 .../physical-plan/src/aggregates/row_hash.rs  |  2 +-
 datafusion/physical-plan/src/analyze.rs       |  2 +-
 datafusion/physical-plan/src/coalesce/mod.rs  |  2 +-
 datafusion/physical-plan/src/display.rs       |  4 +--
 .../physical-plan/src/execution_plan.rs       |  4 +--
 .../physical-plan/src/joins/hash_join.rs      | 10 +++---
 .../src/joins/sort_merge_join.rs              |  4 +--
 datafusion/physical-plan/src/joins/utils.rs   |  2 +-
 .../physical-plan/src/metrics/builder.rs      |  2 +-
 datafusion/physical-plan/src/metrics/mod.rs   |  2 +-
 .../physical-plan/src/recursive_query.rs      |  2 +-
 .../src/repartition/distributor_channels.rs   |  8 ++---
 .../physical-plan/src/repartition/mod.rs      |  8 ++---
 datafusion/physical-plan/src/sorts/merge.rs   |  2 +-
 datafusion/physical-plan/src/sorts/sort.rs    |  4 +--
 datafusion/physical-plan/src/sorts/stream.rs  |  2 +-
 datafusion/physical-plan/src/stream.rs        |  2 +-
 datafusion/physical-plan/src/test/exec.rs     |  2 +-
 datafusion/physical-plan/src/topk/mod.rs      |  4 +--
 datafusion/physical-plan/src/unnest.rs        | 22 ++++++-------
 .../src/windows/bounded_window_agg_exec.rs    |  2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  2 +-
 datafusion/sql/src/planner.rs                 |  2 +-
 datafusion/sql/src/statement.rs               |  2 +-
 datafusion/sql/src/unparser/dialect.rs        |  2 +-
 datafusion/sql/src/unparser/expr.rs           |  4 +--
 datafusion/sql/src/utils.rs                   |  4 +--
 .../src/engines/postgres_engine/mod.rs        |  4 +--
 .../substrait/src/logical_plan/consumer.rs    | 10 +++---
 datafusion/substrait/src/variation_const.rs   |  2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  2 +-
 .../tests/cases/substrait_validations.rs      |  4 +--
 datafusion/substrait/tests/utils.rs           |  2 +-
 test-utils/src/array_gen/decimal.rs           |  4 +--
 test-utils/src/array_gen/primitive.rs         |  6 ++--
 test-utils/src/array_gen/string.rs            |  8 ++---
 test-utils/src/data_gen.rs                    |  2 +-
 138 files changed, 277 insertions(+), 277 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index b7c8c09a8537..05d2944c768d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,7 +73,7 @@ version = "43.0.0"
 # selectively turn them on if needed, since we can override default-features = true (from false)
 # for the inherited dependency but cannot do the reverse (override from true to false).
 #
-# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
+# See for more details: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index d7ca48d638b7..36e68ec4842b 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -360,7 +360,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
             Field::new("total_uncompressed_size", DataType::Int64, true),
         ]));
 
-        // construct recordbatch from metadata
+        // construct record batch from metadata
         let mut filename_arr = vec![];
         let mut row_group_id_arr = vec![];
         let mut row_group_num_rows_arr = vec![];
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index a155920eadc4..b06148ce267f 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -22,7 +22,7 @@
 This crate includes end to end, highly commented examples of how to use
 various DataFusion APIs to help you get started.
 
-## Prerequisites:
+## Prerequisites
 
 Run `git submodule update --init` to init test files.
 
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs
index 67b745d4074e..28a3a2f1de09 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -82,7 +82,7 @@ use url::Url;
 /// Specifically, this example illustrates how to:
 /// 1. Use [`ParquetFileReaderFactory`] to avoid re-reading parquet metadata on each query
 /// 2. Use [`PruningPredicate`] for predicate analysis
-/// 3. Pass a row group selection to [`ParuetExec`]
+/// 3. Pass a row group selection to [`ParquetExec`]
 /// 4. Pass a row selection (within a row group) to [`ParquetExec`]
 ///
 /// Note this is a *VERY* low level example for people who want to build their
@@ -211,7 +211,7 @@ async fn main() -> Result<()> {
     //
     // Note: in order to prune pages, the Page Index must be loaded and the
     // ParquetExec will load it on demand if not present. To avoid a second IO
-    // during query, this example loaded the Page Index pre-emptively by setting
+    // during query, this example loaded the Page Index preemptively by setting
     // `ArrowReader::with_page_index` in `IndexedFile::try_new`
     provider.set_use_row_selection(true);
     println!("** Select data, predicate `id = 950`");
diff --git a/datafusion-examples/examples/analyzer_rule.rs b/datafusion-examples/examples/analyzer_rule.rs
index bd067be97b8b..aded64ed4105 100644
--- a/datafusion-examples/examples/analyzer_rule.rs
+++ b/datafusion-examples/examples/analyzer_rule.rs
@@ -138,7 +138,7 @@ impl AnalyzerRule for RowLevelAccessControl {
     fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
         // use the TreeNode API to recursively walk the LogicalPlan tree
         // and all of its children (inputs)
-        let transfomed_plan = plan.transform(|plan| {
+        let transformed_plan = plan.transform(|plan| {
             // This closure is called for each LogicalPlan node
             // if it is a Scan node, add a filter to remove all managers
             if is_employee_table_scan(&plan) {
@@ -166,7 +166,7 @@ impl AnalyzerRule for RowLevelAccessControl {
         //
         // This example does not need the value of either flag, so simply
         // extract the LogicalPlan "data"
-        Ok(transfomed_plan.data)
+        Ok(transformed_plan.data)
     }
 
     fn name(&self) -> &str {
diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/catalog.rs
index f40f1dfb5a15..655438b78b9f 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/catalog.rs
@@ -46,11 +46,11 @@ async fn main() -> Result<()> {
 
     let ctx = SessionContext::new();
     let state = ctx.state();
-    let cataloglist = Arc::new(CustomCatalogProviderList::new());
+    let catalog_list = Arc::new(CustomCatalogProviderList::new());
 
     // use our custom catalog list for context. each context has a single catalog list.
     // context will by default have [`MemoryCatalogProviderList`]
-    ctx.register_catalog_list(cataloglist.clone());
+    ctx.register_catalog_list(catalog_list.clone());
 
     // initialize our catalog and schemas
     let catalog = DirCatalog::new();
@@ -81,7 +81,7 @@ async fn main() -> Result<()> {
     ctx.register_catalog("dircat", Arc::new(catalog));
     {
         // catalog was passed down into our custom catalog list since we override the ctx's default
-        let catalogs = cataloglist.catalogs.read().unwrap();
+        let catalogs = catalog_list.catalogs.read().unwrap();
         assert!(catalogs.contains_key("dircat"));
     };
 
@@ -144,8 +144,8 @@ impl DirSchema {
     async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
         let DirSchemaOpts { ext, dir, format } = opts;
         let mut tables = HashMap::new();
-        let direntries = std::fs::read_dir(dir).unwrap();
-        for res in direntries {
+        let dir_entries = std::fs::read_dir(dir).unwrap();
+        for res in dir_entries {
             let entry = res.unwrap();
             let filename = entry.file_name().to_str().unwrap().to_string();
             if !filename.ends_with(ext) {
diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
index cb0796bdcf73..943e5d5e027c 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/expr_api.rs
@@ -53,7 +53,7 @@ use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter;
 /// 4. Simplify expressions: [`simplify_demo`]
 /// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
 /// 6. Get the types of the expressions: [`expression_type_demo`]
-/// 7. Apply type cocercion to expressions: [`type_coercion_demo`]
+/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
 #[tokio::main]
 async fn main() -> Result<()> {
     // The easiest way to do create expressions is to use the
@@ -392,7 +392,7 @@ fn type_coercion_demo() -> Result<()> {
     )?;
     assert!(physical_expr.evaluate(&batch).is_ok());
 
-    // 4. Apply explict type coercion by manually rewriting the expression
+    // 4. Apply explicit type coercion by manually rewriting the expression
     let coerced_expr = expr
         .transform(|e| {
             // Only type coerces binary expressions.
diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs
index b2771149aae5..58ffa060ebaa 100644
--- a/datafusion-examples/examples/function_factory.rs
+++ b/datafusion-examples/examples/function_factory.rs
@@ -36,7 +36,7 @@ use datafusion_expr::{
 ///
 /// Apart from [FunctionFactory], this example covers
 /// [ScalarUDFImpl::simplify()] which is often used at the same time, to replace
-/// a function call with another expression at rutime.
+/// a function call with another expression at runtime.
 ///
 /// This example is rather simple and does not cover all cases required for a
 /// real implementation.
diff --git a/datafusion-examples/examples/memtable.rs b/datafusion-examples/examples/memtable.rs
index 5cce578039e7..bb0b720eff79 100644
--- a/datafusion-examples/examples/memtable.rs
+++ b/datafusion-examples/examples/memtable.rs
@@ -25,7 +25,7 @@ use std::sync::Arc;
 use std::time::Duration;
 use tokio::time::timeout;
 
-/// This example demonstrates executing a simple query against a Memtable
+/// This example demonstrates executing a simple query against a [`MemTable`]
 #[tokio::main]
 async fn main() -> Result<()> {
     let mem_table = create_memtable()?;
diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/optimizer_rule.rs
index 0f28a1670252..e8a272f28318 100644
--- a/datafusion-examples/examples/optimizer_rule.rs
+++ b/datafusion-examples/examples/optimizer_rule.rs
@@ -146,7 +146,7 @@ impl MyOptimizerRule {
             // Closure called for each sub tree
             match expr {
                 Expr::BinaryExpr(binary_expr) if is_binary_eq(&binary_expr) => {
-                    // destruture the expression
+                    // destructure the expression
                     let BinaryExpr { left, op: _, right } = binary_expr;
                     // rewrite to `my_eq(left, right)`
                     let udf = ScalarUDF::new_from_impl(MyEq::new());
diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs
index 8ea7c2951223..b5b69093a646 100644
--- a/datafusion-examples/examples/plan_to_sql.rs
+++ b/datafusion-examples/examples/plan_to_sql.rs
@@ -65,7 +65,7 @@ fn simple_expr_to_sql_demo() -> Result<()> {
     Ok(())
 }
 
-/// DataFusioon can remove parentheses when converting an expression to SQL.
+/// DataFusion can remove parentheses when converting an expression to SQL.
 /// Note that output is intended for humans, not for other SQL engines,
 /// as difference in precedence rules can cause expressions to be parsed differently.
 fn simple_expr_to_pretty_sql_demo() -> Result<()> {
diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/simple_udtf.rs
index f32560ede69d..7cf1ce87690e 100644
--- a/datafusion-examples/examples/simple_udtf.rs
+++ b/datafusion-examples/examples/simple_udtf.rs
@@ -140,7 +140,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
         let limit = exprs
             .get(1)
             .map(|expr| {
-                // try to simpify the expression, so 1+2 becomes 3, for example
+                // try to simplify the expression, so 1+2 becomes 3, for example
                 let execution_props = ExecutionProps::new();
                 let info = SimplifyContext::new(&execution_props);
                 let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
@@ -173,8 +173,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
         .with_header(true)
         .build(file)?;
     let mut batches = vec![];
-    for bacth in reader {
-        batches.push(bacth?);
+    for batch in reader {
+        batches.push(batch?);
     }
     let schema = Arc::new(schema);
     Ok((schema, batches))
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index d940bcf3146e..4f25260d5e9c 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -230,7 +230,7 @@ impl Column {
                         .collect::<Vec<_>>();
                     for using_col in using_columns {
                         let all_matched = columns.iter().all(|c| using_col.contains(c));
-                        // All matched fields belong to the same using column set, in orther words
+                        // All matched fields belong to the same using column set, in other words
                         // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
                             return Ok(columns[0].clone());
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 6e64700bd2e0..942aa308e200 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -904,12 +904,12 @@ pub trait ConfigExtension: ExtensionOptions {
 pub trait ExtensionOptions: Send + Sync + fmt::Debug + 'static {
     /// Return `self` as [`Any`]
     ///
-    /// This is needed until trait upcasting is stabilised
+    /// This is needed until trait upcasting is stabilized
     fn as_any(&self) -> &dyn Any;
 
     /// Return `self` as [`Any`]
     ///
-    /// This is needed until trait upcasting is stabilised
+    /// This is needed until trait upcasting is stabilized
     fn as_any_mut(&mut self) -> &mut dyn Any;
 
     /// Return a deep clone of this [`ExtensionOptions`]
diff --git a/datafusion/common/src/cse.rs b/datafusion/common/src/cse.rs
index f64571b8471e..674d3386171f 100644
--- a/datafusion/common/src/cse.rs
+++ b/datafusion/common/src/cse.rs
@@ -60,7 +60,7 @@ pub trait Normalizeable {
 }
 
 /// The `NormalizeEq` trait extends `Eq` and `Normalizeable` to provide a method for comparing
-/// normlized nodes in optimizations like Common Subexpression Elimination (CSE).
+/// normalized nodes in optimizations like Common Subexpression Elimination (CSE).
 ///
 /// The `normalize_eq` method ensures that two nodes that are semantically equivalent (after normalization)
 /// are considered equal in CSE optimization, even if their original forms differ.
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index b5f7b5681eef..ac4d8be8045f 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -955,7 +955,7 @@ pub trait ExprSchema: std::fmt::Debug {
     /// Returns the column's optional metadata.
     fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>>;
 
-    /// Return the coulmn's datatype and nullability
+    /// Return the column's datatype and nullability
     fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)>;
 }
 
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 4fac7298c455..1012c4cd2270 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -115,7 +115,7 @@ pub enum DataFusionError {
     Execution(String),
     /// [`JoinError`] during execution of the query.
     ///
-    /// This error can unoccur for unjoined tasks, such as execution shutdown.
+    /// This error can't occur for unjoined tasks, such as execution shutdown.
     ExecutionJoin(JoinError),
     /// Error when resources (such as memory of scratch disk space) are exhausted.
     ///
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index feb3c6f604f0..de14d3a01037 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -2216,7 +2216,7 @@ impl ScalarValue {
     ///
     /// Errors if `self` is
     /// - a decimal that fails be converted to a decimal array of size
-    /// - a `Fixedsizelist` that fails to be concatenated into an array of size
+    /// - a `FixedsizeList` that fails to be concatenated into an array of size
     /// - a `List` that fails to be concatenated into an array of size
     /// - a `Dictionary` that fails be converted to a dictionary array of size
     pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
@@ -2925,7 +2925,7 @@ impl ScalarValue {
     /// preferred over this function if at all possible as they can be
     /// vectorized and are generally much faster.
     ///
-    /// This function has a few narrow usescases such as hash table key
+    /// This function has a few narrow use cases such as hash table key
     /// comparisons where comparing a single row at a time is necessary.
     ///
     /// # Errors
@@ -4465,7 +4465,7 @@ mod tests {
         Ok(())
     }
 
-    // Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
+    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
     fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
     where
         T: ArrowNumericType,
@@ -6150,9 +6150,9 @@ mod tests {
             &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
         );
 
-        let newscalar = ScalarValue::try_from_array(&array, 0).unwrap();
+        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
         assert_eq!(
-            newscalar.data_type(),
+            new_scalar.data_type(),
             DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
         );
     }
diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index d92a2cc34b56..9c59652e0d70 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -995,11 +995,11 @@ impl<
 /// construct a temporary container to be able to call `apply_ref_elements` on a
 /// collection of tree node references. But in that case the container's temporary
 /// lifetime is different to the lifetime of tree nodes that we put into it.
-/// Please find an example usecase in `Expr::apply_children` with the `Expr::Case` case.
+/// Please find an example use case in `Expr::apply_children` with the `Expr::Case` case.
 ///
 /// Most of the cases we don't need to create a temporary container with
 /// `TreeNodeRefContainer`, but we can just call `TreeNodeContainer::apply_elements`.
-/// Please find an example usecase in `Expr::apply_children` with the `Expr::GroupingSet`
+/// Please find an example use case in `Expr::apply_children` with the `Expr::GroupingSet`
 /// case.
 pub trait TreeNodeRefContainer<'a, T: 'a>: Sized {
     /// Applies `f` to all elements of the container.
diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs
index bb68d59eed59..ab73996fcd8b 100644
--- a/datafusion/common/src/utils/memory.rs
+++ b/datafusion/common/src/utils/memory.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module provides a function to estimate the memory size of a HashTable prior to alloaction
+//! This module provides a function to estimate the memory size of a HashTable prior to allocation
 
 use crate::{DataFusionError, Result};
 use std::mem::size_of;
@@ -79,7 +79,7 @@ pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result
     // For the majority of cases hashbrown overestimates the bucket quantity
     // to keep ~1/8 of them empty. We take this factor into account by
     // multiplying the number of elements with a fixed ratio of 8/7 (~1.14).
-    // This formula leads to overallocation for small tables (< 8 elements)
+    // This formula leads to over-allocation for small tables (< 8 elements)
     // but should be fine overall.
     num_elements
         .checked_mul(8)
diff --git a/datafusion/common/src/utils/proxy.rs b/datafusion/common/src/utils/proxy.rs
index b32164f682fa..d940677a5fb3 100644
--- a/datafusion/common/src/utils/proxy.rs
+++ b/datafusion/common/src/utils/proxy.rs
@@ -92,12 +92,12 @@ impl<T> VecAllocExt for Vec<T> {
     type T = T;
 
     fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) {
-        let prev_capacty = self.capacity();
+        let prev_capacity = self.capacity();
         self.push(x);
         let new_capacity = self.capacity();
-        if new_capacity > prev_capacty {
+        if new_capacity > prev_capacity {
             // capacity changed, so we allocated more
-            let bump_size = (new_capacity - prev_capacty) * size_of::<T>();
+            let bump_size = (new_capacity - prev_capacity) * size_of::<T>();
             // Note multiplication should never overflow because `push` would
             // have panic'd first, but the checked_add could potentially
             // overflow since accounting could be tracking additional values, and
diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs
index 349c2e438195..7d87a37b3b9c 100644
--- a/datafusion/core/benches/physical_plan.rs
+++ b/datafusion/core/benches/physical_plan.rs
@@ -38,7 +38,7 @@ use datafusion::physical_plan::{
 use datafusion::prelude::SessionContext;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
-// Initialise the operator using the provided record batches and the sort key
+// Initialize the operator using the provided record batches and the sort key
 // as inputs. All record batches must have the same schema.
 fn sort_preserving_merge_operator(
     session_ctx: Arc<SessionContext>,
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 414d6da7bc9b..3d4cfb57e7a1 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -3279,7 +3279,7 @@ mod tests {
             &df_results
         );
 
-        // check that col with the same name ovwewritten
+        // check that col with the same name overwritten
         let df_results_overwrite = df
             .clone()
             .with_column("c1", col("c2") + col("c3"))?
@@ -3302,7 +3302,7 @@ mod tests {
             &df_results_overwrite
         );
 
-        // check that col with the same name ovwewritten using same name as reference
+        // check that col with the same name overwritten using same name as reference
         let df_results_overwrite_self = df
             .clone()
             .with_column("c2", col("c2") + lit(1))?
diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs
index 5efabd000d68..91c1e0ac97fc 100644
--- a/datafusion/core/src/datasource/default_table_source.rs
+++ b/datafusion/core/src/datasource/default_table_source.rs
@@ -67,7 +67,7 @@ impl TableSource for DefaultTableSource {
     }
 
     /// Tests whether the table provider can make use of any or all filter expressions
-    /// to optimise data retrieval.
+    /// to optimize data retrieval.
     fn supports_filters_pushdown(
         &self,
         filter: &[&Expr],
diff --git a/datafusion/core/src/datasource/physical_plan/file_groups.rs b/datafusion/core/src/datasource/physical_plan/file_groups.rs
index f9a19f1d9691..f681dfe219b5 100644
--- a/datafusion/core/src/datasource/physical_plan/file_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_groups.rs
@@ -781,7 +781,7 @@ mod test {
         assert_partitioned_files(expected, actual);
     }
 
-    /// Asserts that the two groups of `ParititonedFile` are the same
+    /// Asserts that the two groups of [`PartitionedFile`] are the same
     /// (PartitionedFile doesn't implement PartialEq)
     fn assert_partitioned_files(
         expected: Option<Vec<Vec<PartitionedFile>>>,
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 879c9817a382..4071f9c26b58 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -870,7 +870,7 @@ mod tests {
     )]
     #[cfg(feature = "compression")]
     #[tokio::test]
-    async fn test_json_with_repartitioing(
+    async fn test_json_with_repartitioning(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
         let config = SessionConfig::new()
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 7573e32f8652..83b544a76e11 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -333,7 +333,7 @@ impl ParquetExecBuilder {
 
     /// Set the filter predicate when reading.
     ///
-    /// See the "Predicate Pushdown" section of the [`ParquetExec`] documenation
+    /// See the "Predicate Pushdown" section of the [`ParquetExec`] documentation
     /// for more details.
     pub fn with_predicate(mut self, predicate: Arc<dyn PhysicalExpr>) -> Self {
         self.predicate = Some(predicate);
@@ -611,7 +611,7 @@ impl ParquetExec {
     }
 
     /// If enabled, the reader will read the page index
-    /// This is used to optimise filter pushdown
+    /// This is used to optimize filter pushdown
     /// via `RowSelector` and `RowFilter` by
     /// eliminating unnecessary IO and decoding
     pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
index af5ffb9d5743..f6428a693fb1 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
@@ -422,7 +422,7 @@ fn would_column_prevent_pushdown(
     checker.prevents_pushdown()
 }
 
-/// Recurses through expr as a trea, finds all `column`s, and checks if any of them would prevent
+/// Recurses through expr as a tree, finds all `column`s, and checks if any of them would prevent
 /// this expression from being predicate pushed down. If any of them would, this returns false.
 /// Otherwise, true.
 pub fn can_expr_be_pushed_down_with_schemas(
@@ -692,7 +692,7 @@ mod test {
 
         let mut parquet_reader = parquet_reader_builder.build().expect("building reader");
 
-        // Parquet file is small, we only need 1 recordbatch
+        // Parquet file is small, we only need 1 record batch
         let first_rb = parquet_reader
             .next()
             .expect("expected record batch")
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
index 810f74e8515b..3854f04566ee 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -294,7 +294,7 @@ impl BloomFilterStatistics {
                 }
                 _ => true,
             },
-            // One more parrern matching since not all data types are supported
+            // One more pattern matching since not all data types are supported
             // inside of a Dictionary
             ScalarValue::Dictionary(_, inner) => match inner.as_ref() {
                 ScalarValue::Int32(_)
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 2cea37fe17e2..768761bb9cf1 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -101,7 +101,7 @@ impl FromStr for StreamEncoding {
         match s.to_ascii_lowercase().as_str() {
             "csv" => Ok(Self::Csv),
             "json" => Ok(Self::Json),
-            _ => plan_err!("Unrecognised StreamEncoding {}", s),
+            _ => plan_err!("Unrecognized StreamEncoding {}", s),
         }
     }
 }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 67236c9a6bd2..3455cce132b6 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -1903,7 +1903,7 @@ mod tests {
     #[tokio::test]
     async fn send_context_to_threads() -> Result<()> {
         // ensure SessionContexts can be used in a multi-threaded
-        // environment. Usecase is for concurrent planing.
+        // environment. Use case is for concurrent planing.
         let tmp_dir = TempDir::new()?;
         let partition_count = 4;
         let ctx = Arc::new(create_ctx(&tmp_dir, partition_count).await?);
diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs
index 3f23c150be83..be87c7cac1d2 100644
--- a/datafusion/core/src/execution/context/parquet.rs
+++ b/datafusion/core/src/execution/context/parquet.rs
@@ -281,10 +281,10 @@ mod tests {
             )
             .await;
         let binding = DataFilePaths::to_urls(&path2).unwrap();
-        let expexted_path = binding[0].as_str();
+        let expected_path = binding[0].as_str();
         assert_eq!(
             read_df.unwrap_err().strip_backtrace(),
-            format!("Execution error: File path '{}' does not match the expected extension '.parquet'", expexted_path)
+            format!("Execution error: File path '{}' does not match the expected extension '.parquet'", expected_path)
         );
 
         // Read the dataframe from 'output3.parquet.snappy.parquet' with the correct file extension.
@@ -316,7 +316,7 @@ mod tests {
         let total_rows: usize = results.iter().map(|rb| rb.num_rows()).sum();
         assert_eq!(total_rows, 0);
 
-        // Read the datafram from doule dot folder;
+        // Read the dataframe from double dot folder;
         let read_df = ctx
             .read_parquet(
                 &path5,
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index ef32e84a7380..c5874deb6ed5 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -125,9 +125,9 @@ pub struct SessionState {
     session_id: String,
     /// Responsible for analyzing and rewrite a logical plan before optimization
     analyzer: Analyzer,
-    /// Provides support for customising the SQL planner, e.g. to add support for custom operators like `->>` or `?`
+    /// Provides support for customizing the SQL planner, e.g. to add support for custom operators like `->>` or `?`
     expr_planners: Vec<Arc<dyn ExprPlanner>>,
-    /// Provides support for customising the SQL type planning
+    /// Provides support for customizing the SQL type planning
     type_planner: Option<Arc<dyn TypePlanner>>,
     /// Responsible for optimizing a logical plan
     optimizer: Optimizer,
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index a1b18b8bfe8c..e9501bd37a8a 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -182,7 +182,7 @@
 //!
 //! DataFusion is designed to be highly extensible, so you can
 //! start with a working, full featured engine, and then
-//! specialize any behavior for your usecase. For example,
+//! specialize any behavior for your use case. For example,
 //! some projects may add custom [`ExecutionPlan`] operators, or create their own
 //! query language that directly creates [`LogicalPlan`] rather than using the
 //! built in SQL planner, [`SqlToRel`].
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 76c4d668d797..3c8d08ee32d4 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1252,7 +1252,7 @@ fn ensure_distribution(
                         // to increase parallelism.
                         child = add_roundrobin_on_top(child, target_partitions)?;
                     }
-                    // When inserting hash is necessary to satisy hash requirement, insert hash repartition.
+                    // When inserting hash is necessary to satisfy hash requirement, insert hash repartition.
                     if hash_necessary {
                         child =
                             add_hash_on_top(child, exprs.to_vec(), target_partitions)?;
@@ -2833,11 +2833,11 @@ pub(crate) mod tests {
                     ],
                 // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
                 // Since ordering of the left child is not preserved after SortMergeJoin
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
                 //   when mode is Inner, Left, LeftSemi, LeftAnti
                 // Similarly, since partitioning of the left side is not preserved
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
                 //   cases when mode is Inner, Left, LeftSemi, LeftAnti
                 _ => vec![
@@ -2885,11 +2885,11 @@ pub(crate) mod tests {
                     ],
                 // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
                 // Since ordering of the left child is not preserved after SortMergeJoin
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
                 //   when mode is Inner, Left, LeftSemi, LeftAnti
                 // Similarly, since partitioning of the left side is not preserved
-                // when mode is Right, RgihtSemi, RightAnti, Full
+                // when mode is Right, RightSemi, RightAnti, Full
                 // - We need to add one additional Hash Repartition and Roundrobin repartition after
                 //   SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
                 _ => vec![
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index 85fe9ecfcdb0..dd8e9d900b7d 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -720,7 +720,7 @@ mod tests {
             let state = session_ctx.state();
 
             // This file has 4 rules that use tree node, apply these rules as in the
-            // EnforSorting::optimize implementation
+            // EnforceSorting::optimize implementation
             // After these operations tree nodes should be in a consistent state.
             // This code block makes sure that these rules doesn't violate tree node integrity.
             {
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 3ac40bfb62ea..d2d35c3877c1 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -615,15 +615,15 @@ fn try_embed_projection<Exec: EmbeddedProjection + 'static>(
 /// Collect all column indices from the given projection expressions.
 fn collect_column_indices(exprs: &[(Arc<dyn PhysicalExpr>, String)]) -> Vec<usize> {
     // Collect indices and remove duplicates.
-    let mut indexs = exprs
+    let mut indices = exprs
         .iter()
         .flat_map(|(expr, _)| collect_columns(expr))
         .map(|x| x.index())
         .collect::<std::collections::HashSet<_>>()
         .into_iter()
         .collect::<Vec<_>>();
-    indexs.sort();
-    indexs
+    indices.sort();
+    indices
 }
 
 /// Tries to push `projection` down through `hash_join`. If possible, performs the
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index aa134f28fe5b..b1a6f014380e 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -286,7 +286,7 @@ impl DisplayAs for UnboundedExec {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 write!(
                     f,
-                    "UnboundableExec: unbounded={}",
+                    "UnboundedExec: unbounded={}",
                     self.batch_produce.is_none(),
                 )
             }
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index f4f754b11c6d..02fe2d83b3c4 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1140,7 +1140,7 @@ async fn unnest_fixed_list_drop_nulls() -> Result<()> {
 }
 
 #[tokio::test]
-async fn unnest_fixed_list_nonull() -> Result<()> {
+async fn unnest_fixed_list_non_null() -> Result<()> {
     let mut shape_id_builder = UInt32Builder::new();
     let mut tags_builder = FixedSizeListBuilder::new(StringBuilder::new(), 2);
 
@@ -2053,9 +2053,9 @@ async fn test_dataframe_placeholder_missing_param_values() -> Result<()> {
     // Executing LogicalPlans with placeholders that don't have bound values
     // should fail.
     let results = df.collect().await;
-    let err_mesg = results.unwrap_err().strip_backtrace();
+    let err_msg = results.unwrap_err().strip_backtrace();
     assert_eq!(
-        err_mesg,
+        err_msg,
         "Execution error: Placeholder '$0' was not provided a value for execution."
     );
 
@@ -2119,9 +2119,9 @@ async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
     // Executing LogicalPlans with placeholders that don't have bound values
     // should fail.
     let results = df.collect().await;
-    let err_mesg = results.unwrap_err().strip_backtrace();
+    let err_msg = results.unwrap_err().strip_backtrace();
     assert_eq!(
-        err_mesg,
+        err_msg,
         "Execution error: Placeholder '$1' was not provided a value for execution."
     );
 
@@ -2189,9 +2189,9 @@ async fn test_dataframe_placeholder_like_expression() -> Result<()> {
     // Executing LogicalPlans with placeholders that don't have bound values
     // should fail.
     let results = df.collect().await;
-    let err_mesg = results.unwrap_err().strip_backtrace();
+    let err_msg = results.unwrap_err().strip_backtrace();
     assert_eq!(
-        err_mesg,
+        err_msg,
         "Execution error: Placeholder '$1' was not provided a value for execution."
     );
 
@@ -2277,12 +2277,12 @@ async fn write_partitioned_parquet_results() -> Result<()> {
 
     // Explicitly read the parquet file at c2=123 to verify the physical files are partitioned
     let partitioned_file = format!("{out_dir}/c2=123", out_dir = out_dir);
-    let filted_df = ctx
+    let filter_df = ctx
         .read_parquet(&partitioned_file, ParquetReadOptions::default())
         .await?;
 
     // Check that the c2 column is gone and that c1 is abc.
-    let results = filted_df.collect().await?;
+    let results = filter_df.collect().await?;
     let expected = ["+-----+", "| c1  |", "+-----+", "| abc |", "+-----+"];
 
     assert_batches_eq!(expected, &results);
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
index 881949047bff..e18dab35fc91 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
@@ -184,7 +184,7 @@ fn add_equal_conditions_test() -> Result<()> {
     assert!(eq_groups.contains(&col_a_expr));
     assert!(eq_groups.contains(&col_b_expr));
 
-    // b and c are aliases. Exising equivalence class should expand,
+    // b and c are aliases. Existing equivalence class should expand,
     // however there shouldn't be any new equivalence class
     eq_properties.add_equal_conditions(&col_b_expr, &col_c_expr)?;
     assert_eq!(eq_properties.eq_group().len(), 1);
diff --git a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
index c52acdd82764..a82849f4ea92 100644
--- a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
@@ -281,7 +281,7 @@ fn i64string_batch<'a>(
     .unwrap()
 }
 
-/// Run the TopK test, sorting the input batches with the specified ftch
+/// Run the TopK test, sorting the input batches with the specified fetch
 /// (limit) and compares the results to the expected values.
 async fn run_limit_test(fetch: usize, data: &SortedData) {
     let input = data.batches();
diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
index e4acb96f4930..19ffa69f11d3 100644
--- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
@@ -93,7 +93,7 @@ impl SortTest {
         self
     }
 
-    /// specify that this test should use a memory pool of the specifeid size
+    /// specify that this test should use a memory pool of the specified size
     fn with_pool_size(mut self, pool_size: usize) -> Self {
         self.pool_size = Some(pool_size);
         self
diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
index 73f4a569954e..daa282c8fe4a 100644
--- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
@@ -261,15 +261,15 @@ mod sp_repartition_fuzz_tests {
 
             for ordering in eq_properties.oeq_class().iter() {
                 let err_msg = format!("error in eq properties: {:?}", eq_properties);
-                let sort_solumns = ordering
+                let sort_columns = ordering
                     .iter()
                     .map(|sort_expr| sort_expr.evaluate_to_sort_column(&res))
                     .collect::<Result<Vec<_>>>()?;
-                let orig_columns = sort_solumns
+                let orig_columns = sort_columns
                     .iter()
                     .map(|sort_column| sort_column.values.clone())
                     .collect::<Vec<_>>();
-                let sorted_columns = lexsort(&sort_solumns, None)?;
+                let sorted_columns = lexsort(&sort_columns, None)?;
 
                 // Make sure after merging ordering is still valid.
                 assert_eq!(orig_columns.len(), sorted_columns.len(), "{}", err_msg);
diff --git a/datafusion/core/tests/macro_hygiene/mod.rs b/datafusion/core/tests/macro_hygiene/mod.rs
index 62f24f5198e6..5aff1d5e3296 100644
--- a/datafusion/core/tests/macro_hygiene/mod.rs
+++ b/datafusion/core/tests/macro_hygiene/mod.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Verifies [Macro Hygene]
+//! Verifies [Macro Hygiene]
 //!
-//! [Macro Hygene]: https://en.wikipedia.org/wiki/Hygienic_macro
+//! [Macro Hygiene]: https://en.wikipedia.org/wiki/Hygienic_macro
 
 mod plan_err {
     // NO other imports!
diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs
index fa23f5c699e2..61a9e9b5757c 100644
--- a/datafusion/core/tests/parquet/external_access_plan.rs
+++ b/datafusion/core/tests/parquet/external_access_plan.rs
@@ -161,7 +161,7 @@ async fn plan_and_filter() {
         RowGroupAccess::Scan,
     ]));
 
-    // initia
+    // initial
     let parquet_metrics = TestFull {
         access_plan,
         expected_rows: 0,
@@ -274,7 +274,7 @@ struct Test {
 impl Test {
     /// Runs the test case, panic'ing on error.
     ///
-    /// Returns the `MetricsSet` from the ParqeutExec
+    /// Returns the [`MetricsSet`] from the [`ParquetExec`]
     async fn run_success(self) -> MetricsSet {
         let Self {
             access_plan,
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 39fd492786bc..5fb0b9852641 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -566,7 +566,7 @@ async fn csv_explain_verbose_plans() {
 #[tokio::test]
 async fn explain_analyze_runs_optimizers(#[values("*", "1")] count_expr: &str) {
     // repro for https://github.com/apache/datafusion/issues/917
-    // where EXPLAIN ANALYZE was not correctly running optiimizer
+    // where EXPLAIN ANALYZE was not correctly running optimizer
     let ctx = SessionContext::new();
     register_alltypes_parquet(&ctx).await;
 
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 82f73eadba8c..03c4ad7c013e 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -196,7 +196,7 @@ fn populate_csv_partitions(
     Ok(schema)
 }
 
-/// Specialised String representation
+/// Specialized String representation
 fn col_str(column: &ArrayRef, row_index: usize) -> String {
     // NullArray::is_null() does not work on NullArray.
     // can remove check for DataType::Null when
diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs
index 975984e5b11f..441af1639d9b 100644
--- a/datafusion/core/tests/sql/path_partition.rs
+++ b/datafusion/core/tests/sql/path_partition.rs
@@ -183,7 +183,7 @@ async fn parquet_distinct_partition_col() -> Result<()> {
     max_limit += 1;
     let last_batch = results
         .last()
-        .expect("There shouled be at least one record batch returned");
+        .expect("There should be at least one record batch returned");
     let last_row_idx = last_batch.num_rows() - 1;
     let mut min_limit =
         match ScalarValue::try_from_array(last_batch.column(0), last_row_idx)? {
@@ -568,7 +568,7 @@ async fn parquet_overlapping_columns() -> Result<()> {
 
     assert!(
         result.is_err(),
-        "Dupplicate qualified name should raise error"
+        "Duplicate qualified name should raise error"
     );
     Ok(())
 }
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 99c00615376f..bf32eef3b011 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -724,7 +724,7 @@ impl Accumulator for FirstSelector {
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        // cast argumets to the appropriate type (DataFusion will type
+        // cast arguments to the appropriate type (DataFusion will type
         // check these based on the declared allowed input types)
         let v = as_primitive_array::<Float64Type>(&values[0])?;
         let t = as_primitive_array::<TimestampNanosecondType>(&values[1])?;
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 77753290c37e..487063642345 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -482,7 +482,7 @@ impl ExtensionPlanner for TopKPlanner {
 /// code is not general and is meant as an illustration only
 struct TopKExec {
     input: Arc<dyn ExecutionPlan>,
-    /// The maxium number of values
+    /// The maximum number of values
     k: usize,
     cache: PlanProperties,
 }
diff --git a/datafusion/core/tests/user_defined/user_defined_table_functions.rs b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
index 39f10ef11ab0..b5f94107dd0b 100644
--- a/datafusion/core/tests/user_defined/user_defined_table_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
@@ -228,8 +228,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
         .with_header(true)
         .build(file)?;
     let mut batches = vec![];
-    for bacth in reader {
-        batches.push(bacth?);
+    for batch in reader {
+        batches.push(batch?);
     }
     let schema = Arc::new(schema);
     Ok((schema, batches))
diff --git a/datafusion/expr-common/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs
index 7155c7993f8c..dc1e023d4c3c 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -115,7 +115,7 @@ pub trait Accumulator: Send + Sync + Debug {
     ///              │                                │
     ///              │                                │
     /// ┌─────────────────────────┐      ┌─────────────────────────┐
-    /// │        GroubyBy         │      │        GroubyBy         │
+    /// │        GroupBy          │      │        GroupBy          │
     /// │(AggregateMode::Partial) │      │(AggregateMode::Partial) │
     /// └─────────────────────────┘      └─────────────────────────┘
     ///              ▲                                ▲
@@ -181,7 +181,7 @@ pub trait Accumulator: Send + Sync + Debug {
     ///              │                             │
     ///              │                             │
     /// ┌─────────────────────────┐  ┌──────────────────────────┐     2. Each AggregateMode::Partial
-    /// │        GroubyBy         │  │         GroubyBy         │     GroupBy has an entry for *all*
+    /// │        GroupBy          │  │       GroupBy            │     GroupBy has an entry for *all*
     /// │(AggregateMode::Partial) │  │ (AggregateMode::Partial) │     the groups
     /// └─────────────────────────┘  └──────────────────────────┘
     ///              ▲                             ▲
@@ -254,7 +254,7 @@ pub trait Accumulator: Send + Sync + Debug {
     /// or more intermediate values.
     ///
     /// For some aggregates (such as `SUM`), merge_batch is the same
-    /// as `update_batch`, but for some aggregrates (such as `COUNT`)
+    /// as `update_batch`, but for some aggregates (such as `COUNT`)
     /// the operations differ. See [`Self::state`] for more details on how
     /// state is used and merged.
     ///
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c82572ebd5f1..79e846e7af3e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1287,7 +1287,7 @@ impl Expr {
     /// let expr = col("foo").alias("bar") + col("baz");
     /// assert_eq!(expr.clone().unalias(), expr);
     ///
-    /// // `foo as "bar" as "baz" is unalaised to foo as "bar"
+    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
     /// let expr = col("foo").alias("bar").alias("baz");
     /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
     /// ```
@@ -1587,7 +1587,7 @@ impl Expr {
     /// Recursively find all [`Expr::Placeholder`] expressions, and
     /// to infer their [`DataType`] from the context of their use.
     ///
-    /// For example, gicen an expression like `<int32> = $0` will infer `$0` to
+    /// For example, given an expression like `<int32> = $0` will infer `$0` to
     /// have type `int32`.
     ///
     /// Returns transformed expression and flag that is true if expression contains
@@ -2263,7 +2263,7 @@ impl Display for SchemaDisplay<'_> {
                     "{}({}{})",
                     func.name(),
                     if *distinct { "DISTINCT " } else { "" },
-                    schema_name_from_exprs_comma_seperated_without_space(args)?
+                    schema_name_from_exprs_comma_separated_without_space(args)?
                 )?;
 
                 if let Some(null_treatment) = null_treatment {
@@ -2335,7 +2335,7 @@ impl Display for SchemaDisplay<'_> {
 
                 write!(f, "END")
             }
-            // Cast expr is not shown to be consistant with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
+            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
             Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
                 write!(f, "{}", SchemaDisplay(expr))
             }
@@ -2465,7 +2465,7 @@ impl Display for SchemaDisplay<'_> {
                     f,
                     "{}({})",
                     fun,
-                    schema_name_from_exprs_comma_seperated_without_space(args)?
+                    schema_name_from_exprs_comma_separated_without_space(args)?
                 )?;
 
                 if let Some(null_treatment) = null_treatment {
@@ -2495,7 +2495,7 @@ impl Display for SchemaDisplay<'_> {
 /// Internal usage. Please call `schema_name_from_exprs` instead
 // TODO: Use ", " to standardize the formatting of Vec<Expr>,
 // <https://github.com/apache/datafusion/issues/10364>
-pub(crate) fn schema_name_from_exprs_comma_seperated_without_space(
+pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
     exprs: &[Expr],
 ) -> Result<String, fmt::Error> {
     schema_name_from_exprs_inner(exprs, ",")
@@ -2598,7 +2598,7 @@ impl Display for Expr {
             Expr::ScalarFunction(fun) => {
                 fmt_function(f, fun.name(), false, &fun.args, true)
             }
-            // TODO: use udf's display_name, need to fix the seperator issue, <https://github.com/apache/datafusion/issues/10364>
+            // TODO: use udf's display_name, need to fix the separator issue, <https://github.com/apache/datafusion/issues/10364>
             // Expr::ScalarFunction(ScalarFunction { func, args }) => {
             //     write!(f, "{}", func.display_name(args).unwrap())
             // }
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 23ffc83e3549..e0235d32292f 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -69,7 +69,7 @@ pub type StateTypeFunction =
 /// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked
 /// * 'info': [crate::simplify::SimplifyInfo]
 ///
-///Cclosure returns simplified [Expr] or an error.
+/// Closure returns simplified [Expr] or an error.
 pub type AggregateFunctionSimplification = Box<
     dyn Fn(
         crate::expr::AggregateFunction,
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index e15c0a36a0e9..c7cff3ac26b1 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -155,11 +155,11 @@ impl LogicalPlanBuilder {
         }
         // Ensure that the static term and the recursive term have the same number of fields
         let static_fields_len = self.plan.schema().fields().len();
-        let recurive_fields_len = recursive_term.schema().fields().len();
-        if static_fields_len != recurive_fields_len {
+        let recursive_fields_len = recursive_term.schema().fields().len();
+        if static_fields_len != recursive_fields_len {
             return plan_err!(
                 "Non-recursive term and recursive term must have the same number of columns ({} != {})",
-                static_fields_len, recurive_fields_len
+                static_fields_len, recursive_fields_len
             );
         }
         // Ensure that the recursive term has the same field types as the static term
@@ -254,7 +254,7 @@ impl LogicalPlanBuilder {
                     if can_cast_types(&data_type, field_type) {
                     } else {
                         return exec_err!(
-                            "type mistmatch and can't cast to got {} and {}",
+                            "type mismatch and can't cast to got {} and {}",
                             data_type,
                             field_type
                         );
@@ -1635,7 +1635,7 @@ pub fn wrap_projection_for_join_if_necessary(
         .iter()
         .map(|key| {
             // The display_name() of cast expression will ignore the cast info, and show the inner expression name.
-            // If we do not add alais, it will throw same field name error in the schema when adding projection.
+            // If we do not add alias, it will throw same field name error in the schema when adding projection.
             // For example:
             //    input scan : [a, b, c],
             //    join keys: [cast(a as int)]
@@ -1776,7 +1776,7 @@ pub fn get_unnested_columns(
             let new_field = Arc::new(Field::new(
                 col_name, data_type,
                 // Unnesting may produce NULLs even if the list is not null.
-                // For example: unnset([1], []) -> 1, null
+                // For example: unnest([1], []) -> 1, null
                 true,
             ));
             let column = Column::from_name(col_name);
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 3efaf296c29c..a433871ef20d 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -303,7 +303,7 @@ pub struct CreateMemoryTable {
     pub or_replace: bool,
     /// Default values for columns
     pub column_defaults: Vec<(String, Expr)>,
-    /// Wheter the table is `TableType::Temporary`
+    /// Whether the table is `TableType::Temporary`
     pub temporary: bool,
 }
 
@@ -318,7 +318,7 @@ pub struct CreateView {
     pub or_replace: bool,
     /// SQL used to create the view, if available
     pub definition: Option<String>,
-    /// Wheter the view is ephemeral
+    /// Whether the view is ephemeral
     pub temporary: bool,
 }
 
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 6c2b923cf6ad..47d9aac3caf2 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -1202,7 +1202,7 @@ impl LogicalPlan {
     /// # let schema = Schema::new(vec![
     /// #     Field::new("id", DataType::Int32, false),
     /// # ]);
-    /// // Build SELECT * FROM t1 WHRERE id = $1
+    /// // Build SELECT * FROM t1 WHERE id = $1
     /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
     ///     .filter(col("id").eq(placeholder("$1"))).unwrap()
     ///     .build().unwrap();
@@ -1225,7 +1225,7 @@ impl LogicalPlan {
     ///  );
     ///
     /// // Note you can also used named parameters
-    /// // Build SELECT * FROM t1 WHRERE id = $my_param
+    /// // Build SELECT * FROM t1 WHERE id = $my_param
     /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
     ///     .filter(col("id").eq(placeholder("$my_param"))).unwrap()
     ///     .build().unwrap()
@@ -3633,7 +3633,7 @@ digraph {
 "#;
 
         // just test for a few key lines in the output rather than the
-        // whole thing to make test mainteance easier.
+        // whole thing to make test maintenance easier.
         let graphviz = format!("{}", plan.display_graphviz());
 
         assert_eq!(expected_graphviz, graphviz);
diff --git a/datafusion/expr/src/table_source.rs b/datafusion/expr/src/table_source.rs
index e9a677de50c1..d62484153f53 100644
--- a/datafusion/expr/src/table_source.rs
+++ b/datafusion/expr/src/table_source.rs
@@ -55,7 +55,7 @@ pub enum TableProviderFilterPushDown {
 pub enum TableType {
     /// An ordinary physical table.
     Base,
-    /// A non-materialised table that itself uses a query internally to provide data.
+    /// A non-materialized table that itself uses a query internally to provide data.
     View,
     /// A transient table.
     Temporary,
@@ -99,7 +99,7 @@ pub trait TableSource: Sync + Send {
     }
 
     /// Tests whether the table provider can make use of any or all filter expressions
-    /// to optimise data retrieval. Only non-volatile expressions are passed to this function.
+    /// to optimize data retrieval. Only non-volatile expressions are passed to this function.
     fn supports_filters_pushdown(
         &self,
         filters: &[&Expr],
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 38e7106e6e64..96bb5c4b2d8f 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -520,7 +520,7 @@ fn get_valid_types(
         TypeSignature::Numeric(number) => {
             function_length_check(current_types.len(), *number)?;
 
-            // Find common numeric type amongs given types except string
+            // Find common numeric type among given types except string
             let mut valid_type = current_types.first().unwrap().to_owned();
             for t in current_types.iter().skip(1) {
                 let logical_data_type: NativeType = t.into();
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 83200edfa24c..51c42b5c4c30 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -17,7 +17,7 @@
 
 //! [`ScalarUDF`]: Scalar User Defined Functions
 
-use crate::expr::schema_name_from_exprs_comma_seperated_without_space;
+use crate::expr::schema_name_from_exprs_comma_separated_without_space;
 use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
 use crate::sort_properties::{ExprProperties, SortProperties};
 use crate::{
@@ -436,7 +436,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
         Ok(format!(
             "{}({})",
             self.name(),
-            schema_name_from_exprs_comma_seperated_without_space(args)?
+            schema_name_from_exprs_comma_separated_without_space(args)?
         ))
     }
 
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 6f7c5d379260..9d0a2b5b95f6 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -67,7 +67,7 @@ pub fn grouping_set_expr_count(group_expr: &[Expr]) -> Result<usize> {
                 "Invalid group by expressions, GroupingSet must be the only expression"
             );
         }
-        // Groupings sets have an additional interal column for the grouping id
+        // Groupings sets have an additional integral column for the grouping id
         Ok(grouping_set.distinct_expr().len() + 1)
     } else {
         grouping_set_to_exprlist(group_expr).map(|exprs| exprs.len())
@@ -1112,7 +1112,7 @@ fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs: Vec<&'a Expr>) -> Vec<&
     }
 }
 
-/// Iteratate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
+/// Iterate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
 ///
 /// See [`split_conjunction_owned`] for more details and an example.
 pub fn iter_conjunction(expr: &Expr) -> impl Iterator<Item = &Expr> {
@@ -1136,7 +1136,7 @@ pub fn iter_conjunction(expr: &Expr) -> impl Iterator<Item = &Expr> {
     })
 }
 
-/// Iteratate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
+/// Iterate parts in a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
 ///
 /// See [`split_conjunction_owned`] for more details and an example.
 pub fn iter_conjunction_owned(expr: Expr) -> impl Iterator<Item = Expr> {
@@ -1301,7 +1301,7 @@ pub fn conjunction(filters: impl IntoIterator<Item = Expr>) -> Option<Expr> {
 ///   col("b").eq(lit(2)),
 /// ];
 ///
-/// // use disjuncton to join them together with `OR`
+/// // use disjunction to join them together with `OR`
 /// assert_eq!(disjunction(split), Some(expr));
 /// ```
 pub fn disjunction(filters: impl IntoIterator<Item = Expr>) -> Option<Expr> {
diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs
index 222914315d70..815d5742afd2 100644
--- a/datafusion/expr/src/window_frame.rs
+++ b/datafusion/expr/src/window_frame.rs
@@ -274,7 +274,7 @@ impl WindowFrame {
         Ok(())
     }
 
-    /// Returns whether the window frame can accept multiple ORDER BY expressons.
+    /// Returns whether the window frame can accept multiple ORDER BY expressions.
     pub fn can_accept_multi_orderby(&self) -> bool {
         match self.units {
             WindowFrameUnits::Rows => true,
diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs
index 01f7c46106a2..b229d908d10d 100644
--- a/datafusion/ffi/src/table_provider.rs
+++ b/datafusion/ffi/src/table_provider.rs
@@ -66,7 +66,7 @@ use datafusion::error::Result;
 /// calls defined on the `FFI_` structs. The second purpose of the `Foreign`
 /// structs is to contain additional data that may be needed by the traits that
 /// are implemented on them. Some of these traits require borrowing data which
-/// can be far more convienent to be locally stored.
+/// can be far more convenient to be locally stored.
 ///
 /// For example, we have a struct `FFI_TableProvider` to give access to the
 /// `TableProvider` functions like `table_type()` and `scan()`. If we write a
@@ -318,7 +318,7 @@ impl FFI_TableProvider {
     }
 }
 
-/// This wrapper struct exists on the reciever side of the FFI interface, so it has
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
 /// no guarantees about being able to access the data in `private_data`. Any functions
 /// defined on this struct must only use the stable functions provided in
 /// FFI_TableProvider to interact with the foreign table provider.
@@ -397,7 +397,7 @@ impl TableProvider for ForeignTableProvider {
     }
 
     /// Tests whether the table provider can make use of a filter expression
-    /// to optimise data retrieval.
+    /// to optimize data retrieval.
     fn supports_filters_pushdown(
         &self,
         filters: &[&Expr],
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index 03e4ef557269..aa2f5a586e87 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -79,7 +79,7 @@ use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 ///
 ///  Logical group         Current Min/Max value for that group stored
 ///     number             as a ScalarValue which points to an
-///                        indivdually allocated String
+///                        individually allocated String
 ///
 ///```
 ///
@@ -281,7 +281,7 @@ impl GroupsAccumulatorAdapter {
     /// See [`Self::allocation_bytes`] for rationale.
     fn free_allocation(&mut self, size: usize) {
         // use saturating sub to avoid errors if the accumulators
-        // report erronious sizes
+        // report erroneous sizes
         self.allocation_bytes = self.allocation_bytes.saturating_sub(size)
     }
 
diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
index 13e40a2b9966..378fc8c42bc6 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -23,7 +23,7 @@
 //! [Facebook's Folly TDigest] implementation.
 //!
 //! Alterations include reduction of runtime heap allocations, broader type
-//! support, (de-)serialisation support, reduced type conversions and null value
+//! support, (de-)serialization support, reduced type conversions and null value
 //! tolerance.
 //!
 //! [TDigest sketch algorithm]: https://arxiv.org/abs/1902.04023
@@ -612,7 +612,7 @@ impl TDigest {
         ]
     }
 
-    /// Unpack the serialised state of a [`TDigest`] produced by
+    /// Unpack the serialized state of a [`TDigest`] produced by
     /// [`Self::to_scalar_state()`].
     ///
     /// # Correctness
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 61424e8f2445..000c69d9f331 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -231,7 +231,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
     }
 
     #[allow(rustdoc::private_intra_doc_links)]
-    /// See [`TDigest::to_scalar_state()`] for a description of the serialised
+    /// See [`TDigest::to_scalar_state()`] for a description of the serialized
     /// state.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         Ok(vec![
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index 10b9b06f1f94..16dac2c1b8f0 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -172,7 +172,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
     }
 
     #[allow(rustdoc::private_intra_doc_links)]
-    /// See [`TDigest::to_scalar_state()`] for a description of the serialised
+    /// See [`TDigest::to_scalar_state()`] for a description of the serialized
     /// state.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         self.approx_percentile_cont.state_fields(args)
diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs
index ffbf2ceef052..d4ae27533c6d 100644
--- a/datafusion/functions-aggregate/src/covariance.rs
+++ b/datafusion/functions-aggregate/src/covariance.rs
@@ -246,7 +246,7 @@ impl AggregateUDFImpl for CovariancePopulation {
 /// Journal of the American Statistical Association. 69 (348): 859–866. doi:10.2307/2286154. JSTOR 2286154.
 ///
 /// Though it is not covered in the original paper but is based on the same idea, as a result the algorithm is online,
-/// parallelizable and numerically stable.
+/// parallelize and numerically stable.
 
 #[derive(Debug)]
 pub struct CovarianceAccumulator {
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index a0f7634c5fa8..c4e05bd57de6 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -372,7 +372,7 @@ macro_rules! typed_min_max_batch_string {
         ScalarValue::$SCALAR(value)
     }};
 }
-// Statically-typed version of min/max(array) -> ScalarValue for binay types.
+// Statically-typed version of min/max(array) -> ScalarValue for binary types.
 macro_rules! typed_min_max_batch_binary {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = downcast_value!($VALUES, $ARRAYTYPE);
diff --git a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
index 25499c252191..725b7a29bd47 100644
--- a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
@@ -449,7 +449,7 @@ impl MinMaxBytesState {
         self.min_max.resize(total_num_groups, None);
         // Minimize value copies by calculating the new min/maxes for each group
         // in this batch (either the existing min/max or the new input value)
-        // and updating the owne values in `self.min_maxes` at most once
+        // and updating the owned values in `self.min_maxes` at most once
         let mut locations = vec![MinMaxLocation::ExistingMinMax; total_num_groups];
 
         // Figure out the new min value for each group
@@ -463,12 +463,12 @@ impl MinMaxBytesState {
                 // previous input value was the min/max, so compare it
                 MinMaxLocation::Input(existing_val) => existing_val,
                 MinMaxLocation::ExistingMinMax => {
-                    let Some(exising_val) = self.min_max[group_index].as_ref() else {
+                    let Some(existing_val) = self.min_max[group_index].as_ref() else {
                         // no existing min/max, so this is the new min/max
                         locations[group_index] = MinMaxLocation::Input(new_val);
                         continue;
                     };
-                    exising_val.as_ref()
+                    existing_val.as_ref()
                 }
             };
 
diff --git a/datafusion/functions/src/core/greatest.rs b/datafusion/functions/src/core/greatest.rs
index e91ec2b0c4d8..654b2a2987c7 100644
--- a/datafusion/functions/src/core/greatest.rs
+++ b/datafusion/functions/src/core/greatest.rs
@@ -85,7 +85,7 @@ impl GreatestLeastOperator for GreatestFunc {
     /// Nulls are always considered smaller than any other value
     fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result<BooleanArray> {
         // Fast path:
-        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel
+        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorized kernel
         // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined.
         // - both array does not have any nulls: cmp::gt_eq will return null if any of the input is null while we want to return false in that case
         if !lhs.data_type().is_nested()
diff --git a/datafusion/functions/src/core/least.rs b/datafusion/functions/src/core/least.rs
index b9ea65cdb732..085090727773 100644
--- a/datafusion/functions/src/core/least.rs
+++ b/datafusion/functions/src/core/least.rs
@@ -98,7 +98,7 @@ impl GreatestLeastOperator for LeastFunc {
     /// Nulls are always considered larger than any other value
     fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result<BooleanArray> {
         // Fast path:
-        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel
+        // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorized kernel
         // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined.
         // - both array does not have any nulls: cmp::lt_eq will return null if any of the input is null while we want to return false in that case
         if !lhs.data_type().is_nested()
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index ba8255d2e472..76fb4bbe5b47 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -115,8 +115,8 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         // `get_field(my_struct_col, "field_name")`.
         //
         // However, it is also exposed directly for use cases such as
-        // serializing / deserializing plans with the field access  desugared to
-        // calls to `get_field`
+        // serializing / deserializing plans with the field access desugared to
+        // calls to [`get_field`]
         get_field(),
         coalesce(),
         greatest(),
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 0c2d01376de9..7c86047a0243 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -215,7 +215,7 @@ mod tests {
 
     #[test]
     // Ensure that arrays with no nulls can also invoke NULLIF() correctly
-    fn nullif_int32_nonulls() -> Result<()> {
+    fn nullif_int32_non_nulls() -> Result<()> {
         let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
         let a = ColumnarValue::Array(Arc::new(a));
 
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index 6c470eca3d46..eb8c9260601f 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -216,7 +216,7 @@ mod tests {
 
     #[test]
     // Ensure that arrays with no nulls can also invoke nvl() correctly
-    fn nvl_int32_nonulls() -> Result<()> {
+    fn nvl_int32_non_nulls() -> Result<()> {
         let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
         let a = ColumnarValue::Array(Arc::new(a));
 
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 49bf00d5c17b..bb3f2177b9a4 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -318,7 +318,7 @@ fn to_utc_date_time(nanos: i64) -> DateTime<Utc> {
 // Supported intervals:
 //  1. IntervalDayTime: this means that the stride is in days, hours, minutes, seconds and milliseconds
 //     We will assume month interval won't be converted into this type
-//     TODO (my next PR): without `INTERVAL` keyword, the stride was converted into ScalarValue::IntervalDayTime somwhere
+//     TODO (my next PR): without `INTERVAL` keyword, the stride was converted into ScalarValue::IntervalDayTime somewhere
 //             for month interval. I need to find that and make it ScalarValue::IntervalMonthDayNano instead
 // 2. IntervalMonthDayNano
 fn date_bin_impl(
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index b43fcb6db706..0f115563c8db 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -105,7 +105,7 @@ impl ScalarUDFImpl for DatePartFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        internal_err!("return_type_from_exprs shoud be called instead")
+        internal_err!("return_type_from_exprs should be called instead")
     }
 
     fn return_type_from_exprs(
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index 9b4a7b04552b..4ed9350e9729 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Regx expressions
+//! Regex expressions
 use arrow::array::ArrayDataBuilder;
 use arrow::array::BufferBuilder;
 use arrow::array::GenericStringArray;
diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs
index c43aaeccbefe..442c055ac37d 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -145,7 +145,7 @@ pub mod expr_fn {
         "returns uuid v4 as a string value",
     ), (
         contains,
-        "Return true if search_string is found within string. treated it like a reglike",
+        "Return true if search_string is found within string.",
     ));
 
     #[doc = "Removes all characters, spaces by default, from both sides of a string"]
diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs
index a6587a91a9fe..f18573db827f 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -189,7 +189,7 @@ impl StringArrayBuilder {
         self.offsets_buffer.push(next_offset);
     }
 
-    /// Finalise the builder into a concrete [`StringArray`].
+    /// Finalize the builder into a concrete [`StringArray`].
     ///
     /// # Panics
     ///
@@ -358,7 +358,7 @@ impl LargeStringArrayBuilder {
         self.offsets_buffer.push(next_offset);
     }
 
-    /// Finalise the builder into a concrete [`LargeStringArray`].
+    /// Finalize the builder into a concrete [`LargeStringArray`].
     ///
     /// # Panics
     ///
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 628c1498f973..89dd4ca60a6a 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -943,7 +943,7 @@ pub fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
             );
         }
 
-        // coerce data type and nullablity for each field
+        // coerce data type and nullability for each field
         for (union_datatype, union_nullable, union_field_map, plan_field) in izip!(
             union_datatypes.iter_mut(),
             union_nullabilities.iter_mut(),
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index ff75a6a60f4b..92e6dd1ad4d9 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -382,10 +382,10 @@ impl CommonSubexprEliminate {
                         //  keep column names and get rid of additional name
                         //  preserving logic here.
                         if let Some(aggr_expr) = aggr_expr {
-                            let name_perserver = NamePreserver::new_for_projection();
+                            let name_preserver = NamePreserver::new_for_projection();
                             let saved_names = aggr_expr
                                 .iter()
-                                .map(|expr| name_perserver.save(expr))
+                                .map(|expr| name_preserver.save(expr))
                                 .collect::<Vec<_>>();
                             let new_aggr_expr = rewritten_aggr_expr
                                 .into_iter()
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 7fdad5ba4b6e..3e5a85ea02db 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -1593,7 +1593,7 @@ mod tests {
         assert_optimized_plan_equal(plan, expected)
     }
 
-    /// Test for correlated exists subquery filter with disjustions
+    /// Test for correlated exists subquery filter with disjunctions
     #[test]
     fn exists_subquery_disjunction() -> Result<()> {
         let sq = Arc::new(
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index 9a47f437e444..64d24016f425 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -93,7 +93,7 @@ impl OptimizerRule for EliminateCrossJoin {
         let parent_predicate = if let LogicalPlan::Filter(filter) = plan {
             // if input isn't a join that can potentially be rewritten
             // avoid unwrapping the input
-            let rewriteable = matches!(
+            let rewritable = matches!(
                 filter.input.as_ref(),
                 LogicalPlan::Join(Join {
                     join_type: JoinType::Inner,
@@ -101,7 +101,7 @@ impl OptimizerRule for EliminateCrossJoin {
                 })
             );
 
-            if !rewriteable {
+            if !rewritable {
                 // recursively try to rewrite children
                 return rewrite_children(self, LogicalPlan::Filter(filter), config);
             }
diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs
index 035a1d2da229..1213c8ffb368 100644
--- a/datafusion/optimizer/src/eliminate_group_by_constant.rs
+++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs
@@ -94,7 +94,7 @@ impl OptimizerRule for EliminateGroupByConstant {
 /// Checks if expression is constant, and can be eliminated from group by.
 ///
 /// Intended to be used only within this rule, helper function, which heavily
-/// reiles on `SimplifyExpressions` result.
+/// relies on `SimplifyExpressions` result.
 fn is_constant_expression(expr: &Expr) -> bool {
     match expr {
         Expr::Alias(e) => is_constant_expression(&e.expr),
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 7c8e4120ea20..f6e3eec6743c 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -35,7 +35,7 @@ use datafusion_expr::{
     TableScan, Window,
 };
 
-use crate::optimize_projections::required_indices::RequiredIndicies;
+use crate::optimize_projections::required_indices::RequiredIndices;
 use crate::utils::NamePreserver;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
@@ -85,7 +85,7 @@ impl OptimizerRule for OptimizeProjections {
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         // All output fields are necessary:
-        let indices = RequiredIndicies::new_for_all_exprs(&plan);
+        let indices = RequiredIndices::new_for_all_exprs(&plan);
         optimize_projections(plan, config, indices)
     }
 }
@@ -113,7 +113,7 @@ impl OptimizerRule for OptimizeProjections {
 fn optimize_projections(
     plan: LogicalPlan,
     config: &dyn OptimizerConfig,
-    indices: RequiredIndicies,
+    indices: RequiredIndices,
 ) -> Result<Transformed<LogicalPlan>> {
     // Recursively rewrite any nodes that may be able to avoid computation given
     // their parents' required indices.
@@ -176,7 +176,7 @@ fn optimize_projections(
             let all_exprs_iter = new_group_bys.iter().chain(new_aggr_expr.iter());
             let schema = aggregate.input.schema();
             let necessary_indices =
-                RequiredIndicies::new().with_exprs(schema, all_exprs_iter);
+                RequiredIndices::new().with_exprs(schema, all_exprs_iter);
             let necessary_exprs = necessary_indices.get_required_exprs(schema);
 
             return optimize_projections(
@@ -274,7 +274,7 @@ fn optimize_projections(
 
     // For other plan node types, calculate indices for columns they use and
     // try to rewrite their children
-    let mut child_required_indices: Vec<RequiredIndicies> = match &plan {
+    let mut child_required_indices: Vec<RequiredIndices> = match &plan {
         LogicalPlan::Sort(_)
         | LogicalPlan::Filter(_)
         | LogicalPlan::Repartition(_)
@@ -320,7 +320,7 @@ fn optimize_projections(
             //       EXISTS expression), we may not need to require all indices.
             plan.inputs()
                 .into_iter()
-                .map(RequiredIndicies::new_for_all_exprs)
+                .map(RequiredIndices::new_for_all_exprs)
                 .collect()
         }
         LogicalPlan::Extension(extension) => {
@@ -340,7 +340,7 @@ fn optimize_projections(
                 .into_iter()
                 .zip(necessary_children_indices)
                 .map(|(child, necessary_indices)| {
-                    RequiredIndicies::new_from_indices(necessary_indices)
+                    RequiredIndices::new_from_indices(necessary_indices)
                         .with_plan_exprs(&plan, child.schema())
                 })
                 .collect::<Result<Vec<_>>>()?
@@ -379,7 +379,7 @@ fn optimize_projections(
         LogicalPlan::Unnest(Unnest {
             dependency_indices, ..
         }) => {
-            vec![RequiredIndicies::new_from_indices(
+            vec![RequiredIndices::new_from_indices(
                 dependency_indices.clone(),
             )]
         }
@@ -443,7 +443,7 @@ fn optimize_projections(
 /// - `Ok(Some(Projection))`: Merge was beneficial and successful. Contains the
 ///   merged projection.
 /// - `Ok(None)`: Signals that merge is not beneficial (and has not taken place).
-/// - `Err(error)`: An error occured during the function call.
+/// - `Err(error)`: An error occurred during the function call.
 fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Projection>> {
     let Projection {
         expr,
@@ -672,9 +672,9 @@ fn outer_columns_helper_multi<'a, 'b>(
 /// adjusted based on the join type.
 fn split_join_requirements(
     left_len: usize,
-    indices: RequiredIndicies,
+    indices: RequiredIndices,
     join_type: &JoinType,
-) -> (RequiredIndicies, RequiredIndicies) {
+) -> (RequiredIndices, RequiredIndices) {
     match join_type {
         // In these cases requirements are split between left/right children:
         JoinType::Inner
@@ -687,10 +687,10 @@ fn split_join_requirements(
             indices.split_off(left_len)
         }
         // All requirements can be re-routed to left child directly.
-        JoinType::LeftAnti | JoinType::LeftSemi => (indices, RequiredIndicies::new()),
+        JoinType::LeftAnti | JoinType::LeftSemi => (indices, RequiredIndices::new()),
         // All requirements can be re-routed to right side directly.
         // No need to change index, join schema is right child schema.
-        JoinType::RightSemi | JoinType::RightAnti => (RequiredIndicies::new(), indices),
+        JoinType::RightSemi | JoinType::RightAnti => (RequiredIndices::new(), indices),
     }
 }
 
@@ -741,18 +741,18 @@ fn add_projection_on_top_if_helpful(
 ///
 /// - `Ok(Some(LogicalPlan))`: Contains the rewritten projection
 /// - `Ok(None)`: No rewrite necessary.
-/// - `Err(error)`: An error occured during the function call.
+/// - `Err(error)`: An error occurred during the function call.
 fn rewrite_projection_given_requirements(
     proj: Projection,
     config: &dyn OptimizerConfig,
-    indices: &RequiredIndicies,
+    indices: &RequiredIndices,
 ) -> Result<Transformed<LogicalPlan>> {
     let Projection { expr, input, .. } = proj;
 
     let exprs_used = indices.get_at_indices(&expr);
 
     let required_indices =
-        RequiredIndicies::new().with_exprs(input.schema(), exprs_used.iter());
+        RequiredIndices::new().with_exprs(input.schema(), exprs_used.iter());
 
     // rewrite the children projection, and if they are changed rewrite the
     // projection down
diff --git a/datafusion/optimizer/src/optimize_projections/required_indices.rs b/datafusion/optimizer/src/optimize_projections/required_indices.rs
index 55e8081eaeb2..c1e0885c9b5f 100644
--- a/datafusion/optimizer/src/optimize_projections/required_indices.rs
+++ b/datafusion/optimizer/src/optimize_projections/required_indices.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`RequiredIndicies`] helper for OptimizeProjection
+//! [`RequiredIndices`] helper for OptimizeProjection
 
 use crate::optimize_projections::outer_columns;
 use datafusion_common::tree_node::TreeNodeRecursion;
@@ -35,7 +35,7 @@ use datafusion_expr::{Expr, LogicalPlan};
 /// indices were added `[3, 2, 4, 3, 6, 1]`,  the instance would be represented
 /// by  `[1, 2, 3, 4, 6]`.
 #[derive(Debug, Clone, Default)]
-pub(super) struct RequiredIndicies {
+pub(super) struct RequiredIndices {
     /// The indices of the required columns in the
     indices: Vec<usize>,
     /// If putting a projection above children is beneficial for the parent.
@@ -43,7 +43,7 @@ pub(super) struct RequiredIndicies {
     projection_beneficial: bool,
 }
 
-impl RequiredIndicies {
+impl RequiredIndices {
     /// Create a new, empty instance
     pub fn new() -> Self {
         Self::default()
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 2e2c8fb1d6f8..9e7f8eed8a25 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -134,7 +134,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     return Ok(Transformed::no(LogicalPlan::Projection(projection)));
                 }
 
-                let mut all_subqueryies = vec![];
+                let mut all_subqueries = vec![];
                 let mut expr_to_rewrite_expr_map = HashMap::new();
                 let mut subquery_to_expr_map = HashMap::new();
                 for expr in projection.expr.iter() {
@@ -143,15 +143,15 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     for (subquery, _) in &subqueries {
                         subquery_to_expr_map.insert(subquery.clone(), expr.clone());
                     }
-                    all_subqueryies.extend(subqueries);
+                    all_subqueries.extend(subqueries);
                     expr_to_rewrite_expr_map.insert(expr, rewrite_exprs);
                 }
-                if all_subqueryies.is_empty() {
+                if all_subqueries.is_empty() {
                     return internal_err!("Expected subqueries not found in projection");
                 }
                 // iterate through all subqueries in predicate, turning each into a left join
                 let mut cur_input = projection.input.as_ref().clone();
-                for (subquery, alias) in all_subqueryies {
+                for (subquery, alias) in all_subqueries {
                     if let Some((optimized_subquery, expr_check_map)) =
                         build_join(&subquery, &cur_input, &alias)?
                     {
@@ -879,7 +879,7 @@ mod tests {
         Ok(())
     }
 
-    /// Test for correlated scalar subquery filter with disjustions
+    /// Test for correlated scalar subquery filter with disjunctions
     #[test]
     fn scalar_subquery_disjunction() -> Result<()> {
         let sq = Arc::new(
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index e3bcb6da8e53..74d2ce0b6be9 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -3661,7 +3661,7 @@ mod tests {
     }
 
     #[test]
-    fn test_like_and_ilke() {
+    fn test_like_and_ilike() {
         let null = lit(ScalarValue::Utf8(None));
 
         // expr [NOT] [I]LIKE NULL
@@ -3931,7 +3931,7 @@ mod tests {
     }
 
     #[test]
-    fn simplify_common_factor_conjuction_in_disjunction() {
+    fn simplify_common_factor_conjunction_in_disjunction() {
         let props = ExecutionProps::new();
         let schema = boolean_test_schema();
         let simplifier =
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 31e21d08b569..8cba2c88e244 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -281,7 +281,7 @@ fn is_supported_type(data_type: &DataType) -> bool {
         || is_supported_dictionary_type(data_type)
 }
 
-/// Returns true if [[UnwrapCastExprRewriter]] suppors this numeric type
+/// Returns true if [[UnwrapCastExprRewriter]] support this numeric type
 fn is_supported_numeric_type(data_type: &DataType) -> bool {
     matches!(
         data_type,
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index 902e53a7f236..b35d978045d9 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -259,7 +259,7 @@ mod tests {
         assert!(eq_groups.contains(&col_a_expr));
         assert!(eq_groups.contains(&col_b_expr));
 
-        // b and c are aliases. Exising equivalence class should expand,
+        // b and c are aliases. Existing equivalence class should expand,
         // however there shouldn't be any new equivalence class
         eq_properties.add_equal_conditions(&col_b_expr, &col_c_expr)?;
         assert_eq!(eq_properties.eq_group().len(), 1);
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index f019b2e570ff..a7f27ab73684 100755
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -1408,7 +1408,7 @@ fn construct_prefix_orderings(
 /// current projection expression.
 ///
 /// # Example
-///  If `dependences` is `a + b ASC` and the dependency map holds dependencies
+///  If `dependencies` is `a + b ASC` and the dependency map holds dependencies
 ///  * `a ASC` --> `[c ASC]`
 ///  * `b ASC` --> `[d DESC]`,
 ///
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 938d775a2ad1..2ab53b214d7f 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -404,7 +404,7 @@ impl PhysicalExpr for BinaryExpr {
         if self.op.eq(&Operator::And) {
             if interval.eq(&Interval::CERTAINLY_TRUE) {
                 // A certainly true logical conjunction can only derive from possibly
-                // true operands. Otherwise, we prove infeasability.
+                // true operands. Otherwise, we prove infeasibility.
                 Ok((!left_interval.eq(&Interval::CERTAINLY_FALSE)
                     && !right_interval.eq(&Interval::CERTAINLY_FALSE))
                 .then(|| vec![Interval::CERTAINLY_TRUE, Interval::CERTAINLY_TRUE]))
@@ -444,7 +444,7 @@ impl PhysicalExpr for BinaryExpr {
         } else if self.op.eq(&Operator::Or) {
             if interval.eq(&Interval::CERTAINLY_FALSE) {
                 // A certainly false logical conjunction can only derive from certainly
-                // false operands. Otherwise, we prove infeasability.
+                // false operands. Otherwise, we prove infeasibility.
                 Ok((!left_interval.eq(&Interval::CERTAINLY_TRUE)
                     && !right_interval.eq(&Interval::CERTAINLY_TRUE))
                 .then(|| vec![Interval::CERTAINLY_FALSE, Interval::CERTAINLY_FALSE]))
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index 5f6932f6d725..0649cbd65d34 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -42,7 +42,7 @@ use datafusion_expr::ColumnarValue;
 ///
 /// # Example:
 ///  If the schema is `a`, `b`, `c` the `Column` for `b` would be represented by
-///  index 1, since `b` is the second colum in the schema.
+///  index 1, since `b` is the second column in the schema.
 ///
 /// ```
 /// # use datafusion_physical_expr::expressions::Column;
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index c594f039ff2f..232f9769b056 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -113,7 +113,7 @@ mod tests {
 
     #[test]
     fn literal_i32() -> Result<()> {
-        // create an arbitrary record bacth
+        // create an arbitrary record batch
         let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
         let a = Int32Array::from(vec![Some(1), None, Some(3), Some(4), Some(5)]);
         let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs
index 98c0c864b9f7..eb7e1ea6282b 100644
--- a/datafusion/physical-expr/src/partitioning.rs
+++ b/datafusion/physical-expr/src/partitioning.rs
@@ -97,7 +97,7 @@ use std::sync::Arc;
 /// # Additional Examples
 ///
 /// A simple `FileScanExec` might produce one output stream (partition) for each
-/// file (note the actual DataFusion file scaners can read individual files in
+/// file (note the actual DataFusion file scanners can read individual files in
 /// parallel, potentially producing multiple partitions per file)
 ///
 /// Plans such as `SortPreservingMerge` produce a single output stream
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index 82c718cfaca3..0ae4115de67a 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -233,7 +233,7 @@ pub fn create_physical_expr(
     // verify that input data types is consistent with function's `TypeSignature`
     data_types_with_scalar_udf(&input_expr_types, fun)?;
 
-    // Since we have arg_types, we dont need args and schema.
+    // Since we have arg_types, we don't need args and schema.
     let return_type =
         fun.return_type_from_exprs(args, input_dfschema, &input_expr_types)?;
 
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 2c73df7cfd7d..7afb78b8bf2e 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -808,7 +808,7 @@ mod test {
             vec![not_in_guarantee("b", [1, 2, 3]), in_guarantee("b", [3, 4])],
         );
         // b IN (1, 2, 3) OR b = 2
-        // TODO this should be in_guarantee("b", [1, 2, 3]) but currently we don't support to anylize this kind of disjunction. Only `ColOpLit OR ColOpLit` is supported.
+        // TODO this should be in_guarantee("b", [1, 2, 3]) but currently we don't support to analyze this kind of disjunction. Only `ColOpLit OR ColOpLit` is supported.
         test_analyze(
             col("b")
                 .in_list(vec![lit(1), lit(2), lit(3)], false)
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index dffdc49adf09..0849a3d97a83 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -294,7 +294,7 @@ mod tests {
         let field = &fields[0];
         assert_eq!(field.name(), agg.column_name());
         assert_eq!(field.data_type(), &DataType::Int64);
-        // note that nullabiolity differs
+        // note that nullability differs
 
         assert_eq!(
             as_int64_array(batch.column(0)).unwrap().values(),
@@ -377,7 +377,7 @@ mod tests {
             Arc::clone(&schema),
         )?;
 
-        // We introduce an intermediate optimization step between the partial and final aggregtator
+        // We introduce an intermediate optimization step between the partial and final aggregator
         let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
 
         let final_agg = AggregateExec::try_new(
@@ -409,7 +409,7 @@ mod tests {
             Arc::clone(&schema),
         )?;
 
-        // We introduce an intermediate optimization step between the partial and final aggregtator
+        // We introduce an intermediate optimization step between the partial and final aggregator
         let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
 
         let final_agg = AggregateExec::try_new(
diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
index d5ffaad6d872..e107bb85d7b8 100644
--- a/datafusion/physical-optimizer/src/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -44,7 +44,7 @@ use crate::PhysicalOptimizerRule;
 /// `new_add_mode` and `new_remove_mode`. With this rule, we can keep track of
 /// the global requirements (ordering and distribution) across rules.
 ///
-/// The primary usecase of this node and rule is to specify and preserve the desired output
+/// The primary use case of this node and rule is to specify and preserve the desired output
 /// ordering and distribution the entire plan. When sending to a single client, a single partition may
 /// be desirable, but when sending to a multi-partitioned writer, keeping multiple partitions may be
 /// better.
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
index 35a79cbd91ed..8e975e10180f 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
@@ -352,7 +352,7 @@ where
         let null_buffer = self.nulls.take_n(n);
         let first_remaining_offset = O::as_usize(self.offsets[n]);
 
-        // Given offests like [0, 2, 4, 5] and n = 1, we expect to get
+        // Given offsets like [0, 2, 4, 5] and n = 1, we expect to get
         // offsets [0, 2, 3]. We first create two offsets for first_n as [0, 2] and the remaining as [2, 4, 5].
         // And we shift the offset starting from 0 for the remaining one, [2, 4, 5] -> [0, 2, 3].
         let mut first_n_offsets = self.offsets.drain(0..n).collect::<Vec<_>>();
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index c261310f56e3..98787d740c20 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -137,7 +137,7 @@ struct SkipAggregationProbe {
     // ========================================================================
     // STATES:
     // Fields changes during execution. Can be buffer, or state flags that
-    // influence the exeuction in parent `GroupedHashAggregateStream`
+    // influence the execution in parent `GroupedHashAggregateStream`
     // ========================================================================
     /// Number of processed input rows (updated during probing)
     input_rows: usize,
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index 1fc3280ceb16..708f006b0d39 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -173,7 +173,7 @@ impl ExecutionPlan for AnalyzeExec {
             );
         }
 
-        // Create future that computes thefinal output
+        // Create future that computes the final output
         let start = Instant::now();
         let captured_input = Arc::clone(&self.input);
         let captured_schema = Arc::clone(&self.schema);
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
index 46875fae94fc..f38876d93ec1 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -180,7 +180,7 @@ impl BatchCoalescer {
 /// Indicates the state of the [`BatchCoalescer`] buffer after the
 /// [`BatchCoalescer::push_batch()`] operation.
 ///
-/// The caller should take diferent actions, depending on the variant returned.
+/// The caller should take different actions, depending on the variant returned.
 pub enum CoalescerState {
     /// Neither the limit nor the target batch size is reached.
     ///
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 88b85a85a102..961d2f639897 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -457,7 +457,7 @@ pub trait DisplayAs {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> fmt::Result;
 }
 
-/// A newtype wrapper to display `T` implementing`DisplayAs` using the `Default` mode
+/// A new type wrapper to display `T` implementing`DisplayAs` using the `Default` mode
 pub struct DefaultDisplay<T>(pub T);
 
 impl<T: DisplayAs> fmt::Display for DefaultDisplay<T> {
@@ -466,7 +466,7 @@ impl<T: DisplayAs> fmt::Display for DefaultDisplay<T> {
     }
 }
 
-/// A newtype wrapper to display `T` implementing `DisplayAs` using the `Verbose` mode
+/// A new type wrapper to display `T` implementing `DisplayAs` using the `Verbose` mode
 pub struct VerboseDisplay<T>(pub T);
 
 impl<T: DisplayAs> fmt::Display for VerboseDisplay<T> {
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index 09bb80734401..5f0b229ce92a 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -521,7 +521,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan {
 /// For unbounded streams, it also tracks whether the operator requires finite memory
 /// to process the stream or if memory usage could grow unbounded.
 ///
-/// Bounedness of the output stream is based on the the boundedness of the input stream and the nature of
+/// Boundedness of the output stream is based on the the boundedness of the input stream and the nature of
 /// the operator. For example, limit or topk with fetch operator can convert an unbounded stream to a bounded stream.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Boundedness {
@@ -903,7 +903,7 @@ pub fn execute_stream_partitioned(
 /// and context. It then checks if there are any columns in the input that might
 /// violate the `not null` constraints specified in the `sink_schema`. If there are
 /// such columns, it wraps the resulting stream to enforce the `not null` constraints
-/// by invoking the `check_not_null_contraits` function on each batch of the stream.
+/// by invoking the [`check_not_null_constraints`] function on each batch of the stream.
 pub fn execute_input_stream(
     input: Arc<dyn ExecutionPlan>,
     sink_schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index ef70392a01b7..dabe42ee43a2 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -138,13 +138,13 @@ impl JoinLeftData {
 }
 
 #[allow(rustdoc::private_intra_doc_links)]
-/// Join execution plan: Evaluates eqijoin predicates in parallel on multiple
+/// Join execution plan: Evaluates equijoin predicates in parallel on multiple
 /// partitions using a hash table and an optional filter list to apply post
 /// join.
 ///
 /// # Join Expressions
 ///
-/// This implementation is optimized for evaluating eqijoin predicates  (
+/// This implementation is optimized for evaluating equijoin predicates  (
 /// `<col1> = <col2>`) expressions, which are represented as a list of `Columns`
 /// in [`Self::on`].
 ///
@@ -198,7 +198,7 @@ impl JoinLeftData {
 ///
 ///  Original build-side data   Inserting build-side values into hashmap    Concatenated build-side batch
 ///                                                                         ┌───────────────────────────┐
-///                             hasmap.insert(row-hash, row-idx + offset)   │                      idx  │
+///                             hashmap.insert(row-hash, row-idx + offset)  │                      idx  │
 ///            ┌───────┐                                                    │          ┌───────┐        │
 ///            │ Row 1 │        1) update_hash for batch 3 with offset 0    │          │ Row 6 │    0   │
 ///   Batch 1  │       │           - hashmap.insert(Row 7, idx 1)           │ Batch 3  │       │        │
@@ -849,7 +849,7 @@ async fn collect_left_input(
             acc.2.build_mem_used.add(batch_size);
             acc.2.build_input_batches.add(1);
             acc.2.build_input_rows.add(batch.num_rows());
-            // Update rowcount
+            // Update row count
             acc.1 += batch.num_rows();
             // Push batch to output
             acc.0.push(batch);
@@ -3490,7 +3490,7 @@ mod tests {
         Ok(())
     }
 
-    /// Test for parallelised HashJoinExec with PartitionMode::CollectLeft
+    /// Test for parallelized HashJoinExec with PartitionMode::CollectLeft
     #[tokio::test]
     async fn test_collect_left_multiple_partitions_join() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index b8cb7b313bc1..838339c67440 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -86,7 +86,7 @@ use futures::{Stream, StreamExt};
 /// # Sorting
 ///
 /// Assumes that both the left and right input to the join are pre-sorted. It is not the
-/// responisibility of this execution plan to sort the inputs.
+/// responsibility of this execution plan to sort the inputs.
 ///
 /// # "Streamed" vs "Buffered"
 ///
@@ -101,7 +101,7 @@ use futures::{Stream, StreamExt};
 /// If the memory limit increases beyond the specified value and spilling is enabled,
 /// buffered batches could be spilled to disk. If spilling is disabled, the execution
 /// will fail under the same conditions. Multiple record batches of buffered could currently reside
-/// in memory/disk during the exectution. The number of buffered batches residing in
+/// in memory/disk during the execution. The number of buffered batches residing in
 /// memory/disk depends on the number of rows of buffered input having the same value
 /// of join key as that of streamed input rows currently present in memory. Due to pre-sorted inputs,
 /// the algorithm understands when it is not needed anymore, and releases the buffered batches
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 0366c9fa5e46..d792e143046c 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -1645,7 +1645,7 @@ macro_rules! handle_state {
 
 /// Represents the result of a stateful operation.
 ///
-/// This enumueration indicates whether the state produced a result that is
+/// This enumeration indicates whether the state produced a result that is
 /// ready for use (`Ready`) or if the operation requires continuation (`Continue`).
 ///
 /// Variants:
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index 2037ddb70c2d..dbda0a310ce5 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -50,7 +50,7 @@ pub struct MetricBuilder<'a> {
     /// optional partition number
     partition: Option<usize>,
 
-    /// arbitrary name=value pairs identifiying this metric
+    /// arbitrary name=value pairs identifying this metric
     labels: Vec<Label>,
 }
 
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 4712729bdaf5..50252e8d973a 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -376,7 +376,7 @@ impl ExecutionPlanMetricsSet {
     }
 }
 
-/// `name=value` pairs identifiying a metric. This concept is called various things
+/// `name=value` pairs identifying a metric. This concept is called various things
 /// in various different systems:
 ///
 /// "labels" in
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index 0e49a791cbae..bf7d2c7f275c 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -147,7 +147,7 @@ impl ExecutionPlan for RecursiveQueryExec {
     }
 
     // TODO: control these hints and see whether we can
-    // infer some from the child plans (static/recurisve terms).
+    // infer some from the child plans (static/recursive terms).
     fn maintains_input_order(&self) -> Vec<bool> {
         vec![false, false]
     }
diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs
index 8d09c664fbb2..6e06c87a4821 100644
--- a/datafusion/physical-plan/src/repartition/distributor_channels.rs
+++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 //! Special channel construction to distribute data from various inputs into N outputs
-//! minimizing buffering but preventing deadlocks when repartitoning
+//! minimizing buffering but preventing deadlocks when repartitioning
 //!
 //! # Design
 //!
@@ -167,11 +167,11 @@ impl<T> Drop for DistributionSender<T> {
             //
             // If the last sender is dropped first, `state.data` will still exists and the sender side decrements the
             // signal. The receiver side then MUST check the `n_senders` counter during the section and if it is zero,
-            // it inferres that it is dropped afterwards and MUST NOT decrement the counter.
+            // it infers that it is dropped afterwards and MUST NOT decrement the counter.
             //
-            // If the receiver end is dropped first, it will inferr -- based on `n_senders` -- that there are still
+            // If the receiver end is dropped first, it will infer -- based on `n_senders` -- that there are still
             // senders and it will decrement the `empty_channels` counter. It will also set `data` to `None`. The sender
-            // side will then see that `data` is `None` and can therefore inferr that the receiver end was dropped, and
+            // side will then see that `data` is `None` and can therefore infer that the receiver end was dropped, and
             // hence it MUST NOT decrement the `empty_channels` counter.
             if state
                 .data
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 963ccc6fd809..270aabeb553c 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -170,7 +170,7 @@ impl RepartitionExecState {
 /// which is commonly set to the number of CPU cores and all call execute at the same time.
 ///
 /// Thus, use a **tokio** `OnceCell` for this initialization so as not to waste CPU cycles
-/// in a futex lock but instead allow other threads to do something useful.
+/// in a mutex lock but instead allow other threads to do something useful.
 ///
 /// Uses a parking_lot `Mutex` to control other accesses as they are very short duration
 ///  (e.g. removing channels on completion) where the overhead of `await` is not warranted.
@@ -343,7 +343,7 @@ impl BatchPartitioner {
 /// sufficient care in implementation.
 ///
 /// DataFusion's planner picks the target number of partitions and
-/// then `RepartionExec` redistributes [`RecordBatch`]es to that number
+/// then [`RepartitionExec`] redistributes [`RecordBatch`]es to that number
 /// of output partitions.
 ///
 /// For example, given `target_partitions=3` (trying to use 3 cores)
@@ -734,7 +734,7 @@ impl RepartitionExec {
         )
     }
 
-    /// Specify if this reparititoning operation should preserve the order of
+    /// Specify if this repartitioning operation should preserve the order of
     /// rows from its input when producing output. Preserving order is more
     /// expensive at runtime, so should only be set if the output of this
     /// operator can take advantage of it.
@@ -1410,7 +1410,7 @@ mod tests {
             .flat_map(|batch| {
                 assert_eq!(batch.columns().len(), 1);
                 let string_array = as_string_array(batch.column(0))
-                    .expect("Unexpected type for repartitoned batch");
+                    .expect("Unexpected type for repartitioned batch");
 
                 string_array
                     .iter()
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index 258e234b35c7..1c2b8cd0c91b 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -114,7 +114,7 @@ pub(crate) struct SortPreservingMergeStream<C: CursorValues> {
     /// causing upstream operator buffers for the other partitions to grow
     /// excessively, as they continued receiving data without consuming it.
     ///
-    /// For example, an upstream operator like `RepartitonExec` execution would
+    /// For example, an upstream operator like `RepartitionExec` execution would
     /// keep sending data to certain partitions, but those partitions wouldn't
     /// consume the data if they weren't selected as winners. This resulted in
     /// inefficient buffer usage.
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 8d8a5c5f7055..33c8a2b2fee3 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -598,7 +598,7 @@ impl ExternalSorter {
     }
 
     /// If this sort may spill, pre-allocates
-    /// `sort_spill_reservation_bytes` of memory to gurarantee memory
+    /// `sort_spill_reservation_bytes` of memory to guarantee memory
     /// left for the in memory sort/merge.
     fn reserve_memory_for_merge(&mut self) -> Result<()> {
         // Reserve headroom for next merge sort
@@ -1366,7 +1366,7 @@ mod tests {
         // Data is correct
         assert_eq!(&vec![expected_batch], &result);
 
-        // explicitlty ensure the metadata is present
+        // explicitly ensure the metadata is present
         assert_eq!(result[0].schema().fields()[0].metadata(), &field_metadata);
         assert_eq!(result[0].schema().metadata(), &schema_metadata);
 
diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs
index 7c57fdf9baae..ab8054be59a8 100644
--- a/datafusion/physical-plan/src/sorts/stream.rs
+++ b/datafusion/physical-plan/src/sorts/stream.rs
@@ -49,7 +49,7 @@ pub trait PartitionedStream: std::fmt::Debug + Send {
     ) -> Poll<Option<Self::Output>>;
 }
 
-/// A newtype wrapper around a set of fused [`SendableRecordBatchStream`]
+/// A new type wrapper around a set of fused [`SendableRecordBatchStream`]
 /// that implements debug, and skips over empty [`RecordBatch`]
 struct FusedStreams(Vec<Fuse<SendableRecordBatchStream>>);
 
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index a05b46d22840..331cded165a8 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -256,7 +256,7 @@ impl RecordBatchReceiverStreamBuilder {
     }
 
     /// Runs the `partition` of the `input` ExecutionPlan on the
-    /// tokio threadpool and writes its outputs to this stream
+    /// tokio thread pool and writes its outputs to this stream
     ///
     /// If the input partition produces an error, the error will be
     /// sent to the output stream and no further results are sent.
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index b31a53e55e88..f0149faa8433 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -578,7 +578,7 @@ impl ExecutionPlan for StatisticsExec {
 
 /// Execution plan that emits streams that block forever.
 ///
-/// This is useful to test shutdown / cancelation behavior of certain execution plans.
+/// This is useful to test shutdown / cancellation behavior of certain execution plans.
 #[derive(Debug)]
 pub struct BlockingExec {
     /// Schema that is mocked by this plan.
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index c8842965ac33..6d5299f71647 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -125,7 +125,7 @@ impl TopK {
         let row_converter = RowConverter::new(sort_fields)?;
         let scratch_rows = row_converter.empty_rows(
             batch_size,
-            20 * batch_size, // guestimate 20 bytes per row
+            20 * batch_size, // guesstimate 20 bytes per row
         );
 
         Ok(Self {
@@ -261,7 +261,7 @@ struct TopKHeap {
     k: usize,
     /// The target number of rows for output batches
     batch_size: usize,
-    /// Storage for up at most `k` items using a BinaryHeap. Reverserd
+    /// Storage for up at most `k` items using a BinaryHeap. Reversed
     /// so that the smallest k so far is on the top
     inner: BinaryHeap<TopKRow>,
     /// Storage the original row values (TopKRow only has the sort key)
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 19a090ca284f..3e2b3fb26d45 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -456,7 +456,7 @@ fn list_unnest_at_level(
         unnest_list_arrays(arrs_to_unnest.as_ref(), unnested_length, total_length)?;
 
     // Create the take indices array for other columns
-    let take_indices = create_take_indicies(unnested_length, total_length);
+    let take_indices = create_take_indices(unnested_length, total_length);
     unnested_temp_arrays
         .into_iter()
         .zip(list_unnest_specs.iter())
@@ -823,14 +823,14 @@ fn unnest_list_array(
     capacity: usize,
 ) -> Result<ArrayRef> {
     let values = list_array.values();
-    let mut take_indicies_builder = PrimitiveArray::<Int64Type>::builder(capacity);
+    let mut take_indices_builder = PrimitiveArray::<Int64Type>::builder(capacity);
     for row in 0..list_array.len() {
         let mut value_length = 0;
         if !list_array.is_null(row) {
             let (start, end) = list_array.value_offsets(row);
             value_length = end - start;
             for i in start..end {
-                take_indicies_builder.append_value(i)
+                take_indices_builder.append_value(i)
             }
         }
         let target_length = length_array.value(row);
@@ -840,17 +840,17 @@ fn unnest_list_array(
         );
         // Pad with NULL values
         for _ in value_length..target_length {
-            take_indicies_builder.append_null();
+            take_indices_builder.append_null();
         }
     }
     Ok(kernels::take::take(
         &values,
-        &take_indicies_builder.finish(),
+        &take_indices_builder.finish(),
         None,
     )?)
 }
 
-/// Creates take indicies that will be used to expand all columns except for the list type
+/// Creates take indices that will be used to expand all columns except for the list type
 /// [`columns`](UnnestExec::list_column_indices) that is being unnested.
 /// Every column value needs to be repeated multiple times according to the length array.
 ///
@@ -859,13 +859,13 @@ fn unnest_list_array(
 /// ```ignore
 /// [2, 3, 1]
 /// ```
-/// Then `create_take_indicies` will return an array like this
+/// Then [`create_take_indices`] will return an array like this
 ///
 /// ```ignore
 /// [0, 0, 1, 1, 1, 2]
 /// ```
 ///
-fn create_take_indicies(
+fn create_take_indices(
     length_array: &PrimitiveArray<Int64Type>,
     capacity: usize,
 ) -> PrimitiveArray<Int64Type> {
@@ -1270,11 +1270,11 @@ mod tests {
     }
 
     #[test]
-    fn test_create_take_indicies() -> Result<()> {
+    fn test_create_take_indices() -> Result<()> {
         let length_array = Int64Array::from(vec![2, 3, 1]);
-        let take_indicies = create_take_indicies(&length_array, 6);
+        let take_indices = create_take_indices(&length_array, 6);
         let expected = Int64Array::from(vec![0, 0, 1, 1, 1, 2]);
-        assert_eq!(take_indicies, expected);
+        assert_eq!(take_indices, expected);
         Ok(())
     }
 }
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index b66147bf7439..2ac86da92e50 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -459,7 +459,7 @@ pub struct LinearSearch {
 
 impl PartitionSearcher for LinearSearch {
     /// This method constructs output columns using the result of each window expression.
-    // Assume input buffer is         |      Partition Buffers would be (Where each partition and its data is seperated)
+    // Assume input buffer is         |      Partition Buffers would be (Where each partition and its data is separated)
     // a, 2                           |      a, 2
     // b, 2                           |      a, 2
     // a, 2                           |      a, 2
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index c0885ece08bc..687406c7db41 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -2573,7 +2573,7 @@ async fn roundtrip_union_query() -> Result<()> {
     ctx.register_csv("t2", "tests/testdata/test.csv", CsvReadOptions::default())
         .await?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    // proto deserialisation only supports 2-way union, hence this plan has nested unions
+    // proto deserialization only supports 2-way union, hence this plan has nested unions
     // apply the flatten unions optimizer rule to be able to compare
     let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
     let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, _y| {})?;
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index d917a707ca20..628bcb2fbdcd 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -463,7 +463,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 {
                     Ok(DataType::Time64(TimeUnit::Nanosecond))
                 } else {
-                    // We dont support TIMETZ and TIME WITH TIME ZONE for now
+                    // We don't support TIMETZ and TIME WITH TIME ZONE for now
                     not_impl_err!(
                         "Unsupported SQL type {sql_type:?}"
                     )
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 4fa359ebe00d..dfd3a4fd76a2 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -683,7 +683,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 parameters,
                 using,
                 // has_parentheses specifies the syntax, but the plan is the
-                // same no matter the synax used, so ignore it
+                // same no matter the syntax used, so ignore it
                 has_parentheses: _,
             } => {
                 // `USING` is a MySQL-specific syntax and currently not supported.
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index a82687533e31..3a44d7f0ec48 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -152,7 +152,7 @@ pub trait Dialect: Send + Sync {
 
     /// Allow to unparse a qualified column with a full qualified name
     /// (e.g. catalog_name.schema_name.table_name.column_name)
-    /// Otherwise, the column will be unparsed with only the table name and colum name
+    /// Otherwise, the column will be unparsed with only the table name and column name
     /// (e.g. table_name.column_name)
     fn full_qualified_col(&self) -> bool {
         false
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index f09de133b571..d012d3437720 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -747,7 +747,7 @@ impl Unparser<'_> {
     }
 
     /// Given an expression of the form `((a + b) * (c * d))`,
-    /// the parenthesing is redundant if the precedence of the nested expression is already higher
+    /// the parenthesis is redundant if the precedence of the nested expression is already higher
     /// than the surrounding operators' precedence. The above expression would become
     /// `(a + b) * c * d`.
     ///
@@ -1246,7 +1246,7 @@ impl Unparser<'_> {
     /// MySQL requires INTERVAL sql to be in the format: INTERVAL 1 YEAR + INTERVAL 1 MONTH + INTERVAL 1 DAY etc
     /// `<https://dev.mysql.com/doc/refman/8.4/en/expressions.html#temporal-intervals>`
     /// Interval sequence can't be wrapped in brackets - (INTERVAL 1 YEAR + INTERVAL 1 MONTH ...) so we need to generate
-    /// a single INTERVAL expression so it works correct for interval substraction cases
+    /// a single INTERVAL expression so it works correct for interval subtraction cases
     /// MySQL supports the DAY_MICROSECOND unit type (format is DAYS HOURS:MINUTES:SECONDS.MICROSECONDS), but it is not supported by sqlparser
     /// so we calculate the best single interval to represent the provided duration
     fn interval_to_mysql_expr(
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 1c2a3ea91a2b..74ead8cf94ea 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -434,7 +434,7 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
             // if expr inside unnest is a struct, do not consider
             // the next unnest as consecutive unnest (if any)
             // meaning unnest(unnest(struct_arr_col)) can't
-            // be interpreted as unest(struct_arr_col, depth:=2)
+            // be interpreted as unnest(struct_arr_col, depth:=2)
             // but has to be split into multiple unnest logical plan instead
             // a.k.a:
             // - unnest(struct_col)
@@ -488,7 +488,7 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
             if traversing_unnest == self.top_most_unnest.as_ref().unwrap() {
                 self.top_most_unnest = None;
             }
-            // Find inside consecutive_unnest, the sequence of continous unnest exprs
+            // Find inside consecutive_unnest, the sequence of continuous unnest exprs
 
             // Get the latest consecutive unnest exprs
             // and check if current upward traversal is the returning to the root expr
diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
index fe2785603e76..a490488cd764 100644
--- a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
+++ b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
@@ -75,14 +75,14 @@ impl Postgres {
         let uri =
             std::env::var("PG_URI").map_or(PG_URI.to_string(), std::convert::identity);
 
-        debug!("Using posgres connection string: {uri}");
+        debug!("Using postgres connection string: {uri}");
 
         let config = tokio_postgres::Config::from_str(&uri)?;
 
         // hint to user what the connection string was
         let res = config.connect(tokio_postgres::NoTls).await;
         if res.is_err() {
-            eprintln!("Error connecting to posgres using PG_URI={uri}");
+            eprintln!("Error connecting to postgres using PG_URI={uri}");
         };
 
         let (client, connection) = res?;
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 9aa3f008040c..515553152659 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -1326,7 +1326,7 @@ pub async fn from_read_rel(
             .build()?
         };
 
-        ensure_schema_compatability(plan.schema(), schema.clone())?;
+        ensure_schema_compatibility(plan.schema(), schema.clone())?;
 
         let schema = apply_masking(schema, projection)?;
 
@@ -1659,8 +1659,8 @@ impl NameTracker {
 /// This means:
 /// 1. All fields present in the Substrait schema are present in the DataFusion schema. The
 ///    DataFusion schema may have MORE fields, but not the other way around.
-/// 2. All fields are compatible. See [`ensure_field_compatability`] for details
-fn ensure_schema_compatability(
+/// 2. All fields are compatible. See [`ensure_field_compatibility`] for details
+fn ensure_schema_compatibility(
     table_schema: &DFSchema,
     substrait_schema: DFSchema,
 ) -> Result<()> {
@@ -1671,7 +1671,7 @@ fn ensure_schema_compatability(
         .try_for_each(|substrait_field| {
             let df_field =
                 table_schema.field_with_unqualified_name(substrait_field.name())?;
-            ensure_field_compatability(df_field, substrait_field)
+            ensure_field_compatibility(df_field, substrait_field)
         })
 }
 
@@ -1729,7 +1729,7 @@ fn apply_projection(
 ///
 /// If a Substrait field is not nullable, the Substrait plan may be built around assuming it is not
 /// nullable. As such if DataFusion has that field as nullable the plan should be rejected.
-fn ensure_field_compatability(
+fn ensure_field_compatibility(
     datafusion_field: &Field,
     substrait_field: &Field,
 ) -> Result<()> {
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index 58774db424da..e5bebf8e1181 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -23,7 +23,7 @@
 //!
 //! The rules of type variations here are:
 //! - Default type reference is 0. It is used when the actual type is the same with the original type.
-//! - Extended variant type references start from 1, and ususlly increase by 1.
+//! - Extended variant type references start from 1, and usually increase by 1.
 //!
 //! TODO: Definitions here are not the final form. All the non-system-preferred variations will be defined
 //! using [simple extensions] as per the [spec of type_variations](https://substrait.io/types/type_variations/)
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 1d1a87015135..383fe44be507 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -1135,7 +1135,7 @@ fn check_post_join_filters(rel: &Rel) -> Result<()> {
             // check if join filter is None
             if join.post_join_filter.is_some() {
                 plan_err!(
-                    "DataFusion generated Susbtrait plan cannot have post_join_filter in JoinRel"
+                    "DataFusion generated Substrait plan cannot have post_join_filter in JoinRel"
                 )
             } else {
                 // recursively check JoinRels
diff --git a/datafusion/substrait/tests/cases/substrait_validations.rs b/datafusion/substrait/tests/cases/substrait_validations.rs
index c77bf1489f4e..8357e0a8621d 100644
--- a/datafusion/substrait/tests/cases/substrait_validations.rs
+++ b/datafusion/substrait/tests/cases/substrait_validations.rs
@@ -18,8 +18,8 @@
 #[cfg(test)]
 mod tests {
 
-    // verify the schema compatability validations
-    mod schema_compatability {
+    // verify the schema compatibility validations
+    mod schema_compatibility {
         use crate::utils::test::read_json;
         use datafusion::arrow::datatypes::{DataType, Field};
         use datafusion::catalog_common::TableReference;
diff --git a/datafusion/substrait/tests/utils.rs b/datafusion/substrait/tests/utils.rs
index b9e5e0e5257c..6984baeb21a5 100644
--- a/datafusion/substrait/tests/utils.rs
+++ b/datafusion/substrait/tests/utils.rs
@@ -297,7 +297,7 @@ pub mod test {
                 RelType::Exchange(e) => {
                     self.apply(e.input.as_ref().map(|b| b.as_ref()))?;
                     let exchange_kind = e.exchange_kind.as_ref().ok_or(
-                        substrait_datafusion_err!("Exhange must set exchange_kind"),
+                        substrait_datafusion_err!("Exchange must set exchange_kind"),
                     )?;
                     match exchange_kind {
                         ExchangeKind::ScatterByFields(_) => {}
diff --git a/test-utils/src/array_gen/decimal.rs b/test-utils/src/array_gen/decimal.rs
index f878a830c4eb..d46ea9fe5457 100644
--- a/test-utils/src/array_gen/decimal.rs
+++ b/test-utils/src/array_gen/decimal.rs
@@ -60,7 +60,7 @@ impl DecimalArrayGenerator {
         };
 
         // pick num_decimals randomly from the distinct decimal table
-        let indicies: UInt32Array = (0..self.num_decimals)
+        let indices: UInt32Array = (0..self.num_decimals)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -74,6 +74,6 @@ impl DecimalArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_decimals, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_decimals, &indices, options).unwrap()
     }
 }
diff --git a/test-utils/src/array_gen/primitive.rs b/test-utils/src/array_gen/primitive.rs
index bfc151b43dbe..58d39c14e65d 100644
--- a/test-utils/src/array_gen/primitive.rs
+++ b/test-utils/src/array_gen/primitive.rs
@@ -78,8 +78,8 @@ impl PrimitiveArrayGenerator {
             }
         };
 
-        // pick num_primitves randomly from the distinct string table
-        let indicies: UInt32Array = (0..self.num_primitives)
+        // pick num_primitives randomly from the distinct string table
+        let indices: UInt32Array = (0..self.num_primitives)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -93,7 +93,7 @@ impl PrimitiveArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_primitives, &indices, options).unwrap()
     }
 
     // Generates a random timezone or returns `None`.
diff --git a/test-utils/src/array_gen/string.rs b/test-utils/src/array_gen/string.rs
index b5cef6321bc8..a405cb76b1bd 100644
--- a/test-utils/src/array_gen/string.rs
+++ b/test-utils/src/array_gen/string.rs
@@ -45,7 +45,7 @@ impl StringArrayGenerator {
             .collect();
 
         // pick num_strings randomly from the distinct string table
-        let indicies: UInt32Array = (0..self.num_strings)
+        let indices: UInt32Array = (0..self.num_strings)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -59,7 +59,7 @@ impl StringArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_strings, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_strings, &indices, options).unwrap()
     }
 
     /// Creates a StringViewArray with random strings.
@@ -69,7 +69,7 @@ impl StringArrayGenerator {
             .collect();
 
         // pick num_strings randomly from the distinct string table
-        let indicies: UInt32Array = (0..self.num_strings)
+        let indices: UInt32Array = (0..self.num_strings)
             .map(|_| {
                 if self.rng.gen::<f64>() < self.null_pct {
                     None
@@ -83,7 +83,7 @@ impl StringArrayGenerator {
             .collect();
 
         let options = None;
-        arrow::compute::take(&distinct_string_views, &indicies, options).unwrap()
+        arrow::compute::take(&distinct_string_views, &indices, options).unwrap()
     }
 }
 
diff --git a/test-utils/src/data_gen.rs b/test-utils/src/data_gen.rs
index 4227f2d9a737..7ac6f3d3e255 100644
--- a/test-utils/src/data_gen.rs
+++ b/test-utils/src/data_gen.rs
@@ -237,7 +237,7 @@ fn generate_sorted_strings(
 
 /// Iterator that generates sorted, [`RecordBatch`]es with randomly generated data with
 /// an access log style schema for tracing or monitoring type
-/// usecases.
+/// use cases.
 ///
 /// This is useful for writing tests queries on such data
 ///