Skip to content

Commit

Permalink
minor: fix typos in comments / structure names (#13879)
Browse files Browse the repository at this point in the history
* minor: fix typo error in datafusion

* fix: fix rebase error

* fix: format HashJoinExec doc

* doc: recover thiserror/preemptively

* fix: other typo error fixed

* fix: directories to dir_entries in catalog example
  • Loading branch information
zhuliquan authored Dec 23, 2024
1 parent 63ba5b6 commit e718c1a
Show file tree
Hide file tree
Showing 138 changed files with 277 additions and 277 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ version = "43.0.0"
# selectively turn them on if needed, since we can override default-features = true (from false)
# for the inherited dependency but cannot do the reverse (override from true to false).
#
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
# See for more details: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
Field::new("total_uncompressed_size", DataType::Int64, true),
]));

// construct recordbatch from metadata
// construct record batch from metadata
let mut filename_arr = vec![];
let mut row_group_id_arr = vec![];
let mut row_group_num_rows_arr = vec![];
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
This crate includes end to end, highly commented examples of how to use
various DataFusion APIs to help you get started.

## Prerequisites:
## Prerequisites

Run `git submodule update --init` to init test files.

Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/advanced_parquet_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ use url::Url;
/// Specifically, this example illustrates how to:
/// 1. Use [`ParquetFileReaderFactory`] to avoid re-reading parquet metadata on each query
/// 2. Use [`PruningPredicate`] for predicate analysis
/// 3. Pass a row group selection to [`ParuetExec`]
/// 3. Pass a row group selection to [`ParquetExec`]
/// 4. Pass a row selection (within a row group) to [`ParquetExec`]
///
/// Note this is a *VERY* low level example for people who want to build their
Expand Down Expand Up @@ -211,7 +211,7 @@ async fn main() -> Result<()> {
//
// Note: in order to prune pages, the Page Index must be loaded and the
// ParquetExec will load it on demand if not present. To avoid a second IO
// during query, this example loaded the Page Index pre-emptively by setting
// during query, this example loaded the Page Index preemptively by setting
// `ArrowReader::with_page_index` in `IndexedFile::try_new`
provider.set_use_row_selection(true);
println!("** Select data, predicate `id = 950`");
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/analyzer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ impl AnalyzerRule for RowLevelAccessControl {
fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
// use the TreeNode API to recursively walk the LogicalPlan tree
// and all of its children (inputs)
let transfomed_plan = plan.transform(|plan| {
let transformed_plan = plan.transform(|plan| {
// This closure is called for each LogicalPlan node
// if it is a Scan node, add a filter to remove all managers
if is_employee_table_scan(&plan) {
Expand Down Expand Up @@ -166,7 +166,7 @@ impl AnalyzerRule for RowLevelAccessControl {
//
// This example does not need the value of either flag, so simply
// extract the LogicalPlan "data"
Ok(transfomed_plan.data)
Ok(transformed_plan.data)
}

fn name(&self) -> &str {
Expand Down
10 changes: 5 additions & 5 deletions datafusion-examples/examples/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ async fn main() -> Result<()> {

let ctx = SessionContext::new();
let state = ctx.state();
let cataloglist = Arc::new(CustomCatalogProviderList::new());
let catalog_list = Arc::new(CustomCatalogProviderList::new());

// use our custom catalog list for context. each context has a single catalog list.
// context will by default have [`MemoryCatalogProviderList`]
ctx.register_catalog_list(cataloglist.clone());
ctx.register_catalog_list(catalog_list.clone());

// initialize our catalog and schemas
let catalog = DirCatalog::new();
Expand Down Expand Up @@ -81,7 +81,7 @@ async fn main() -> Result<()> {
ctx.register_catalog("dircat", Arc::new(catalog));
{
// catalog was passed down into our custom catalog list since we override the ctx's default
let catalogs = cataloglist.catalogs.read().unwrap();
let catalogs = catalog_list.catalogs.read().unwrap();
assert!(catalogs.contains_key("dircat"));
};

Expand Down Expand Up @@ -144,8 +144,8 @@ impl DirSchema {
async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
let DirSchemaOpts { ext, dir, format } = opts;
let mut tables = HashMap::new();
let direntries = std::fs::read_dir(dir).unwrap();
for res in direntries {
let dir_entries = std::fs::read_dir(dir).unwrap();
for res in dir_entries {
let entry = res.unwrap();
let filename = entry.file_name().to_str().unwrap().to_string();
if !filename.ends_with(ext) {
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/expr_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter;
/// 4. Simplify expressions: [`simplify_demo`]
/// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
/// 6. Get the types of the expressions: [`expression_type_demo`]
/// 7. Apply type cocercion to expressions: [`type_coercion_demo`]
/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
#[tokio::main]
async fn main() -> Result<()> {
// The easiest way to do create expressions is to use the
Expand Down Expand Up @@ -392,7 +392,7 @@ fn type_coercion_demo() -> Result<()> {
)?;
assert!(physical_expr.evaluate(&batch).is_ok());

// 4. Apply explict type coercion by manually rewriting the expression
// 4. Apply explicit type coercion by manually rewriting the expression
let coerced_expr = expr
.transform(|e| {
// Only type coerces binary expressions.
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/function_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use datafusion_expr::{
///
/// Apart from [FunctionFactory], this example covers
/// [ScalarUDFImpl::simplify()] which is often used at the same time, to replace
/// a function call with another expression at rutime.
/// a function call with another expression at runtime.
///
/// This example is rather simple and does not cover all cases required for a
/// real implementation.
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/memtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use std::sync::Arc;
use std::time::Duration;
use tokio::time::timeout;

/// This example demonstrates executing a simple query against a Memtable
/// This example demonstrates executing a simple query against a [`MemTable`]
#[tokio::main]
async fn main() -> Result<()> {
let mem_table = create_memtable()?;
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/optimizer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ impl MyOptimizerRule {
// Closure called for each sub tree
match expr {
Expr::BinaryExpr(binary_expr) if is_binary_eq(&binary_expr) => {
// destruture the expression
// destructure the expression
let BinaryExpr { left, op: _, right } = binary_expr;
// rewrite to `my_eq(left, right)`
let udf = ScalarUDF::new_from_impl(MyEq::new());
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ fn simple_expr_to_sql_demo() -> Result<()> {
Ok(())
}

/// DataFusioon can remove parentheses when converting an expression to SQL.
/// DataFusion can remove parentheses when converting an expression to SQL.
/// Note that output is intended for humans, not for other SQL engines,
/// as difference in precedence rules can cause expressions to be parsed differently.
fn simple_expr_to_pretty_sql_demo() -> Result<()> {
Expand Down
6 changes: 3 additions & 3 deletions datafusion-examples/examples/simple_udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
let limit = exprs
.get(1)
.map(|expr| {
// try to simpify the expression, so 1+2 becomes 3, for example
// try to simplify the expression, so 1+2 becomes 3, for example
let execution_props = ExecutionProps::new();
let info = SimplifyContext::new(&execution_props);
let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
Expand Down Expand Up @@ -173,8 +173,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
.with_header(true)
.build(file)?;
let mut batches = vec![];
for bacth in reader {
batches.push(bacth?);
for batch in reader {
batches.push(batch?);
}
let schema = Arc::new(schema);
Ok((schema, batches))
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ impl Column {
.collect::<Vec<_>>();
for using_col in using_columns {
let all_matched = columns.iter().all(|c| using_col.contains(c));
// All matched fields belong to the same using column set, in orther words
// All matched fields belong to the same using column set, in other words
// the same join clause. We simply pick the qualifier from the first match.
if all_matched {
return Ok(columns[0].clone());
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -904,12 +904,12 @@ pub trait ConfigExtension: ExtensionOptions {
pub trait ExtensionOptions: Send + Sync + fmt::Debug + 'static {
/// Return `self` as [`Any`]
///
/// This is needed until trait upcasting is stabilised
/// This is needed until trait upcasting is stabilized
fn as_any(&self) -> &dyn Any;

/// Return `self` as [`Any`]
///
/// This is needed until trait upcasting is stabilised
/// This is needed until trait upcasting is stabilized
fn as_any_mut(&mut self) -> &mut dyn Any;

/// Return a deep clone of this [`ExtensionOptions`]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/cse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub trait Normalizeable {
}

/// The `NormalizeEq` trait extends `Eq` and `Normalizeable` to provide a method for comparing
/// normlized nodes in optimizations like Common Subexpression Elimination (CSE).
/// normalized nodes in optimizations like Common Subexpression Elimination (CSE).
///
/// The `normalize_eq` method ensures that two nodes that are semantically equivalent (after normalization)
/// are considered equal in CSE optimization, even if their original forms differ.
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ pub trait ExprSchema: std::fmt::Debug {
/// Returns the column's optional metadata.
fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>>;

/// Return the coulmn's datatype and nullability
/// Return the column's datatype and nullability
fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)>;
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ pub enum DataFusionError {
Execution(String),
/// [`JoinError`] during execution of the query.
///
/// This error can unoccur for unjoined tasks, such as execution shutdown.
/// This error can't occur for unjoined tasks, such as execution shutdown.
ExecutionJoin(JoinError),
/// Error when resources (such as memory of scratch disk space) are exhausted.
///
Expand Down
10 changes: 5 additions & 5 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,7 @@ impl ScalarValue {
///
/// Errors if `self` is
/// - a decimal that fails be converted to a decimal array of size
/// - a `Fixedsizelist` that fails to be concatenated into an array of size
/// - a `FixedsizeList` that fails to be concatenated into an array of size
/// - a `List` that fails to be concatenated into an array of size
/// - a `Dictionary` that fails be converted to a dictionary array of size
pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
Expand Down Expand Up @@ -2925,7 +2925,7 @@ impl ScalarValue {
/// preferred over this function if at all possible as they can be
/// vectorized and are generally much faster.
///
/// This function has a few narrow usescases such as hash table key
/// This function has a few narrow use cases such as hash table key
/// comparisons where comparing a single row at a time is necessary.
///
/// # Errors
Expand Down Expand Up @@ -4465,7 +4465,7 @@ mod tests {
Ok(())
}

// Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
// Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
where
T: ArrowNumericType,
Expand Down Expand Up @@ -6150,9 +6150,9 @@ mod tests {
&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
);

let newscalar = ScalarValue::try_from_array(&array, 0).unwrap();
let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
assert_eq!(
newscalar.data_type(),
new_scalar.data_type(),
DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
);
}
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -995,11 +995,11 @@ impl<
/// construct a temporary container to be able to call `apply_ref_elements` on a
/// collection of tree node references. But in that case the container's temporary
/// lifetime is different to the lifetime of tree nodes that we put into it.
/// Please find an example usecase in `Expr::apply_children` with the `Expr::Case` case.
/// Please find an example use case in `Expr::apply_children` with the `Expr::Case` case.
///
/// Most of the cases we don't need to create a temporary container with
/// `TreeNodeRefContainer`, but we can just call `TreeNodeContainer::apply_elements`.
/// Please find an example usecase in `Expr::apply_children` with the `Expr::GroupingSet`
/// Please find an example use case in `Expr::apply_children` with the `Expr::GroupingSet`
/// case.
pub trait TreeNodeRefContainer<'a, T: 'a>: Sized {
/// Applies `f` to all elements of the container.
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/utils/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! This module provides a function to estimate the memory size of a HashTable prior to alloaction
//! This module provides a function to estimate the memory size of a HashTable prior to allocation
use crate::{DataFusionError, Result};
use std::mem::size_of;
Expand Down Expand Up @@ -79,7 +79,7 @@ pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result
// For the majority of cases hashbrown overestimates the bucket quantity
// to keep ~1/8 of them empty. We take this factor into account by
// multiplying the number of elements with a fixed ratio of 8/7 (~1.14).
// This formula leads to overallocation for small tables (< 8 elements)
// This formula leads to over-allocation for small tables (< 8 elements)
// but should be fine overall.
num_elements
.checked_mul(8)
Expand Down
6 changes: 3 additions & 3 deletions datafusion/common/src/utils/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,12 @@ impl<T> VecAllocExt for Vec<T> {
type T = T;

fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) {
let prev_capacty = self.capacity();
let prev_capacity = self.capacity();
self.push(x);
let new_capacity = self.capacity();
if new_capacity > prev_capacty {
if new_capacity > prev_capacity {
// capacity changed, so we allocated more
let bump_size = (new_capacity - prev_capacty) * size_of::<T>();
let bump_size = (new_capacity - prev_capacity) * size_of::<T>();
// Note multiplication should never overflow because `push` would
// have panic'd first, but the checked_add could potentially
// overflow since accounting could be tracking additional values, and
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/benches/physical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use datafusion::physical_plan::{
use datafusion::prelude::SessionContext;
use datafusion_physical_expr_common::sort_expr::LexOrdering;

// Initialise the operator using the provided record batches and the sort key
// Initialize the operator using the provided record batches and the sort key
// as inputs. All record batches must have the same schema.
fn sort_preserving_merge_operator(
session_ctx: Arc<SessionContext>,
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3279,7 +3279,7 @@ mod tests {
&df_results
);

// check that col with the same name ovwewritten
// check that col with the same name overwritten
let df_results_overwrite = df
.clone()
.with_column("c1", col("c2") + col("c3"))?
Expand All @@ -3302,7 +3302,7 @@ mod tests {
&df_results_overwrite
);

// check that col with the same name ovwewritten using same name as reference
// check that col with the same name overwritten using same name as reference
let df_results_overwrite_self = df
.clone()
.with_column("c2", col("c2") + lit(1))?
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/default_table_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl TableSource for DefaultTableSource {
}

/// Tests whether the table provider can make use of any or all filter expressions
/// to optimise data retrieval.
/// to optimize data retrieval.
fn supports_filters_pushdown(
&self,
filter: &[&Expr],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ mod test {
assert_partitioned_files(expected, actual);
}

/// Asserts that the two groups of `ParititonedFile` are the same
/// Asserts that the two groups of [`PartitionedFile`] are the same
/// (PartitionedFile doesn't implement PartialEq)
fn assert_partitioned_files(
expected: Option<Vec<Vec<PartitionedFile>>>,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/physical_plan/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ mod tests {
)]
#[cfg(feature = "compression")]
#[tokio::test]
async fn test_json_with_repartitioing(
async fn test_json_with_repartitioning(
file_compression_type: FileCompressionType,
) -> Result<()> {
let config = SessionConfig::new()
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/datasource/physical_plan/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ impl ParquetExecBuilder {

/// Set the filter predicate when reading.
///
/// See the "Predicate Pushdown" section of the [`ParquetExec`] documenation
/// See the "Predicate Pushdown" section of the [`ParquetExec`] documentation
/// for more details.
pub fn with_predicate(mut self, predicate: Arc<dyn PhysicalExpr>) -> Self {
self.predicate = Some(predicate);
Expand Down Expand Up @@ -611,7 +611,7 @@ impl ParquetExec {
}

/// If enabled, the reader will read the page index
/// This is used to optimise filter pushdown
/// This is used to optimize filter pushdown
/// via `RowSelector` and `RowFilter` by
/// eliminating unnecessary IO and decoding
pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ fn would_column_prevent_pushdown(
checker.prevents_pushdown()
}

/// Recurses through expr as a trea, finds all `column`s, and checks if any of them would prevent
/// Recurses through expr as a tree, finds all `column`s, and checks if any of them would prevent
/// this expression from being predicate pushed down. If any of them would, this returns false.
/// Otherwise, true.
pub fn can_expr_be_pushed_down_with_schemas(
Expand Down Expand Up @@ -692,7 +692,7 @@ mod test {

let mut parquet_reader = parquet_reader_builder.build().expect("building reader");

// Parquet file is small, we only need 1 recordbatch
// Parquet file is small, we only need 1 record batch
let first_rb = parquet_reader
.next()
.expect("expected record batch")
Expand Down
Loading

0 comments on commit e718c1a

Please sign in to comment.