Skip to content

Commit

Permalink
Move handlign of NULL literals in where clause to type coercion pass (a…
Browse files Browse the repository at this point in the history
…pache#11491)

* Revert "Support `NULL` literals in where clause  (apache#11266)"

This reverts commit fa01917.

* Followup Support NULL literals in where clause

* misc err change

* adopt comparison_coercion

* Fix comments

* Fix comments
  • Loading branch information
xinlifoobar authored and wiedld committed Jul 31, 2024
1 parent 2445159 commit 03daf38
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 31 deletions.
12 changes: 11 additions & 1 deletion datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl AnalyzerRule for TypeCoercion {
/// Assumes that children have already been optimized
fn analyze_internal(
external_schema: &DFSchema,
plan: LogicalPlan,
mut plan: LogicalPlan,
) -> Result<Transformed<LogicalPlan>> {
// get schema representing all available input fields. This is used for data type
// resolution only, so order does not matter here
Expand All @@ -103,6 +103,16 @@ fn analyze_internal(
// select t2.c2 from t1 where t1.c1 in (select t2.c1 from t2 where t2.c2=t1.c3)
schema.merge(external_schema);

if let LogicalPlan::Filter(filter) = &mut plan {
if let Ok(new_predicate) = filter
.predicate
.clone()
.cast_to(&DataType::Boolean, filter.input.schema())
{
filter.predicate = new_predicate;
}
}

let mut expr_rewrite = TypeCoercionRewriter::new(&schema);

let name_preserver = NamePreserver::new(&plan);
Expand Down
39 changes: 9 additions & 30 deletions datafusion/physical-plan/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ use crate::{
metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
DisplayFormatType, ExecutionPlan,
};

use arrow::compute::filter_record_batch;
use arrow::datatypes::{DataType, SchemaRef};
use arrow::record_batch::RecordBatch;
use arrow_array::{Array, BooleanArray};
use datafusion_common::cast::{as_boolean_array, as_null_array};
use datafusion_common::cast::as_boolean_array;
use datafusion_common::stats::Precision;
use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
use datafusion_execution::TaskContext;
Expand Down Expand Up @@ -81,19 +81,6 @@ impl FilterExec {
cache,
})
}
DataType::Null => {
let default_selectivity = 0;
let cache =
Self::compute_properties(&input, &predicate, default_selectivity)?;

Ok(Self {
predicate,
input: Arc::clone(&input),
metrics: ExecutionPlanMetricsSet::new(),
default_selectivity,
cache,
})
}
other => {
plan_err!("Filter predicate must return BOOLEAN values, got {other:?}")
}
Expand Down Expand Up @@ -367,23 +354,15 @@ pub(crate) fn batch_filter(
.evaluate(batch)
.and_then(|v| v.into_array(batch.num_rows()))
.and_then(|array| {
let filter_array = match as_boolean_array(&array) {
Ok(boolean_array) => Ok(boolean_array.to_owned()),
Ok(match as_boolean_array(&array) {
// apply filter array to record batch
Ok(filter_array) => filter_record_batch(batch, filter_array)?,
Err(_) => {
let Ok(null_array) = as_null_array(&array) else {
return internal_err!(
"Cannot create filter_array from non-boolean predicates"
);
};

// if the predicate is null, then the result is also null
Ok::<BooleanArray, DataFusionError>(BooleanArray::new_null(
null_array.len(),
))
return internal_err!(
"Cannot create filter_array from non-boolean predicates"
);
}
}?;

Ok(filter_record_batch(batch, &filter_array)?)
})
})
}

Expand Down

0 comments on commit 03daf38

Please sign in to comment.