Skip to content

Commit

Permalink
Merge pull request #3 from alamb/alamb/unhandled_hook
Browse files Browse the repository at this point in the history
Improve documentation and add default to ConstantUnhandledPredicatehook
  • Loading branch information
adriangb authored Oct 11, 2024
2 parents d249584 + e8dba08 commit e823c88
Showing 1 changed file with 22 additions and 16 deletions.
38 changes: 22 additions & 16 deletions datafusion/core/src/physical_optimizer/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ pub trait PruningStatistics {
/// [`Snowflake SIGMOD Paper`]: https://dl.acm.org/doi/10.1145/2882903.2903741
/// [small materialized aggregates]: https://www.vldb.org/conf/1998/p476.pdf
/// [zone maps]: https://dl.acm.org/doi/10.1007/978-3-642-03730-6_10
///[data skipping]: https://dl.acm.org/doi/10.1145/2588555.2610515
/// [data skipping]: https://dl.acm.org/doi/10.1145/2588555.2610515
#[derive(Debug, Clone)]
pub struct PruningPredicate {
/// The input schema against which the predicate will be evaluated
Expand All @@ -478,19 +478,30 @@ pub struct PruningPredicate {
literal_guarantees: Vec<LiteralGuarantee>,
}

/// Hook to handle predicates that DataFusion can not handle, e.g. certain complex expressions
/// or predicates that reference columns that are not in the schema.
/// Rewrites predicates that [`PredicateRewriter`] can not handle, e.g. certain
/// complex expressions or predicates that reference columns that are not in the
/// schema.
pub trait UnhandledPredicateHook {
/// Called when a predicate can not be handled by DataFusion's transformation rules
/// or is referencing a column that is not in the schema.
/// Called when a predicate can not be rewritten in terms of statistics or
/// references a column that is not in the schema.
fn handle(&self, expr: &Arc<dyn PhysicalExpr>) -> Arc<dyn PhysicalExpr>;
}

/// The default handling for unhandled predicates is to return a constant `true`
/// (meaning don't prune the container)
#[derive(Debug, Clone)]
struct ConstantUnhandledPredicateHook {
default: Arc<dyn PhysicalExpr>,
}

impl Default for ConstantUnhandledPredicateHook {
fn default() -> Self {
Self {
default: Arc::new(phys_expr::Literal::new(ScalarValue::from(true))),
}
}
}

impl ConstantUnhandledPredicateHook {
fn new(default: Arc<dyn PhysicalExpr>) -> Self {
Self { default }
Expand All @@ -503,12 +514,6 @@ impl UnhandledPredicateHook for ConstantUnhandledPredicateHook {
}
}

fn default_unhandled_hook() -> Arc<dyn UnhandledPredicateHook> {
Arc::new(ConstantUnhandledPredicateHook::new(Arc::new(
phys_expr::Literal::new(ScalarValue::Boolean(Some(true))),
)))
}

impl PruningPredicate {
/// Try to create a new instance of [`PruningPredicate`]
///
Expand All @@ -533,7 +538,7 @@ impl PruningPredicate {
/// See the struct level documentation on [`PruningPredicate`] for more
/// details.
pub fn try_new(expr: Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Result<Self> {
let unhandled_hook = default_unhandled_hook();
let unhandled_hook = Arc::new(ConstantUnhandledPredicateHook::default()) as _;

// build predicate expression once
let mut required_columns = RequiredColumns::new();
Expand Down Expand Up @@ -1348,16 +1353,17 @@ fn build_is_null_column_expr(
/// The maximum number of entries in an `InList` that might be rewritten into
/// an OR chain
const MAX_LIST_VALUE_SIZE_REWRITE: usize = 20;
/// Rewrite a predicate expression to a pruning predicate expression.
/// See documentation for `PruningPredicate` for more information.

/// Rewrite a predicate expression in terms of statistics (min/max/null_counts)
/// for use as a [`PruningPredicate`].
pub struct PredicateRewriter {
unhandled_hook: Arc<dyn UnhandledPredicateHook>,
}

impl Default for PredicateRewriter {
fn default() -> Self {
Self {
unhandled_hook: default_unhandled_hook(),
unhandled_hook: Arc::new(ConstantUnhandledPredicateHook::default()),
}
}
}
Expand Down Expand Up @@ -4049,7 +4055,7 @@ mod tests {
required_columns: &mut RequiredColumns,
) -> Arc<dyn PhysicalExpr> {
let expr = logical2physical(expr, schema);
let unhandled_hook = default_unhandled_hook();
let unhandled_hook = Arc::new(ConstantUnhandledPredicateHook::default()) as _;
build_predicate_expression(&expr, schema, required_columns, &unhandled_hook)
}
}

0 comments on commit e823c88

Please sign in to comment.