diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index bcb867f6e7fa..35ec7d074d5f 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -20,6 +20,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use arrow::datatypes::DataType; use indexmap::IndexSet; use itertools::Itertools; @@ -875,14 +876,37 @@ impl OptimizerRule for PushDownFilter { let predicates = split_conjunction_owned(filter.predicate.clone()); let mut non_unnest_predicates = vec![]; let mut unnest_predicates = vec![]; + let mut unnest_struct_columns = vec![]; + + for idx in &unnest.struct_type_columns { + let (sub_qualifier, field) = + unnest.input.schema().qualified_field(*idx); + let field_name = field.name().clone(); + + if let DataType::Struct(children) = field.data_type() { + for child in children { + let child_name = child.name().clone(); + unnest_struct_columns.push(Column::new( + sub_qualifier.cloned(), + format!("{field_name}.{child_name}"), + )); + } + } + } + for predicate in predicates { // collect all the Expr::Column in predicate recursively let mut accum: HashSet = HashSet::new(); expr_to_columns(&predicate, &mut accum)?; - if unnest.list_type_columns.iter().any(|(_, unnest_list)| { - accum.contains(&unnest_list.output_column) - }) { + let contains_list_columns = + unnest.list_type_columns.iter().any(|(_, unnest_list)| { + accum.contains(&unnest_list.output_column) + }); + let contains_struct_columns = + unnest_struct_columns.iter().any(|c| accum.contains(c)); + + if contains_list_columns || contains_struct_columns { unnest_predicates.push(predicate); } else { non_unnest_predicates.push(predicate); diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt index f6d71cad60d4..6e2972d96a6f 100644 --- a/datafusion/sqllogictest/test_files/push_down_filter.slt +++ b/datafusion/sqllogictest/test_files/push_down_filter.slt @@ -128,12 +128,31 @@ physical_plan 06)----------ProjectionExec: expr=[column1@0 as column1, column2@1 as __unnest_placeholder(d.column2)] 07)------------DataSourceExec: partitions=1, partition_sizes=[1] +statement ok +drop table d; +statement ok +CREATE TABLE d AS VALUES (named_struct('a', 1, 'b', 2)), (named_struct('a', 3, 'b', 4)), (named_struct('a', 5, 'b', 6)); + +query II +select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5; +---- +5 6 + +query TT +explain select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: __unnest_placeholder(d.column1).b@1 > 5 +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +04)------UnnestExec +05)--------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)] +06)----------DataSourceExec: partitions=1, partition_sizes=[1] statement ok drop table d; - # Test push down filter with limit for parquet statement ok set datafusion.execution.parquet.pushdown_filters = true; diff --git a/docs/source/user-guide/sql/special_functions.md b/docs/source/user-guide/sql/special_functions.md index 7c9efbb66218..4f2a39f642b0 100644 --- a/docs/source/user-guide/sql/special_functions.md +++ b/docs/source/user-guide/sql/special_functions.md @@ -69,6 +69,7 @@ Expands an array or map into rows. ### `unnest (struct)` Expand a struct fields into individual columns. +Each field of the struct will be prefixed with `__unnest_placeholder` and could be accessed via `"__unnest_placeholder()."`. #### Arguments @@ -91,10 +92,10 @@ Expand a struct fields into individual columns. +---------------------------+ > select unnest(struct_column) from foov; -+------------------------------------------+------------------------------------------+ -| unnest_placeholder(foov.struct_column).a | unnest_placeholder(foov.struct_column).b | -+------------------------------------------+------------------------------------------+ -| 5 | a string | -| 6 | another string | -+------------------------------------------+------------------------------------------+ ++--------------------------------------------+--------------------------------------------+ +| __unnest_placeholder(foov.struct_column).a | __unnest_placeholder(foov.struct_column).b | ++--------------------------------------------+--------------------------------------------+ +| 5 | a string | +| 6 | another string | ++--------------------------------------------+--------------------------------------------+ ```