-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
The following filter expression generates a InternalError("type mismatch") for a column of type List[nullable Bool] (aka inner nullability is false):
.filter(array_distinct(col(column_name)).eq(make_array(vec![lit(false)])))
To Reproduce
This test fails as per the comments. Note that the return value of run_test strictly is what collect() returns—no short circuiting happens. That's where the unexpected error occurs.
use std::sync::Arc;
use arrow::array::{ArrayRef, BooleanArray, ListArray, RecordBatch};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::catalog::MemTable;
use datafusion::error::DataFusionError;
use datafusion::logical_expr::{col, lit};
use datafusion::prelude::{SessionContext, array_distinct, make_array};
async fn run_test(
inner_nullable: bool,
outer_nullable: bool,
) -> Result<Vec<RecordBatch>, DataFusionError> {
// Some data
let column_name = "bool_column";
let values = BooleanArray::from(vec![true, false, true, true, false, false, true, false]);
let offsets = OffsetBuffer::new(vec![0i32, 1, 2, 4, 6, 8].into());
let bool_lists = ListArray::try_new(
Arc::new(Field::new("item", DataType::Boolean, inner_nullable)),
offsets,
Arc::new(values),
None,
)
.expect("failed to create a bool list array");
let array = Arc::new(bool_lists) as ArrayRef;
let field = Field::new(column_name, array.data_type().clone(), outer_nullable);
let schema = Arc::new(Schema::new(vec![field]));
let ctx = SessionContext::new();
ctx.register_table(
"test_table",
Arc::new(
MemTable::try_new(
Arc::clone(&schema),
vec![vec![
RecordBatch::try_new_with_options(schema, vec![array], &Default::default())
.expect("failed to create the record batch"),
]],
)
.expect("failed to create mem table"),
),
)
.expect("failed to register table");
//
let filter_expr = array_distinct(col(column_name)).eq(make_array(vec![lit(false)]));
// no filtering
ctx.table("test_table")
.await
.unwrap()
.filter(filter_expr)
.unwrap()
.collect()
.await
}
#[tokio::test]
async fn internal_error_repro() {
assert!(run_test(true, true).await.is_ok());
assert!(run_test(true, false).await.is_ok());
assert!(dbg!(run_test(false, true).await).is_ok()); // FAILS: InternalError("type mismatch")
assert!(dbg!(run_test(false, false).await).is_ok()); // FAILS: InternalError("type mismatch")
}Expected behavior
My expectation would be that this filter expression should work regardless of the nullability.
Additional context
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working