Skip to content

Commit 35b2e35

Browse files
authored
fix: array_distinct inner nullability causing type mismatch (#18104)
## Which issue does this PR close? - Closes #17416. ## Rationale for this change `array_distinct`'s inner return type is always `nullable`, however `general_array_distinct` maintain input nullability, causing type mismatch error. I believe the same error happens for `array_union` and `array_intersect` (in `set_ops.rs`). I can include the fix for those in this PR or maybe another separated PR. ## What changes are included in this PR? - Match return type nullability for `array_distinct`. ## Are these changes tested? Yes. I tried to add unit tests checking return types (similar to #15901), but it wasn't clear to me whether the added tests could verify the issue #17416. So I switched to the integration test. - Added test for `List` with inner `nullability = true / false`. - I did not added tests for `LargeList`, I don't think it needed because the code path for the return type is identical to `List`. ## Are there any user-facing changes? I don't think so.
1 parent f198fc8 commit 35b2e35

File tree

1 file changed

+53
-10
lines changed

1 file changed

+53
-10
lines changed

datafusion/functions-nested/src/set_ops.rs

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ use arrow::datatypes::{DataType, Field, FieldRef};
2929
use arrow::row::{RowConverter, SortField};
3030
use datafusion_common::cast::{as_large_list_array, as_list_array};
3131
use datafusion_common::utils::ListCoercion;
32-
use datafusion_common::{
33-
exec_err, internal_err, plan_err, utils::take_function_args, Result,
34-
};
32+
use datafusion_common::{exec_err, internal_err, utils::take_function_args, Result};
3533
use datafusion_expr::{
3634
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3735
};
@@ -289,13 +287,7 @@ impl ScalarUDFImpl for ArrayDistinct {
289287
}
290288

291289
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
292-
match &arg_types[0] {
293-
List(field) => Ok(DataType::new_list(field.data_type().clone(), true)),
294-
LargeList(field) => {
295-
Ok(DataType::new_large_list(field.data_type().clone(), true))
296-
}
297-
arg_type => plan_err!("{} does not support type {arg_type}", self.name()),
298-
}
290+
Ok(arg_types[0].clone())
299291
}
300292

301293
fn invoke_with_args(
@@ -563,3 +555,54 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
563555
array.nulls().cloned(),
564556
)?))
565557
}
558+
559+
#[cfg(test)]
560+
mod tests {
561+
use std::sync::Arc;
562+
563+
use arrow::{
564+
array::{Int32Array, ListArray},
565+
buffer::OffsetBuffer,
566+
datatypes::{DataType, Field},
567+
};
568+
use datafusion_common::{config::ConfigOptions, DataFusionError};
569+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
570+
571+
use crate::set_ops::array_distinct_udf;
572+
573+
#[test]
574+
fn test_array_distinct_inner_nullability_result_type_match_return_type(
575+
) -> Result<(), DataFusionError> {
576+
let udf = array_distinct_udf();
577+
578+
for inner_nullable in [true, false] {
579+
let inner_field = Field::new_list_field(DataType::Int32, inner_nullable);
580+
let input_field =
581+
Field::new_list("input", Arc::new(inner_field.clone()), true);
582+
583+
// [[1, 1, 2]]
584+
let input_array = ListArray::new(
585+
inner_field.into(),
586+
OffsetBuffer::new(vec![0, 3].into()),
587+
Arc::new(Int32Array::new(vec![1, 1, 2].into(), None)),
588+
None,
589+
);
590+
591+
let input_array = ColumnarValue::Array(Arc::new(input_array));
592+
593+
let result = udf.invoke_with_args(ScalarFunctionArgs {
594+
args: vec![input_array],
595+
arg_fields: vec![input_field.clone().into()],
596+
number_rows: 1,
597+
return_field: input_field.clone().into(),
598+
config_options: Arc::new(ConfigOptions::default()),
599+
})?;
600+
601+
assert_eq!(
602+
result.data_type(),
603+
udf.return_type(&[input_field.data_type().clone()])?
604+
);
605+
}
606+
Ok(())
607+
}
608+
}

0 commit comments

Comments
 (0)