diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index 0fe779d5c1a2..3d79a763e9d8 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -19,7 +19,7 @@ use crate::builder::ArrayBuilder; use crate::{ArrayRef, FixedSizeListArray}; use arrow_buffer::NullBufferBuilder; use arrow_data::ArrayData; -use arrow_schema::{DataType, Field}; +use arrow_schema::{DataType, Field, FieldRef}; use std::any::Any; use std::sync::Arc; @@ -67,6 +67,7 @@ pub struct FixedSizeListBuilder { null_buffer_builder: NullBufferBuilder, values_builder: T, list_len: i32, + field: Option, } impl FixedSizeListBuilder { @@ -89,6 +90,20 @@ impl FixedSizeListBuilder { null_buffer_builder: NullBufferBuilder::new(capacity), values_builder, list_len: value_length, + field: None, + } + } + + /// Override the field passed to [`ArrayData::builder`] + /// + /// By default a nullable field is created with the name `item` + /// + /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the + /// field's data type does not match that of `T` + pub fn with_field(self, field: impl Into) -> Self { + Self { + field: Some(field.into()), + ..self } } } @@ -166,13 +181,40 @@ where ); let nulls = self.null_buffer_builder.finish(); - let array_data = ArrayData::builder(DataType::FixedSizeList( - Arc::new(Field::new("item", values_data.data_type().clone(), true)), - self.list_len, - )) - .len(len) - .add_child_data(values_data) - .nulls(nulls); + + let field = match &self.field { + Some(f) => { + let size = self.value_length(); + assert_eq!( + f.data_type(), + values_data.data_type(), + "DataType of field ({}) should be the same as the values_builder DataType ({})", + f.data_type(), + values_data.data_type() + ); + + if let Some(a) = values_arr.logical_nulls() { + let nulls_valid = f.is_nullable() + || nulls + .as_ref() + .map(|n| n.expand(size as _).contains(&a)) + .unwrap_or_default(); + + assert!( + nulls_valid, + "Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}", + f.name() + ); + } + f.clone() + } + None => Arc::new(Field::new("item", values_data.data_type().clone(), true)), + }; + + let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len)) + .len(len) + .add_child_data(values_data) + .nulls(nulls); let array_data = unsafe { array_data.build_unchecked() }; @@ -194,13 +236,39 @@ where ); let nulls = self.null_buffer_builder.finish_cloned(); - let array_data = ArrayData::builder(DataType::FixedSizeList( - Arc::new(Field::new("item", values_data.data_type().clone(), true)), - self.list_len, - )) - .len(len) - .add_child_data(values_data) - .nulls(nulls); + + let field = match &self.field { + Some(f) => { + let size = self.value_length(); + assert_eq!( + f.data_type(), + values_data.data_type(), + "DataType of field ({}) should be the same as the values_builder DataType ({})", + f.data_type(), + values_data.data_type() + ); + if let Some(a) = values_arr.logical_nulls() { + let nulls_valid = f.is_nullable() + || nulls + .as_ref() + .map(|n| n.expand(size as _).contains(&a)) + .unwrap_or_default(); + + assert!( + nulls_valid, + "Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}", + f.name() + ); + } + f.clone() + } + None => Arc::new(Field::new("item", values_data.data_type().clone(), true)), + }; + + let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len)) + .len(len) + .add_child_data(values_data) + .nulls(nulls); let array_data = unsafe { array_data.build_unchecked() }; @@ -216,28 +284,54 @@ mod tests { use crate::Array; use crate::Int32Array; - #[test] - fn test_fixed_size_list_array_builder() { + fn make_list_builder( + include_null_element: bool, + include_null_in_values: bool, + ) -> FixedSizeListBuilder> { let values_builder = Int32Builder::new(); let mut builder = FixedSizeListBuilder::new(values_builder, 3); - // [[0, 1, 2], null, [3, null, 5], [6, 7, null]] builder.values().append_value(0); builder.values().append_value(1); builder.values().append_value(2); builder.append(true); - builder.values().append_null(); - builder.values().append_null(); - builder.values().append_null(); - builder.append(false); + + builder.values().append_value(2); builder.values().append_value(3); - builder.values().append_null(); - builder.values().append_value(5); - builder.append(true); - builder.values().append_value(6); - builder.values().append_value(7); - builder.values().append_null(); + builder.values().append_value(4); builder.append(true); + + if include_null_element { + builder.values().append_null(); + builder.values().append_null(); + builder.values().append_null(); + builder.append(false); + } else { + builder.values().append_value(2); + builder.values().append_value(3); + builder.values().append_value(4); + builder.append(true); + } + + if include_null_in_values { + builder.values().append_value(3); + builder.values().append_null(); + builder.values().append_value(5); + builder.append(true); + } else { + builder.values().append_value(3); + builder.values().append_value(4); + builder.values().append_value(5); + builder.append(true); + } + + builder + } + + #[test] + fn test_fixed_size_list_array_builder() { + let mut builder = make_list_builder(true, true); + let list_array = builder.finish(); assert_eq!(DataType::Int32, list_array.value_type()); @@ -248,9 +342,48 @@ mod tests { } #[test] - fn test_fixed_size_list_array_builder_finish_cloned() { + fn test_fixed_size_list_array_builder_with_field() { + let builder = make_list_builder(false, false); + let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false)); + let list_array = builder.finish(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(4, list_array.len()); + assert_eq!(0, list_array.null_count()); + assert_eq!(6, list_array.value_offset(2)); + assert_eq!(3, list_array.value_length()); + } + + #[test] + fn test_fixed_size_list_array_builder_with_field_and_null() { + let builder = make_list_builder(true, false); + let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false)); + let list_array = builder.finish(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(4, list_array.len()); + assert_eq!(1, list_array.null_count()); + assert_eq!(6, list_array.value_offset(2)); + assert_eq!(3, list_array.value_length()); + } + + #[test] + #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")] + fn test_fixed_size_list_array_builder_with_field_null_panic() { + let builder = make_list_builder(true, true); + let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false)); + + builder.finish(); + } + + #[test] + #[should_panic( + expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)" + )] + fn test_fixed_size_list_array_builder_with_field_type_panic() { let values_builder = Int32Builder::new(); - let mut builder = FixedSizeListBuilder::new(values_builder, 3); + let builder = FixedSizeListBuilder::new(values_builder, 3); + let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true)); // [[0, 1, 2], null, [3, null, 5], [6, 7, null]] builder.values().append_value(0); @@ -262,13 +395,68 @@ mod tests { builder.values().append_null(); builder.append(false); builder.values().append_value(3); - builder.values().append_null(); + builder.values().append_value(4); builder.values().append_value(5); builder.append(true); + + builder.finish(); + } + + #[test] + fn test_fixed_size_list_array_builder_cloned_with_field() { + let builder = make_list_builder(true, true); + let builder = builder.with_field(Field::new("list_element", DataType::Int32, true)); + + let list_array = builder.finish_cloned(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(4, list_array.len()); + assert_eq!(1, list_array.null_count()); + assert_eq!(6, list_array.value_offset(2)); + assert_eq!(3, list_array.value_length()); + } + + #[test] + #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")] + fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() { + let builder = make_list_builder(true, true); + let builder = builder.with_field(Field::new("list_item", DataType::Int32, false)); + + builder.finish_cloned(); + } + + #[test] + fn test_fixed_size_list_array_builder_cloned_with_field_and_null() { + let builder = make_list_builder(true, false); + let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false)); + let list_array = builder.finish(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(4, list_array.len()); + assert_eq!(1, list_array.null_count()); + assert_eq!(6, list_array.value_offset(2)); + assert_eq!(3, list_array.value_length()); + } + + #[test] + #[should_panic( + expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)" + )] + fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() { + let builder = make_list_builder(false, false); + let builder = builder.with_field(Field::new("list_item", DataType::Int64, true)); + + builder.finish_cloned(); + } + + #[test] + fn test_fixed_size_list_array_builder_finish_cloned() { + let mut builder = make_list_builder(true, true); + let mut list_array = builder.finish_cloned(); assert_eq!(DataType::Int32, list_array.value_type()); - assert_eq!(3, list_array.len()); + assert_eq!(4, list_array.len()); assert_eq!(1, list_array.null_count()); assert_eq!(3, list_array.value_length()); @@ -283,12 +471,40 @@ mod tests { list_array = builder.finish(); assert_eq!(DataType::Int32, list_array.value_type()); - assert_eq!(5, list_array.len()); + assert_eq!(6, list_array.len()); assert_eq!(2, list_array.null_count()); assert_eq!(6, list_array.value_offset(2)); assert_eq!(3, list_array.value_length()); } + #[test] + fn test_fixed_size_list_array_builder_with_field_empty() { + let values_builder = Int32Array::builder(0); + let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new( + "list_item", + DataType::Int32, + false, + )); + assert!(builder.is_empty()); + let arr = builder.finish(); + assert_eq!(0, arr.len()); + assert_eq!(0, builder.len()); + } + + #[test] + fn test_fixed_size_list_array_builder_cloned_with_field_empty() { + let values_builder = Int32Array::builder(0); + let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new( + "list_item", + DataType::Int32, + false, + )); + assert!(builder.is_empty()); + let arr = builder.finish_cloned(); + assert_eq!(0, arr.len()); + assert_eq!(0, builder.len()); + } + #[test] fn test_fixed_size_list_array_builder_empty() { let values_builder = Int32Array::builder(5);