From 6b1f3cab6af20d4626e4be00fabbb7cb47af14b3 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 8 Jan 2025 13:03:44 +0000 Subject: [PATCH] Struct `field` renamed to `maybe_null_field` (#1846) --- bench-vortex/src/clickbench.rs | 2 +- bench-vortex/src/tpch/mod.rs | 2 +- bench-vortex/src/vortex_utils.rs | 4 +- vortex-array/src/array/chunked/canonical.rs | 10 +- vortex-array/src/array/chunked/variants.rs | 8 +- vortex-array/src/array/constant/variants.rs | 2 +- vortex-array/src/array/sparse/variants.rs | 8 +- vortex-array/src/array/struct_/mod.rs | 12 +-- vortex-array/src/variants.rs | 17 +++- vortex-datafusion/src/memory/statistics.rs | 2 +- .../src/persistent/statistics.rs | 8 +- vortex-dtype/src/serde/flatbuffers/mod.rs | 5 +- vortex-expr/src/column.rs | 21 ++-- vortex-expr/src/pack.rs | 6 +- vortex-file/src/read/layouts/columnar.rs | 16 +-- vortex-file/src/read/metadata.rs | 16 +-- vortex-file/src/tests.rs | 97 +++++++++++++++---- vortex-layout/src/layouts/struct_/writer.rs | 2 +- vortex-sampling-compressor/tests/smoketest.rs | 10 +- vortex-scalar/src/display.rs | 44 +++++---- vortex-scalar/src/struct_.rs | 88 +++++++++++++++-- 21 files changed, 271 insertions(+), 109 deletions(-) diff --git a/bench-vortex/src/clickbench.rs b/bench-vortex/src/clickbench.rs index ad73f0056e..6fe4d18898 100644 --- a/bench-vortex/src/clickbench.rs +++ b/bench-vortex/src/clickbench.rs @@ -188,7 +188,7 @@ pub async fn register_vortex_files( for (field_name, field_type) in names.zip(types) { let val = arrays_map.entry(field_name.clone()).or_default(); - val.push(st.field_by_name(field_name.as_ref()).unwrap()); + val.push(st.maybe_null_field_by_name(field_name.as_ref()).unwrap()); types_map.insert(field_name.clone(), field_type.clone()); } diff --git a/bench-vortex/src/tpch/mod.rs b/bench-vortex/src/tpch/mod.rs index 3dabb7bb97..ff2f2128a8 100644 --- a/bench-vortex/src/tpch/mod.rs +++ b/bench-vortex/src/tpch/mod.rs @@ -235,7 +235,7 @@ async fn register_vortex_file( for (field_name, field_type) in names.zip(types) { let val = arrays_map.entry(field_name.clone()).or_default(); - val.push(st.field_by_name(field_name).unwrap()); + val.push(st.maybe_null_field_by_name(field_name).unwrap()); types_map.insert(field_name.clone(), field_type.clone()); } diff --git a/bench-vortex/src/vortex_utils.rs b/bench-vortex/src/vortex_utils.rs index 6f39f04cd1..1421175628 100644 --- a/bench-vortex/src/vortex_utils.rs +++ b/bench-vortex/src/vortex_utils.rs @@ -24,7 +24,9 @@ pub async fn vortex_chunk_sizes(path: PathBuf) -> VortexResult>>()?; let field_array = ChunkedArray::try_new(field_chunks, field_dtype.clone())?; @@ -347,11 +347,15 @@ mod tests { .into_array(); let canonical_struct = chunked.into_struct().unwrap(); let canonical_varbin = canonical_struct - .field(0) + .maybe_null_field_by_idx(0) + .unwrap() + .into_varbinview() + .unwrap(); + let original_varbin = struct_array + .maybe_null_field_by_idx(0) .unwrap() .into_varbinview() .unwrap(); - let original_varbin = struct_array.field(0).unwrap().into_varbinview().unwrap(); let orig_values = original_varbin .with_iterator(|it| it.map(|a| a.map(|v| v.to_vec())).collect::>()) .unwrap(); diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index ddf8620721..8cdb18435c 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -61,10 +61,14 @@ impl Utf8ArrayTrait for ChunkedArray {} impl BinaryArrayTrait for ChunkedArray {} impl StructArrayTrait for ChunkedArray { - fn field(&self, idx: usize) -> Option { + fn maybe_null_field_by_idx(&self, idx: usize) -> Option { let mut chunks = Vec::with_capacity(self.nchunks()); for chunk in self.chunks() { - chunks.push(chunk.as_struct_array().and_then(|s| s.field(idx))?); + chunks.push( + chunk + .as_struct_array() + .and_then(|s| s.maybe_null_field_by_idx(idx))?, + ); } let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; diff --git a/vortex-array/src/array/constant/variants.rs b/vortex-array/src/array/constant/variants.rs index 6212cfc5b2..24d683f124 100644 --- a/vortex-array/src/array/constant/variants.rs +++ b/vortex-array/src/array/constant/variants.rs @@ -90,7 +90,7 @@ impl Utf8ArrayTrait for ConstantArray {} impl BinaryArrayTrait for ConstantArray {} impl StructArrayTrait for ConstantArray { - fn field(&self, idx: usize) -> Option { + fn maybe_null_field_by_idx(&self, idx: usize) -> Option { self.scalar() .as_struct() .field_by_idx(idx) diff --git a/vortex-array/src/array/sparse/variants.rs b/vortex-array/src/array/sparse/variants.rs index 64c032e865..c4efcbc66a 100644 --- a/vortex-array/src/array/sparse/variants.rs +++ b/vortex-array/src/array/sparse/variants.rs @@ -62,10 +62,14 @@ impl Utf8ArrayTrait for SparseArray {} impl BinaryArrayTrait for SparseArray {} impl StructArrayTrait for SparseArray { - fn field(&self, idx: usize) -> Option { + fn maybe_null_field_by_idx(&self, idx: usize) -> Option { let new_patches = self .patches() - .map_values_opt(|values| values.as_struct_array().and_then(|s| s.field(idx))) + .map_values_opt(|values| { + values + .as_struct_array() + .and_then(|s| s.maybe_null_field_by_idx(idx)) + }) .vortex_expect("field array length should equal struct array length")?; let scalar = StructScalar::try_from(&self.fill_scalar()) .ok()? diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index de7295d498..48bc1a448e 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -40,7 +40,7 @@ impl StructArray { pub fn children(&self) -> impl Iterator + '_ { (0..self.nfields()).map(move |idx| { - self.field(idx).unwrap_or_else(|| { + self.maybe_null_field_by_idx(idx).unwrap_or_else(|| { vortex_panic!("Field {} not found, nfields: {}", idx, self.nfields()) }) }) @@ -126,7 +126,7 @@ impl StructArray { names.push(self.names()[idx].clone()); children.push( - self.field(idx) + self.maybe_null_field_by_idx(idx) .ok_or_else(|| vortex_err!(OutOfBounds: idx, 0, self.dtypes().len()))?, ); } @@ -149,7 +149,7 @@ impl VariantsVTable for StructEncoding { } impl StructArrayTrait for StructArray { - fn field(&self, idx: usize) -> Option { + fn maybe_null_field_by_idx(&self, idx: usize) -> Option { self.dtypes().get(idx).map(|dtype| { self.as_ref() .child(idx, dtype, self.len()) @@ -183,7 +183,7 @@ impl VisitorVTable for StructEncoding { fn accept(&self, array: &StructArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { for (idx, name) in array.names().iter().enumerate() { let child = array - .field(idx) + .maybe_null_field_by_idx(idx) .ok_or_else(|| vortex_err!(OutOfBounds: idx, 0, array.nfields()))?; visitor.visit_child(name.as_ref(), &child)?; } @@ -247,13 +247,13 @@ mod test { assert_eq!(struct_b.len(), 5); - let bools = BoolArray::try_from(struct_b.field(0).unwrap()).unwrap(); + let bools = BoolArray::try_from(struct_b.maybe_null_field_by_idx(0).unwrap()).unwrap(); assert_eq!( bools.boolean_buffer().iter().collect::>(), vec![true, true, true, false, false] ); - let prims = PrimitiveArray::try_from(struct_b.field(1).unwrap()).unwrap(); + let prims = PrimitiveArray::try_from(struct_b.maybe_null_field_by_idx(1).unwrap()).unwrap(); assert_eq!(prims.as_slice::(), [0i64, 1, 2, 3, 4]); } } diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index 1ba939c2c6..756aedf6c7 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -200,17 +200,24 @@ pub trait StructArrayTrait: ArrayTrait { self.names().len() } - /// Return a field's array by index - fn field(&self, idx: usize) -> Option; + /// Return a field's array by index, ignoring struct nullability + fn maybe_null_field_by_idx(&self, idx: usize) -> Option; - /// Return a field's array by name - fn field_by_name(&self, name: &str) -> Option { + /// Return a field's array by name, ignoring struct nullability + fn maybe_null_field_by_name(&self, name: &str) -> Option { let field_idx = self .names() .iter() .position(|field_name| field_name.as_ref() == name); - field_idx.and_then(|field_idx| self.field(field_idx)) + field_idx.and_then(|field_idx| self.maybe_null_field_by_idx(field_idx)) + } + + fn maybe_null_field(&self, field: &Field) -> Option { + match field { + Field::Index(idx) => self.maybe_null_field_by_idx(*idx), + Field::Name(name) => self.maybe_null_field_by_name(name.as_ref()), + } } fn project(&self, projection: &[Field]) -> VortexResult; diff --git a/vortex-datafusion/src/memory/statistics.rs b/vortex-datafusion/src/memory/statistics.rs index c2c262a2e1..e375bb4810 100644 --- a/vortex-datafusion/src/memory/statistics.rs +++ b/vortex-datafusion/src/memory/statistics.rs @@ -13,7 +13,7 @@ pub fn chunked_array_df_stats(array: &ChunkedArray, projection: &[usize]) -> DFR .iter() .map(|i| { array - .field(*i) + .maybe_null_field_by_idx(*i) .ok_or_else(|| vortex_err!("Projection references unknown field {i}")) }) .map_ok(|arr| { diff --git a/vortex-datafusion/src/persistent/statistics.rs b/vortex-datafusion/src/persistent/statistics.rs index 3113a755be..6ab6c339fe 100644 --- a/vortex-datafusion/src/persistent/statistics.rs +++ b/vortex-datafusion/src/persistent/statistics.rs @@ -13,7 +13,7 @@ use vortex_error::VortexResult; pub fn array_to_col_statistics(array: &StructArray) -> VortexResult { let mut stats = ColumnStatistics::new_unknown(); - if let Some(null_count_array) = array.field_by_name(Stat::NullCount.name()) { + if let Some(null_count_array) = array.maybe_null_field_by_name(Stat::NullCount.name()) { let array = null_count_array.into_arrow()?; let array = array.as_primitive::(); @@ -21,7 +21,7 @@ pub fn array_to_col_statistics(array: &StructArray) -> VortexResult VortexResult VortexResult VortexResult> { - match array.field_by_name(Stat::UncompressedSizeInBytes.name()) { + match array.maybe_null_field_by_name(Stat::UncompressedSizeInBytes.name()) { None => Ok(None), Some(array) => { let array = array.into_arrow()?; diff --git a/vortex-dtype/src/serde/flatbuffers/mod.rs b/vortex-dtype/src/serde/flatbuffers/mod.rs index 4c36d22665..7f39148a00 100644 --- a/vortex-dtype/src/serde/flatbuffers/mod.rs +++ b/vortex-dtype/src/serde/flatbuffers/mod.rs @@ -64,8 +64,7 @@ impl TryFrom> for DType { .ok_or_else(|| vortex_err!("failed to parse struct names from flatbuffer"))? .iter() .map(|n| (*n).into()) - .collect_vec() - .into(); + .collect_vec(); let dtypes: Vec = fb_struct .dtypes() .ok_or_else(|| vortex_err!("failed to parse struct dtypes from flatbuffer"))? @@ -73,7 +72,7 @@ impl TryFrom> for DType { .map(Self::try_from) .collect::>>()?; Ok(Self::Struct( - StructDType::new(names, dtypes), + StructDType::new(names.into(), dtypes), fb_struct.nullable().into(), )) } diff --git a/vortex-expr/src/column.rs b/vortex-expr/src/column.rs index cf29fe8ca4..1260ca9560 100644 --- a/vortex-expr/src/column.rs +++ b/vortex-expr/src/column.rs @@ -2,9 +2,7 @@ use std::any::Any; use std::fmt::Display; use std::sync::Arc; -use vortex_array::array::StructArray; -use vortex_array::variants::StructArrayTrait; -use vortex_array::ArrayData; +use vortex_array::{ArrayDType, ArrayData}; use vortex_dtype::Field; use vortex_error::{vortex_err, VortexResult}; @@ -61,13 +59,16 @@ impl VortexExpr for Column { } fn evaluate(&self, batch: &ArrayData) -> VortexResult { - let s = StructArray::try_from(batch.clone())?; - - match &self.field { - Field::Name(n) => s.field_by_name(n), - Field::Index(i) => s.field(*i), - } - .ok_or_else(|| vortex_err!("Array doesn't contain child array {}", self.field)) + batch + .as_struct_array() + .ok_or_else(|| { + vortex_err!( + "Array must be a struct array, however it was a {}", + batch.dtype() + ) + })? + .maybe_null_field(&self.field) + .ok_or_else(|| vortex_err!("Array doesn't contain child array {}", self.field)) } fn children(&self) -> Vec<&ExprRef> { diff --git a/vortex-expr/src/pack.rs b/vortex-expr/src/pack.rs index 805b1dc433..f03da39dbd 100644 --- a/vortex-expr/src/pack.rs +++ b/vortex-expr/src/pack.rs @@ -30,7 +30,7 @@ use crate::{ExprRef, VortexExpr}; /// let x_copy = packed /// .as_struct_array() /// .unwrap() -/// .field_by_name("x copy") +/// .maybe_null_field_by_name("x copy") /// .unwrap(); /// assert_eq!(scalar_at(&x_copy, 0).unwrap(), Scalar::from(100)); /// assert_eq!(scalar_at(&x_copy, 1).unwrap(), Scalar::from(110)); @@ -140,14 +140,14 @@ mod tests { let mut array = array .as_struct_array() .ok_or_else(|| vortex_err!("expected a struct"))? - .field_by_name(field) + .maybe_null_field_by_name(field) .ok_or_else(|| vortex_err!("expected field to exist: {}", field))?; for field in field_path { array = array .as_struct_array() .ok_or_else(|| vortex_err!("expected a struct"))? - .field_by_name(field) + .maybe_null_field_by_name(field) .ok_or_else(|| vortex_err!("expected field to exist: {}", field))?; } Ok(array.into_primitive().unwrap()) diff --git a/vortex-file/src/read/layouts/columnar.rs b/vortex-file/src/read/layouts/columnar.rs index 47f002d0c5..ec8e05e9da 100644 --- a/vortex-file/src/read/layouts/columnar.rs +++ b/vortex-file/src/read/layouts/columnar.rs @@ -522,7 +522,7 @@ mod tests { .unwrap() .as_struct_array() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); @@ -531,7 +531,7 @@ mod tests { .unwrap() .as_struct_array() .unwrap() - .field(1) + .maybe_null_field_by_idx(1) .unwrap() .into_varbinview() .unwrap(); @@ -560,7 +560,7 @@ mod tests { .unwrap() .as_struct_array() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); @@ -569,7 +569,7 @@ mod tests { .unwrap() .as_struct_array() .unwrap() - .field(1) + .maybe_null_field_by_idx(1) .unwrap() .into_varbinview() .unwrap(); @@ -608,14 +608,14 @@ mod tests { let prim_arr_chunk0 = arr[0] .as_struct_array() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); let str_arr_chunk0 = arr[0] .as_struct_array() .unwrap() - .field(1) + .maybe_null_field_by_idx(1) .unwrap() .into_varbinview() .unwrap(); @@ -635,14 +635,14 @@ mod tests { let prim_arr_chunk1 = arr[1] .as_struct_array() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); let str_arr_chunk1 = arr[1] .as_struct_array() .unwrap() - .field(1) + .maybe_null_field_by_idx(1) .unwrap() .into_varbinview() .unwrap(); diff --git a/vortex-file/src/read/metadata.rs b/vortex-file/src/read/metadata.rs index aa06558560..cff42f3690 100644 --- a/vortex-file/src/read/metadata.rs +++ b/vortex-file/src/read/metadata.rs @@ -136,7 +136,7 @@ mod test { .as_struct_array() .unwrap(); - let min = name_metadata_table.field_by_name("min").unwrap(); + let min = name_metadata_table.maybe_null_field_by_name("min").unwrap(); let chunk1_min = scalar_at(&min, 0).unwrap(); let chunk2_min = scalar_at(&min, 1).unwrap(); assert_eq!( @@ -148,7 +148,7 @@ mod test { Some(BufferString::from("Khalil")) ); - let max = name_metadata_table.field_by_name("max").unwrap(); + let max = name_metadata_table.maybe_null_field_by_name("max").unwrap(); let chunk1_max = scalar_at(&max, 0).unwrap(); let chunk2_max = scalar_at(&max, 1).unwrap(); assert_eq!( @@ -160,7 +160,9 @@ mod test { Some(BufferString::from("Pharrell")) ); - let null_count = name_metadata_table.field_by_name("null_count").unwrap(); + let null_count = name_metadata_table + .maybe_null_field_by_name("null_count") + .unwrap(); let chunk1_null_count = scalar_at(&null_count, 0).unwrap(); let chunk2_null_count = scalar_at(&null_count, 1).unwrap(); assert_eq!( @@ -178,19 +180,21 @@ mod test { .as_struct_array() .unwrap(); - let min = age_metadata_table.field_by_name("min").unwrap(); + let min = age_metadata_table.maybe_null_field_by_name("min").unwrap(); let chunk1_min = scalar_at(&min, 0).unwrap(); let chunk2_min = scalar_at(&min, 1).unwrap(); assert_eq!(chunk1_min.as_primitive().typed_value::(), Some(25)); assert_eq!(chunk2_min.as_primitive().typed_value::(), Some(18)); - let max = age_metadata_table.field_by_name("max").unwrap(); + let max = age_metadata_table.maybe_null_field_by_name("max").unwrap(); let chunk1_max = scalar_at(&max, 0).unwrap(); let chunk2_max = scalar_at(&max, 1).unwrap(); assert_eq!(chunk1_max.as_primitive().typed_value::(), Some(31)); assert_eq!(chunk2_max.as_primitive().typed_value::(), Some(57)); - let null_count = age_metadata_table.field_by_name("null_count").unwrap(); + let null_count = age_metadata_table + .maybe_null_field_by_name("null_count") + .unwrap(); let chunk1_null_count = scalar_at(&null_count, 0).unwrap(); let chunk2_null_count = scalar_at(&null_count, 1).unwrap(); assert_eq!( diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 1a8da9a459..45e5906a59 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -211,7 +211,7 @@ async fn test_read_projection() { let actual = array .into_struct() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_varbinview() .unwrap() @@ -243,7 +243,7 @@ async fn test_read_projection() { let actual = array .into_struct() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_varbinview() .unwrap() @@ -275,7 +275,7 @@ async fn test_read_projection() { let primitive_array = array .into_struct() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); @@ -303,7 +303,7 @@ async fn test_read_projection() { let primitive_array = array .into_struct() .unwrap() - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); @@ -345,7 +345,10 @@ async fn unequal_batches() { item_count += array.len(); batch_count += 1; - let numbers = array.as_struct_array().unwrap().field_by_name("numbers"); + let numbers = array + .as_struct_array() + .unwrap() + .maybe_null_field_by_name("numbers"); if let Some(numbers) = numbers { let numbers = numbers.into_primitive().unwrap(); @@ -437,7 +440,11 @@ async fn filter_string() { let result = stream.try_collect::>().await.unwrap(); assert_eq!(result.len(), 1); - let names = result[0].as_struct_array().unwrap().field(0).unwrap(); + let names = result[0] + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(0) + .unwrap(); assert_eq!( names .into_varbinview() @@ -449,7 +456,11 @@ async fn filter_string() { .unwrap(), vec!["Joseph".to_string()] ); - let ages = result[0].as_struct_array().unwrap().field(1).unwrap(); + let ages = result[0] + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(1) + .unwrap(); assert_eq!(ages.into_primitive().unwrap().as_slice::(), vec![25]); } @@ -492,7 +503,11 @@ async fn filter_or() { result.push(array.unwrap()); } assert_eq!(result.len(), 1); - let names = result[0].as_struct_array().unwrap().field(0).unwrap(); + let names = result[0] + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(0) + .unwrap(); assert_eq!( names .into_varbinview() @@ -504,7 +519,11 @@ async fn filter_or() { .unwrap(), vec!["Joseph".to_string(), "Angela".to_string()] ); - let ages = result[0].as_struct_array().unwrap().field(1).unwrap(); + let ages = result[0] + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(1) + .unwrap(); assert_eq!( ages.into_primitive() .unwrap() @@ -549,7 +568,11 @@ async fn filter_and() { result.push(array.unwrap()); } assert_eq!(result.len(), 1); - let names = result[0].as_struct_array().unwrap().field(0).unwrap(); + let names = result[0] + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(0) + .unwrap(); assert_eq!( names .into_varbinview() @@ -560,7 +583,11 @@ async fn filter_and() { .unwrap(), vec![Some("Joseph".to_string()), None] ); - let ages = result[0].as_struct_array().unwrap().field(1).unwrap(); + let ages = result[0] + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(1) + .unwrap(); assert_eq!( ages.into_primitive().unwrap().as_slice::(), vec![25, 31] @@ -617,7 +644,7 @@ async fn test_with_indices_simple() { .into_struct() .unwrap(); let actual_kept_numbers_array = actual_kept_array - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); @@ -640,7 +667,11 @@ async fn test_with_indices_simple() { .unwrap() .into_struct() .unwrap(); - let actual_numbers_array = actual_array.field(0).unwrap().into_primitive().unwrap(); + let actual_numbers_array = actual_array + .maybe_null_field_by_idx(0) + .unwrap() + .into_primitive() + .unwrap(); let actual_numbers = actual_numbers_array.as_slice::(); assert_eq!(expected_numbers, actual_numbers); @@ -684,7 +715,7 @@ async fn test_with_indices_on_two_columns() { .unwrap(); let strings_actual = array - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_varbinview() .unwrap() @@ -701,7 +732,11 @@ async fn test_with_indices_on_two_columns() { .collect::>() ); - let numbers_actual_array = array.field(1).unwrap().into_primitive().unwrap(); + let numbers_actual_array = array + .maybe_null_field_by_idx(1) + .unwrap() + .into_primitive() + .unwrap(); let numbers_actual = numbers_actual_array.as_slice::(); assert_eq!( numbers_actual, @@ -775,7 +810,7 @@ async fn test_with_indices_and_with_row_filter_simple() { .into_struct() .unwrap(); let actual_kept_numbers_array = actual_kept_array - .field(0) + .maybe_null_field_by_idx(0) .unwrap() .into_primitive() .unwrap(); @@ -806,7 +841,11 @@ async fn test_with_indices_and_with_row_filter_simple() { .unwrap() .into_struct() .unwrap(); - let actual_numbers_array = actual_array.field(0).unwrap().into_primitive().unwrap(); + let actual_numbers_array = actual_array + .maybe_null_field_by_idx(0) + .unwrap() + .into_primitive() + .unwrap(); let actual_numbers = actual_numbers_array.as_slice::(); assert_eq!( @@ -872,7 +911,11 @@ async fn filter_string_chunked() { .unwrap(); assert_eq!(actual_array.len(), 1); - let names = actual_array.as_struct_array().unwrap().field(0).unwrap(); + let names = actual_array + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(0) + .unwrap(); assert_eq!( names .into_varbinview() @@ -884,7 +927,11 @@ async fn filter_string_chunked() { .unwrap(), vec!["Joseph".to_string()] ); - let ages = actual_array.as_struct_array().unwrap().field(1).unwrap(); + let ages = actual_array + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(1) + .unwrap(); assert_eq!(ages.into_primitive().unwrap().as_slice::(), vec![25]); } @@ -960,7 +1007,11 @@ async fn test_pruning_with_or() { .unwrap(); assert_eq!(actual_array.len(), 10); - let letters = actual_array.as_struct_array().unwrap().field(0).unwrap(); + let letters = actual_array + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(0) + .unwrap(); assert_eq!( letters .into_varbinview() @@ -982,7 +1033,11 @@ async fn test_pruning_with_or() { Some("P".to_string()) ] ); - let numbers = actual_array.as_struct_array().unwrap().field(1).unwrap(); + let numbers = actual_array + .as_struct_array() + .unwrap() + .maybe_null_field_by_idx(1) + .unwrap(); assert_eq!( (0..numbers.len()) .map(|index| -> Option { diff --git a/vortex-layout/src/layouts/struct_/writer.rs b/vortex-layout/src/layouts/struct_/writer.rs index 99fd0e8a0a..0440b1d698 100644 --- a/vortex-layout/src/layouts/struct_/writer.rs +++ b/vortex-layout/src/layouts/struct_/writer.rs @@ -67,7 +67,7 @@ impl LayoutWriter for StructLayoutWriter { let column = chunk .as_struct_array() .vortex_expect("batch is a struct array") - .field(i) + .maybe_null_field_by_idx(i) .vortex_expect("bounds already checked"); self.column_strategies[i].push_chunk(segments, column)?; } diff --git a/vortex-sampling-compressor/tests/smoketest.rs b/vortex-sampling-compressor/tests/smoketest.rs index bfe9c1fbb7..9055659b76 100644 --- a/vortex-sampling-compressor/tests/smoketest.rs +++ b/vortex-sampling-compressor/tests/smoketest.rs @@ -117,7 +117,7 @@ mod tests { let struct_array: StructArray = compressed.try_into().unwrap(); let prim_col: ChunkedArray = struct_array - .field_by_name("prim_col") + .maybe_null_field_by_name("prim_col") .unwrap() .try_into() .unwrap(); @@ -131,7 +131,7 @@ mod tests { } let bool_col: ChunkedArray = struct_array - .field_by_name("bool_col") + .maybe_null_field_by_name("bool_col") .unwrap() .try_into() .unwrap(); @@ -144,7 +144,7 @@ mod tests { } let varbin_col: ChunkedArray = struct_array - .field_by_name("varbin_col") + .maybe_null_field_by_name("varbin_col") .unwrap() .try_into() .unwrap(); @@ -160,7 +160,7 @@ mod tests { } let binary_col: ChunkedArray = struct_array - .field_by_name("binary_col") + .maybe_null_field_by_name("binary_col") .unwrap() .try_into() .unwrap(); @@ -173,7 +173,7 @@ mod tests { } let timestamp_col: ChunkedArray = struct_array - .field_by_name("timestamp_col") + .maybe_null_field_by_name("timestamp_col") .unwrap() .try_into() .unwrap(); diff --git a/vortex-scalar/src/display.rs b/vortex-scalar/src/display.rs index 0b15b9e0f7..b4044d2d20 100644 --- a/vortex-scalar/src/display.rs +++ b/vortex-scalar/src/display.rs @@ -169,7 +169,7 @@ mod tests { #[test] fn display_empty_struct() { fn dtype() -> DType { - DType::Struct(StructDType::new(Arc::new([]), vec![]), Nullable) + DType::Struct(StructDType::new([].into(), vec![]), Nullable) } assert_eq!(format!("{}", Scalar::null(dtype())), "null"); @@ -182,7 +182,7 @@ mod tests { fn dtype() -> DType { DType::Struct( StructDType::new( - Arc::new([Arc::from("foo")]), + [Arc::from("foo")].into(), vec![DType::Primitive(PType::U32, Nullable)], ), Nullable, @@ -207,35 +207,43 @@ mod tests { #[test] fn display_two_field_struct() { - fn dtype() -> DType { - DType::Struct( - StructDType::new( - Arc::new([Arc::from("foo"), Arc::from("bar")]), - vec![ - DType::Bool(Nullable), - DType::Primitive(PType::U32, Nullable), - ], - ), - Nullable, - ) - } + // fn dtype() -> (DType, DType, DType) { + let f1 = DType::Bool(Nullable); + let f2 = DType::Primitive(PType::U32, Nullable); + let dtype = DType::Struct( + StructDType::new( + [Arc::from("foo"), Arc::from("bar")].into(), + vec![f1.clone(), f2.clone()], + ), + Nullable, + ); + // } - assert_eq!(format!("{}", Scalar::null(dtype())), "null"); + assert_eq!(format!("{}", Scalar::null(dtype.clone())), "null"); assert_eq!( - format!("{}", Scalar::struct_(dtype(), vec![])), + format!( + "{}", + Scalar::struct_( + dtype.clone(), + vec![Scalar::null(f1), Scalar::null(f2.clone())] + ) + ), "{foo:null,bar:null}" ); assert_eq!( - format!("{}", Scalar::struct_(dtype(), vec![Scalar::from(true)])), + format!( + "{}", + Scalar::struct_(dtype.clone(), vec![Scalar::from(true), Scalar::null(f2)]) + ), "{foo:true,bar:null}" ); assert_eq!( format!( "{}", - Scalar::struct_(dtype(), vec![Scalar::from(true), Scalar::from(32_u32)]) + Scalar::struct_(dtype, vec![Scalar::from(true), Scalar::from(32_u32)]) ), "{foo:true,bar:32_u32}" ); diff --git a/vortex-scalar/src/struct_.rs b/vortex-scalar/src/struct_.rs index 0806c20cdb..a779964519 100644 --- a/vortex-scalar/src/struct_.rs +++ b/vortex-scalar/src/struct_.rs @@ -44,20 +44,28 @@ impl<'a> StructScalar<'a> { } pub fn field_by_idx(&self, idx: usize) -> Option { - let DType::Struct(st, _) = self.dtype() else { + let DType::Struct(st, nullability) = self.dtype() else { unreachable!() }; + let field_dtype = st.dtypes().get(idx)?.with_nullability(*nullability); + self.fields - .as_ref() - .and_then(|fields| fields.get(idx)) - .map(|field| Scalar { - dtype: st.dtypes()[idx].clone(), - value: field.clone(), + .map(|fields| { + let field = fields.get(idx); + let field = field + .vortex_expect("Scalar dtype requires field exist") + .clone(); + debug_assert!(field.is_instance_of(&field_dtype)); + Some(Scalar { + dtype: field_dtype.clone(), + value: field, + }) }) + .unwrap_or_else(|| Some(Scalar::null(field_dtype))) } - pub fn field(&self, name: &str) -> Option { + pub fn field_by_name(&self, name: &str) -> Option { let DType::Struct(st, _) = self.dtype() else { unreachable!() }; @@ -167,3 +175,69 @@ impl<'a> TryFrom<&'a Scalar> for StructScalar<'a> { Self::try_new(value.dtype(), &value.value) } } + +#[cfg(test)] +mod tests { + use vortex_dtype::PType::I32; + use vortex_dtype::{DType, Nullability, StructDType}; + + use super::*; + + fn setup_types() -> (DType, DType, DType) { + let f0_dt = DType::Primitive(I32, Nullability::NonNullable); + let f1_dt = DType::Utf8(Nullability::NonNullable); + let f0_dt_null = f0_dt.with_nullability(Nullability::Nullable); + let f1_dt_null = f1_dt.with_nullability(Nullability::Nullable); + + let dtype = DType::Struct( + StructDType::new(vec!["a".into(), "b".into()].into(), vec![f0_dt, f1_dt]), + Nullability::Nullable, + ); + + (f0_dt_null, f1_dt_null, dtype) + } + + #[test] + fn test_struct_scalar_null() { + let (f0_dt_null, f1_dt_null, dtype) = setup_types(); + + let scalar = Scalar::null(dtype); + + let scalar_f0 = scalar.as_struct().field_by_idx(0); + assert!(scalar_f0.is_some()); + let scalar_f0 = scalar_f0.unwrap(); + assert_eq!(scalar_f0, Scalar::null(f0_dt_null.clone())); + assert_eq!(scalar_f0.dtype(), &f0_dt_null); + + let scalar_f1 = scalar.as_struct().field_by_idx(1); + assert!(scalar_f1.is_some()); + let scalar_f1 = scalar_f1.unwrap(); + assert_eq!(scalar_f1, Scalar::null(f1_dt_null.clone())); + assert_eq!(scalar_f1.dtype(), &f1_dt_null); + } + + #[test] + fn test_struct_scalar_non_null() { + let (f0_dt_null, f1_dt_null, dtype) = setup_types(); + + let f0_val = Scalar::primitive::(1, Nullability::NonNullable); + let f1_val = Scalar::utf8("hello", Nullability::NonNullable); + + let f0_val_null = Scalar::primitive::(1, Nullability::Nullable); + let f1_val_null = Scalar::utf8("hello", Nullability::Nullable); + + let scalar = Scalar::struct_(dtype, vec![f0_val, f1_val]); + + let scalar_f0 = scalar.as_struct().field_by_idx(0); + assert!(scalar_f0.is_some()); + let scalar_f0 = scalar_f0.unwrap(); + assert_eq!(scalar_f0, f0_val_null); + assert_eq!(scalar_f0.dtype(), &f0_dt_null); + + let scalar_f1 = scalar.as_struct().field_by_idx(1); + assert!(scalar_f1.is_some()); + let scalar_f1 = scalar_f1.unwrap(); + assert_eq!(scalar_f1, f1_val_null); + assert_eq!(scalar_f1.dtype(), &f1_dt_null); + } +}