Skip to content

Commit

Permalink
Struct field renamed to maybe_null_field (#1846)
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-isaacs authored Jan 8, 2025
1 parent 69861fa commit 6b1f3ca
Show file tree
Hide file tree
Showing 21 changed files with 271 additions and 109 deletions.
2 changes: 1 addition & 1 deletion bench-vortex/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ pub async fn register_vortex_files(

for (field_name, field_type) in names.zip(types) {
let val = arrays_map.entry(field_name.clone()).or_default();
val.push(st.field_by_name(field_name.as_ref()).unwrap());
val.push(st.maybe_null_field_by_name(field_name.as_ref()).unwrap());

types_map.insert(field_name.clone(), field_type.clone());
}
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ async fn register_vortex_file(

for (field_name, field_type) in names.zip(types) {
let val = arrays_map.entry(field_name.clone()).or_default();
val.push(st.field_by_name(field_name).unwrap());
val.push(st.maybe_null_field_by_name(field_name).unwrap());

types_map.insert(field_name.clone(), field_type.clone());
}
Expand Down
4 changes: 3 additions & 1 deletion bench-vortex/src/vortex_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ pub async fn vortex_chunk_sizes(path: PathBuf) -> VortexResult<CompressionRunSta
let chunked_array = ChunkedArray::try_from(vortex).unwrap();
for chunk in chunked_array.chunks() {
let struct_arr = StructArray::try_from(chunk).unwrap();
for (i, f) in (0..struct_arr.nfields()).map(|i| (i, struct_arr.field(i).unwrap())) {
for (i, f) in
(0..struct_arr.nfields()).map(|i| (i, struct_arr.maybe_null_field_by_idx(i).unwrap()))
{
compressed_sizes[i] += f.nbytes() as u64;
}
}
Expand Down
10 changes: 7 additions & 3 deletions vortex-array/src/array/chunked/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ fn swizzle_struct_chunks(
for (field_idx, field_dtype) in struct_dtype.dtypes().iter().enumerate() {
let field_chunks = chunks.iter().map(|c| c.as_struct_array()
.vortex_expect("Chunk was not a StructArray")
.field(field_idx)
.maybe_null_field_by_idx(field_idx)
.ok_or_else(|| vortex_err!("All chunks must have same dtype; missing field at index {}, current chunk dtype: {}", field_idx, c.dtype()))
).collect::<VortexResult<Vec<_>>>()?;
let field_array = ChunkedArray::try_new(field_chunks, field_dtype.clone())?;
Expand Down Expand Up @@ -347,11 +347,15 @@ mod tests {
.into_array();
let canonical_struct = chunked.into_struct().unwrap();
let canonical_varbin = canonical_struct
.field(0)
.maybe_null_field_by_idx(0)
.unwrap()
.into_varbinview()
.unwrap();
let original_varbin = struct_array
.maybe_null_field_by_idx(0)
.unwrap()
.into_varbinview()
.unwrap();
let original_varbin = struct_array.field(0).unwrap().into_varbinview().unwrap();
let orig_values = original_varbin
.with_iterator(|it| it.map(|a| a.map(|v| v.to_vec())).collect::<Vec<_>>())
.unwrap();
Expand Down
8 changes: 6 additions & 2 deletions vortex-array/src/array/chunked/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ impl Utf8ArrayTrait for ChunkedArray {}
impl BinaryArrayTrait for ChunkedArray {}

impl StructArrayTrait for ChunkedArray {
fn field(&self, idx: usize) -> Option<ArrayData> {
fn maybe_null_field_by_idx(&self, idx: usize) -> Option<ArrayData> {
let mut chunks = Vec::with_capacity(self.nchunks());
for chunk in self.chunks() {
chunks.push(chunk.as_struct_array().and_then(|s| s.field(idx))?);
chunks.push(
chunk
.as_struct_array()
.and_then(|s| s.maybe_null_field_by_idx(idx))?,
);
}

let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?;
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/constant/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl Utf8ArrayTrait for ConstantArray {}
impl BinaryArrayTrait for ConstantArray {}

impl StructArrayTrait for ConstantArray {
fn field(&self, idx: usize) -> Option<ArrayData> {
fn maybe_null_field_by_idx(&self, idx: usize) -> Option<ArrayData> {
self.scalar()
.as_struct()
.field_by_idx(idx)
Expand Down
8 changes: 6 additions & 2 deletions vortex-array/src/array/sparse/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,14 @@ impl Utf8ArrayTrait for SparseArray {}
impl BinaryArrayTrait for SparseArray {}

impl StructArrayTrait for SparseArray {
fn field(&self, idx: usize) -> Option<ArrayData> {
fn maybe_null_field_by_idx(&self, idx: usize) -> Option<ArrayData> {
let new_patches = self
.patches()
.map_values_opt(|values| values.as_struct_array().and_then(|s| s.field(idx)))
.map_values_opt(|values| {
values
.as_struct_array()
.and_then(|s| s.maybe_null_field_by_idx(idx))
})
.vortex_expect("field array length should equal struct array length")?;
let scalar = StructScalar::try_from(&self.fill_scalar())
.ok()?
Expand Down
12 changes: 6 additions & 6 deletions vortex-array/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ impl StructArray {

pub fn children(&self) -> impl Iterator<Item = ArrayData> + '_ {
(0..self.nfields()).map(move |idx| {
self.field(idx).unwrap_or_else(|| {
self.maybe_null_field_by_idx(idx).unwrap_or_else(|| {
vortex_panic!("Field {} not found, nfields: {}", idx, self.nfields())
})
})
Expand Down Expand Up @@ -126,7 +126,7 @@ impl StructArray {

names.push(self.names()[idx].clone());
children.push(
self.field(idx)
self.maybe_null_field_by_idx(idx)
.ok_or_else(|| vortex_err!(OutOfBounds: idx, 0, self.dtypes().len()))?,
);
}
Expand All @@ -149,7 +149,7 @@ impl VariantsVTable<StructArray> for StructEncoding {
}

impl StructArrayTrait for StructArray {
fn field(&self, idx: usize) -> Option<ArrayData> {
fn maybe_null_field_by_idx(&self, idx: usize) -> Option<ArrayData> {
self.dtypes().get(idx).map(|dtype| {
self.as_ref()
.child(idx, dtype, self.len())
Expand Down Expand Up @@ -183,7 +183,7 @@ impl VisitorVTable<StructArray> for StructEncoding {
fn accept(&self, array: &StructArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
for (idx, name) in array.names().iter().enumerate() {
let child = array
.field(idx)
.maybe_null_field_by_idx(idx)
.ok_or_else(|| vortex_err!(OutOfBounds: idx, 0, array.nfields()))?;
visitor.visit_child(name.as_ref(), &child)?;
}
Expand Down Expand Up @@ -247,13 +247,13 @@ mod test {

assert_eq!(struct_b.len(), 5);

let bools = BoolArray::try_from(struct_b.field(0).unwrap()).unwrap();
let bools = BoolArray::try_from(struct_b.maybe_null_field_by_idx(0).unwrap()).unwrap();
assert_eq!(
bools.boolean_buffer().iter().collect::<Vec<_>>(),
vec![true, true, true, false, false]
);

let prims = PrimitiveArray::try_from(struct_b.field(1).unwrap()).unwrap();
let prims = PrimitiveArray::try_from(struct_b.maybe_null_field_by_idx(1).unwrap()).unwrap();
assert_eq!(prims.as_slice::<i64>(), [0i64, 1, 2, 3, 4]);
}
}
17 changes: 12 additions & 5 deletions vortex-array/src/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,17 +200,24 @@ pub trait StructArrayTrait: ArrayTrait {
self.names().len()
}

/// Return a field's array by index
fn field(&self, idx: usize) -> Option<ArrayData>;
/// Return a field's array by index, ignoring struct nullability
fn maybe_null_field_by_idx(&self, idx: usize) -> Option<ArrayData>;

/// Return a field's array by name
fn field_by_name(&self, name: &str) -> Option<ArrayData> {
/// Return a field's array by name, ignoring struct nullability
fn maybe_null_field_by_name(&self, name: &str) -> Option<ArrayData> {
let field_idx = self
.names()
.iter()
.position(|field_name| field_name.as_ref() == name);

field_idx.and_then(|field_idx| self.field(field_idx))
field_idx.and_then(|field_idx| self.maybe_null_field_by_idx(field_idx))
}

fn maybe_null_field(&self, field: &Field) -> Option<ArrayData> {
match field {
Field::Index(idx) => self.maybe_null_field_by_idx(*idx),
Field::Name(name) => self.maybe_null_field_by_name(name.as_ref()),
}
}

fn project(&self, projection: &[Field]) -> VortexResult<ArrayData>;
Expand Down
2 changes: 1 addition & 1 deletion vortex-datafusion/src/memory/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub fn chunked_array_df_stats(array: &ChunkedArray, projection: &[usize]) -> DFR
.iter()
.map(|i| {
array
.field(*i)
.maybe_null_field_by_idx(*i)
.ok_or_else(|| vortex_err!("Projection references unknown field {i}"))
})
.map_ok(|arr| {
Expand Down
8 changes: 4 additions & 4 deletions vortex-datafusion/src/persistent/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ use vortex_error::VortexResult;
pub fn array_to_col_statistics(array: &StructArray) -> VortexResult<ColumnStatistics> {
let mut stats = ColumnStatistics::new_unknown();

if let Some(null_count_array) = array.field_by_name(Stat::NullCount.name()) {
if let Some(null_count_array) = array.maybe_null_field_by_name(Stat::NullCount.name()) {
let array = null_count_array.into_arrow()?;
let array = array.as_primitive::<UInt64Type>();

let null_count = array.iter().map(|v| v.unwrap_or_default()).sum::<u64>();
stats.null_count = Precision::Exact(null_count as usize);
}

if let Some(max_value_array) = array.field_by_name(Stat::Max.name()) {
if let Some(max_value_array) = array.maybe_null_field_by_name(Stat::Max.name()) {
let array = max_value_array.into_arrow()?;
let mut acc = MaxAccumulator::try_new(array.data_type())?;
acc.update_batch(&[array])?;
Expand All @@ -30,7 +30,7 @@ pub fn array_to_col_statistics(array: &StructArray) -> VortexResult<ColumnStatis
stats.max_value = Precision::Exact(max_val)
}

if let Some(min_value_array) = array.field_by_name(Stat::Min.name()) {
if let Some(min_value_array) = array.maybe_null_field_by_name(Stat::Min.name()) {
let array = min_value_array.into_arrow()?;
let mut acc = MinAccumulator::try_new(array.data_type())?;
acc.update_batch(&[array])?;
Expand All @@ -43,7 +43,7 @@ pub fn array_to_col_statistics(array: &StructArray) -> VortexResult<ColumnStatis
}

pub fn uncompressed_col_size(array: &StructArray) -> VortexResult<Option<u64>> {
match array.field_by_name(Stat::UncompressedSizeInBytes.name()) {
match array.maybe_null_field_by_name(Stat::UncompressedSizeInBytes.name()) {
None => Ok(None),
Some(array) => {
let array = array.into_arrow()?;
Expand Down
5 changes: 2 additions & 3 deletions vortex-dtype/src/serde/flatbuffers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,15 @@ impl TryFrom<fb::DType<'_>> for DType {
.ok_or_else(|| vortex_err!("failed to parse struct names from flatbuffer"))?
.iter()
.map(|n| (*n).into())
.collect_vec()
.into();
.collect_vec();
let dtypes: Vec<Self> = fb_struct
.dtypes()
.ok_or_else(|| vortex_err!("failed to parse struct dtypes from flatbuffer"))?
.iter()
.map(Self::try_from)
.collect::<VortexResult<Vec<_>>>()?;
Ok(Self::Struct(
StructDType::new(names, dtypes),
StructDType::new(names.into(), dtypes),
fb_struct.nullable().into(),
))
}
Expand Down
21 changes: 11 additions & 10 deletions vortex-expr/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ use std::any::Any;
use std::fmt::Display;
use std::sync::Arc;

use vortex_array::array::StructArray;
use vortex_array::variants::StructArrayTrait;
use vortex_array::ArrayData;
use vortex_array::{ArrayDType, ArrayData};
use vortex_dtype::Field;
use vortex_error::{vortex_err, VortexResult};

Expand Down Expand Up @@ -61,13 +59,16 @@ impl VortexExpr for Column {
}

fn evaluate(&self, batch: &ArrayData) -> VortexResult<ArrayData> {
let s = StructArray::try_from(batch.clone())?;

match &self.field {
Field::Name(n) => s.field_by_name(n),
Field::Index(i) => s.field(*i),
}
.ok_or_else(|| vortex_err!("Array doesn't contain child array {}", self.field))
batch
.as_struct_array()
.ok_or_else(|| {
vortex_err!(
"Array must be a struct array, however it was a {}",
batch.dtype()
)
})?
.maybe_null_field(&self.field)
.ok_or_else(|| vortex_err!("Array doesn't contain child array {}", self.field))
}

fn children(&self) -> Vec<&ExprRef> {
Expand Down
6 changes: 3 additions & 3 deletions vortex-expr/src/pack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use crate::{ExprRef, VortexExpr};
/// let x_copy = packed
/// .as_struct_array()
/// .unwrap()
/// .field_by_name("x copy")
/// .maybe_null_field_by_name("x copy")
/// .unwrap();
/// assert_eq!(scalar_at(&x_copy, 0).unwrap(), Scalar::from(100));
/// assert_eq!(scalar_at(&x_copy, 1).unwrap(), Scalar::from(110));
Expand Down Expand Up @@ -140,14 +140,14 @@ mod tests {
let mut array = array
.as_struct_array()
.ok_or_else(|| vortex_err!("expected a struct"))?
.field_by_name(field)
.maybe_null_field_by_name(field)
.ok_or_else(|| vortex_err!("expected field to exist: {}", field))?;

for field in field_path {
array = array
.as_struct_array()
.ok_or_else(|| vortex_err!("expected a struct"))?
.field_by_name(field)
.maybe_null_field_by_name(field)
.ok_or_else(|| vortex_err!("expected field to exist: {}", field))?;
}
Ok(array.into_primitive().unwrap())
Expand Down
16 changes: 8 additions & 8 deletions vortex-file/src/read/layouts/columnar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ mod tests {
.unwrap()
.as_struct_array()
.unwrap()
.field(0)
.maybe_null_field_by_idx(0)
.unwrap()
.into_primitive()
.unwrap();
Expand All @@ -531,7 +531,7 @@ mod tests {
.unwrap()
.as_struct_array()
.unwrap()
.field(1)
.maybe_null_field_by_idx(1)
.unwrap()
.into_varbinview()
.unwrap();
Expand Down Expand Up @@ -560,7 +560,7 @@ mod tests {
.unwrap()
.as_struct_array()
.unwrap()
.field(0)
.maybe_null_field_by_idx(0)
.unwrap()
.into_primitive()
.unwrap();
Expand All @@ -569,7 +569,7 @@ mod tests {
.unwrap()
.as_struct_array()
.unwrap()
.field(1)
.maybe_null_field_by_idx(1)
.unwrap()
.into_varbinview()
.unwrap();
Expand Down Expand Up @@ -608,14 +608,14 @@ mod tests {
let prim_arr_chunk0 = arr[0]
.as_struct_array()
.unwrap()
.field(0)
.maybe_null_field_by_idx(0)
.unwrap()
.into_primitive()
.unwrap();
let str_arr_chunk0 = arr[0]
.as_struct_array()
.unwrap()
.field(1)
.maybe_null_field_by_idx(1)
.unwrap()
.into_varbinview()
.unwrap();
Expand All @@ -635,14 +635,14 @@ mod tests {
let prim_arr_chunk1 = arr[1]
.as_struct_array()
.unwrap()
.field(0)
.maybe_null_field_by_idx(0)
.unwrap()
.into_primitive()
.unwrap();
let str_arr_chunk1 = arr[1]
.as_struct_array()
.unwrap()
.field(1)
.maybe_null_field_by_idx(1)
.unwrap()
.into_varbinview()
.unwrap();
Expand Down
Loading

0 comments on commit 6b1f3ca

Please sign in to comment.