diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs index 2003ddf82597..a8552b3d27e6 100644 --- a/parquet/src/arrow/arrow_reader/statistics.rs +++ b/parquet/src/arrow/arrow_reader/statistics.rs @@ -26,8 +26,8 @@ use crate::file::page_index::index::{Index, PageIndex}; use crate::file::statistics::Statistics as ParquetStatistics; use crate::schema::types::SchemaDescriptor; use arrow_array::builder::{ - BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder, StringViewBuilder, - BinaryViewBuilder, + BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder, + StringViewBuilder, }; use arrow_array::{ new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, @@ -973,6 +973,41 @@ macro_rules! get_data_page_statistics { } Ok(Arc::new(builder.finish())) }, + Some(DataType::Utf8View) => { + let mut builder = StringViewBuilder::new(); + let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator); + for x in iterator { + for x in x.into_iter() { + let Some(x) = x else { + builder.append_null(); // no statistics value + continue; + }; + + let Ok(x) = std::str::from_utf8(x.data()) else { + builder.append_null(); + continue; + }; + + builder.append_value(x); + } + } + Ok(Arc::new(builder.finish())) + }, + Some(DataType::BinaryView) => { + let mut builder = BinaryViewBuilder::new(); + let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator); + for x in iterator { + for x in x.into_iter() { + let Some(x) = x else { + builder.append_null(); // no statistics value + continue; + }; + + builder.append_value(x); + } + } + Ok(Arc::new(builder.finish())) + }, _ => unimplemented!() } } @@ -1528,7 +1563,12 @@ mod test { use arrow::compute::kernels::cast_utils::Parser; use arrow::datatypes::{i256, Date32Type, Date64Type}; use arrow::util::test_util::parquet_test_data; - use arrow_array::{new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch, StringArray, StructArray, TimestampNanosecondArray, StringViewArray, BinaryViewArray}; + use arrow_array::{ + new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BinaryViewArray, + BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch, + StringArray, StringViewArray, StructArray, TimestampNanosecondArray, + }; use arrow_schema::{DataType, Field, SchemaRef}; use bytes::Bytes; use std::path::PathBuf; @@ -2627,7 +2667,7 @@ mod test { Arc::new(array) } - fn binary_view_array<'a>(input: Vec>) -> ArrayRef { + fn binary_view_array(input: Vec>) -> ArrayRef { let array = BinaryViewArray::from(input.into_iter().collect::>>()); Arc::new(array)