diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index 2003ddf82597..a8552b3d27e6 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -26,8 +26,8 @@ use crate::file::page_index::index::{Index, PageIndex};
use crate::file::statistics::Statistics as ParquetStatistics;
use crate::schema::types::SchemaDescriptor;
use arrow_array::builder::{
- BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder, StringViewBuilder,
- BinaryViewBuilder,
+ BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder,
+ StringViewBuilder,
};
use arrow_array::{
new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
@@ -973,6 +973,41 @@ macro_rules! get_data_page_statistics {
}
Ok(Arc::new(builder.finish()))
},
+ Some(DataType::Utf8View) => {
+ let mut builder = StringViewBuilder::new();
+ let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator);
+ for x in iterator {
+ for x in x.into_iter() {
+ let Some(x) = x else {
+ builder.append_null(); // no statistics value
+ continue;
+ };
+
+ let Ok(x) = std::str::from_utf8(x.data()) else {
+ builder.append_null();
+ continue;
+ };
+
+ builder.append_value(x);
+ }
+ }
+ Ok(Arc::new(builder.finish()))
+ },
+ Some(DataType::BinaryView) => {
+ let mut builder = BinaryViewBuilder::new();
+ let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator);
+ for x in iterator {
+ for x in x.into_iter() {
+ let Some(x) = x else {
+ builder.append_null(); // no statistics value
+ continue;
+ };
+
+ builder.append_value(x);
+ }
+ }
+ Ok(Arc::new(builder.finish()))
+ },
_ => unimplemented!()
}
}
@@ -1528,7 +1563,12 @@ mod test {
use arrow::compute::kernels::cast_utils::Parser;
use arrow::datatypes::{i256, Date32Type, Date64Type};
use arrow::util::test_util::parquet_test_data;
- use arrow_array::{new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch, StringArray, StructArray, TimestampNanosecondArray, StringViewArray, BinaryViewArray};
+ use arrow_array::{
+ new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
+ BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array,
+ Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch,
+ StringArray, StringViewArray, StructArray, TimestampNanosecondArray,
+ };
use arrow_schema::{DataType, Field, SchemaRef};
use bytes::Bytes;
use std::path::PathBuf;
@@ -2627,7 +2667,7 @@ mod test {
Arc::new(array)
}
- fn binary_view_array<'a>(input: Vec