Skip to content

Commit

Permalink
Merge branch 'refs/heads/add-stringview-binaryview-to-stat-convertor'
Browse files Browse the repository at this point in the history
# Conflicts:
#	parquet/src/arrow/arrow_reader/statistics.rs
  • Loading branch information
Kev1n8 committed Aug 2, 2024
2 parents ccef93b + abea892 commit 0cc8deb
Showing 1 changed file with 44 additions and 4 deletions.
48 changes: 44 additions & 4 deletions parquet/src/arrow/arrow_reader/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ use crate::file::page_index::index::{Index, PageIndex};
use crate::file::statistics::Statistics as ParquetStatistics;
use crate::schema::types::SchemaDescriptor;
use arrow_array::builder::{
BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder, StringViewBuilder,
BinaryViewBuilder,
BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder,
StringViewBuilder,
};
use arrow_array::{
new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
Expand Down Expand Up @@ -973,6 +973,41 @@ macro_rules! get_data_page_statistics {
}
Ok(Arc::new(builder.finish()))
},
Some(DataType::Utf8View) => {
let mut builder = StringViewBuilder::new();
let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator);
for x in iterator {
for x in x.into_iter() {
let Some(x) = x else {
builder.append_null(); // no statistics value
continue;
};

let Ok(x) = std::str::from_utf8(x.data()) else {
builder.append_null();
continue;
};

builder.append_value(x);
}
}
Ok(Arc::new(builder.finish()))
},
Some(DataType::BinaryView) => {
let mut builder = BinaryViewBuilder::new();
let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator);
for x in iterator {
for x in x.into_iter() {
let Some(x) = x else {
builder.append_null(); // no statistics value
continue;
};

builder.append_value(x);
}
}
Ok(Arc::new(builder.finish()))
},
_ => unimplemented!()
}
}
Expand Down Expand Up @@ -1528,7 +1563,12 @@ mod test {
use arrow::compute::kernels::cast_utils::Parser;
use arrow::datatypes::{i256, Date32Type, Date64Type};
use arrow::util::test_util::parquet_test_data;
use arrow_array::{new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch, StringArray, StructArray, TimestampNanosecondArray, StringViewArray, BinaryViewArray};
use arrow_array::{
new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array,
Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch,
StringArray, StringViewArray, StructArray, TimestampNanosecondArray,
};
use arrow_schema::{DataType, Field, SchemaRef};
use bytes::Bytes;
use std::path::PathBuf;
Expand Down Expand Up @@ -2627,7 +2667,7 @@ mod test {
Arc::new(array)
}

fn binary_view_array<'a>(input: Vec<Option<&[u8]>>) -> ArrayRef {
fn binary_view_array(input: Vec<Option<&[u8]>>) -> ArrayRef {
let array = BinaryViewArray::from(input.into_iter().collect::<Vec<Option<&[u8]>>>());

Arc::new(array)
Expand Down

0 comments on commit 0cc8deb

Please sign in to comment.