From 5508978a3c5c4eb65ef6410e097887a8adaba38a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 8 Oct 2024 15:18:28 -0400 Subject: [PATCH] Add ColumnChunkMetadataBuilder clear APIs (#6523) --- parquet/src/file/metadata/mod.rs | 43 +++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 5a2ccbc0241f..fa56894f239c 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -1205,15 +1205,34 @@ impl ColumnChunkMetaData { /// Converts this [`ColumnChunkMetaData`] into a [`ColumnChunkMetaDataBuilder`] pub fn into_builder(self) -> ColumnChunkMetaDataBuilder { - ColumnChunkMetaDataBuilder(self) + ColumnChunkMetaDataBuilder::from(self) } } -/// Builder for column chunk metadata. +/// Builder for [`ColumnChunkMetaData`] +/// +/// This builder is used to create a new column chunk metadata or modify an +/// existing one. +/// +/// # Example +/// ```no_run +/// # use parquet::file::metadata::{ColumnChunkMetaData, ColumnChunkMetaDataBuilder}; +/// # fn get_column_chunk_metadata() -> ColumnChunkMetaData { unimplemented!(); } +/// let column_chunk_metadata = get_column_chunk_metadata(); +/// // create a new builder from existing column chunk metadata +/// let builder = ColumnChunkMetaDataBuilder::from(column_chunk_metadata); +/// // clear the statistics: +/// let column_chunk_metadata: ColumnChunkMetaData = builder +/// .clear_statistics() +/// .build() +/// .unwrap(); +/// ``` pub struct ColumnChunkMetaDataBuilder(ColumnChunkMetaData); impl ColumnChunkMetaDataBuilder { /// Creates new column chunk metadata builder. + /// + /// See also [`ColumnChunkMetaData::builder`] fn new(column_descr: ColumnDescPtr) -> Self { Self(ColumnChunkMetaData { column_descr, @@ -1297,7 +1316,7 @@ impl ColumnChunkMetaDataBuilder { self } - /// Sets optional dictionary page ofset in bytes. + /// Sets optional dictionary page offset in bytes. pub fn set_dictionary_page_offset(mut self, value: Option) -> Self { self.0.dictionary_page_offset = value; self @@ -1315,12 +1334,24 @@ impl ColumnChunkMetaDataBuilder { self } + /// Clears the statistics for this column chunk. + pub fn clear_statistics(mut self) -> Self { + self.0.statistics = None; + self + } + /// Sets page encoding stats for this column chunk. pub fn set_page_encoding_stats(mut self, value: Vec) -> Self { self.0.encoding_stats = Some(value); self } + /// Clears the page encoding stats for this column chunk. + pub fn clear_page_encoding_stats(mut self) -> Self { + self.0.encoding_stats = None; + self + } + /// Sets optional bloom filter offset in bytes. pub fn set_bloom_filter_offset(mut self, value: Option) -> Self { self.0.bloom_filter_offset = value; @@ -1492,6 +1523,12 @@ impl ColumnIndexBuilder { } } +impl From for ColumnChunkMetaDataBuilder { + fn from(value: ColumnChunkMetaData) -> Self { + ColumnChunkMetaDataBuilder(value) + } +} + /// Builder for offset index, part of the Parquet [PageIndex]. /// /// [PageIndex]: https://github.com/apache/parquet-format/blob/master/PageIndex.md