From 3a8738316294f26262a13740835ad81a09ca6628 Mon Sep 17 00:00:00 2001 From: mwish Date: Fri, 24 Nov 2023 21:49:05 +0800 Subject: [PATCH] Resolve comments --- cpp/src/parquet/bloom_filter.cc | 9 +++++++++ cpp/src/parquet/bloom_filter.h | 4 ++-- cpp/src/parquet/printer.cc | 16 ++++++++-------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/cpp/src/parquet/bloom_filter.cc b/cpp/src/parquet/bloom_filter.cc index fb543ea086455..b1d552accc36e 100644 --- a/cpp/src/parquet/bloom_filter.cc +++ b/cpp/src/parquet/bloom_filter.cc @@ -144,6 +144,15 @@ BlockSplitBloomFilter BlockSplitBloomFilter::Deserialize( bloom_filter.Init(header_buf->data() + header_size, bloom_filter_size); return bloom_filter; } + if (bloom_filter_length && *bloom_filter_length < bloom_filter_size + header_size) { + // We know the bloom filter data size, but the length is not enough to read the + // entire bloom filter. + std::stringstream ss; + ss << "Bloom filter length (" << bloom_filter_length.value() + << ") is not enough to read the entire bloom filter (size: " + << bloom_filter_size + header_size << ")."; + throw ParquetException(ss.str()); + } // We have read a part of the bloom filter already, copy it to the target buffer // and read the remaining part from the InputStream. auto buffer = AllocateBuffer(properties.memory_pool(), bloom_filter_size); diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index c5eed0ec002e9..909563d013fed 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -310,8 +310,8 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// a Bloom filter from a parquet filter. /// /// @param properties The parquet reader properties. - /// @param input_stream The input stream from which to construct the Bloom filter. - /// @param bloom_filter_length The length of the Serialized Bloom filter including + /// @param input_stream The input stream from which to construct the bloom filter. + /// @param bloom_filter_length The length of the serialized bloom filter including /// header. /// @return The BlockSplitBloomFilter. static BlockSplitBloomFilter Deserialize( diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc index 46085d19e4ad1..f11397ab96ed8 100644 --- a/cpp/src/parquet/printer.cc +++ b/cpp/src/parquet/printer.cc @@ -324,19 +324,19 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected stream << "\"}"; } - if (column_chunk->GetOffsetIndexLocation()) { - auto location = column_chunk->GetOffsetIndexLocation().value(); - // Output OffsetIndex {offset, length} - stream << "\", OffsetIndex {" + if (column_chunk->GetColumnIndexLocation()) { + auto location = column_chunk->GetColumnIndexLocation().value(); + // Output ColumnIndex {offset, length} + stream << "\", ColumnIndex {" << "\"offset\": \"" << location.offset; stream << "\", \"length\": \"" << location.length; stream << "\"}"; } - if (column_chunk->GetColumnIndexLocation()) { - auto location = column_chunk->GetColumnIndexLocation().value(); - // Output ColumnIndex {offset, length} - stream << "\", ColumnIndex {" + if (column_chunk->GetOffsetIndexLocation()) { + auto location = column_chunk->GetOffsetIndexLocation().value(); + // Output OffsetIndex {offset, length} + stream << "\", OffsetIndex {" << "\"offset\": \"" << location.offset; stream << "\", \"length\": \"" << location.length; stream << "\"}";