From 58a80e5c297b5c273e56c9f9ba9b9956fbeeb075 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Tue, 6 Aug 2024 20:57:09 -0700 Subject: [PATCH] Add row size estimation support for selective ARRAY and MAP column readers (#10622) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10622 To do estimation on repeated types, we need the method to return not just the byte size but also the row count of inner data, so that we can get estimation on the average size of elements in one array or map. Reviewed By: HuamengJiang Differential Revision: D60477335 fbshipit-source-id: 2bec760d141106d48814ffd7ed3c24d720cc4122 --- velox/dwio/common/SelectiveColumnReader.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/velox/dwio/common/SelectiveColumnReader.h b/velox/dwio/common/SelectiveColumnReader.h index 96ae4b48d7da..e0d3b2ac102f 100644 --- a/velox/dwio/common/SelectiveColumnReader.h +++ b/velox/dwio/common/SelectiveColumnReader.h @@ -477,11 +477,15 @@ class SelectiveColumnReader { template void filterNulls(RowSet rows, bool isNull, bool extractValues); - // Temporary method for estimate in-memory row size (number of bits) of this - // column for Nimble. Will be removed once column statistics are added for - // Nimble. - virtual std::optional estimatedRowBitSize() const { - return std::nullopt; + // Temporary method for estimate total in-memory byte size and row count of + // current encoding chunk on this column for Nimble. Will be removed once + // column statistics are added for Nimble. Note that the estimations are + // based on current encoding chunk, so in multi-chunk stripe this is not + // accurate. Other formats should not use this. + virtual bool estimateMaterializedSize( + size_t& /*byteSize*/, + size_t& /*rowCount*/) const { + return false; } StringView copyStringValueIfNeed(folly::StringPiece value) {