From a2bb719f4605c16f217fc09874bd020b5ace51d9 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Meng Date: Wed, 18 Dec 2024 15:09:45 -0800 Subject: [PATCH] Fix columns stats when insert serialized rows Summary: We don't update column stats when insert serialized rows which could cause problem when extract data from the row container as deserialized vector which depends on the column stats in the row container. This PR adds column stats update for serialized row insertion Differential Revision: D67419701 --- velox/exec/RowContainer.cpp | 24 ++++++++++++++++++++++-- velox/exec/RowContainer.h | 3 +++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/velox/exec/RowContainer.cpp b/velox/exec/RowContainer.cpp index 74d49266b0fd..9e9ace14ee0f 100644 --- a/velox/exec/RowContainer.cpp +++ b/velox/exec/RowContainer.cpp @@ -520,6 +520,25 @@ void RowContainer::updateColumnStats( } } +void RowContainer::updateColumnStats(char* row, int32_t columnIndex) { + const bool nullColumn = isNullAt(row, rowColumns_[columnIndex]); + updateColumnHasNulls(columnIndex, nullColumn); + + if (rowColumnsStats_.empty()) { + // Column stats have been invalidated. + return; + } + + auto& columnStats = rowColumnsStats_[columnIndex]; + if (nullColumn) { + columnStats.addNullCell(); + } else if (types_[columnIndex]->isFixedWidth()) { + columnStats.addCellSize(fixedSizeAt(columnIndex)); + } else { + columnStats.addCellSize(variableSizeAt(row, columnIndex)); + } +} + void RowContainer::store( const DecodedVector& decoded, vector_size_t rowIndex, @@ -596,7 +615,7 @@ std::unique_ptr RowContainer::prepareRead( int32_t RowContainer::variableSizeAt(const char* row, column_index_t column) const { - const auto rowColumn = rowColumns_[column]; + const auto& rowColumn = rowColumns_[column]; if (isNullAt(row, rowColumn)) { return 0; @@ -766,7 +785,7 @@ void RowContainer::storeSerializedRow( vector_size_t index, char* row) { VELOX_CHECK(!vector.isNullAt(index)); - auto serialized = vector.valueAt(index); + const auto serialized = vector.valueAt(index); size_t offset = 0; ::memcpy(row + rowColumns_[0].nullByte(), serialized.data(), flagBytes_); @@ -783,6 +802,7 @@ void RowContainer::storeSerializedRow( const auto size = storeVariableSizeAt(serialized.data() + offset, row, i); offset += size; } + updateColumnStats(row, i); } } diff --git a/velox/exec/RowContainer.h b/velox/exec/RowContainer.h index 72cd08e276dd..2d3f65346717 100644 --- a/velox/exec/RowContainer.h +++ b/velox/exec/RowContainer.h @@ -1466,6 +1466,9 @@ class RowContainer { char* row, int32_t columnIndex); + // Updates column stats for serialized row. + inline void updateColumnStats(char* row, int32_t columnIndex); + // Light weight aggregated column stats does not support row erasures. This // method is called whenever a row is erased. void invalidateColumnStats();