diff --git a/velox/exec/RowContainer.cpp b/velox/exec/RowContainer.cpp index cb7e438cba07..9f1282a138e1 100644 --- a/velox/exec/RowContainer.cpp +++ b/velox/exec/RowContainer.cpp @@ -541,6 +541,35 @@ void RowContainer::store( } } +void RowContainer::store( + const DecodedVector& decoded, + folly::Range rows, + int32_t column) { + VELOX_CHECK_GE(decoded.size(), rows.size()); + const bool isKey = column < keyTypes_.size(); + if ((isKey && !nullableKeys_) || !decoded.mayHaveNulls()) { + VELOX_DYNAMIC_TYPE_DISPATCH( + storeNoNullsBatch, + typeKinds_[column], + decoded, + rows, + isKey, + offsets_[column]); + } else { + const auto rowColumn = rowColumns_[column]; + VELOX_DYNAMIC_TYPE_DISPATCH_ALL( + storeWithNullsBatch, + typeKinds_[column], + decoded, + rows, + isKey, + rowColumn.offset(), + rowColumn.nullByte(), + rowColumn.nullMask(), + column); + } +} + std::unique_ptr RowContainer::prepareRead( const char* row, int32_t offset) { diff --git a/velox/exec/RowContainer.h b/velox/exec/RowContainer.h index 722dfbdc8997..8c7f7b56805e 100644 --- a/velox/exec/RowContainer.h +++ b/velox/exec/RowContainer.h @@ -302,6 +302,13 @@ class RowContainer { char* row, int32_t columnIndex); + /// Stores the first 'rows.size' values from the 'decoded' vector into the + /// 'columnIndex' column of 'rows'. + void store( + const DecodedVector& decoded, + folly::Range rows, + int32_t columnIndex); + HashStringAllocator& stringAllocator() { return *stringAllocator_; } @@ -965,6 +972,32 @@ class RowContainer { } } + template + inline void storeWithNullsBatch( + const DecodedVector& decoded, + folly::Range rows, + bool isKey, + int32_t offset, + int32_t nullByte, + uint8_t nullMask, + int32_t column) { + for (int32_t i = 0; i < rows.size(); ++i) { + storeWithNulls( + decoded, i, isKey, rows[i], offset, nullByte, nullMask, column); + } + } + + template + inline void storeNoNullsBatch( + const DecodedVector& decoded, + folly::Range rows, + bool isKey, + int32_t offset) { + for (int32_t i = 0; i < rows.size(); ++i) { + storeNoNulls(decoded, i, isKey, rows[i], offset); + } + } + template static void extractValuesWithNulls( const char* const* rows, diff --git a/velox/exec/SortBuffer.cpp b/velox/exec/SortBuffer.cpp index 31a1ef34f542..fb2923c4c1d1 100644 --- a/velox/exec/SortBuffer.cpp +++ b/velox/exec/SortBuffer.cpp @@ -90,9 +90,10 @@ void SortBuffer::addInput(const VectorPtr& input) { for (const auto& columnProjection : columnMap_) { DecodedVector decoded( *inputRow->childAt(columnProjection.outputChannel), allRows); - for (int i = 0; i < input->size(); ++i) { - data_->store(decoded, i, rows[i], columnProjection.inputChannel); - } + data_->store( + decoded, + folly::Range(rows.data(), input->size()), + columnProjection.inputChannel); } numInputRows_ += allRows.size(); } diff --git a/velox/exec/benchmarks/PrefixSortBenchmark.cpp b/velox/exec/benchmarks/PrefixSortBenchmark.cpp index 9575fa201c4c..bb81eaa4c6aa 100644 --- a/velox/exec/benchmarks/PrefixSortBenchmark.cpp +++ b/velox/exec/benchmarks/PrefixSortBenchmark.cpp @@ -84,10 +84,8 @@ class TestCase { } for (auto column = 0; column < data->childrenSize(); ++column) { DecodedVector decoded(*data->childAt(column)); - for (int i = 0; i < numRows; ++i) { - char* row = rows_[i]; - rowContainer()->store(decoded, i, row, column); - } + rowContainer()->store( + decoded, folly::Range(rows_.data(), numRows), column); } } diff --git a/velox/exec/tests/RowContainerTest.cpp b/velox/exec/tests/RowContainerTest.cpp index 96b8dc01be8b..26a666fe2d81 100644 --- a/velox/exec/tests/RowContainerTest.cpp +++ b/velox/exec/tests/RowContainerTest.cpp @@ -2116,3 +2116,57 @@ TEST_F(RowContainerTest, columnHasNulls) { } } } + +TEST_F(RowContainerTest, store) { + const uint64_t kNumRows = 1000; + auto rowVectorWithNulls = makeRowVector({ + makeFlatVector( + kNumRows, [](auto row) { return row % 5; }, nullEvery(6)), + makeFlatVector( + kNumRows, + [](auto row) { return fmt::format("abcdefg123_{}", row); }, + nullEvery(7)), + makeFlatVector( + kNumRows, [](auto row) { return row; }, nullEvery(8)), + makeArrayVector( + kNumRows, + [](auto i) { return i % 5; }, + [](auto i) { return i % 10; }, + nullEvery(10)), + }); + + auto rowVectorNoNulls = makeRowVector({ + makeFlatVector(kNumRows, [](auto row) { return row % 5; }), + makeFlatVector( + kNumRows, [](auto row) { return fmt::format("abcdefg12_{}", row); }), + makeFlatVector(kNumRows, [](auto row) { return row; }), + makeArrayVector( + kNumRows, + [](auto i) { return i % 3; }, + [](auto i) { return i % 10; }), + }); + for (auto& rowVector : {rowVectorWithNulls, rowVectorNoNulls}) { + auto rowContainer = makeRowContainer( + {BIGINT(), VARCHAR()}, {BIGINT(), ARRAY(BIGINT())}, false); + std::vector rows; + rows.reserve(kNumRows); + + ASSERT_EQ(rowContainer->numRows(), 0); + SelectivityVector allRows(kNumRows); + for (size_t i = 0; i < kNumRows; i++) { + auto row = rowContainer->newRow(); + rows.push_back(row); + } + for (int i = 0; i < rowContainer->columnTypes().size(); ++i) { + DecodedVector decoded(*rowVector->childAt(i), allRows); + rowContainer->store(decoded, folly::Range(rows.data(), kNumRows), i); + } + ASSERT_EQ(rowContainer->numRows(), kNumRows); + for (int i = 0; i < rowContainer->columnTypes().size(); ++i) { + auto vector = + BaseVector::create(rowVector->childAt(i)->type(), kNumRows, pool()); + rowContainer->extractColumn(rows.data(), kNumRows, i, vector); + assertEqualVectors(rowVector->childAt(i), vector); + } + } +}