diff --git a/velox/exec/Spill.cpp b/velox/exec/Spill.cpp index 21a00ec9c88e5..060a7b6141b3c 100644 --- a/velox/exec/Spill.cpp +++ b/velox/exec/Spill.cpp @@ -61,7 +61,7 @@ compareByWord(uint64_t* left, uint64_t* right, int32_t bytes) { void SpillMergeStream::pop() { VELOX_CHECK(!closed_); if (++index_ >= size_) { - nextBatch(); + setNextBatch(); } } @@ -392,7 +392,7 @@ uint32_t FileSpillMergeStream::id() const { void FileSpillMergeStream::buildPrefixBuffer() { // After close or no data, we should not build the prefix buffer. - if (size_ == 0 || prefixComparatorEnabled_ == false) { + if (size_ == 0 || !prefixComparatorEnabled_) { return; } diff --git a/velox/exec/Spill.h b/velox/exec/Spill.h index 9cae677e018eb..e5305566e7077 100644 --- a/velox/exec/Spill.h +++ b/velox/exec/Spill.h @@ -92,6 +92,15 @@ class SpillMergeStream : public MergeStream { // loads the next 'rowVector' and sets 'decoded_' if this is initialized. void setNextBatch() { nextBatch(); + if (!decoded_.empty()) { + ensureRows(); + // Prefix comparator has decoded some columns in `buildPrefixBuffer` + const auto start = + prefixComparatorEnabled_ ? sortLayout_.numNormalizedKeys : 0; + for (auto i = start; i < decoded_.size(); ++i) { + decoded_[i].decode(*rowVector_->childAt(i), rows_); + } + } } void ensureDecodedValid(int32_t index) { diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h index 153936a3c25ea..c4d356ec53d46 100644 --- a/velox/exec/prefixsort/PrefixSortEncoder.h +++ b/velox/exec/prefixsort/PrefixSortEncoder.h @@ -27,7 +27,7 @@ namespace facebook::velox::exec::prefixsort { class PrefixSortEncoder { public: PrefixSortEncoder(bool ascending, bool nullsFirst) - : ascending_(ascending), nullsFirst_(nullsFirst) {}; + : ascending_(ascending), nullsFirst_(nullsFirst){}; /// Encode native primitive types(such as uint64_t, int64_t, uint32_t, /// int32_t, float, double, Timestamp). diff --git a/velox/exec/tests/PrefixSortTest.cpp b/velox/exec/tests/PrefixSortTest.cpp index 26d851c9af894..962c039e9503d 100644 --- a/velox/exec/tests/PrefixSortTest.cpp +++ b/velox/exec/tests/PrefixSortTest.cpp @@ -383,104 +383,46 @@ class VectorPrefixEncoderTest : public exec::test::OperatorTestBase { TEST_F(VectorPrefixEncoderTest, encode) { const std::vector testData = { - makeNullableFlatVector({7979, std::nullopt, -3, 2, 1}), - makeFlatVector({5, 4, 3, 2, 1}), - makeFlatVector({5, 4, 3, 2, 1}), - makeFlatVector( - {5, - HugeInt::parse("1234567"), - HugeInt::parse("12345678901234567890"), - HugeInt::parse("12345679"), - HugeInt::parse("-12345678901234567890")}), - makeFlatVector({5.5, 4.4, 3.3, 2.2, 1.1}), - makeFlatVector({5.5, 4.4, 3.3, 2.2, 1.1}), - makeFlatVector( - {Timestamp(5, 5), - Timestamp(4, 4), - Timestamp(3, 3), - Timestamp(2, 2), - Timestamp(1, 1)})}; + makeNullableFlatVector({7979, std::nullopt}), + makeFlatVector({5, 4}), + makeFlatVector({5, 4}), + makeFlatVector({5, HugeInt::parse("-12345678901234567890")}), + makeFlatVector({5.5, 4.4}), + makeFlatVector({5.5, 4.4}), + makeFlatVector({Timestamp(5, 5), Timestamp(4, 4)})}; const std::vector> expectedValues = { - {108086391056891935, - 3098476543630901248, - 0, + {108086391056891935, 3098476543630901248, 0, 0}, + {108086391140777984, 108086391124000768}, + {108091888615030784, 108090789103403008}, + {108086391056891904, 0, + 360287970189639680, 108086391056891903, - 18230571291595767808ull, - 108086391056891904, - 144115188075855872, - 108086391056891904, - 72057594037927936}, - {108086524922298368, - 0, - 108086391056891904, - 108086391056891904, - 108086391006560256}, - {116859394334916608, - 0, - 108086391056891904, - 108086391056891904, - 108086391056891904}, + 18398518765501604085ull, + + 3314649325744685056}, + {126294303612862464, 126255600806920192}, + {126124978822184960, 0, 126120140971022745, 11096869481840902144ull}, {108086391056891904, - 31, - 3098476543630901248, - 0, - 0, - 0, - 81523983842910625, - 11601272640106397696ull, - 72057594037927936, - 81523983842910625, - 11646767826930344353ull, - 11601272640106397696ull, - 81523983842910625, - 11646767826930344353ull, - 11601272640106397696ull}, - {108086524922298368, - 0, - 108086391056891904, - 108086391056891904, - 144115188059078656}, - {108086391056891935, - 3098476543630901248, - 0, - 0, - 144115188075855871, - 18374686479671623680ull, - 108086391056891904, - 144115188075855872, - 108086391056891904, - 72057594037927936}, - {108086391056891935, - 3098476543630901248, - 0, - 0, - 0, - 0, + 360287970189639680, + 360287970189639680, 108086391056891904, - 117552780861874593, - 11601272640106397696ull, - 81523983842910625, - 11646767826930344353ull, - 11601272640106397696ull, - 81523983842910625, - 11646767826930344353ull, - 11601272640106397696ull}}; - const auto numRows = 5; - std::vector decoded; - decoded.resize(testData.size()); + 288230376151711744, + 288230376151711744}}; + const auto numRows = testData[0]->size(); for (auto i = 0; i < testData.size(); ++i) { - decoded[i].decode(*testData[i]); + std::vector decoded{1}; + decoded[0].decode(*testData[i]); const auto data = makeRowVector({testData[i]}); std::vector compareFlags = {kAsc}; const auto rowType = asRowType(data->type()); const std::vector keyTypes = rowType->children(); auto sortLayout = PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 17); - char rawPrefixBuffer_[sortLayout.normalizedBufferSize * numRows]; + char rawPrefixBuffer[sortLayout.normalizedBufferSize * numRows]; VectorPrefixEncoder::encode( - sortLayout, keyTypes, decoded, numRows, rawPrefixBuffer_); - auto encodedValue = reinterpret_cast(rawPrefixBuffer_); + sortLayout, keyTypes, decoded, numRows, rawPrefixBuffer); + auto encodedValue = reinterpret_cast(rawPrefixBuffer); for (auto j = 0; j < sortLayout.normalizedBufferSize * numRows / sizeof(uint64_t);