From 85f39732b4928a0d67aad3503d1ab83f9b92d985 Mon Sep 17 00:00:00 2001 From: "hengjiang.ly" Date: Tue, 12 Mar 2024 19:50:15 -0700 Subject: [PATCH] Add prefix-sort with support for fixed width sorting keys (#8146) Summary: PrefixSort is used to improve in-memory sort performance, using memcmp to compare binary string (normalized encoded sort keys called 'prefix') when sorting. This PR adds 'extract rows to prefix' and 'sort' for fixed width types (int32, int64, float, double, timestamp etc.). More types will be added in a follow-up. Add benchmark to compare std::sort vs. prefix sort using 1, 2, 3, and 4 bigint sorting keys. The performance of sorting up to 1000 rows is the same. When sorting more than 1K rows prefix-sort is faster, the gains increase as the number of rows sorted and the number of sorting keys increases. The presence of the payload columns doesn't affect the performance. ``` ============================================================================ [...]ec/benchmarks/PrefixSortBenchmark.cpp relative time/iter iters/s ============================================================================ StdSort_no-payload_1_bigint_0.01k 49.40ns 20.24M PrefixSort 100.00% 49.40ns 20.24M StdSort_no-payload_2_bigint_0.01k 87.85ns 11.38M PrefixSort 99.904% 87.94ns 11.37M StdSort_no-payload_3_bigint_0.01k 65.04ns 15.37M PrefixSort 99.350% 65.47ns 15.27M StdSort_no-payload_4_bigint_0.01k 69.19ns 14.45M PrefixSort 99.566% 69.49ns 14.39M StdSort_no-payload_1_bigint_0.015k 47.93ns 20.86M PrefixSort 100.02% 47.92ns 20.87M StdSort_no-payload_2_bigint_0.015k 54.22ns 18.44M PrefixSort 99.921% 54.26ns 18.43M StdSort_no-payload_3_bigint_0.015k 61.00ns 16.39M PrefixSort 99.958% 61.02ns 16.39M StdSort_no-payload_4_bigint_0.015k 57.38ns 17.43M PrefixSort 99.870% 57.46ns 17.40M StdSort_no-payload_1_bigint_0.02k 47.82ns 20.91M PrefixSort 99.914% 47.86ns 20.90M StdSort_no-payload_2_bigint_0.02k 83.94ns 11.91M PrefixSort 100.00% 83.93ns 11.91M StdSort_no-payload_3_bigint_0.02k 128.43ns 7.79M PrefixSort 100.17% 128.21ns 7.80M StdSort_no-payload_4_bigint_0.02k 165.29ns 6.05M PrefixSort 99.997% 165.30ns 6.05M StdSort_no-payload_1_bigint_0.05k 77.34ns 12.93M PrefixSort 99.425% 77.79ns 12.86M StdSort_no-payload_2_bigint_0.05k 113.79ns 8.79M PrefixSort 99.786% 114.03ns 8.77M StdSort_no-payload_3_bigint_0.05k 152.81ns 6.54M PrefixSort 99.696% 153.27ns 6.52M StdSort_no-payload_4_bigint_0.05k 185.93ns 5.38M PrefixSort 100.02% 185.90ns 5.38M StdSort_no-payload_1_bigint_0.1k 87.39ns 11.44M PrefixSort 99.499% 87.83ns 11.39M StdSort_no-payload_2_bigint_0.1k 139.93ns 7.15M PrefixSort 162.24% 86.25ns 11.59M StdSort_no-payload_3_bigint_0.1k 186.27ns 5.37M PrefixSort 186.72% 99.76ns 10.02M StdSort_no-payload_4_bigint_0.1k 234.01ns 4.27M PrefixSort 187.97% 124.49ns 8.03M StdSort_no-payloads_1_bigint_1k 173.31ns 5.77M PrefixSort 136.72% 126.76ns 7.89M StdSort_no-payloads_2_bigint_1k 249.77ns 4.00M PrefixSort 199.49% 125.20ns 7.99M StdSort_no-payloads_3_bigint_1k 314.18ns 3.18M PrefixSort 219.49% 143.14ns 6.99M StdSort_no-payloads_4_bigint_1k 348.38ns 2.87M PrefixSort 203.28% 171.38ns 5.84M StdSort_no-payloads_1_bigint_10k 251.90ns 3.97M PrefixSort 165.99% 151.76ns 6.59M StdSort_no-payloads_2_bigint_10k 363.09ns 2.75M PrefixSort 253.07% 143.47ns 6.97M StdSort_no-payloads_3_bigint_10k 483.58ns 2.07M PrefixSort 293.67% 164.67ns 6.07M StdSort_no-payloads_4_bigint_10k 593.29ns 1.69M PrefixSort 312.83% 189.65ns 5.27M StdSort_no-payloads_1_bigint_100k 330.44ns 3.03M PrefixSort 192.57% 171.59ns 5.83M StdSort_no-payloads_2_bigint_100k 470.79ns 2.12M PrefixSort 293.67% 160.31ns 6.24M StdSort_no-payloads_3_bigint_100k 607.15ns 1.65M PrefixSort 303.88% 199.80ns 5.01M StdSort_no-payloads_4_bigint_100k 706.03ns 1.42M PrefixSort 315.15% 224.03ns 4.46M StdSort_no-payloads_1_bigint_1000k 452.05ns 2.21M PrefixSort 204.92% 220.60ns 4.53M StdSort_no-payloads_2_bigint_1000k 645.35ns 1.55M PrefixSort 306.42% 210.61ns 4.75M StdSort_no-payloads_3_bigint_1000k 818.78ns 1.22M PrefixSort 328.01% 249.62ns 4.01M StdSort_no-payloads_4_bigint_1000k 981.65ns 1.02M PrefixSort 343.79% 285.54ns 3.50M StdSort_2payloads_1_bigint_1k 177.21ns 5.64M PrefixSort 139.94% 126.63ns 7.90M StdSort_2payloads_2_bigint_1k 248.46ns 4.02M PrefixSort 199.08% 124.80ns 8.01M StdSort_2payloads_3_bigint_1k 313.66ns 3.19M PrefixSort 218.48% 143.56ns 6.97M StdSort_2payloads_4_bigint_1k 359.17ns 2.78M PrefixSort 208.57% 172.21ns 5.81M StdSort_2payloads_1_bigint_10k 254.83ns 3.92M PrefixSort 168.28% 151.43ns 6.60M StdSort_2payloads_2_bigint_10k 363.92ns 2.75M PrefixSort 254.35% 143.08ns 6.99M StdSort_2payloads_3_bigint_10k 475.61ns 2.10M PrefixSort 288.96% 164.60ns 6.08M StdSort_2payloads_4_bigint_10k 594.78ns 1.68M PrefixSort 314.12% 189.35ns 5.28M StdSort_2payloads_1_bigint_100k 349.99ns 2.86M PrefixSort 205.28% 170.49ns 5.87M StdSort_2payloads_2_bigint_100k 489.03ns 2.04M PrefixSort 307.77% 158.89ns 6.29M StdSort_2payloads_3_bigint_100k 607.88ns 1.65M PrefixSort 305.62% 198.90ns 5.03M StdSort_2payloads_4_bigint_100k 715.90ns 1.40M PrefixSort 321.73% 222.52ns 4.49M StdSort_2payloads_1_bigint_1000k 574.14ns 1.74M PrefixSort 262.05% 219.10ns 4.56M StdSort_2payloads_2_bigint_1000k 796.05ns 1.26M PrefixSort 377.73% 210.75ns 4.75M StdSort_2payloads_3_bigint_1000k 1.02us 975.65K PrefixSort 411.02% 249.37ns 4.01M StdSort_2payloads_4_bigint_1000k 1.16us 858.48K PrefixSort 408.37% 285.24ns 3.51M StdSort_2payloads_1_bigint_0.01k 49.37ns 20.26M PrefixSort 99.124% 49.81ns 20.08M StdSort_2payloads_2_bigint_0.01k 89.08ns 11.23M PrefixSort 99.958% 89.12ns 11.22M StdSort_2payloads_3_bigint_0.01k 64.40ns 15.53M PrefixSort 99.991% 64.41ns 15.53M StdSort_2payloads_4_bigint_0.01k 86.56ns 11.55M PrefixSort 100.34% 86.26ns 11.59M StdSort_2payloads_1_bigint_0.015k 48.17ns 20.76M PrefixSort 100.11% 48.12ns 20.78M StdSort_2payloads_2_bigint_0.015k 55.44ns 18.04M PrefixSort 99.994% 55.45ns 18.03M StdSort_2payloads_3_bigint_0.015k 61.17ns 16.35M PrefixSort 99.988% 61.18ns 16.35M StdSort_2payloads_4_bigint_0.015k 57.55ns 17.38M PrefixSort 99.895% 57.61ns 17.36M StdSort_2payloads_1_bigint_0.02k 47.93ns 20.86M PrefixSort 99.916% 47.97ns 20.85M StdSort_2payloads_2_bigint_0.02k 84.10ns 11.89M PrefixSort 100.38% 83.78ns 11.94M StdSort_2payloads_3_bigint_0.02k 126.53ns 7.90M PrefixSort 100.05% 126.47ns 7.91M StdSort_2payloads_4_bigint_0.02k 164.44ns 6.08M PrefixSort 99.935% 164.55ns 6.08M StdSort_2payloads_1_bigint_0.05k 77.86ns 12.84M PrefixSort 99.171% 78.51ns 12.74M StdSort_2payloads_2_bigint_0.05k 118.10ns 8.47M PrefixSort 100.53% 117.48ns 8.51M StdSort_2payloads_3_bigint_0.05k 152.74ns 6.55M PrefixSort 100.02% 152.71ns 6.55M StdSort_2payloads_4_bigint_0.05k 184.56ns 5.42M PrefixSort 99.925% 184.70ns 5.41M StdSort_2payloads_1_bigint_0.1k 88.01ns 11.36M PrefixSort 100.46% 87.60ns 11.42M StdSort_2payloads_2_bigint_0.1k 138.22ns 7.24M PrefixSort 159.92% 86.43ns 11.57M StdSort_2payloads_3_bigint_0.1k 187.49ns 5.33M PrefixSort 187.96% 99.75ns 10.03M StdSort_2payloads_4_bigint_0.1k 232.52ns 4.30M PrefixSort 188.98% 123.04ns 8.13M StdSort_no-payloads_1_varchar_1k 292.32ns 3.42M PrefixSort 100.47% 290.96ns 3.44M StdSort_no-payloads_2_varchar_1k 380.98ns 2.62M PrefixSort 98.623% 386.29ns 2.59M StdSort_no-payloads_3_varchar_1k 456.15ns 2.19M PrefixSort 98.302% 464.03ns 2.16M StdSort_no-payloads_4_varchar_1k 520.84ns 1.92M PrefixSort 98.186% 530.46ns 1.89M StdSort_no-payloads_1_varchar_10k 422.83ns 2.37M PrefixSort 99.186% 426.30ns 2.35M StdSort_no-payloads_2_varchar_10k 495.10ns 2.02M PrefixSort 98.218% 504.08ns 1.98M StdSort_no-payloads_3_varchar_10k 584.89ns 1.71M PrefixSort 99.079% 590.33ns 1.69M StdSort_no-payloads_4_varchar_10k 667.37ns 1.50M PrefixSort 98.887% 674.88ns 1.48M StdSort_no-payloads_1_varchar_100k 605.27ns 1.65M PrefixSort 99.425% 608.78ns 1.64M StdSort_no-payloads_2_varchar_100k 741.11ns 1.35M PrefixSort 99.107% 747.78ns 1.34M StdSort_no-payloads_3_varchar_100k 890.60ns 1.12M PrefixSort 99.089% 898.78ns 1.11M StdSort_no-payloads_4_varchar_100k 1.11us 903.14K PrefixSort 104.50% 1.06us 943.76K StdSort_no-payloads_1_varchar_1000k 1.22us 822.83K PrefixSort 99.534% 1.22us 818.99K StdSort_no-payloads_2_varchar_1000k 1.52us 656.78K PrefixSort 99.353% 1.53us 652.53K StdSort_no-payloads_3_varchar_1000k 1.78us 560.23K PrefixSort 98.862% 1.81us 553.86K StdSort_no-payloads_4_varchar_1000k 1.93us 519.34K PrefixSort 99.159% 1.94us 514.97K ``` Part of https://github.com/facebookincubator/velox/issues/6766 Pull Request resolved: https://github.com/facebookincubator/velox/pull/8146 Reviewed By: Yuhta Differential Revision: D54247347 Pulled By: mbasmanova fbshipit-source-id: b358925e6b702de5bb2eee55df24827b51268923 --- velox/exec/CMakeLists.txt | 1 + velox/exec/PrefixSort.cpp | 255 ++++++++++++++ velox/exec/PrefixSort.h | 179 ++++++++++ velox/exec/benchmarks/CMakeLists.txt | 5 + velox/exec/benchmarks/PrefixSortBenchmark.cpp | 324 ++++++++++++++++++ velox/exec/prefixsort/PrefixSortEncoder.h | 26 ++ velox/exec/tests/CMakeLists.txt | 3 +- velox/exec/tests/PrefixSortTest.cpp | 271 +++++++++++++++ 8 files changed, 1063 insertions(+), 1 deletion(-) create mode 100644 velox/exec/PrefixSort.cpp create mode 100644 velox/exec/PrefixSort.h create mode 100644 velox/exec/benchmarks/PrefixSortBenchmark.cpp create mode 100644 velox/exec/tests/PrefixSortTest.cpp diff --git a/velox/exec/CMakeLists.txt b/velox/exec/CMakeLists.txt index 99f789b1785d..de522d149f0f 100644 --- a/velox/exec/CMakeLists.txt +++ b/velox/exec/CMakeLists.txt @@ -53,6 +53,7 @@ add_library( NestedLoopJoinProbe.cpp Operator.cpp OperatorUtils.cpp + PrefixSort.cpp OrderBy.cpp PartitionedOutput.cpp OutputBuffer.cpp diff --git a/velox/exec/PrefixSort.cpp b/velox/exec/PrefixSort.cpp new file mode 100644 index 000000000000..c5ea46a9bf9f --- /dev/null +++ b/velox/exec/PrefixSort.cpp @@ -0,0 +1,255 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/exec/PrefixSort.h" + +using namespace facebook::velox::exec::prefixsort; + +namespace facebook::velox::exec { + +namespace { + +// For alignment, 8 is faster than 4. +// If the alignment is changed from 8 to 4, you need to change bitswap64 +// to bitswap32. +const int32_t kAlignment = 8; + +template +FOLLY_ALWAYS_INLINE void encodeRowColumn( + const PrefixSortLayout& prefixSortLayout, + const uint32_t index, + const RowColumn& rowColumn, + char* const row, + char* const prefix) { + std::optional value; + if (RowContainer::isNullAt(row, rowColumn.nullByte(), rowColumn.nullMask())) { + value = std::nullopt; + } else { + value = *(reinterpret_cast(row + rowColumn.offset())); + } + prefixSortLayout.encoders[index].encode( + value, prefix + prefixSortLayout.prefixOffsets[index]); +} + +FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( + TypeKind typeKind, + const PrefixSortLayout& prefixSortLayout, + const uint32_t index, + const RowColumn& rowColumn, + char* const row, + char* const prefix) { + switch (typeKind) { + case TypeKind::INTEGER: { + encodeRowColumn(prefixSortLayout, index, rowColumn, row, prefix); + return; + } + case TypeKind::BIGINT: { + encodeRowColumn(prefixSortLayout, index, rowColumn, row, prefix); + return; + } + case TypeKind::REAL: { + encodeRowColumn(prefixSortLayout, index, rowColumn, row, prefix); + return; + } + case TypeKind::DOUBLE: { + encodeRowColumn(prefixSortLayout, index, rowColumn, row, prefix); + return; + } + case TypeKind::TIMESTAMP: { + encodeRowColumn( + prefixSortLayout, index, rowColumn, row, prefix); + return; + } + default: + VELOX_UNSUPPORTED( + "prefix-sort does not support type kind: {}", + mapTypeKindToName(typeKind)); + } +} + +FOLLY_ALWAYS_INLINE int32_t alignmentPadding(int32_t size, int32_t alignment) { + auto extra = size % alignment; + return extra == 0 ? 0 : alignment - extra; +} + +FOLLY_ALWAYS_INLINE void bitsSwapByWord(uint64_t* address, int32_t bytes) { + while (bytes != 0) { + *address = __builtin_bswap64(*address); + ++address; + bytes -= kAlignment; + } +} + +FOLLY_ALWAYS_INLINE int +compareByWord(uint64_t* left, uint64_t* right, int32_t bytes) { + while (bytes != 0) { + if (*left == *right) { + ++left; + ++right; + bytes -= kAlignment; + continue; + } + if (*left > *right) { + return 1; + } else { + return -1; + } + } + return 0; +} + +} // namespace + +PrefixSortLayout PrefixSortLayout::makeSortLayout( + const std::vector& types, + const std::vector& compareFlags, + uint32_t maxNormalizedKeySize) { + uint32_t normalizedKeySize = 0; + uint32_t numNormalizedKeys = 0; + const uint32_t numKeys = types.size(); + std::vector prefixOffsets; + std::vector encoders; + + // Calculate encoders and prefix-offsets, and stop the loop if a key that + // cannot be normalized is encountered. + for (auto i = 0; i < numKeys; ++i) { + if (normalizedKeySize > maxNormalizedKeySize) { + break; + } + std::optional encodedSize = + PrefixSortEncoder::encodedSize(types[i]->kind()); + if (encodedSize.has_value()) { + prefixOffsets.push_back(normalizedKeySize); + encoders.push_back( + {compareFlags[i].ascending, compareFlags[i].nullsFirst}); + normalizedKeySize += encodedSize.value(); + numNormalizedKeys++; + } else { + break; + } + } + auto padding = alignmentPadding(normalizedKeySize, kAlignment); + normalizedKeySize += padding; + return PrefixSortLayout{ + normalizedKeySize + sizeof(char*), + normalizedKeySize, + numNormalizedKeys, + numKeys, + compareFlags, + numNormalizedKeys == 0, + numNormalizedKeys < numKeys, + std::move(prefixOffsets), + std::move(encoders), + padding}; +} + +FOLLY_ALWAYS_INLINE int PrefixSort::compareAllNormalizedKeys( + char* left, + char* right) { + return compareByWord( + (uint64_t*)left, (uint64_t*)right, sortLayout_.normalizedBufferSize); +} + +int PrefixSort::comparePartNormalizedKeys(char* left, char* right) { + int result = compareAllNormalizedKeys(left, right); + if (result != 0) { + return result; + } + // If prefixes are equal, compare the left sort keys with rowContainer. + char* leftAddress = getAddressFromPrefix(left); + char* rightAddress = getAddressFromPrefix(right); + for (auto i = sortLayout_.numNormalizedKeys; i < sortLayout_.numKeys; ++i) { + result = rowContainer_->compare( + leftAddress, rightAddress, i, sortLayout_.compareFlags[i]); + if (result != 0) { + return result; + } + } + return result; +} + +PrefixSort::PrefixSort( + memory::MemoryPool* pool, + RowContainer* rowContainer, + const std::vector& keyCompareFlags, + const PrefixSortConfig& config, + const PrefixSortLayout& sortLayout) + : pool_(pool), sortLayout_(sortLayout), rowContainer_(rowContainer) {} + +void PrefixSort::extractRowToPrefix(char* row, char* prefix) { + for (auto i = 0; i < sortLayout_.numNormalizedKeys; i++) { + extractRowColumnToPrefix( + rowContainer_->keyTypes()[i]->kind(), + sortLayout_, + i, + rowContainer_->columnAt(i), + row, + prefix); + } + simd::memset( + prefix + sortLayout_.normalizedBufferSize - sortLayout_.padding, + 0, + sortLayout_.padding); + // When comparing in std::memcmp, each byte is compared. If it is changed to + // compare every 8 bytes, the number of comparisons will be reduced and the + // performance will be improved. + // Use uint64_t compare to implement the above-mentioned comparison of every 8 + // bytes, assuming the system is little-endian, need to reverse bytes for + // every 8 bytes. + bitsSwapByWord((uint64_t*)prefix, sortLayout_.normalizedBufferSize); + // Set row address. + getAddressFromPrefix(prefix) = row; +} + +void PrefixSort::sortInternal(std::vector& rows) { + const auto numRows = rows.size(); + const auto entrySize = sortLayout_.entrySize; + memory::ContiguousAllocation prefixAllocation; + // 1. Allocate prefixes data. + { + const auto numPages = + memory::AllocationTraits::numPages(numRows * entrySize); + pool_->allocateContiguous(numPages, prefixAllocation); + } + char* const prefixes = prefixAllocation.data(); + + // 2. Extract rows to prefixes with row address. + for (auto i = 0; i < rows.size(); ++i) { + extractRowToPrefix(rows[i], prefixes + entrySize * i); + } + + // 3. Sort prefixes with row address. + { + const auto swapBuffer = AlignedBuffer::allocate(entrySize, pool_); + PrefixSortRunner sortRunner(entrySize, swapBuffer->asMutable()); + const auto start = prefixes; + const auto end = prefixes + numRows * entrySize; + if (sortLayout_.hasNonNormalizedKey) { + sortRunner.quickSort(start, end, [&](char* a, char* b) { + return comparePartNormalizedKeys(a, b); + }); + } else { + sortRunner.quickSort(start, end, [&](char* a, char* b) { + return compareAllNormalizedKeys(a, b); + }); + } + } + // 4. Output sorted row addresses. + for (int i = 0; i < rows.size(); i++) { + rows[i] = getAddressFromPrefix(prefixes + i * entrySize); + } +} + +} // namespace facebook::velox::exec diff --git a/velox/exec/PrefixSort.h b/velox/exec/PrefixSort.h new file mode 100644 index 000000000000..37bdaca36951 --- /dev/null +++ b/velox/exec/PrefixSort.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/common/memory/MemoryAllocator.h" +#include "velox/exec/RowContainer.h" +#include "velox/exec/prefixsort/PrefixSortAlgorithm.h" +#include "velox/exec/prefixsort/PrefixSortEncoder.h" + +namespace facebook::velox::exec { + +namespace detail { + +FOLLY_ALWAYS_INLINE void stdSort( + std::vector& rows, + RowContainer* rowContainer, + const std::vector& compareFlags) { + std::sort( + rows.begin(), rows.end(), [&](const char* leftRow, const char* rightRow) { + for (auto i = 0; i < compareFlags.size(); ++i) { + if (auto result = rowContainer->compare( + leftRow, rightRow, i, compareFlags[i])) { + return result < 0; + } + } + return false; + }); +} +}; // namespace detail + +struct PrefixSortConfig { + PrefixSortConfig(uint32_t maxNormalizedKeySize, uint32_t threshold = 130) + : maxNormalizedKeySize(maxNormalizedKeySize), threshold(threshold) {} + + /// Max number of bytes can store normalized keys in prefix-sort buffer per + /// entry. + const uint32_t maxNormalizedKeySize; + + /// PrefixSort will have performance regression when the dateset is too small. + /// The threshold is set to 100 according to the benchmark test results by + /// default. + const int64_t threshold; +}; + +/// The layout of prefix-sort buffer, a prefix entry includes: +/// 1. normalized keys +/// 2. non-normalized data ptr for semi-normalized types such as +/// string_view`s ptr, it will be filled when support Varchar. +/// 3. the row address ptr point to RowContainer`s rows is added at the end of +/// prefix. +struct PrefixSortLayout { + /// Number of bytes to store a prefix, it equals to: + /// normalizedKeySize_ + 8 (non-normalized-ptr) + 8(row address). + const uint64_t entrySize; + + /// If a sort key supports normalization and can be added to the prefix + /// sort buffer, it is called a normalized key. + const uint32_t normalizedBufferSize; + + const uint32_t numNormalizedKeys; + + /// The num of sort keys include normalized and non-normalized. + const uint32_t numKeys; + + /// CompareFlags of all sort keys. + const std::vector compareFlags; + + /// Whether the sort keys contains normalized key. + /// It equals to 'numNormalizedKeys == 0', a little faster. + const bool noNormalizedKeys; + + /// Whether the sort keys contains non-normalized key. + const bool hasNonNormalizedKey; + + /// Offsets of normalized keys, used to find write locations when + /// extracting columns + const std::vector prefixOffsets; + + /// The encoders for normalized keys. + const std::vector encoders; + + /// Align the buffer size to 8 so that long compare can replace byte compare + /// during ‘memcmp’ + const int32_t padding; + + static PrefixSortLayout makeSortLayout( + const std::vector& types, + const std::vector& compareFlags, + uint32_t maxNormalizedKeySize); +}; + +class PrefixSort { + public: + PrefixSort( + memory::MemoryPool* pool, + RowContainer* rowContainer, + const std::vector& keyCompareFlags, + const PrefixSortConfig& config, + const PrefixSortLayout& sortLayout); + + /// Follow the steps below to sort the data in RowContainer: + /// 1. Allocate a contiguous block of memory to store normalized keys. + /// 2. Extract the sort keys from the RowContainer. If the key can be + /// normalized, normalize it. For this kind of keys can be normalized,we + /// combine them with the original row address ptr and store them + /// together into a buffer, called 'Prefix'. + /// 3. Sort the prefixes data we got in step 2. + /// For keys can normalized(All fixed width types), we use 'memcmp' to compare + /// the normalized binary string. + /// For keys can not normalized, we use RowContainer`s compare method to + /// compare value. + /// For keys can part-normalized(Varchar, Row etc.), we will store the + /// normalized part and points to raw data in prefix, and custom the points + /// compare. The compare strategy will be defined in PrefixSortLayout as + /// follow-up, we treat this part as non-normalized until we implement all + /// fixed width types. + /// For complex types, e.g. ROW that can be converted to scalar types will be + /// supported. + /// 4. Extract the original row address ptr from prefixes (previously stored + /// them in the prefix buffer) into the input rows vector. + /// + /// @param rows The result of RowContainer::listRows(), assuming that the + /// caller (SortBuffer etc.) has already got the result. + FOLLY_ALWAYS_INLINE static void sort( + std::vector& rows, + memory::MemoryPool* pool, + RowContainer* rowContainer, + const std::vector& compareFlags, + const PrefixSortConfig& config) { + if (rowContainer->numRows() < config.threshold) { + detail::stdSort(rows, rowContainer, compareFlags); + return; + } + VELOX_DCHECK_EQ(rowContainer->keyTypes().size(), compareFlags.size()); + const auto sortLayout = PrefixSortLayout::makeSortLayout( + rowContainer->keyTypes(), compareFlags, config.maxNormalizedKeySize); + // All keys can not normalize, skip the binary string compare opt. + // Putting this outside sort-internal helps with inline std-sort. + if (sortLayout.noNormalizedKeys) { + detail::stdSort(rows, rowContainer, compareFlags); + return; + } + + PrefixSort prefixSort(pool, rowContainer, compareFlags, config, sortLayout); + prefixSort.sortInternal(rows); + } + + private: + void sortInternal(std::vector& rows); + + int compareAllNormalizedKeys(char* left, char* right); + + int comparePartNormalizedKeys(char* left, char* right); + + void extractRowToPrefix(char* row, char* prefix); + + // Return the reference of row address ptr for read/write. + FOLLY_ALWAYS_INLINE char*& getAddressFromPrefix(char* prefix) { + return *reinterpret_cast(prefix + sortLayout_.normalizedBufferSize); + } + + memory::MemoryPool* const pool_; + const PrefixSortLayout sortLayout_; + RowContainer* const rowContainer_; +}; +} // namespace facebook::velox::exec diff --git a/velox/exec/benchmarks/CMakeLists.txt b/velox/exec/benchmarks/CMakeLists.txt index 92fd47a6bb60..eac595cbadf0 100644 --- a/velox/exec/benchmarks/CMakeLists.txt +++ b/velox/exec/benchmarks/CMakeLists.txt @@ -49,3 +49,8 @@ if(${VELOX_ENABLE_PARQUET}) arrow thrift) endif() + +add_executable(velox_prefixsort_benchmark PrefixSortBenchmark.cpp) + +target_link_libraries(velox_prefixsort_benchmark velox_exec velox_vector_fuzzer + velox_vector_test_lib ${FOLLY_BENCHMARK}) diff --git a/velox/exec/benchmarks/PrefixSortBenchmark.cpp b/velox/exec/benchmarks/PrefixSortBenchmark.cpp new file mode 100644 index 000000000000..4edde03fa30b --- /dev/null +++ b/velox/exec/benchmarks/PrefixSortBenchmark.cpp @@ -0,0 +1,324 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include "glog/logging.h" +#include "velox/exec/PrefixSort.h" +#include "velox/vector/fuzzer/VectorFuzzer.h" + +using namespace facebook::velox; +using namespace facebook::velox::exec; + +namespace { + +class TestCase { + public: + TestCase( + memory::MemoryPool* pool, + const std::string& testName, + size_t numRows, + const RowTypePtr& rowType, + int numKeys) + : testName_(testName), numRows_(numRows), pool_(pool), rowType_(rowType) { + // Initialize a RowContainer that holds fuzzed rows to be sorted. + std::vector keyTypes; + std::vector dependentTypes; + for (auto i = 0; i < rowType->size(); ++i) { + if (i < numKeys) { + keyTypes.push_back(rowType->childAt(i)); + } else { + dependentTypes.push_back(rowType->childAt(i)); + } + } + data_ = std::make_unique(keyTypes, dependentTypes, pool); + RowVectorPtr sortedRows = fuzzRows(numRows, numKeys); + storeRows(numRows, sortedRows); + + // Initialize CompareFlags, it could be same for each key in benchmark. + for (int i = 0; i < numKeys; ++i) { + compareFlags_.push_back( + {true, true, false, CompareFlags::NullHandlingMode::kNullAsValue}); + } + }; + + const std::string& testName() const { + return testName_; + } + + size_t numRows() const { + return numRows_; + } + + const std::vector& rows() const { + return rows_; + } + + RowContainer* rowContainer() const { + return data_.get(); + } + + const std::vector& compareFlags() const { + return compareFlags_; + } + + private: + // Store data into the RowContainer to mock the behavior of SortBuffer. + void storeRows(int numRows, const RowVectorPtr& data) { + rows_.resize(numRows); + for (auto row = 0; row < numRows; ++row) { + rows_[row] = rowContainer()->newRow(); + } + for (auto column = 0; column < data->childrenSize(); ++column) { + DecodedVector decoded(*data->childAt(column)); + for (int i = 0; i < numRows; ++i) { + char* row = rows_[i]; + rowContainer()->store(decoded, i, row, column); + } + } + } + + RowVectorPtr fuzzRows(size_t numRows, int numKeys) { + VectorFuzzer fuzzer({.vectorSize = numRows}, pool_); + VectorFuzzer fuzzerWithNulls( + {.vectorSize = numRows, .nullRatio = 0.7}, pool_); + std::vector children; + + // Fuzz keys: for front keys (column 0 to numKeys -2) use high + // nullRatio to enforce all columns to be compared. + { + for (auto i = 0; i < numKeys - 1; ++i) { + children.push_back(fuzzerWithNulls.fuzz(rowType_->childAt(i))); + } + children.push_back(fuzzer.fuzz(rowType_->childAt(numKeys - 1))); + } + // Fuzz payload + { + for (auto i = numKeys; i < rowType_->size(); ++i) { + children.push_back(fuzzer.fuzz(rowType_->childAt(i))); + } + } + return std::make_shared( + pool_, rowType_, nullptr, numRows, std::move(children)); + } + + const std::string testName_; + const size_t numRows_; + // Rows address stored in RowContainer + std::vector rows_; + std::unique_ptr data_; + memory::MemoryPool* const pool_; + const RowTypePtr rowType_; + std::vector compareFlags_; +}; + +// You could config threshold, e.i. 0, to test prefix-sort for small +// dateset. +static const PrefixSortConfig kDefaultSortConfig(1024, 100); + +// For small dataset, in some test environments, if std-sort is defined in the +// benchmark file, the test results may be strangely regressed. When the +// threshold is particularly large, PrefixSort is actually std-sort, hence, we +// can use this as std-sort benchmark base. +static const PrefixSortConfig kStdSortConfig( + 1024, + std::numeric_limits::max()); + +class PrefixSortBenchmark { + public: + PrefixSortBenchmark(memory::MemoryPool* pool) : pool_(pool) {} + + void runPrefixSort( + const std::vector& rows, + RowContainer* rowContainer, + const std::vector& compareFlags) { + // Copy rows to avoid sort rows already sorted. + std::vector sortedRows = rows; + PrefixSort::sort( + sortedRows, pool_, rowContainer, compareFlags, kDefaultSortConfig); + } + + void runStdSort( + const std::vector& rows, + RowContainer* rowContainer, + const std::vector& compareFlags) { + std::vector sortedRows = rows; + PrefixSort::sort( + sortedRows, pool_, rowContainer, compareFlags, kStdSortConfig); + } + + // Add benchmark manually to avoid writing a lot of BENCHMARK. + void addBenchmark( + const std::string& testName, + size_t numRows, + const RowTypePtr& rowType, + int iterations, + int numKeys, + bool testStdSort) { + auto testCase = + std::make_unique(pool_, testName, numRows, rowType, numKeys); + // Add benchmarks for std-sort and prefix-sort. + { + if (testStdSort) { + folly::addBenchmark( + __FILE__, + "StdSort_" + testCase->testName(), + [rows = testCase->rows(), + container = testCase->rowContainer(), + sortFlags = testCase->compareFlags(), + iterations = iterations, + this]() { + for (auto i = 0; i < iterations; ++i) { + runStdSort(rows, container, sortFlags); + } + return rows.size() * iterations; + }); + } + folly::addBenchmark( + __FILE__, + testStdSort ? "%PrefixSort" : "PrefixSort_" + testCase->testName(), + [rows = testCase->rows(), + container = testCase->rowContainer(), + sortFlags = testCase->compareFlags(), + iterations = iterations, + this]() { + for (auto i = 0; i < iterations; ++i) { + runPrefixSort(rows, container, sortFlags); + } + return rows.size() * iterations; + }); + } + testCases_.push_back(std::move(testCase)); + } + + void benchmark( + const std::string& prefix, + const std::string& keyName, + const std::vector& batchSizes, + const std::vector& rowTypes, + const std::vector& numKeys, + int32_t iterations, + bool testStdSort = true) { + for (auto batchSize : batchSizes) { + for (auto i = 0; i < rowTypes.size(); ++i) { + const auto name = fmt::format( + "{}_{}_{}_{}k", prefix, numKeys[i], keyName, batchSize / 1000.0); + addBenchmark( + name, batchSize, rowTypes[i], iterations, numKeys[i], testStdSort); + } + } + } + + std::vector bigintRowTypes(bool noPayload) { + if (noPayload) { + return { + ROW({BIGINT()}), + ROW({BIGINT(), BIGINT()}), + ROW({BIGINT(), BIGINT(), BIGINT()}), + ROW({BIGINT(), BIGINT(), BIGINT(), BIGINT()}), + }; + } else { + return { + ROW({BIGINT(), VARCHAR(), VARCHAR()}), + ROW({BIGINT(), BIGINT(), VARCHAR(), VARCHAR()}), + ROW({BIGINT(), BIGINT(), BIGINT(), VARCHAR(), VARCHAR()}), + ROW({BIGINT(), BIGINT(), BIGINT(), BIGINT(), VARCHAR(), VARCHAR()}), + }; + } + } + + void bigint( + bool noPayload, + int numIterations, + const std::vector& batchSizes) { + std::vector rowTypes = bigintRowTypes(noPayload); + std::vector numKeys = {1, 2, 3, 4}; + benchmark( + noPayload ? "no-payload" : "payload", + "bigint", + batchSizes, + rowTypes, + numKeys, + numIterations); + } + + void smallBigint() { + // For small dateset, iterations need to be large enough to ensure that the + // benchmark runs for enough time. + const auto iterations = 100'000; + const std::vector batchSizes = {10, 50, 100, 500}; + bigint(true, iterations, batchSizes); + } + + void smallBigintWithPayload() { + const auto iterations = 100'000; + const std::vector batchSizes = {10, 50, 100, 500}; + bigint(false, iterations, batchSizes); + } + + void largeBigint() { + const auto iterations = 10; + const std::vector batchSizes = { + 1'000, 10'000, 100'000, 1'000'000}; + bigint(true, iterations, batchSizes); + } + + void largeBigintWithPayloads() { + const auto iterations = 10; + const std::vector batchSizes = { + 1'000, 10'000, 100'000, 1'000'000}; + bigint(false, iterations, batchSizes); + } + + void largeVarchar() { + const auto iterations = 10; + const std::vector batchSizes = { + 1'000, 10'000, 100'000, 1'000'000}; + std::vector rowTypes = { + ROW({VARCHAR()}), + ROW({VARCHAR(), VARCHAR()}), + ROW({VARCHAR(), VARCHAR(), VARCHAR()}), + ROW({VARCHAR(), VARCHAR(), VARCHAR(), VARCHAR()}), + }; + std::vector numKeys = {1, 2, 3, 4}; + benchmark( + "no-payloads", "varchar", batchSizes, rowTypes, numKeys, iterations); + } + + private: + std::vector> testCases_; + memory::MemoryPool* pool_; +}; +} // namespace + +int main(int argc, char** argv) { + folly::Init init(&argc, &argv); + + memory::MemoryManager::initialize({}); + auto rootPool = memory::memoryManager()->addRootPool(); + auto leafPool = rootPool->addLeafChild("leaf"); + + PrefixSortBenchmark bm(leafPool.get()); + + bm.smallBigint(); + bm.largeBigint(); + bm.largeBigintWithPayloads(); + bm.smallBigintWithPayload(); + bm.largeVarchar(); + folly::runBenchmarks(); + + return 0; +} diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h index 7408390ecabf..1323c43a4eb9 100644 --- a/velox/exec/prefixsort/PrefixSortEncoder.h +++ b/velox/exec/prefixsort/PrefixSortEncoder.h @@ -23,6 +23,7 @@ #include "velox/common/base/Exceptions.h" #include "velox/common/base/SimdUtil.h" #include "velox/type/Timestamp.h" +#include "velox/type/Type.h" namespace facebook::velox::exec::prefixsort { @@ -65,6 +66,31 @@ class PrefixSortEncoder { return nullsFirst_; } + /// @return For supported types, returns the encoded size, assume nullable. + /// For not supported types, returns 'std::nullopt'. + FOLLY_ALWAYS_INLINE static std::optional encodedSize( + TypeKind typeKind) { + switch ((typeKind)) { + case ::facebook::velox::TypeKind::INTEGER: { + return 5; + } + case ::facebook::velox::TypeKind::BIGINT: { + return 9; + } + case ::facebook::velox::TypeKind::REAL: { + return 5; + } + case ::facebook::velox::TypeKind::DOUBLE: { + return 9; + } + case ::facebook::velox::TypeKind::TIMESTAMP: { + return 17; + } + default: + return std::nullopt; + } + } + private: const bool ascending_; const bool nullsFirst_; diff --git a/velox/exec/tests/CMakeLists.txt b/velox/exec/tests/CMakeLists.txt index 263797e36019..6310df860d5d 100644 --- a/velox/exec/tests/CMakeLists.txt +++ b/velox/exec/tests/CMakeLists.txt @@ -81,7 +81,8 @@ add_executable( ValuesTest.cpp WindowFunctionRegistryTest.cpp WindowTest.cpp - SortBufferTest.cpp) + SortBufferTest.cpp + PrefixSortTest.cpp) add_executable( velox_exec_infra_test diff --git a/velox/exec/tests/PrefixSortTest.cpp b/velox/exec/tests/PrefixSortTest.cpp new file mode 100644 index 000000000000..ad09e28373a1 --- /dev/null +++ b/velox/exec/tests/PrefixSortTest.cpp @@ -0,0 +1,271 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "velox/exec/PrefixSort.h" +#include "velox/exec/tests/utils/OperatorTestBase.h" + +namespace facebook::velox::exec::prefixsort::test { +namespace { + +class PrefixSortTest : public exec::test::OperatorTestBase { + protected: + std::vector + storeRows(int numRows, const RowVectorPtr& sortedRows, RowContainer* data); + + static constexpr CompareFlags kAsc{ + true, + true, + false, + CompareFlags::NullHandlingMode::kNullAsValue}; + + static constexpr CompareFlags kDesc{ + true, + false, + false, + CompareFlags::NullHandlingMode::kNullAsValue}; + + void testPrefixSort( + const std::vector& compareFlags, + const RowVectorPtr& data) { + const auto numRows = data->size(); + const auto expectedResult = + generateExpectedResult(compareFlags, numRows, data); + + const auto rowType = asRowType(data->type()); + + // Store data in a RowContainer. + const std::vector keyTypes{ + rowType->children().begin(), + rowType->children().begin() + compareFlags.size()}; + const std::vector payloadTypes{ + rowType->children().begin() + compareFlags.size(), + rowType->children().end()}; + + RowContainer rowContainer(keyTypes, payloadTypes, pool_.get()); + std::vector rows = storeRows(numRows, data, &rowContainer); + + // Use PrefixSort to sort rows. + PrefixSort::sort( + rows, + pool_.get(), + &rowContainer, + compareFlags, + {1024, + // Set threshold to 0 to enable prefix-sort in small dataset. + 0}); + + // Extract data from the RowContainer in order. + const RowVectorPtr actual = + BaseVector::create(rowType, numRows, pool_.get()); + for (int column = 0; column < compareFlags.size(); ++column) { + rowContainer.extractColumn( + rows.data(), numRows, column, actual->childAt(column)); + } + + velox::test::assertEqualVectors(actual, expectedResult); + } + + private: + // Use std::sort to generate expected result. + const RowVectorPtr generateExpectedResult( + const std::vector& compareFlags, + int numRows, + const RowVectorPtr& sortedRows); +}; + +std::vector PrefixSortTest::storeRows( + int numRows, + const RowVectorPtr& sortedRows, + RowContainer* data) { + std::vector rows; + SelectivityVector allRows(numRows); + rows.resize(numRows); + for (int row = 0; row < numRows; ++row) { + rows[row] = data->newRow(); + } + for (int column = 0; column < sortedRows->childrenSize(); ++column) { + DecodedVector decoded(*sortedRows->childAt(column), allRows); + for (int i = 0; i < numRows; ++i) { + char* row = rows[i]; + data->store(decoded, i, row, column); + } + } + return rows; +} + +const RowVectorPtr PrefixSortTest::generateExpectedResult( + const std::vector& compareFlags, + int numRows, + const RowVectorPtr& sortedRows) { + const auto rowType = asRowType(sortedRows->type()); + const int numKeys = compareFlags.size(); + RowContainer rowContainer(rowType->children(), pool_.get()); + std::vector rows = storeRows(numRows, sortedRows, &rowContainer); + + std::sort( + rows.begin(), rows.end(), [&](const char* leftRow, const char* rightRow) { + for (auto i = 0; i < numKeys; ++i) { + if (auto result = + rowContainer.compare(leftRow, rightRow, i, compareFlags[i])) { + return result < 0; + } + } + return false; + }); + + const RowVectorPtr result = + BaseVector::create(rowType, numRows, pool_.get()); + for (int column = 0; column < compareFlags.size(); ++column) { + rowContainer.extractColumn( + rows.data(), numRows, column, result->childAt(column)); + } + return result; +} + +TEST_F(PrefixSortTest, singleKey) { + const int numRows = 5; + const int columnsSize = 7; + + // Vectors without nulls. + const std::vector testData = { + makeFlatVector({5, 4, 3, 2, 1}), + makeFlatVector({5, 4, 3, 2, 1}), + makeFlatVector({5, 4, 3, 2, 1}), + makeFlatVector({5.5, 4.4, 3.3, 2.2, 1.1}), + makeFlatVector({5.5, 4.4, 3.3, 2.2, 1.1}), + makeFlatVector( + {Timestamp(5, 5), + Timestamp(4, 4), + Timestamp(3, 3), + Timestamp(2, 2), + Timestamp(1, 1)}), + makeFlatVector({"eee", "ddd", "ccc", "bbb", "aaa"})}; + for (int i = 5; i < columnsSize; ++i) { + const auto data = makeRowVector({testData[i]}); + + testPrefixSort({kAsc}, data); + testPrefixSort({kDesc}, data); + } +} + +TEST_F(PrefixSortTest, singleKeyWithNulls) { + const int numRows = 5; + const int columnsSize = 7; + + Timestamp ts = {5, 5}; + // Vectors with nulls. + const std::vector testData = { + makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), + makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), + makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), + makeNullableFlatVector({5.5, 4.4, std::nullopt, 2.2, 1.1}), + makeNullableFlatVector({5.5, 4.4, std::nullopt, 2.2, 1.1}), + makeNullableFlatVector( + {Timestamp(5, 5), + Timestamp(4, 4), + std::nullopt, + Timestamp(2, 2), + Timestamp(1, 1)}), + makeNullableFlatVector( + {"eee", "ddd", std::nullopt, "bbb", "aaa"})}; + + for (int i = 5; i < columnsSize; ++i) { + const auto data = makeRowVector({testData[i]}); + + testPrefixSort({kAsc}, data); + testPrefixSort({kDesc}, data); + } +} + +TEST_F(PrefixSortTest, multipleKeys) { + // Test all keys normalized : bigint, integer + { + const auto data = makeRowVector({ + makeNullableFlatVector({5, 2, std::nullopt, 2, 1}), + makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), + }); + + testPrefixSort({kAsc, kAsc}, data); + testPrefixSort({kDesc, kDesc}, data); + } + + // Test keys with semi-normalized : bigint, varchar + { + const auto data = makeRowVector({ + makeNullableFlatVector({5, 2, std::nullopt, 2, 1}), + makeNullableFlatVector( + {"eee", "ddd", std::nullopt, "bbb", "aaa"}), + }); + + testPrefixSort({kAsc, kAsc}, data); + testPrefixSort({kDesc, kDesc}, data); + } +} + +TEST_F(PrefixSortTest, fuzz) { + std::vector allTypes = { + INTEGER(), + BOOLEAN(), + TINYINT(), + SMALLINT(), + BIGINT(), + HUGEINT(), + REAL(), + DOUBLE(), + TIMESTAMP(), + VARCHAR(), + VARBINARY()}; + const int numRows = 10240; + for (const auto& type : allTypes) { + SCOPED_TRACE(fmt::format("{}", type->toString())); + VectorFuzzer fuzzer({.vectorSize = numRows, .nullRatio = 0.1}, pool()); + RowVectorPtr data = fuzzer.fuzzRow(ROW({type})); + + testPrefixSort({kAsc}, data); + testPrefixSort({kDesc}, data); + } +} + +TEST_F(PrefixSortTest, fuzzMulti) { + std::vector allTypes = { + INTEGER(), + BOOLEAN(), + TINYINT(), + SMALLINT(), + BIGINT(), + HUGEINT(), + REAL(), + DOUBLE(), + TIMESTAMP(), + VARCHAR(), + VARBINARY()}; + const int32_t numRows = 10240; + const TypePtr payload = VARCHAR(); + VectorFuzzer fuzzer({.vectorSize = numRows, .nullRatio = 0.1}, pool()); + for (const auto& type1 : allTypes) { + for (const auto& type2 : allTypes) { + SCOPED_TRACE(fmt::format("{}, {}", type1->toString(), type2->toString())); + RowVectorPtr data = fuzzer.fuzzRow(ROW({type1, type2, payload})); + + testPrefixSort({kAsc, kAsc}, data); + testPrefixSort({kDesc, kDesc}, data); + } + } +} +} // namespace +} // namespace facebook::velox::exec::prefixsort::test