From 0a546573e22a4ba11d5cb68945079601f672b9c5 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Mon, 4 Nov 2024 11:37:46 +0800 Subject: [PATCH] Support string type key in prefix sort address comments address comments address comments address comments address comments fix build address comments get string column max length from row container fix ut collect stats for orderBy op --- velox/common/base/PrefixSortConfig.h | 12 +- velox/core/QueryConfig.h | 9 ++ velox/docs/configs.rst | 4 + velox/exec/Driver.h | 3 +- velox/exec/PrefixSort.cpp | 57 +++++++-- velox/exec/PrefixSort.h | 45 ++++++- velox/exec/RowContainer.cpp | 3 +- velox/exec/RowContainer.h | 5 +- velox/exec/Window.cpp | 3 +- velox/exec/benchmarks/PrefixSortBenchmark.cpp | 7 +- velox/exec/prefixsort/PrefixSortEncoder.h | 64 +++++++++- .../prefixsort/tests/PrefixEncoderTest.cpp | 80 +++++++++++- velox/exec/tests/AggregationTest.cpp | 74 ++++++----- velox/exec/tests/PrefixSortTest.cpp | 117 ++++++++++++++---- velox/exec/tests/SortBufferTest.cpp | 28 ++--- velox/exec/tests/WindowTest.cpp | 2 +- .../exec/tests/utils/LocalRunnerTestBase.cpp | 2 +- 17 files changed, 411 insertions(+), 104 deletions(-) diff --git a/velox/common/base/PrefixSortConfig.h b/velox/common/base/PrefixSortConfig.h index 46ac493915546..d5f731d140eac 100644 --- a/velox/common/base/PrefixSortConfig.h +++ b/velox/common/base/PrefixSortConfig.h @@ -24,9 +24,13 @@ namespace facebook::velox::common { struct PrefixSortConfig { PrefixSortConfig() = default; - PrefixSortConfig(uint32_t _maxNormalizedKeyBytes, uint32_t _minNumRows) + PrefixSortConfig( + uint32_t _maxNormalizedKeyBytes, + uint32_t _minNumRows, + uint32_t _maxStringPrefixLength) : maxNormalizedKeyBytes(_maxNormalizedKeyBytes), - minNumRows(_minNumRows) {} + minNumRows(_minNumRows), + maxStringPrefixLength(_maxStringPrefixLength) {} /// Maximum bytes that can be used to store normalized keys in prefix-sort /// buffer per entry. Same with QueryConfig kPrefixSortNormalizedKeyMaxBytes. @@ -35,5 +39,9 @@ struct PrefixSortConfig { /// Minimum number of rows to apply prefix sort. Prefix sort does not perform /// with small datasets. uint32_t minNumRows{128}; + + /// Max number of bytes to be stored in prefix-sort buffer for a string + /// column. + uint32_t maxStringPrefixLength{12}; }; } // namespace facebook::velox::common diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index 7bceb06d117bb..37d3589a62bd6 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -378,6 +378,11 @@ class QueryConfig { /// derived using micro-benchmarking. static constexpr const char* kPrefixSortMinRows = "prefixsort_min_rows"; + /// Maximum number of bytes to be stored in prefix-sort buffer for a string + /// key. + static constexpr const char* kPrefixSortMaxStringPrefixLength = + "prefixsort_max_string_prefix_length"; + /// Enable query tracing flag. static constexpr const char* kQueryTraceEnabled = "query_trace_enabled"; @@ -844,6 +849,10 @@ class QueryConfig { return get(kPrefixSortMinRows, 128); } + uint32_t prefixSortMaxStringPrefixLength() const { + return get(kPrefixSortMaxStringPrefixLength, 12); + } + double scaleWriterRebalanceMaxMemoryUsageRatio() const { return get(kScaleWriterRebalanceMaxMemoryUsageRatio, 0.7); } diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index f25b89a5518e5..b34636a6d9cdd 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -144,6 +144,10 @@ Generic Configuration - integer - 128 - Minimum number of rows to use prefix-sort. The default value has been derived using micro-benchmarking. + * - prefixsort_max_string_prefix_length + - integer + - 12 + - Byte length of the string prefix stored in the prefix-sort buffer. This doesn't include the null byte. .. _expression-evaluation-conf: diff --git a/velox/exec/Driver.h b/velox/exec/Driver.h index 9bd25072cc427..fe880228b8e14 100644 --- a/velox/exec/Driver.h +++ b/velox/exec/Driver.h @@ -300,7 +300,8 @@ struct DriverCtx { common::PrefixSortConfig prefixSortConfig() const { return common::PrefixSortConfig{ queryConfig().prefixSortNormalizedKeyMaxBytes(), - queryConfig().prefixSortMinRows()}; + queryConfig().prefixSortMinRows(), + queryConfig().prefixSortMaxStringPrefixLength()}; } }; diff --git a/velox/exec/PrefixSort.cpp b/velox/exec/PrefixSort.cpp index b4dfb1f6e0d12..d40085e52452a 100644 --- a/velox/exec/PrefixSort.cpp +++ b/velox/exec/PrefixSort.cpp @@ -39,8 +39,15 @@ FOLLY_ALWAYS_INLINE void encodeRowColumn( } else { value = *(reinterpret_cast(row + rowColumn.offset())); } - prefixSortLayout.encoders[index].encode( - value, prefixBuffer + prefixSortLayout.prefixOffsets[index]); + if constexpr (std::is_same_v) { + prefixSortLayout.encoders[index].encode( + value, + prefixBuffer + prefixSortLayout.prefixOffsets[index], + prefixSortLayout.encodeSizes[index]); + } else { + prefixSortLayout.encoders[index].encode( + value, prefixBuffer + prefixSortLayout.prefixOffsets[index]); + } } FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( @@ -86,6 +93,13 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( prefixSortLayout, index, rowColumn, row, prefixBuffer); return; } + case TypeKind::VARCHAR: + [[fallthrough]]; + case TypeKind::VARBINARY: { + encodeRowColumn( + prefixSortLayout, index, rowColumn, row, prefixBuffer); + return; + } default: VELOX_UNSUPPORTED( "prefix-sort does not support type kind: {}", @@ -130,28 +144,42 @@ compareByWord(uint64_t* left, uint64_t* right, int32_t bytes) { PrefixSortLayout PrefixSortLayout::makeSortLayout( const std::vector& types, const std::vector& compareFlags, - uint32_t maxNormalizedKeySize) { + uint32_t maxNormalizedKeySize, + uint32_t maxStringPrefixLength, + const std::vector& maxStringLengths) { const uint32_t numKeys = types.size(); std::vector prefixOffsets; prefixOffsets.reserve(numKeys); + std::vector encodeSizes; + encodeSizes.reserve(numKeys); std::vector encoders; encoders.reserve(numKeys); // Calculate encoders and prefix-offsets, and stop the loop if a key that - // cannot be normalized is encountered. + // cannot be normalized is encountered or only partial data of a key is + // normalized. uint32_t normalizedKeySize{0}; uint32_t numNormalizedKeys{0}; + + bool lastKeyInPrefixIsPartial{false}; for (auto i = 0; i < numKeys; ++i) { - const std::optional encodedSize = - PrefixSortEncoder::encodedSize(types[i]->kind()); + const std::optional encodedSize = PrefixSortEncoder::encodedSize( + types[i]->kind(), std::min(maxStringLengths[i], maxStringPrefixLength)); if (!encodedSize.has_value() || normalizedKeySize + encodedSize.value() > maxNormalizedKeySize) { break; } prefixOffsets.push_back(normalizedKeySize); encoders.push_back({compareFlags[i].ascending, compareFlags[i].nullsFirst}); + encodeSizes.push_back(encodedSize.value()); normalizedKeySize += encodedSize.value(); ++numNormalizedKeys; + if ((types[i]->kind() == TypeKind::VARCHAR || + types[i]->kind() == TypeKind::VARBINARY) && + maxStringPrefixLength < maxStringLengths[i]) { + lastKeyInPrefixIsPartial = true; + break; + } } const auto numPaddingBytes = alignmentPadding(normalizedKeySize, kAlignment); @@ -165,7 +193,9 @@ PrefixSortLayout PrefixSortLayout::makeSortLayout( compareFlags, numNormalizedKeys != 0, numNormalizedKeys < numKeys, + lastKeyInPrefixIsPartial ? numNormalizedKeys - 1 : numNormalizedKeys, std::move(prefixOffsets), + std::move(encodeSizes), std::move(encoders), numPaddingBytes}; } @@ -177,8 +207,10 @@ void PrefixSortLayout::optimizeSortKeysOrder( std::vector> encodedKeySizes( rowType->size(), std::nullopt); for (const auto& projection : keyColumnProjections) { + // Set stringPrefixLength to UINT_MAX - 1 to ensure VARCHAR columns are + // processed after all other types. encodedKeySizes[projection.inputChannel] = PrefixSortEncoder::encodedSize( - rowType->childAt(projection.inputChannel)->kind()); + rowType->childAt(projection.inputChannel)->kind(), UINT_MAX - 1); } std::sort( @@ -222,7 +254,8 @@ int PrefixSort::comparePartNormalizedKeys(char* left, char* right) { // If prefixes are equal, compare the remaining sort keys with rowContainer. char* leftRow = getRowAddrFromPrefixBuffer(left); char* rightRow = getRowAddrFromPrefixBuffer(right); - for (auto i = sortLayout_.numNormalizedKeys; i < sortLayout_.numKeys; ++i) { + for (auto i = sortLayout_.comparisonStartIndex; i < sortLayout_.numKeys; + ++i) { result = rowContainer_->compare( leftRow, rightRow, i, sortLayout_.compareFlags[i]); if (result != 0) { @@ -276,9 +309,8 @@ uint32_t PrefixSort::maxRequiredBytes( if (rowContainer->numRows() < config.minNumRows) { return 0; } - VELOX_CHECK_EQ(rowContainer->keyTypes().size(), compareFlags.size()); - const auto sortLayout = PrefixSortLayout::makeSortLayout( - rowContainer->keyTypes(), compareFlags, config.maxNormalizedKeyBytes); + const auto sortLayout = + generateSortLayout(rowContainer, compareFlags, config); if (!sortLayout.hasNormalizedKeys) { return 0; } @@ -346,7 +378,8 @@ void PrefixSort::sortInternal( RuntimeCounter( sortLayout_.numNormalizedKeys, RuntimeCounter::Unit::kNone)); } - if (sortLayout_.hasNonNormalizedKey) { + if (sortLayout_.hasNonNormalizedKey || + sortLayout_.comparisonStartIndex < sortLayout_.numNormalizedKeys) { sortRunner.quickSort( prefixBufferStart, prefixBufferEnd, [&](char* lhs, char* rhs) { return comparePartNormalizedKeys(lhs, rhs); diff --git a/velox/exec/PrefixSort.h b/velox/exec/PrefixSort.h index 7ac34b4305519..40e28e8f00100 100644 --- a/velox/exec/PrefixSort.h +++ b/velox/exec/PrefixSort.h @@ -53,10 +53,18 @@ struct PrefixSortLayout { /// Whether the sort keys contains non-normalized key. const bool hasNonNormalizedKey; + /// Indicates the starting index for key comparison. + /// If the last key is only partially encoded in the prefix, start from + /// numNormalizedKeys - 1. Otherwise, start from numNormalizedKeys. + const uint32_t comparisonStartIndex; + /// Offsets of normalized keys, used to find write locations when /// extracting columns const std::vector prefixOffsets; + /// Sizes of normalized keys. + const std::vector encodeSizes; + /// The encoders for normalized keys. const std::vector encoders; @@ -67,7 +75,9 @@ struct PrefixSortLayout { static PrefixSortLayout makeSortLayout( const std::vector& types, const std::vector& compareFlags, - uint32_t maxNormalizedKeySize); + uint32_t maxNormalizedKeySize, + uint32_t maxStringPrefixLength, + const std::vector& maxStringLengths); /// Optimizes the order of sort key columns to maximize the number of prefix /// sort keys for acceleration. This only applies for use case which doesn't @@ -121,10 +131,8 @@ class PrefixSort { stdSort(rows, rowContainer, compareFlags); return; } - - VELOX_CHECK_EQ(rowContainer->keyTypes().size(), compareFlags.size()); - const auto sortLayout = PrefixSortLayout::makeSortLayout( - rowContainer->keyTypes(), compareFlags, config.maxNormalizedKeyBytes); + const auto sortLayout = + generateSortLayout(rowContainer, compareFlags, config); // All keys can not normalize, skip the binary string compare opt. // Putting this outside sort-internal helps with stdSort. if (!sortLayout.hasNormalizedKeys) { @@ -158,6 +166,33 @@ class PrefixSort { const RowContainer* rowContainer, const std::vector& compareFlags); + FOLLY_ALWAYS_INLINE static PrefixSortLayout generateSortLayout( + const RowContainer* rowContainer, + const std::vector& compareFlags, + const velox::common::PrefixSortConfig& config) { + const auto keyTypes = rowContainer->keyTypes(); + VELOX_CHECK_EQ(keyTypes.size(), compareFlags.size()); + std::vector maxStringLengths; + maxStringLengths.reserve(keyTypes.size()); + for (int i = 0; i < keyTypes.size(); ++i) { + auto maxPrefixLength = config.maxStringPrefixLength; + if (keyTypes[i]->kind() == TypeKind::VARBINARY || + keyTypes[i]->kind() == TypeKind::VARCHAR) { + const auto stats = rowContainer->columnStats(i); + maxPrefixLength = stats.has_value() && stats.value().maxBytes() > 0 + ? stats.value().maxBytes() + : UINT_MAX; + } + maxStringLengths.emplace_back(maxPrefixLength); + } + return PrefixSortLayout::makeSortLayout( + keyTypes, + compareFlags, + config.maxNormalizedKeyBytes, + config.maxStringPrefixLength, + maxStringLengths); + } + // Estimates the memory required for prefix sort such as prefix buffer and // swap buffer. uint32_t maxRequiredBytes() const; diff --git a/velox/exec/RowContainer.cpp b/velox/exec/RowContainer.cpp index d8b622c3a728f..74d49266b0fdd 100644 --- a/velox/exec/RowContainer.cpp +++ b/velox/exec/RowContainer.cpp @@ -567,7 +567,8 @@ void RowContainer::store( decoded, rows, isKey, - offsets_[column]); + offsets_[column], + column); } else { const auto rowColumn = rowColumns_[column]; VELOX_DYNAMIC_TYPE_DISPATCH_ALL( diff --git a/velox/exec/RowContainer.h b/velox/exec/RowContainer.h index 3782dff698169..72cd08e276ddb 100644 --- a/velox/exec/RowContainer.h +++ b/velox/exec/RowContainer.h @@ -1056,6 +1056,7 @@ class RowContainer { for (int32_t i = 0; i < rows.size(); ++i) { storeWithNulls( decoded, i, isKey, rows[i], offset, nullByte, nullMask, column); + updateColumnStats(decoded, i, rows[i], column); } } @@ -1064,9 +1065,11 @@ class RowContainer { const DecodedVector& decoded, folly::Range rows, bool isKey, - int32_t offset) { + int32_t offset, + int32_t column) { for (int32_t i = 0; i < rows.size(); ++i) { storeNoNulls(decoded, i, isKey, rows[i], offset); + updateColumnStats(decoded, i, rows[i], column); } } diff --git a/velox/exec/Window.cpp b/velox/exec/Window.cpp index c8694cff95740..1f1a91331a95e 100644 --- a/velox/exec/Window.cpp +++ b/velox/exec/Window.cpp @@ -60,7 +60,8 @@ Window::Window( pool(), common::PrefixSortConfig{ driverCtx->queryConfig().prefixSortNormalizedKeyMaxBytes(), - driverCtx->queryConfig().prefixSortMinRows()}, + driverCtx->queryConfig().prefixSortMinRows(), + driverCtx->queryConfig().prefixSortMaxStringPrefixLength()}, spillConfig, &nonReclaimableSection_, &spillStats_); diff --git a/velox/exec/benchmarks/PrefixSortBenchmark.cpp b/velox/exec/benchmarks/PrefixSortBenchmark.cpp index 692d4f72926bc..12e575b68e80e 100644 --- a/velox/exec/benchmarks/PrefixSortBenchmark.cpp +++ b/velox/exec/benchmarks/PrefixSortBenchmark.cpp @@ -125,15 +125,14 @@ class TestCase { // You could config threshold, e.i. 0, to test prefix-sort for small // dateset. -static const common::PrefixSortConfig kDefaultSortConfig(1024, 100); +static const common::PrefixSortConfig kDefaultSortConfig(1024, 100, 50); // For small dataset, in some test environments, if std-sort is defined in the // benchmark file, the test results may be strangely regressed. When the // threshold is particularly large, PrefixSort is actually std-sort, hence, we // can use this as std-sort benchmark base. -static const common::PrefixSortConfig kStdSortConfig( - 1024, - std::numeric_limits::max()); +static const common::PrefixSortConfig + kStdSortConfig(1024, std::numeric_limits::max(), 50); class PrefixSortBenchmark { public: diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h index c4d356ec53d46..0078954edb4a5 100644 --- a/velox/exec/prefixsort/PrefixSortEncoder.h +++ b/velox/exec/prefixsort/PrefixSortEncoder.h @@ -18,6 +18,7 @@ #include #include "velox/common/base/SimdUtil.h" +#include "velox/common/memory/HashStringAllocator.h" #include "velox/type/Timestamp.h" #include "velox/type/Type.h" @@ -30,7 +31,7 @@ class PrefixSortEncoder { : ascending_(ascending), nullsFirst_(nullsFirst){}; /// Encode native primitive types(such as uint64_t, int64_t, uint32_t, - /// int32_t, float, double, Timestamp). + /// int32_t, uint16_t, int16_t, float, double, Timestamp). /// 1. The first byte of the encoded result is null byte. The value is 0 if /// (nulls first and value is null) or (nulls last and value is not null). /// Otherwise, the value is 1. @@ -38,8 +39,6 @@ class PrefixSortEncoder { /// -If value is null, we set the remaining sizeof(T) bytes to '0', they /// do not affect the comparison results at all. /// -If value is not null, the result is set by calling encodeNoNulls. - /// - /// TODO: add support for strings. template FOLLY_ALWAYS_INLINE void encode(std::optional value, char* dest) const { if (value.has_value()) { @@ -51,6 +50,50 @@ class PrefixSortEncoder { } } + /// Encode String type. + /// The string prefix is formatted as 'null byte + string content + padding + /// zeros'. If `!ascending_`, the bits for both the content and padding zeros + /// need to be inverted. + FOLLY_ALWAYS_INLINE void encode( + std::optional value, + char* dest, + uint32_t encodeSize) const { + auto* destDataPtr = dest + 1; + const auto stringPrefixSize = encodeSize - 1; + if (value.has_value()) { + dest[0] = nullsFirst_ ? 1 : 0; + auto data = value.value(); + const uint32_t copySize = + std::min(data.size(), stringPrefixSize); + if (data.isInline() || + reinterpret_cast(data.data())[-1] + .size() >= data.size()) { + // The string is inline or all in one piece out of line. + std::memcpy(destDataPtr, data.data(), copySize); + } else { + // 'data' is stored in non-contiguous allocation pieces in the row + // container, we only read prefix size data out. + auto stream = HashStringAllocator::prepareRead( + HashStringAllocator::headerOf(data.data())); + stream->readBytes(destDataPtr, copySize); + } + + if (data.size() < stringPrefixSize) { + std::memset( + destDataPtr + data.size(), 0, stringPrefixSize - data.size()); + } + + if (!ascending_) { + for (auto i = 1; i < encodeSize; ++i) { + dest[i] = ~dest[i]; + } + } + } else { + dest[0] = nullsFirst_ ? 0 : 1; + std::memset(destDataPtr, 0, stringPrefixSize); + } + } + /// @tparam T Type of value. Supported type are: uint64_t, int64_t, uint32_t, /// int32_t, int16_t, uint16_t, float, double, Timestamp. template @@ -67,7 +110,8 @@ class PrefixSortEncoder { /// @return For supported types, returns the encoded size, assume nullable. /// For not supported types, returns 'std::nullopt'. FOLLY_ALWAYS_INLINE static std::optional encodedSize( - TypeKind typeKind) { + TypeKind typeKind, + uint32_t stringPrefixLength) { // NOTE: one byte is reserved for nullable comparison. switch ((typeKind)) { case ::facebook::velox::TypeKind::SMALLINT: { @@ -91,6 +135,11 @@ class PrefixSortEncoder { case ::facebook::velox::TypeKind::HUGEINT: { return 17; } + case ::facebook::velox::TypeKind::VARBINARY: + [[fallthrough]]; + case ::facebook::velox::TypeKind::VARCHAR: { + return 1 + stringPrefixLength; + } default: return std::nullopt; } @@ -260,4 +309,11 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( encodeNoNulls(value.getNanos(), dest + 8); } +template <> +FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( + StringView value, + char* dest) const { + VELOX_UNREACHABLE(); +} + } // namespace facebook::velox::exec::prefixsort diff --git a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp index dc21fabea7c18..b92321d29b43d 100644 --- a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp +++ b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp @@ -225,7 +225,9 @@ class PrefixEncoderTest : public testing::Test, auto test = [&](const PrefixSortEncoder& encoder) { TypePtr type = TypeTraits::ImplType::create(); - VectorFuzzer fuzzer({.vectorSize = vectorSize, .nullRatio = 0.1}, pool()); + VectorFuzzer fuzzer( + {.vectorSize = vectorSize, .nullRatio = 0.1, .stringLength = 16}, + pool()); CompareFlags compareFlag = { encoder.isNullsFirst(), @@ -250,8 +252,14 @@ class PrefixEncoderTest : public testing::Test, const auto rightValue = rightVector->isNullAt(i) ? std::nullopt : std::optional(rightVector->valueAt(i)); - encoder.encode(leftValue, leftEncoded); - encoder.encode(rightValue, rightEncoded); + if constexpr ( + Kind == TypeKind::VARCHAR || Kind == TypeKind::VARBINARY) { + encoder.encode(leftValue, leftEncoded, 17); + encoder.encode(rightValue, rightEncoded, 17); + } else { + encoder.encode(leftValue, leftEncoded); + encoder.encode(rightValue, rightEncoded); + } const auto result = compare(leftEncoded, rightEncoded); const auto expected = @@ -264,7 +272,23 @@ class PrefixEncoderTest : public testing::Test, test(ascNullsLastEncoder_); test(descNullsFirstEncoder_); test(descNullsLastEncoder_); - }; + } + + const PrefixSortEncoder& ascNullsFirstEncoder() const { + return ascNullsFirstEncoder_; + } + + const PrefixSortEncoder ascNullsLastEncoder() const { + return ascNullsLastEncoder_; + } + + const PrefixSortEncoder descNullsFirstEncoder() const { + return descNullsFirstEncoder_; + } + + const PrefixSortEncoder descNullsLastEncoder() const { + return descNullsLastEncoder_; + } protected: static void SetUpTestCase() { @@ -347,6 +371,46 @@ TEST_F(PrefixEncoderTest, encode) { } } +TEST_F(PrefixEncoderTest, encodeString) { + constexpr int32_t encodeSize = 13; + StringView testValue = StringView("aaaaaabbbbbb"); + char expectedAsc[encodeSize] = "aaaaaabbbbbb"; + char expectedDesc[encodeSize]; + for (int i = 0; i < encodeSize - 1; ++i) { + expectedDesc[i] = ~expectedAsc[i]; + } + std::optional nullValue = std::nullopt; + std::optional value = testValue; + char encoded[encodeSize + 1]; + char nullFirst[encodeSize + 1]; + char nullLast[encodeSize + 1]; + memset(nullFirst, 0, encodeSize); + memset(nullLast, 1, 1); + memset(nullLast + 1, 0, encodeSize - 1); + + auto compare = [&](char* left, char* right) { + return std::memcmp(left, right, encodeSize); + }; + + ascNullsFirstEncoder().encode(nullValue, encoded, encodeSize); + ASSERT_EQ(compare(nullFirst, encoded), 0); + ascNullsLastEncoder().encode(nullValue, encoded, encodeSize); + ASSERT_EQ(compare(nullLast, encoded), 0); + + ascNullsFirstEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 1); + ASSERT_EQ(std::memcmp(encoded + 1, expectedAsc, encodeSize - 1), 0); + ascNullsLastEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 0); + ASSERT_EQ(std::memcmp(encoded + 1, expectedAsc, encodeSize - 1), 0); + descNullsFirstEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 1); + ASSERT_EQ(std::memcmp(encoded + 1, expectedDesc, encodeSize - 1), 0); + descNullsLastEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 0); + ASSERT_EQ(std::memcmp(encoded + 1, expectedDesc, encodeSize - 1), 0); +} + TEST_F(PrefixEncoderTest, compare) { testCompare(); testCompare(); @@ -388,4 +452,12 @@ TEST_F(PrefixEncoderTest, fuzzyTimestamp) { testFuzz(); } +TEST_F(PrefixEncoderTest, fuzzyStringView) { + testFuzz(); +} + +TEST_F(PrefixEncoderTest, fuzzyBinary) { + testFuzz(); +} + } // namespace facebook::velox::exec::prefixsort::test diff --git a/velox/exec/tests/AggregationTest.cpp b/velox/exec/tests/AggregationTest.cpp index dc4724ace32b3..2f351fe833233 100644 --- a/velox/exec/tests/AggregationTest.cpp +++ b/velox/exec/tests/AggregationTest.cpp @@ -2038,92 +2038,108 @@ TEST_F(AggregationTest, spillPrefixSortOptimization) { {true, 0, 0, 0}, {false, 0, 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() - + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() - 1, 0, 0}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() - + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() - 1, 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value(), 0, 1}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value(), 0, 0}, - {true, 1'000'000, 0, 3}, + {true, 1'000'000, 0, 4}, {false, 1'000'000, 0, 0}, {true, 1'000'000, 1'000'000, 0}, {false, 1'000'000, 1'000'000, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 2}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 1'000'000, 0}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 1'000'000, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) + .value(), 0, 2}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) + .value(), 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value() - 1, 0, 2}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value() - 1, 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 3}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 0}}; diff --git a/velox/exec/tests/PrefixSortTest.cpp b/velox/exec/tests/PrefixSortTest.cpp index 9bad8aeb035cc..9ee86a61a0e63 100644 --- a/velox/exec/tests/PrefixSortTest.cpp +++ b/velox/exec/tests/PrefixSortTest.cpp @@ -66,7 +66,8 @@ class PrefixSortTest : public exec::test::OperatorTestBase { common::PrefixSortConfig{ 1024, // Set threshold to 0 to enable prefix-sort in small dataset. - 0}, + 0, + 12}, sortPool.get()); const auto beforeBytes = sortPool->peakBytes(); ASSERT_EQ(sortPool->peakBytes(), 0); @@ -77,7 +78,8 @@ class PrefixSortTest : public exec::test::OperatorTestBase { common::PrefixSortConfig{ 1024, // Set threshold to 0 to enable prefix-sort in small dataset. - 0}, + 0, + 12}, sortPool.get(), rows); ASSERT_GE(maxBytes, sortPool->peakBytes() - beforeBytes); @@ -151,9 +153,6 @@ const RowVectorPtr PrefixSortTest::generateExpectedResult( } TEST_F(PrefixSortTest, singleKey) { - const int numRows = 5; - const int columnsSize = 7; - // Vectors without nulls. const std::vector testData = { makeFlatVector({5, 4, 3, 2, 1}), @@ -173,8 +172,22 @@ TEST_F(PrefixSortTest, singleKey) { Timestamp(3, 3), Timestamp(2, 2), Timestamp(1, 1)}), - makeFlatVector({"eee", "ddd", "ccc", "bbb", "aaa"})}; - for (int i = 5; i < columnsSize; ++i) { + makeFlatVector({"eee", "ddd", "ccc", "bbb", "aaa"}), + makeFlatVector({"e", "dddddd", "bbb", "ddd", "aaa"}), + makeFlatVector( + {"ddd_not_inline", + "aaa_is_not_inline", + "aaa_is_not_inline_a", + "ddd_is_not_inline", + "aaa_is_not_inline_b"}), + makeNullableFlatVector( + {"\u7231", + "\u671B\u5E0C\u2014\u5FF5\u4FE1", + "\u671B\u5E0C", + "\u7231\u2014", + "\u4FE1 \u7231"}, + VARBINARY())}; + for (int i = 0; i < testData.size(); ++i) { const auto data = makeRowVector({testData[i]}); testPrefixSort({kAsc}, data); @@ -183,10 +196,6 @@ TEST_F(PrefixSortTest, singleKey) { } TEST_F(PrefixSortTest, singleKeyWithNulls) { - const int numRows = 5; - const int columnsSize = 7; - - Timestamp ts = {5, 5}; // Vectors with nulls. const std::vector testData = { makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), @@ -207,9 +216,24 @@ TEST_F(PrefixSortTest, singleKeyWithNulls) { Timestamp(2, 2), Timestamp(1, 1)}), makeNullableFlatVector( - {"eee", "ddd", std::nullopt, "bbb", "aaa"})}; + {"eee", "ddd", std::nullopt, "bbb", "aaa"}), + makeNullableFlatVector( + {"ee", "aaa", std::nullopt, "d", "aaaaaaaaaaaaaa"}), + makeNullableFlatVector( + {"aaa_is_not_inline", + "aaa_is_not_inline_2", + std::nullopt, + "aaa_is_not_inline_1", + "aaaaaaaaaaaaaa"}), + makeNullableFlatVector( + {"\u7231", + "\u671B\u5E0C\u2014\u5FF5\u4FE1", + std::nullopt, + "\u7231\u2014", + "\u4FE1 \u7231"}, + VARBINARY())}; - for (int i = 5; i < columnsSize; ++i) { + for (int i = 0; i < testData.size(); ++i) { const auto data = makeRowVector({testData[i]}); testPrefixSort({kAsc}, data); @@ -234,7 +258,7 @@ TEST_F(PrefixSortTest, multipleKeys) { const auto data = makeRowVector({ makeNullableFlatVector({5, 2, std::nullopt, 2, 1}), makeNullableFlatVector( - {"eee", "ddd", std::nullopt, "bbb", "aaa"}), + {"eee", "aaa", std::nullopt, "bbb", "aaaa"}), }); testPrefixSort({kAsc, kAsc}, data); @@ -300,27 +324,32 @@ TEST_F(PrefixSortTest, checkMaxNormalizedKeySizeForMultipleKeys) { // The normalizedKeySize for BIGINT should be 8 + 1. std::vector keyTypes = {BIGINT(), BIGINT()}; std::vector compareFlags = {kAsc, kDesc}; - auto sortLayout = PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 8); + std::vector maxStringLengths = {9, 9}; + auto sortLayout = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 8, 9, maxStringLengths); ASSERT_FALSE(sortLayout.hasNormalizedKeys); - auto sortLayoutOneKey = - PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 9); + auto sortLayoutOneKey = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 9, 9, maxStringLengths); ASSERT_TRUE(sortLayoutOneKey.hasNormalizedKeys); ASSERT_TRUE(sortLayoutOneKey.hasNonNormalizedKey); ASSERT_EQ(sortLayoutOneKey.prefixOffsets.size(), 1); ASSERT_EQ(sortLayoutOneKey.prefixOffsets[0], 0); - auto sortLayoutOneKey1 = - PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 17); + auto sortLayoutOneKey1 = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 17, 12, maxStringLengths); ASSERT_TRUE(sortLayoutOneKey1.hasNormalizedKeys); ASSERT_TRUE(sortLayoutOneKey1.hasNonNormalizedKey); ASSERT_EQ(sortLayoutOneKey1.prefixOffsets.size(), 1); ASSERT_EQ(sortLayoutOneKey1.prefixOffsets[0], 0); - auto sortLayoutTwoKeys = - PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 18); + auto sortLayoutTwoKeys = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 18, 12, maxStringLengths); ASSERT_TRUE(sortLayoutTwoKeys.hasNormalizedKeys); ASSERT_FALSE(sortLayoutTwoKeys.hasNonNormalizedKey); + ASSERT_FALSE( + sortLayoutTwoKeys.comparisonStartIndex < + sortLayoutTwoKeys.numNormalizedKeys); ASSERT_EQ(sortLayoutTwoKeys.prefixOffsets.size(), 2); ASSERT_EQ(sortLayoutTwoKeys.prefixOffsets[0], 0); ASSERT_EQ(sortLayoutTwoKeys.prefixOffsets[1], 9); @@ -348,10 +377,10 @@ TEST_F(PrefixSortTest, optimizeSortKeysOrder) { {ROW({BIGINT(), SMALLINT(), VARCHAR()}), {0, 1, 2}, {1, 0, 2}}, {ROW({TINYINT(), BIGINT(), VARCHAR(), TINYINT(), INTEGER(), VARCHAR()}), {2, 1, 0, 4, 5, 3}, - {4, 1, 2, 0, 5, 3}}, + {4, 1, 2, 5, 0, 3}}, {ROW({INTEGER(), BIGINT(), VARCHAR(), TINYINT(), INTEGER(), VARCHAR()}), {5, 4, 3, 2, 1, 0}, - {4, 0, 1, 5, 3, 2}}}; + {4, 0, 1, 5, 2, 3}}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); @@ -371,5 +400,47 @@ TEST_F(PrefixSortTest, optimizeSortKeysOrder) { } } } + +TEST_F(PrefixSortTest, makeSortLayoutForString) { + std::vector keyTypes = {VARCHAR(), BIGINT()}; + std::vector compareFlags = {kAsc, kDesc}; + std::vector maxStringLengths = {9, 9}; + + auto sortLayoutOneKey = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 24, 8, maxStringLengths); + ASSERT_TRUE(sortLayoutOneKey.hasNormalizedKeys); + ASSERT_TRUE(sortLayoutOneKey.hasNonNormalizedKey); + ASSERT_TRUE( + sortLayoutOneKey.comparisonStartIndex < + sortLayoutOneKey.numNormalizedKeys); + ASSERT_EQ(sortLayoutOneKey.encodeSizes.size(), 1); + ASSERT_EQ(sortLayoutOneKey.encodeSizes[0], 9); + + auto sortLayoutTwoCompleteKeys = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 26, 9, maxStringLengths); + ASSERT_TRUE(sortLayoutTwoCompleteKeys.hasNormalizedKeys); + ASSERT_FALSE(sortLayoutTwoCompleteKeys.hasNonNormalizedKey); + ASSERT_TRUE( + sortLayoutTwoCompleteKeys.comparisonStartIndex == + sortLayoutTwoCompleteKeys.numNormalizedKeys); + ASSERT_EQ(sortLayoutTwoCompleteKeys.encodeSizes.size(), 2); + ASSERT_EQ(sortLayoutTwoCompleteKeys.encodeSizes[0], 10); + ASSERT_EQ(sortLayoutTwoCompleteKeys.encodeSizes[1], 9); + + // The last key type is VARBINARY, indicating that only partial data is + // encoded in the prefix. + std::vector keyTypes1 = {BIGINT(), VARBINARY()}; + auto sortLayoutTwoKeys = PrefixSortLayout::makeSortLayout( + keyTypes1, compareFlags, 26, 8, maxStringLengths); + ASSERT_TRUE(sortLayoutTwoKeys.hasNormalizedKeys); + ASSERT_FALSE(sortLayoutTwoKeys.hasNonNormalizedKey); + ASSERT_TRUE( + sortLayoutTwoKeys.comparisonStartIndex < + sortLayoutTwoKeys.numNormalizedKeys); + ASSERT_EQ(sortLayoutTwoKeys.encodeSizes.size(), 2); + ASSERT_EQ(sortLayoutTwoKeys.encodeSizes[0], 9); + ASSERT_EQ(sortLayoutTwoKeys.encodeSizes[1], 9); +} + } // namespace } // namespace facebook::velox::exec::prefixsort::test diff --git a/velox/exec/tests/SortBufferTest.cpp b/velox/exec/tests/SortBufferTest.cpp index f021ce5c5c247..c339f5377055c 100644 --- a/velox/exec/tests/SortBufferTest.cpp +++ b/velox/exec/tests/SortBufferTest.cpp @@ -66,7 +66,13 @@ class SortBufferTest : public OperatorTestBase, OperatorTestBase::TearDown(); } - common::SpillConfig getSpillConfig(const std::string& spillDir) const { + common::SpillConfig getSpillConfig( + const std::string& spillDir, + bool enableSpillPrefixSort = true) const { + std::optional spillPrefixSortConfig = + enableSpillPrefixSort + ? std::optional(prefixSortConfig_) + : std::nullopt; return common::SpillConfig( [spillDir]() -> const std::string& { return spillDir; }, [&](uint64_t) {}, @@ -83,14 +89,15 @@ class SortBufferTest : public OperatorTestBase, 0, 0, "none", - spillPrefixSortConfig_); + spillPrefixSortConfig); } const bool enableSpillPrefixSort_{GetParam()}; const velox::common::PrefixSortConfig prefixSortConfig_ = velox::common::PrefixSortConfig{ std::numeric_limits::max(), - GetParam() ? 8 : std::numeric_limits::max()}; + GetParam() ? 8 : std::numeric_limits::max(), + 12}; const std::optional spillPrefixSortConfig_ = enableSpillPrefixSort_ ? std::optional(prefixSortConfig_) @@ -103,13 +110,6 @@ class SortBufferTest : public OperatorTestBase, {"c3", REAL()}, {"c4", DOUBLE()}, {"c5", VARCHAR()}}); - const RowTypePtr nonPrefixSortInputType_ = ROW( - {{"c0", VARCHAR()}, - {"c1", VARCHAR()}, - {"c2", VARCHAR()}, - {"c3", VARCHAR()}, - {"c4", VARCHAR()}, - {"c5", VARCHAR()}}); // Specifies the sort columns ["c4", "c1"]. std::vector sortColumnIndices_{4, 1}; std::vector sortCompareFlags_{ @@ -740,12 +740,10 @@ DEBUG_ONLY_TEST_P(SortBufferTest, reserveMemorySort) { SCOPED_TRACE(fmt::format( "usePrefixSort: {}, spillEnabled: {}, ", usePrefixSort, spillEnabled)); auto spillDirectory = exec::test::TempDirectoryPath::create(); - auto spillConfig = getSpillConfig(spillDirectory->getPath()); + auto spillConfig = getSpillConfig(spillDirectory->getPath(), usePrefixSort); folly::Synchronized spillStats; - const RowTypePtr inputType = - usePrefixSort ? inputType_ : nonPrefixSortInputType_; auto sortBuffer = std::make_unique( - inputType, + inputType_, sortColumnIndices_, sortCompareFlags_, pool_.get(), @@ -759,7 +757,7 @@ DEBUG_ONLY_TEST_P(SortBufferTest, reserveMemorySort) { VectorFuzzer fuzzer({.vectorSize = 100}, spillSource.get()); TestScopedSpillInjection scopedSpillInjection(0); - sortBuffer->addInput(fuzzer.fuzzRow(inputType)); + sortBuffer->addInput(fuzzer.fuzzRow(inputType_)); std::atomic_bool hasReserveMemory = false; // Reserve memory for sort. diff --git a/velox/exec/tests/WindowTest.cpp b/velox/exec/tests/WindowTest.cpp index a725d359207d8..cc51cbfd51471 100644 --- a/velox/exec/tests/WindowTest.cpp +++ b/velox/exec/tests/WindowTest.cpp @@ -660,7 +660,7 @@ DEBUG_ONLY_TEST_F(WindowTest, reserveMemorySort) { const auto plan = usePrefixSort ? prefixSortPlan : nonPrefixSortPlan; velox::common::PrefixSortConfig prefixSortConfig = velox::common::PrefixSortConfig{ - std::numeric_limits::max(), 130}; + std::numeric_limits::max(), 130, 12}; auto sortWindowBuild = std::make_unique( plan, pool_.get(), diff --git a/velox/exec/tests/utils/LocalRunnerTestBase.cpp b/velox/exec/tests/utils/LocalRunnerTestBase.cpp index 2c23b50a32465..123a4f9e8aa85 100644 --- a/velox/exec/tests/utils/LocalRunnerTestBase.cpp +++ b/velox/exec/tests/utils/LocalRunnerTestBase.cpp @@ -59,7 +59,7 @@ void LocalRunnerTestBase::ensureTestData() { void LocalRunnerTestBase::makeSchema() { auto schemaQueryCtx = makeQueryCtx("schema", rootPool_.get()); common::SpillConfig spillConfig; - common::PrefixSortConfig prefixSortConfig(100, 130); + common::PrefixSortConfig prefixSortConfig(100, 130, 12); auto leafPool = schemaQueryCtx->pool()->addLeafChild("schemaReader"); auto connectorQueryCtx = std::make_shared( leafPool.get(),