diff --git a/velox/common/base/PrefixSortConfig.h b/velox/common/base/PrefixSortConfig.h index 46ac493915546..d5f731d140eac 100644 --- a/velox/common/base/PrefixSortConfig.h +++ b/velox/common/base/PrefixSortConfig.h @@ -24,9 +24,13 @@ namespace facebook::velox::common { struct PrefixSortConfig { PrefixSortConfig() = default; - PrefixSortConfig(uint32_t _maxNormalizedKeyBytes, uint32_t _minNumRows) + PrefixSortConfig( + uint32_t _maxNormalizedKeyBytes, + uint32_t _minNumRows, + uint32_t _maxStringPrefixLength) : maxNormalizedKeyBytes(_maxNormalizedKeyBytes), - minNumRows(_minNumRows) {} + minNumRows(_minNumRows), + maxStringPrefixLength(_maxStringPrefixLength) {} /// Maximum bytes that can be used to store normalized keys in prefix-sort /// buffer per entry. Same with QueryConfig kPrefixSortNormalizedKeyMaxBytes. @@ -35,5 +39,9 @@ struct PrefixSortConfig { /// Minimum number of rows to apply prefix sort. Prefix sort does not perform /// with small datasets. uint32_t minNumRows{128}; + + /// Max number of bytes to be stored in prefix-sort buffer for a string + /// column. + uint32_t maxStringPrefixLength{12}; }; } // namespace facebook::velox::common diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index 7bceb06d117bb..37d3589a62bd6 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -378,6 +378,11 @@ class QueryConfig { /// derived using micro-benchmarking. static constexpr const char* kPrefixSortMinRows = "prefixsort_min_rows"; + /// Maximum number of bytes to be stored in prefix-sort buffer for a string + /// key. + static constexpr const char* kPrefixSortMaxStringPrefixLength = + "prefixsort_max_string_prefix_length"; + /// Enable query tracing flag. static constexpr const char* kQueryTraceEnabled = "query_trace_enabled"; @@ -844,6 +849,10 @@ class QueryConfig { return get(kPrefixSortMinRows, 128); } + uint32_t prefixSortMaxStringPrefixLength() const { + return get(kPrefixSortMaxStringPrefixLength, 12); + } + double scaleWriterRebalanceMaxMemoryUsageRatio() const { return get(kScaleWriterRebalanceMaxMemoryUsageRatio, 0.7); } diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index f25b89a5518e5..b34636a6d9cdd 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -144,6 +144,10 @@ Generic Configuration - integer - 128 - Minimum number of rows to use prefix-sort. The default value has been derived using micro-benchmarking. + * - prefixsort_max_string_prefix_length + - integer + - 12 + - Byte length of the string prefix stored in the prefix-sort buffer. This doesn't include the null byte. .. _expression-evaluation-conf: diff --git a/velox/exec/Driver.h b/velox/exec/Driver.h index 9bd25072cc427..fe880228b8e14 100644 --- a/velox/exec/Driver.h +++ b/velox/exec/Driver.h @@ -300,7 +300,8 @@ struct DriverCtx { common::PrefixSortConfig prefixSortConfig() const { return common::PrefixSortConfig{ queryConfig().prefixSortNormalizedKeyMaxBytes(), - queryConfig().prefixSortMinRows()}; + queryConfig().prefixSortMinRows(), + queryConfig().prefixSortMaxStringPrefixLength()}; } }; diff --git a/velox/exec/PrefixSort.cpp b/velox/exec/PrefixSort.cpp index b4dfb1f6e0d12..d40085e52452a 100644 --- a/velox/exec/PrefixSort.cpp +++ b/velox/exec/PrefixSort.cpp @@ -39,8 +39,15 @@ FOLLY_ALWAYS_INLINE void encodeRowColumn( } else { value = *(reinterpret_cast(row + rowColumn.offset())); } - prefixSortLayout.encoders[index].encode( - value, prefixBuffer + prefixSortLayout.prefixOffsets[index]); + if constexpr (std::is_same_v) { + prefixSortLayout.encoders[index].encode( + value, + prefixBuffer + prefixSortLayout.prefixOffsets[index], + prefixSortLayout.encodeSizes[index]); + } else { + prefixSortLayout.encoders[index].encode( + value, prefixBuffer + prefixSortLayout.prefixOffsets[index]); + } } FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( @@ -86,6 +93,13 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( prefixSortLayout, index, rowColumn, row, prefixBuffer); return; } + case TypeKind::VARCHAR: + [[fallthrough]]; + case TypeKind::VARBINARY: { + encodeRowColumn( + prefixSortLayout, index, rowColumn, row, prefixBuffer); + return; + } default: VELOX_UNSUPPORTED( "prefix-sort does not support type kind: {}", @@ -130,28 +144,42 @@ compareByWord(uint64_t* left, uint64_t* right, int32_t bytes) { PrefixSortLayout PrefixSortLayout::makeSortLayout( const std::vector& types, const std::vector& compareFlags, - uint32_t maxNormalizedKeySize) { + uint32_t maxNormalizedKeySize, + uint32_t maxStringPrefixLength, + const std::vector& maxStringLengths) { const uint32_t numKeys = types.size(); std::vector prefixOffsets; prefixOffsets.reserve(numKeys); + std::vector encodeSizes; + encodeSizes.reserve(numKeys); std::vector encoders; encoders.reserve(numKeys); // Calculate encoders and prefix-offsets, and stop the loop if a key that - // cannot be normalized is encountered. + // cannot be normalized is encountered or only partial data of a key is + // normalized. uint32_t normalizedKeySize{0}; uint32_t numNormalizedKeys{0}; + + bool lastKeyInPrefixIsPartial{false}; for (auto i = 0; i < numKeys; ++i) { - const std::optional encodedSize = - PrefixSortEncoder::encodedSize(types[i]->kind()); + const std::optional encodedSize = PrefixSortEncoder::encodedSize( + types[i]->kind(), std::min(maxStringLengths[i], maxStringPrefixLength)); if (!encodedSize.has_value() || normalizedKeySize + encodedSize.value() > maxNormalizedKeySize) { break; } prefixOffsets.push_back(normalizedKeySize); encoders.push_back({compareFlags[i].ascending, compareFlags[i].nullsFirst}); + encodeSizes.push_back(encodedSize.value()); normalizedKeySize += encodedSize.value(); ++numNormalizedKeys; + if ((types[i]->kind() == TypeKind::VARCHAR || + types[i]->kind() == TypeKind::VARBINARY) && + maxStringPrefixLength < maxStringLengths[i]) { + lastKeyInPrefixIsPartial = true; + break; + } } const auto numPaddingBytes = alignmentPadding(normalizedKeySize, kAlignment); @@ -165,7 +193,9 @@ PrefixSortLayout PrefixSortLayout::makeSortLayout( compareFlags, numNormalizedKeys != 0, numNormalizedKeys < numKeys, + lastKeyInPrefixIsPartial ? numNormalizedKeys - 1 : numNormalizedKeys, std::move(prefixOffsets), + std::move(encodeSizes), std::move(encoders), numPaddingBytes}; } @@ -177,8 +207,10 @@ void PrefixSortLayout::optimizeSortKeysOrder( std::vector> encodedKeySizes( rowType->size(), std::nullopt); for (const auto& projection : keyColumnProjections) { + // Set stringPrefixLength to UINT_MAX - 1 to ensure VARCHAR columns are + // processed after all other types. encodedKeySizes[projection.inputChannel] = PrefixSortEncoder::encodedSize( - rowType->childAt(projection.inputChannel)->kind()); + rowType->childAt(projection.inputChannel)->kind(), UINT_MAX - 1); } std::sort( @@ -222,7 +254,8 @@ int PrefixSort::comparePartNormalizedKeys(char* left, char* right) { // If prefixes are equal, compare the remaining sort keys with rowContainer. char* leftRow = getRowAddrFromPrefixBuffer(left); char* rightRow = getRowAddrFromPrefixBuffer(right); - for (auto i = sortLayout_.numNormalizedKeys; i < sortLayout_.numKeys; ++i) { + for (auto i = sortLayout_.comparisonStartIndex; i < sortLayout_.numKeys; + ++i) { result = rowContainer_->compare( leftRow, rightRow, i, sortLayout_.compareFlags[i]); if (result != 0) { @@ -276,9 +309,8 @@ uint32_t PrefixSort::maxRequiredBytes( if (rowContainer->numRows() < config.minNumRows) { return 0; } - VELOX_CHECK_EQ(rowContainer->keyTypes().size(), compareFlags.size()); - const auto sortLayout = PrefixSortLayout::makeSortLayout( - rowContainer->keyTypes(), compareFlags, config.maxNormalizedKeyBytes); + const auto sortLayout = + generateSortLayout(rowContainer, compareFlags, config); if (!sortLayout.hasNormalizedKeys) { return 0; } @@ -346,7 +378,8 @@ void PrefixSort::sortInternal( RuntimeCounter( sortLayout_.numNormalizedKeys, RuntimeCounter::Unit::kNone)); } - if (sortLayout_.hasNonNormalizedKey) { + if (sortLayout_.hasNonNormalizedKey || + sortLayout_.comparisonStartIndex < sortLayout_.numNormalizedKeys) { sortRunner.quickSort( prefixBufferStart, prefixBufferEnd, [&](char* lhs, char* rhs) { return comparePartNormalizedKeys(lhs, rhs); diff --git a/velox/exec/PrefixSort.h b/velox/exec/PrefixSort.h index 7ac34b4305519..40e28e8f00100 100644 --- a/velox/exec/PrefixSort.h +++ b/velox/exec/PrefixSort.h @@ -53,10 +53,18 @@ struct PrefixSortLayout { /// Whether the sort keys contains non-normalized key. const bool hasNonNormalizedKey; + /// Indicates the starting index for key comparison. + /// If the last key is only partially encoded in the prefix, start from + /// numNormalizedKeys - 1. Otherwise, start from numNormalizedKeys. + const uint32_t comparisonStartIndex; + /// Offsets of normalized keys, used to find write locations when /// extracting columns const std::vector prefixOffsets; + /// Sizes of normalized keys. + const std::vector encodeSizes; + /// The encoders for normalized keys. const std::vector encoders; @@ -67,7 +75,9 @@ struct PrefixSortLayout { static PrefixSortLayout makeSortLayout( const std::vector& types, const std::vector& compareFlags, - uint32_t maxNormalizedKeySize); + uint32_t maxNormalizedKeySize, + uint32_t maxStringPrefixLength, + const std::vector& maxStringLengths); /// Optimizes the order of sort key columns to maximize the number of prefix /// sort keys for acceleration. This only applies for use case which doesn't @@ -121,10 +131,8 @@ class PrefixSort { stdSort(rows, rowContainer, compareFlags); return; } - - VELOX_CHECK_EQ(rowContainer->keyTypes().size(), compareFlags.size()); - const auto sortLayout = PrefixSortLayout::makeSortLayout( - rowContainer->keyTypes(), compareFlags, config.maxNormalizedKeyBytes); + const auto sortLayout = + generateSortLayout(rowContainer, compareFlags, config); // All keys can not normalize, skip the binary string compare opt. // Putting this outside sort-internal helps with stdSort. if (!sortLayout.hasNormalizedKeys) { @@ -158,6 +166,33 @@ class PrefixSort { const RowContainer* rowContainer, const std::vector& compareFlags); + FOLLY_ALWAYS_INLINE static PrefixSortLayout generateSortLayout( + const RowContainer* rowContainer, + const std::vector& compareFlags, + const velox::common::PrefixSortConfig& config) { + const auto keyTypes = rowContainer->keyTypes(); + VELOX_CHECK_EQ(keyTypes.size(), compareFlags.size()); + std::vector maxStringLengths; + maxStringLengths.reserve(keyTypes.size()); + for (int i = 0; i < keyTypes.size(); ++i) { + auto maxPrefixLength = config.maxStringPrefixLength; + if (keyTypes[i]->kind() == TypeKind::VARBINARY || + keyTypes[i]->kind() == TypeKind::VARCHAR) { + const auto stats = rowContainer->columnStats(i); + maxPrefixLength = stats.has_value() && stats.value().maxBytes() > 0 + ? stats.value().maxBytes() + : UINT_MAX; + } + maxStringLengths.emplace_back(maxPrefixLength); + } + return PrefixSortLayout::makeSortLayout( + keyTypes, + compareFlags, + config.maxNormalizedKeyBytes, + config.maxStringPrefixLength, + maxStringLengths); + } + // Estimates the memory required for prefix sort such as prefix buffer and // swap buffer. uint32_t maxRequiredBytes() const; diff --git a/velox/exec/RowContainer.cpp b/velox/exec/RowContainer.cpp index d8b622c3a728f..74d49266b0fdd 100644 --- a/velox/exec/RowContainer.cpp +++ b/velox/exec/RowContainer.cpp @@ -567,7 +567,8 @@ void RowContainer::store( decoded, rows, isKey, - offsets_[column]); + offsets_[column], + column); } else { const auto rowColumn = rowColumns_[column]; VELOX_DYNAMIC_TYPE_DISPATCH_ALL( diff --git a/velox/exec/RowContainer.h b/velox/exec/RowContainer.h index 3782dff698169..72cd08e276ddb 100644 --- a/velox/exec/RowContainer.h +++ b/velox/exec/RowContainer.h @@ -1056,6 +1056,7 @@ class RowContainer { for (int32_t i = 0; i < rows.size(); ++i) { storeWithNulls( decoded, i, isKey, rows[i], offset, nullByte, nullMask, column); + updateColumnStats(decoded, i, rows[i], column); } } @@ -1064,9 +1065,11 @@ class RowContainer { const DecodedVector& decoded, folly::Range rows, bool isKey, - int32_t offset) { + int32_t offset, + int32_t column) { for (int32_t i = 0; i < rows.size(); ++i) { storeNoNulls(decoded, i, isKey, rows[i], offset); + updateColumnStats(decoded, i, rows[i], column); } } diff --git a/velox/exec/Window.cpp b/velox/exec/Window.cpp index c8694cff95740..1f1a91331a95e 100644 --- a/velox/exec/Window.cpp +++ b/velox/exec/Window.cpp @@ -60,7 +60,8 @@ Window::Window( pool(), common::PrefixSortConfig{ driverCtx->queryConfig().prefixSortNormalizedKeyMaxBytes(), - driverCtx->queryConfig().prefixSortMinRows()}, + driverCtx->queryConfig().prefixSortMinRows(), + driverCtx->queryConfig().prefixSortMaxStringPrefixLength()}, spillConfig, &nonReclaimableSection_, &spillStats_); diff --git a/velox/exec/benchmarks/PrefixSortBenchmark.cpp b/velox/exec/benchmarks/PrefixSortBenchmark.cpp index 692d4f72926bc..12e575b68e80e 100644 --- a/velox/exec/benchmarks/PrefixSortBenchmark.cpp +++ b/velox/exec/benchmarks/PrefixSortBenchmark.cpp @@ -125,15 +125,14 @@ class TestCase { // You could config threshold, e.i. 0, to test prefix-sort for small // dateset. -static const common::PrefixSortConfig kDefaultSortConfig(1024, 100); +static const common::PrefixSortConfig kDefaultSortConfig(1024, 100, 50); // For small dataset, in some test environments, if std-sort is defined in the // benchmark file, the test results may be strangely regressed. When the // threshold is particularly large, PrefixSort is actually std-sort, hence, we // can use this as std-sort benchmark base. -static const common::PrefixSortConfig kStdSortConfig( - 1024, - std::numeric_limits::max()); +static const common::PrefixSortConfig + kStdSortConfig(1024, std::numeric_limits::max(), 50); class PrefixSortBenchmark { public: diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h index c4d356ec53d46..0078954edb4a5 100644 --- a/velox/exec/prefixsort/PrefixSortEncoder.h +++ b/velox/exec/prefixsort/PrefixSortEncoder.h @@ -18,6 +18,7 @@ #include #include "velox/common/base/SimdUtil.h" +#include "velox/common/memory/HashStringAllocator.h" #include "velox/type/Timestamp.h" #include "velox/type/Type.h" @@ -30,7 +31,7 @@ class PrefixSortEncoder { : ascending_(ascending), nullsFirst_(nullsFirst){}; /// Encode native primitive types(such as uint64_t, int64_t, uint32_t, - /// int32_t, float, double, Timestamp). + /// int32_t, uint16_t, int16_t, float, double, Timestamp). /// 1. The first byte of the encoded result is null byte. The value is 0 if /// (nulls first and value is null) or (nulls last and value is not null). /// Otherwise, the value is 1. @@ -38,8 +39,6 @@ class PrefixSortEncoder { /// -If value is null, we set the remaining sizeof(T) bytes to '0', they /// do not affect the comparison results at all. /// -If value is not null, the result is set by calling encodeNoNulls. - /// - /// TODO: add support for strings. template FOLLY_ALWAYS_INLINE void encode(std::optional value, char* dest) const { if (value.has_value()) { @@ -51,6 +50,50 @@ class PrefixSortEncoder { } } + /// Encode String type. + /// The string prefix is formatted as 'null byte + string content + padding + /// zeros'. If `!ascending_`, the bits for both the content and padding zeros + /// need to be inverted. + FOLLY_ALWAYS_INLINE void encode( + std::optional value, + char* dest, + uint32_t encodeSize) const { + auto* destDataPtr = dest + 1; + const auto stringPrefixSize = encodeSize - 1; + if (value.has_value()) { + dest[0] = nullsFirst_ ? 1 : 0; + auto data = value.value(); + const uint32_t copySize = + std::min(data.size(), stringPrefixSize); + if (data.isInline() || + reinterpret_cast(data.data())[-1] + .size() >= data.size()) { + // The string is inline or all in one piece out of line. + std::memcpy(destDataPtr, data.data(), copySize); + } else { + // 'data' is stored in non-contiguous allocation pieces in the row + // container, we only read prefix size data out. + auto stream = HashStringAllocator::prepareRead( + HashStringAllocator::headerOf(data.data())); + stream->readBytes(destDataPtr, copySize); + } + + if (data.size() < stringPrefixSize) { + std::memset( + destDataPtr + data.size(), 0, stringPrefixSize - data.size()); + } + + if (!ascending_) { + for (auto i = 1; i < encodeSize; ++i) { + dest[i] = ~dest[i]; + } + } + } else { + dest[0] = nullsFirst_ ? 0 : 1; + std::memset(destDataPtr, 0, stringPrefixSize); + } + } + /// @tparam T Type of value. Supported type are: uint64_t, int64_t, uint32_t, /// int32_t, int16_t, uint16_t, float, double, Timestamp. template @@ -67,7 +110,8 @@ class PrefixSortEncoder { /// @return For supported types, returns the encoded size, assume nullable. /// For not supported types, returns 'std::nullopt'. FOLLY_ALWAYS_INLINE static std::optional encodedSize( - TypeKind typeKind) { + TypeKind typeKind, + uint32_t stringPrefixLength) { // NOTE: one byte is reserved for nullable comparison. switch ((typeKind)) { case ::facebook::velox::TypeKind::SMALLINT: { @@ -91,6 +135,11 @@ class PrefixSortEncoder { case ::facebook::velox::TypeKind::HUGEINT: { return 17; } + case ::facebook::velox::TypeKind::VARBINARY: + [[fallthrough]]; + case ::facebook::velox::TypeKind::VARCHAR: { + return 1 + stringPrefixLength; + } default: return std::nullopt; } @@ -260,4 +309,11 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( encodeNoNulls(value.getNanos(), dest + 8); } +template <> +FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( + StringView value, + char* dest) const { + VELOX_UNREACHABLE(); +} + } // namespace facebook::velox::exec::prefixsort diff --git a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp index dc21fabea7c18..b92321d29b43d 100644 --- a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp +++ b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp @@ -225,7 +225,9 @@ class PrefixEncoderTest : public testing::Test, auto test = [&](const PrefixSortEncoder& encoder) { TypePtr type = TypeTraits::ImplType::create(); - VectorFuzzer fuzzer({.vectorSize = vectorSize, .nullRatio = 0.1}, pool()); + VectorFuzzer fuzzer( + {.vectorSize = vectorSize, .nullRatio = 0.1, .stringLength = 16}, + pool()); CompareFlags compareFlag = { encoder.isNullsFirst(), @@ -250,8 +252,14 @@ class PrefixEncoderTest : public testing::Test, const auto rightValue = rightVector->isNullAt(i) ? std::nullopt : std::optional(rightVector->valueAt(i)); - encoder.encode(leftValue, leftEncoded); - encoder.encode(rightValue, rightEncoded); + if constexpr ( + Kind == TypeKind::VARCHAR || Kind == TypeKind::VARBINARY) { + encoder.encode(leftValue, leftEncoded, 17); + encoder.encode(rightValue, rightEncoded, 17); + } else { + encoder.encode(leftValue, leftEncoded); + encoder.encode(rightValue, rightEncoded); + } const auto result = compare(leftEncoded, rightEncoded); const auto expected = @@ -264,7 +272,23 @@ class PrefixEncoderTest : public testing::Test, test(ascNullsLastEncoder_); test(descNullsFirstEncoder_); test(descNullsLastEncoder_); - }; + } + + const PrefixSortEncoder& ascNullsFirstEncoder() const { + return ascNullsFirstEncoder_; + } + + const PrefixSortEncoder ascNullsLastEncoder() const { + return ascNullsLastEncoder_; + } + + const PrefixSortEncoder descNullsFirstEncoder() const { + return descNullsFirstEncoder_; + } + + const PrefixSortEncoder descNullsLastEncoder() const { + return descNullsLastEncoder_; + } protected: static void SetUpTestCase() { @@ -347,6 +371,46 @@ TEST_F(PrefixEncoderTest, encode) { } } +TEST_F(PrefixEncoderTest, encodeString) { + constexpr int32_t encodeSize = 13; + StringView testValue = StringView("aaaaaabbbbbb"); + char expectedAsc[encodeSize] = "aaaaaabbbbbb"; + char expectedDesc[encodeSize]; + for (int i = 0; i < encodeSize - 1; ++i) { + expectedDesc[i] = ~expectedAsc[i]; + } + std::optional nullValue = std::nullopt; + std::optional value = testValue; + char encoded[encodeSize + 1]; + char nullFirst[encodeSize + 1]; + char nullLast[encodeSize + 1]; + memset(nullFirst, 0, encodeSize); + memset(nullLast, 1, 1); + memset(nullLast + 1, 0, encodeSize - 1); + + auto compare = [&](char* left, char* right) { + return std::memcmp(left, right, encodeSize); + }; + + ascNullsFirstEncoder().encode(nullValue, encoded, encodeSize); + ASSERT_EQ(compare(nullFirst, encoded), 0); + ascNullsLastEncoder().encode(nullValue, encoded, encodeSize); + ASSERT_EQ(compare(nullLast, encoded), 0); + + ascNullsFirstEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 1); + ASSERT_EQ(std::memcmp(encoded + 1, expectedAsc, encodeSize - 1), 0); + ascNullsLastEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 0); + ASSERT_EQ(std::memcmp(encoded + 1, expectedAsc, encodeSize - 1), 0); + descNullsFirstEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 1); + ASSERT_EQ(std::memcmp(encoded + 1, expectedDesc, encodeSize - 1), 0); + descNullsLastEncoder().encode(value, encoded, encodeSize); + ASSERT_EQ(encoded[0], 0); + ASSERT_EQ(std::memcmp(encoded + 1, expectedDesc, encodeSize - 1), 0); +} + TEST_F(PrefixEncoderTest, compare) { testCompare(); testCompare(); @@ -388,4 +452,12 @@ TEST_F(PrefixEncoderTest, fuzzyTimestamp) { testFuzz(); } +TEST_F(PrefixEncoderTest, fuzzyStringView) { + testFuzz(); +} + +TEST_F(PrefixEncoderTest, fuzzyBinary) { + testFuzz(); +} + } // namespace facebook::velox::exec::prefixsort::test diff --git a/velox/exec/tests/AggregationTest.cpp b/velox/exec/tests/AggregationTest.cpp index dc4724ace32b3..2f351fe833233 100644 --- a/velox/exec/tests/AggregationTest.cpp +++ b/velox/exec/tests/AggregationTest.cpp @@ -2038,92 +2038,108 @@ TEST_F(AggregationTest, spillPrefixSortOptimization) { {true, 0, 0, 0}, {false, 0, 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() - + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() - 1, 0, 0}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() - + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() - 1, 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value(), 0, 1}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value(), 0, 0}, - {true, 1'000'000, 0, 3}, + {true, 1'000'000, 0, 4}, {false, 1'000'000, 0, 0}, {true, 1'000'000, 1'000'000, 0}, {false, 1'000'000, 1'000'000, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 2}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 1'000'000, 0}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 1'000'000, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) + .value(), 0, 2}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT).value(), + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) + .value(), 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value() - 1, 0, 2}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value() - 1, 0, 0}, {true, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 3}, {false, - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT).value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::SMALLINT, 12) + .value() + + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::BIGINT, 12) .value() + - prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER) + prefixsort::PrefixSortEncoder::encodedSize(TypeKind::INTEGER, 12) .value(), 0, 0}}; diff --git a/velox/exec/tests/PrefixSortTest.cpp b/velox/exec/tests/PrefixSortTest.cpp index 9bad8aeb035cc..9ee86a61a0e63 100644 --- a/velox/exec/tests/PrefixSortTest.cpp +++ b/velox/exec/tests/PrefixSortTest.cpp @@ -66,7 +66,8 @@ class PrefixSortTest : public exec::test::OperatorTestBase { common::PrefixSortConfig{ 1024, // Set threshold to 0 to enable prefix-sort in small dataset. - 0}, + 0, + 12}, sortPool.get()); const auto beforeBytes = sortPool->peakBytes(); ASSERT_EQ(sortPool->peakBytes(), 0); @@ -77,7 +78,8 @@ class PrefixSortTest : public exec::test::OperatorTestBase { common::PrefixSortConfig{ 1024, // Set threshold to 0 to enable prefix-sort in small dataset. - 0}, + 0, + 12}, sortPool.get(), rows); ASSERT_GE(maxBytes, sortPool->peakBytes() - beforeBytes); @@ -151,9 +153,6 @@ const RowVectorPtr PrefixSortTest::generateExpectedResult( } TEST_F(PrefixSortTest, singleKey) { - const int numRows = 5; - const int columnsSize = 7; - // Vectors without nulls. const std::vector testData = { makeFlatVector({5, 4, 3, 2, 1}), @@ -173,8 +172,22 @@ TEST_F(PrefixSortTest, singleKey) { Timestamp(3, 3), Timestamp(2, 2), Timestamp(1, 1)}), - makeFlatVector({"eee", "ddd", "ccc", "bbb", "aaa"})}; - for (int i = 5; i < columnsSize; ++i) { + makeFlatVector({"eee", "ddd", "ccc", "bbb", "aaa"}), + makeFlatVector({"e", "dddddd", "bbb", "ddd", "aaa"}), + makeFlatVector( + {"ddd_not_inline", + "aaa_is_not_inline", + "aaa_is_not_inline_a", + "ddd_is_not_inline", + "aaa_is_not_inline_b"}), + makeNullableFlatVector( + {"\u7231", + "\u671B\u5E0C\u2014\u5FF5\u4FE1", + "\u671B\u5E0C", + "\u7231\u2014", + "\u4FE1 \u7231"}, + VARBINARY())}; + for (int i = 0; i < testData.size(); ++i) { const auto data = makeRowVector({testData[i]}); testPrefixSort({kAsc}, data); @@ -183,10 +196,6 @@ TEST_F(PrefixSortTest, singleKey) { } TEST_F(PrefixSortTest, singleKeyWithNulls) { - const int numRows = 5; - const int columnsSize = 7; - - Timestamp ts = {5, 5}; // Vectors with nulls. const std::vector testData = { makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), @@ -207,9 +216,24 @@ TEST_F(PrefixSortTest, singleKeyWithNulls) { Timestamp(2, 2), Timestamp(1, 1)}), makeNullableFlatVector( - {"eee", "ddd", std::nullopt, "bbb", "aaa"})}; + {"eee", "ddd", std::nullopt, "bbb", "aaa"}), + makeNullableFlatVector( + {"ee", "aaa", std::nullopt, "d", "aaaaaaaaaaaaaa"}), + makeNullableFlatVector( + {"aaa_is_not_inline", + "aaa_is_not_inline_2", + std::nullopt, + "aaa_is_not_inline_1", + "aaaaaaaaaaaaaa"}), + makeNullableFlatVector( + {"\u7231", + "\u671B\u5E0C\u2014\u5FF5\u4FE1", + std::nullopt, + "\u7231\u2014", + "\u4FE1 \u7231"}, + VARBINARY())}; - for (int i = 5; i < columnsSize; ++i) { + for (int i = 0; i < testData.size(); ++i) { const auto data = makeRowVector({testData[i]}); testPrefixSort({kAsc}, data); @@ -234,7 +258,7 @@ TEST_F(PrefixSortTest, multipleKeys) { const auto data = makeRowVector({ makeNullableFlatVector({5, 2, std::nullopt, 2, 1}), makeNullableFlatVector( - {"eee", "ddd", std::nullopt, "bbb", "aaa"}), + {"eee", "aaa", std::nullopt, "bbb", "aaaa"}), }); testPrefixSort({kAsc, kAsc}, data); @@ -300,27 +324,32 @@ TEST_F(PrefixSortTest, checkMaxNormalizedKeySizeForMultipleKeys) { // The normalizedKeySize for BIGINT should be 8 + 1. std::vector keyTypes = {BIGINT(), BIGINT()}; std::vector compareFlags = {kAsc, kDesc}; - auto sortLayout = PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 8); + std::vector maxStringLengths = {9, 9}; + auto sortLayout = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 8, 9, maxStringLengths); ASSERT_FALSE(sortLayout.hasNormalizedKeys); - auto sortLayoutOneKey = - PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 9); + auto sortLayoutOneKey = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 9, 9, maxStringLengths); ASSERT_TRUE(sortLayoutOneKey.hasNormalizedKeys); ASSERT_TRUE(sortLayoutOneKey.hasNonNormalizedKey); ASSERT_EQ(sortLayoutOneKey.prefixOffsets.size(), 1); ASSERT_EQ(sortLayoutOneKey.prefixOffsets[0], 0); - auto sortLayoutOneKey1 = - PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 17); + auto sortLayoutOneKey1 = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 17, 12, maxStringLengths); ASSERT_TRUE(sortLayoutOneKey1.hasNormalizedKeys); ASSERT_TRUE(sortLayoutOneKey1.hasNonNormalizedKey); ASSERT_EQ(sortLayoutOneKey1.prefixOffsets.size(), 1); ASSERT_EQ(sortLayoutOneKey1.prefixOffsets[0], 0); - auto sortLayoutTwoKeys = - PrefixSortLayout::makeSortLayout(keyTypes, compareFlags, 18); + auto sortLayoutTwoKeys = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 18, 12, maxStringLengths); ASSERT_TRUE(sortLayoutTwoKeys.hasNormalizedKeys); ASSERT_FALSE(sortLayoutTwoKeys.hasNonNormalizedKey); + ASSERT_FALSE( + sortLayoutTwoKeys.comparisonStartIndex < + sortLayoutTwoKeys.numNormalizedKeys); ASSERT_EQ(sortLayoutTwoKeys.prefixOffsets.size(), 2); ASSERT_EQ(sortLayoutTwoKeys.prefixOffsets[0], 0); ASSERT_EQ(sortLayoutTwoKeys.prefixOffsets[1], 9); @@ -348,10 +377,10 @@ TEST_F(PrefixSortTest, optimizeSortKeysOrder) { {ROW({BIGINT(), SMALLINT(), VARCHAR()}), {0, 1, 2}, {1, 0, 2}}, {ROW({TINYINT(), BIGINT(), VARCHAR(), TINYINT(), INTEGER(), VARCHAR()}), {2, 1, 0, 4, 5, 3}, - {4, 1, 2, 0, 5, 3}}, + {4, 1, 2, 5, 0, 3}}, {ROW({INTEGER(), BIGINT(), VARCHAR(), TINYINT(), INTEGER(), VARCHAR()}), {5, 4, 3, 2, 1, 0}, - {4, 0, 1, 5, 3, 2}}}; + {4, 0, 1, 5, 2, 3}}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); @@ -371,5 +400,47 @@ TEST_F(PrefixSortTest, optimizeSortKeysOrder) { } } } + +TEST_F(PrefixSortTest, makeSortLayoutForString) { + std::vector keyTypes = {VARCHAR(), BIGINT()}; + std::vector compareFlags = {kAsc, kDesc}; + std::vector maxStringLengths = {9, 9}; + + auto sortLayoutOneKey = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 24, 8, maxStringLengths); + ASSERT_TRUE(sortLayoutOneKey.hasNormalizedKeys); + ASSERT_TRUE(sortLayoutOneKey.hasNonNormalizedKey); + ASSERT_TRUE( + sortLayoutOneKey.comparisonStartIndex < + sortLayoutOneKey.numNormalizedKeys); + ASSERT_EQ(sortLayoutOneKey.encodeSizes.size(), 1); + ASSERT_EQ(sortLayoutOneKey.encodeSizes[0], 9); + + auto sortLayoutTwoCompleteKeys = PrefixSortLayout::makeSortLayout( + keyTypes, compareFlags, 26, 9, maxStringLengths); + ASSERT_TRUE(sortLayoutTwoCompleteKeys.hasNormalizedKeys); + ASSERT_FALSE(sortLayoutTwoCompleteKeys.hasNonNormalizedKey); + ASSERT_TRUE( + sortLayoutTwoCompleteKeys.comparisonStartIndex == + sortLayoutTwoCompleteKeys.numNormalizedKeys); + ASSERT_EQ(sortLayoutTwoCompleteKeys.encodeSizes.size(), 2); + ASSERT_EQ(sortLayoutTwoCompleteKeys.encodeSizes[0], 10); + ASSERT_EQ(sortLayoutTwoCompleteKeys.encodeSizes[1], 9); + + // The last key type is VARBINARY, indicating that only partial data is + // encoded in the prefix. + std::vector keyTypes1 = {BIGINT(), VARBINARY()}; + auto sortLayoutTwoKeys = PrefixSortLayout::makeSortLayout( + keyTypes1, compareFlags, 26, 8, maxStringLengths); + ASSERT_TRUE(sortLayoutTwoKeys.hasNormalizedKeys); + ASSERT_FALSE(sortLayoutTwoKeys.hasNonNormalizedKey); + ASSERT_TRUE( + sortLayoutTwoKeys.comparisonStartIndex < + sortLayoutTwoKeys.numNormalizedKeys); + ASSERT_EQ(sortLayoutTwoKeys.encodeSizes.size(), 2); + ASSERT_EQ(sortLayoutTwoKeys.encodeSizes[0], 9); + ASSERT_EQ(sortLayoutTwoKeys.encodeSizes[1], 9); +} + } // namespace } // namespace facebook::velox::exec::prefixsort::test diff --git a/velox/exec/tests/SortBufferTest.cpp b/velox/exec/tests/SortBufferTest.cpp index f021ce5c5c247..c339f5377055c 100644 --- a/velox/exec/tests/SortBufferTest.cpp +++ b/velox/exec/tests/SortBufferTest.cpp @@ -66,7 +66,13 @@ class SortBufferTest : public OperatorTestBase, OperatorTestBase::TearDown(); } - common::SpillConfig getSpillConfig(const std::string& spillDir) const { + common::SpillConfig getSpillConfig( + const std::string& spillDir, + bool enableSpillPrefixSort = true) const { + std::optional spillPrefixSortConfig = + enableSpillPrefixSort + ? std::optional(prefixSortConfig_) + : std::nullopt; return common::SpillConfig( [spillDir]() -> const std::string& { return spillDir; }, [&](uint64_t) {}, @@ -83,14 +89,15 @@ class SortBufferTest : public OperatorTestBase, 0, 0, "none", - spillPrefixSortConfig_); + spillPrefixSortConfig); } const bool enableSpillPrefixSort_{GetParam()}; const velox::common::PrefixSortConfig prefixSortConfig_ = velox::common::PrefixSortConfig{ std::numeric_limits::max(), - GetParam() ? 8 : std::numeric_limits::max()}; + GetParam() ? 8 : std::numeric_limits::max(), + 12}; const std::optional spillPrefixSortConfig_ = enableSpillPrefixSort_ ? std::optional(prefixSortConfig_) @@ -103,13 +110,6 @@ class SortBufferTest : public OperatorTestBase, {"c3", REAL()}, {"c4", DOUBLE()}, {"c5", VARCHAR()}}); - const RowTypePtr nonPrefixSortInputType_ = ROW( - {{"c0", VARCHAR()}, - {"c1", VARCHAR()}, - {"c2", VARCHAR()}, - {"c3", VARCHAR()}, - {"c4", VARCHAR()}, - {"c5", VARCHAR()}}); // Specifies the sort columns ["c4", "c1"]. std::vector sortColumnIndices_{4, 1}; std::vector sortCompareFlags_{ @@ -740,12 +740,10 @@ DEBUG_ONLY_TEST_P(SortBufferTest, reserveMemorySort) { SCOPED_TRACE(fmt::format( "usePrefixSort: {}, spillEnabled: {}, ", usePrefixSort, spillEnabled)); auto spillDirectory = exec::test::TempDirectoryPath::create(); - auto spillConfig = getSpillConfig(spillDirectory->getPath()); + auto spillConfig = getSpillConfig(spillDirectory->getPath(), usePrefixSort); folly::Synchronized spillStats; - const RowTypePtr inputType = - usePrefixSort ? inputType_ : nonPrefixSortInputType_; auto sortBuffer = std::make_unique( - inputType, + inputType_, sortColumnIndices_, sortCompareFlags_, pool_.get(), @@ -759,7 +757,7 @@ DEBUG_ONLY_TEST_P(SortBufferTest, reserveMemorySort) { VectorFuzzer fuzzer({.vectorSize = 100}, spillSource.get()); TestScopedSpillInjection scopedSpillInjection(0); - sortBuffer->addInput(fuzzer.fuzzRow(inputType)); + sortBuffer->addInput(fuzzer.fuzzRow(inputType_)); std::atomic_bool hasReserveMemory = false; // Reserve memory for sort. diff --git a/velox/exec/tests/WindowTest.cpp b/velox/exec/tests/WindowTest.cpp index a725d359207d8..cc51cbfd51471 100644 --- a/velox/exec/tests/WindowTest.cpp +++ b/velox/exec/tests/WindowTest.cpp @@ -660,7 +660,7 @@ DEBUG_ONLY_TEST_F(WindowTest, reserveMemorySort) { const auto plan = usePrefixSort ? prefixSortPlan : nonPrefixSortPlan; velox::common::PrefixSortConfig prefixSortConfig = velox::common::PrefixSortConfig{ - std::numeric_limits::max(), 130}; + std::numeric_limits::max(), 130, 12}; auto sortWindowBuild = std::make_unique( plan, pool_.get(), diff --git a/velox/exec/tests/utils/LocalRunnerTestBase.cpp b/velox/exec/tests/utils/LocalRunnerTestBase.cpp index 2c23b50a32465..123a4f9e8aa85 100644 --- a/velox/exec/tests/utils/LocalRunnerTestBase.cpp +++ b/velox/exec/tests/utils/LocalRunnerTestBase.cpp @@ -59,7 +59,7 @@ void LocalRunnerTestBase::ensureTestData() { void LocalRunnerTestBase::makeSchema() { auto schemaQueryCtx = makeQueryCtx("schema", rootPool_.get()); common::SpillConfig spillConfig; - common::PrefixSortConfig prefixSortConfig(100, 130); + common::PrefixSortConfig prefixSortConfig(100, 130, 12); auto leafPool = schemaQueryCtx->pool()->addLeafChild("schemaReader"); auto connectorQueryCtx = std::make_shared( leafPool.get(),