Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 committed Dec 11, 2024
1 parent 1086804 commit 3204cce
Show file tree
Hide file tree
Showing 12 changed files with 221 additions and 173 deletions.
2 changes: 1 addition & 1 deletion velox/common/base/PrefixSortConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,6 @@ struct PrefixSortConfig {

/// Maximum number of bytes to be stored in prefix-sort buffer for a string
/// column.
uint32_t maxStringPrefixLength{12};
uint32_t maxStringPrefixLength{16};
};
} // namespace facebook::velox::common
8 changes: 4 additions & 4 deletions velox/core/QueryConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,8 @@ class QueryConfig {

/// Maximum number of bytes to be stored in prefix-sort buffer for a string
/// key.
static constexpr const char* kPrefixSortMaxStringPrefixLength =
"prefixsort_max_string_prefix_length";
static constexpr const char* kPrefixSortMaxStringLength =
"prefixsort_max_string_length";

/// Enable query tracing flag.
static constexpr const char* kQueryTraceEnabled = "query_trace_enabled";
Expand Down Expand Up @@ -849,8 +849,8 @@ class QueryConfig {
return get<uint32_t>(kPrefixSortMinRows, 128);
}

uint32_t prefixSortMaxStringPrefixLength() const {
return get<uint32_t>(kPrefixSortMaxStringPrefixLength, 12);
uint32_t prefixSortMaxStringLength() const {
return get<uint32_t>(kPrefixSortMaxStringLength, 16);
}

double scaleWriterRebalanceMaxMemoryUsageRatio() const {
Expand Down
4 changes: 2 additions & 2 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ Generic Configuration
- integer
- 128
- Minimum number of rows to use prefix-sort. The default value has been derived using micro-benchmarking.
* - prefixsort_max_string_prefix_length
* - prefixsort_max_string_length
- integer
- 12
- 16
- Byte length of the string prefix stored in the prefix-sort buffer. This doesn't include the null byte.

.. _expression-evaluation-conf:
Expand Down
2 changes: 1 addition & 1 deletion velox/exec/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ struct DriverCtx {
return common::PrefixSortConfig{
queryConfig().prefixSortNormalizedKeyMaxBytes(),
queryConfig().prefixSortMinRows(),
queryConfig().prefixSortMaxStringPrefixLength()};
queryConfig().prefixSortMaxStringLength()};
}
};

Expand Down
40 changes: 19 additions & 21 deletions velox/exec/PrefixSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,10 @@ FOLLY_ALWAYS_INLINE void encodeRowColumn(
} else {
value = *(reinterpret_cast<T*>(row + rowColumn.offset()));
}
if constexpr (std::is_same_v<T, StringView>) {
prefixSortLayout.encoders[index].encode(
value,
prefixBuffer + prefixSortLayout.prefixOffsets[index],
prefixSortLayout.encodeSizes[index]);
} else {
prefixSortLayout.encoders[index].encode(
value, prefixBuffer + prefixSortLayout.prefixOffsets[index]);
}
prefixSortLayout.encoders[index].encode(
value,
prefixBuffer + prefixSortLayout.prefixOffsets[index],
prefixSortLayout.encodeSizes[index]);
}

FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix(
Expand Down Expand Up @@ -141,12 +136,12 @@ compareByWord(uint64_t* left, uint64_t* right, int32_t bytes) {
} // namespace

// static.
PrefixSortLayout PrefixSortLayout::makeSortLayout(
PrefixSortLayout PrefixSortLayout::generate(
const std::vector<TypePtr>& types,
const std::vector<CompareFlags>& compareFlags,
uint32_t maxNormalizedKeySize,
uint32_t maxStringPrefixLength,
const std::vector<uint32_t>& maxStringLengths) {
const std::vector<std::optional<uint32_t>>& maxStringLengths) {
const uint32_t numKeys = types.size();
std::vector<uint32_t> prefixOffsets;
prefixOffsets.reserve(numKeys);
Expand All @@ -161,10 +156,13 @@ PrefixSortLayout PrefixSortLayout::makeSortLayout(
uint32_t normalizedKeySize{0};
uint32_t numNormalizedKeys{0};

bool lastKeyInPrefixIsPartial{false};
bool lastPrefixKeyPartial{false};
for (auto i = 0; i < numKeys; ++i) {
const std::optional<uint32_t> encodedSize = PrefixSortEncoder::encodedSize(
types[i]->kind(), std::min(maxStringLengths[i], maxStringPrefixLength));
types[i]->kind(),
maxStringLengths[i].has_value()
? std::min(maxStringLengths[i].value(), maxStringPrefixLength)
: maxStringPrefixLength);
if (!encodedSize.has_value() ||
normalizedKeySize + encodedSize.value() > maxNormalizedKeySize) {
break;
Expand All @@ -176,8 +174,9 @@ PrefixSortLayout PrefixSortLayout::makeSortLayout(
++numNormalizedKeys;
if ((types[i]->kind() == TypeKind::VARCHAR ||
types[i]->kind() == TypeKind::VARBINARY) &&
maxStringPrefixLength < maxStringLengths[i]) {
lastKeyInPrefixIsPartial = true;
(!maxStringLengths[i].has_value() ||
maxStringPrefixLength < maxStringLengths[i].value())) {
lastPrefixKeyPartial = true;
break;
}
}
Expand All @@ -193,7 +192,8 @@ PrefixSortLayout PrefixSortLayout::makeSortLayout(
compareFlags,
numNormalizedKeys != 0,
numNormalizedKeys < numKeys,
lastKeyInPrefixIsPartial ? numNormalizedKeys - 1 : numNormalizedKeys,
/* nonPrefixSortStartIndex */
lastPrefixKeyPartial ? numNormalizedKeys - 1 : numNormalizedKeys,
std::move(prefixOffsets),
std::move(encodeSizes),
std::move(encoders),
Expand All @@ -207,10 +207,8 @@ void PrefixSortLayout::optimizeSortKeysOrder(
std::vector<std::optional<uint32_t>> encodedKeySizes(
rowType->size(), std::nullopt);
for (const auto& projection : keyColumnProjections) {
// Set stringPrefixLength to UINT_MAX - 1 to ensure VARCHAR columns are
// processed after all other supported types.
encodedKeySizes[projection.inputChannel] = PrefixSortEncoder::encodedSize(
rowType->childAt(projection.inputChannel)->kind(), UINT_MAX - 1);
rowType->childAt(projection.inputChannel)->kind(), std::nullopt);
}

std::sort(
Expand Down Expand Up @@ -254,7 +252,7 @@ int PrefixSort::comparePartNormalizedKeys(char* left, char* right) {
// If prefixes are equal, compare the remaining sort keys with rowContainer.
char* leftRow = getRowAddrFromPrefixBuffer(left);
char* rightRow = getRowAddrFromPrefixBuffer(right);
for (auto i = sortLayout_.comparisonStartIndex; i < sortLayout_.numKeys;
for (auto i = sortLayout_.nonPrefixSortStartIndex; i < sortLayout_.numKeys;
++i) {
result = rowContainer_->compare(
leftRow, rightRow, i, sortLayout_.compareFlags[i]);
Expand Down Expand Up @@ -379,7 +377,7 @@ void PrefixSort::sortInternal(
sortLayout_.numNormalizedKeys, RuntimeCounter::Unit::kNone));
}
if (sortLayout_.hasNonNormalizedKey ||
sortLayout_.comparisonStartIndex < sortLayout_.numNormalizedKeys) {
sortLayout_.nonPrefixSortStartIndex < sortLayout_.numNormalizedKeys) {
sortRunner.quickSort(
prefixBufferStart, prefixBufferEnd, [&](char* lhs, char* rhs) {
return comparePartNormalizedKeys(lhs, rhs);
Expand Down
21 changes: 12 additions & 9 deletions velox/exec/PrefixSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
#pragma once

#include <optional>

#include "velox/common/base/PrefixSortConfig.h"
#include "velox/exec/Operator.h"
#include "velox/exec/RowContainer.h"
Expand Down Expand Up @@ -56,7 +58,7 @@ struct PrefixSortLayout {
/// Indicates the starting index for key comparison.
/// If the last key is only partially encoded in the prefix, start from
/// numNormalizedKeys - 1. Otherwise, start from numNormalizedKeys.
const uint32_t comparisonStartIndex;
const uint32_t nonPrefixSortStartIndex;

/// Offsets of normalized keys, used to find write locations when
/// extracting columns
Expand All @@ -72,12 +74,12 @@ struct PrefixSortLayout {
/// for fast long compare.
const int32_t numPaddingBytes;

static PrefixSortLayout makeSortLayout(
static PrefixSortLayout generate(
const std::vector<TypePtr>& types,
const std::vector<CompareFlags>& compareFlags,
uint32_t maxNormalizedKeySize,
uint32_t maxStringPrefixLength,
const std::vector<uint32_t>& maxStringLengths);
const std::vector<std::optional<uint32_t>>& maxStringLengths);

/// Optimizes the order of sort key columns to maximize the number of prefix
/// sort keys for acceleration. This only applies for use case which doesn't
Expand Down Expand Up @@ -172,20 +174,21 @@ class PrefixSort {
const velox::common::PrefixSortConfig& config) {
const auto keyTypes = rowContainer->keyTypes();
VELOX_CHECK_EQ(keyTypes.size(), compareFlags.size());
std::vector<uint32_t> maxStringLengths;
std::vector<std::optional<uint32_t>> maxStringLengths;
maxStringLengths.reserve(keyTypes.size());
for (int i = 0; i < keyTypes.size(); ++i) {
auto maxStringLength = UINT_MAX;
std::optional<uint32_t> maxStringLength = std::nullopt;
if (keyTypes[i]->kind() == TypeKind::VARBINARY ||
keyTypes[i]->kind() == TypeKind::VARCHAR) {
const auto stats = rowContainer->columnStats(i);
maxStringLength = stats.has_value() && stats.value().maxBytes() > 0
? stats.value().maxBytes()
: UINT_MAX;
if (stats.has_value()) {
// zhli ? stats.value().maxBytes() > 0
maxStringLength = stats.value().maxBytes();
}
}
maxStringLengths.emplace_back(maxStringLength);
}
return PrefixSortLayout::makeSortLayout(
return PrefixSortLayout::generate(
keyTypes,
compareFlags,
config.maxNormalizedKeyBytes,
Expand Down
2 changes: 1 addition & 1 deletion velox/exec/Window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Window::Window(
common::PrefixSortConfig{
driverCtx->queryConfig().prefixSortNormalizedKeyMaxBytes(),
driverCtx->queryConfig().prefixSortMinRows(),
driverCtx->queryConfig().prefixSortMaxStringPrefixLength()},
driverCtx->queryConfig().prefixSortMaxStringLength()},
spillConfig,
&nonReclaimableSection_,
&spillStats_);
Expand Down
Loading

0 comments on commit 3204cce

Please sign in to comment.