Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 committed Nov 14, 2024
1 parent 39955b0 commit 6ddfde4
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 33 deletions.
60 changes: 31 additions & 29 deletions velox/exec/prefixsort/PrefixSortEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,44 +51,45 @@ class PrefixSortEncoder {
}

/// Encodes String types.
/// The first byte of the encoded result is null byte. The value is 0 if
/// (nulls first and value is null) or (nulls last and value is not null).
/// Otherwise, the value is 1.
/// The string prefix is formatted as 'null byte + string content + padding
/// zeros'. If `!ascending_`, the bits for both the content and padding zeros
/// need to be inverted.
FOLLY_ALWAYS_INLINE void encode(
std::optional<StringView> value,
char* dest,
int32_t encodeSize) const {
auto destDataPtr = dest + 1;
auto stringPrefixSize = encodeSize - 1;
if (value.has_value()) {
dest[0] = nullsFirst_ ? 1 : 0;
encodeNoNulls(value.value(), dest, encodeSize);
} else {
dest[0] = nullsFirst_ ? 0 : 1;
std::memset(dest + 1, 0, encodeSize - 1);
}
}
auto data = value.value();
int32_t copySize = std::min<int32_t>(data.size(), stringPrefixSize);
if (data.isInline() ||
reinterpret_cast<const HashStringAllocator::Header*>(data.data())[-1]
.size() >= data.size()) {
// The string is inline or all in one piece out of line.
std::memcpy(destDataPtr, data.data(), copySize);
} else {
// 'data' is stored in non-contiguous allocation pieces in the row
// container, we only read prefix size data out.
auto stream = HashStringAllocator::prepareRead(
HashStringAllocator::headerOf(data.data()));
stream->readBytes(destDataPtr, copySize);
}

/// The string prefix is formatted as 'null byte + string content + padding
/// zeros'. If `!ascending_`, the bits for both the content and padding zeros
/// need to be inverted.
FOLLY_ALWAYS_INLINE void encodeNoNulls(
std::optional<StringView> value,
char* dest,
int32_t encodeSize) const {
std::string storage;
// 'value' may be stored in non-contiguous allocation pieces in the row
// container, so we need to use 'contiguousString' method to read it out.
auto data = HashStringAllocator::contiguousString(value.value(), storage);
if (data.size() < encodeSize - 1) {
std::memcpy(dest + 1, data.data(), data.size());
std::memset(dest + 1 + data.size(), 0, encodeSize - 1 - data.size());
} else {
std::memcpy(dest + 1, data.data(), encodeSize - 1);
}
if (data.size() < stringPrefixSize) {
std::memset(
destDataPtr + data.size(), 0, stringPrefixSize - data.size());
}

if (!ascending_) {
for (auto i = 1; i < encodeSize; ++i) {
dest[i] = ~dest[i];
if (!ascending_) {
for (auto i = 1; i < encodeSize; ++i) {
dest[i] = ~dest[i];
}
}
} else {
dest[0] = nullsFirst_ ? 0 : 1;
std::memset(destDataPtr, 0, stringPrefixSize);
}
}

Expand Down Expand Up @@ -134,6 +135,7 @@ class PrefixSortEncoder {
return 17;
}
case ::facebook::velox::TypeKind::VARBINARY:
[[fallthrough]];
case ::facebook::velox::TypeKind::VARCHAR: {
return 1 + stringPrefixLength;
}
Expand Down
22 changes: 18 additions & 4 deletions velox/exec/tests/PrefixSortTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ const RowVectorPtr PrefixSortTest::generateExpectedResult(

TEST_F(PrefixSortTest, singleKey) {
const int numRows = 5;
const int columnsSize = 7;
const int columnsSize = 9;

// Vectors without nulls.
const std::vector<VectorPtr> testData = {
Expand Down Expand Up @@ -182,7 +182,14 @@ TEST_F(PrefixSortTest, singleKey) {
"aaa_is_not_inline",
"aaa_is_not_inline_a",
"ddd_is_not_inline",
"aaa_is_not_inline_b"})};
"aaa_is_not_inline_b"}),
makeNullableFlatVector<std::string_view>(
{"\u7231",
"\u671B\u5E0C\u2014\u5FF5\u4FE1",
"\u671B\u5E0C",
"\u7231\u2014",
"\u4FE1 \u7231"},
VARBINARY())};
for (int i = 5; i < columnsSize; ++i) {
const auto data = makeRowVector({testData[i]});

Expand All @@ -193,7 +200,7 @@ TEST_F(PrefixSortTest, singleKey) {

TEST_F(PrefixSortTest, singleKeyWithNulls) {
const int numRows = 5;
const int columnsSize = 7;
const int columnsSize = 9;

Timestamp ts = {5, 5};
// Vectors with nulls.
Expand Down Expand Up @@ -224,7 +231,14 @@ TEST_F(PrefixSortTest, singleKeyWithNulls) {
"aaa_is_not_inline_2",
std::nullopt,
"aaa_is_not_inline_1",
"aaaaaaaaaaaaaa"})};
"aaaaaaaaaaaaaa"}),
makeNullableFlatVector<std::string_view>(
{"\u7231",
"\u671B\u5E0C\u2014\u5FF5\u4FE1",
std::nullopt,
"\u7231\u2014",
"\u4FE1 \u7231"},
VARBINARY())};

for (int i = 5; i < columnsSize; ++i) {
const auto data = makeRowVector({testData[i]});
Expand Down

0 comments on commit 6ddfde4

Please sign in to comment.