Skip to content

Commit

Permalink
Support long decimal in prefixsort (#10385)
Browse files Browse the repository at this point in the history
Summary:
The performance improve from 1.5 ~2.8 times vs stdsort. The larger the amount of data, the greater the optimization。

```
============================================================================
[...]ec/benchmarks/PrefixSortBenchmark.cpp     relative  time/iter   iters/s
============================================================================
StdSort_no-payloads_1_hugeint_1k                          157.05ns     6.37M
PrefixSort                                      159.96%    98.18ns    10.19M
StdSort_no-payloads_2_hugeint_1k                          229.25ns     4.36M
PrefixSort                                      187.99%   121.95ns     8.20M
StdSort_no-payloads_3_hugeint_1k                          300.49ns     3.33M
PrefixSort                                      214.49%   140.09ns     7.14M
StdSort_no-payloads_4_hugeint_1k                          334.26ns     2.99M
PrefixSort                                      177.37%   188.45ns     5.31M
StdSort_no-payloads_1_hugeint_10k                         223.07ns     4.48M
PrefixSort                                      179.30%   124.41ns     8.04M
StdSort_no-payloads_2_hugeint_10k                         299.86ns     3.33M
PrefixSort                                      201.84%   148.57ns     6.73M
StdSort_no-payloads_3_hugeint_10k                         389.77ns     2.57M
PrefixSort                                      233.71%   166.77ns     6.00M
StdSort_no-payloads_4_hugeint_10k                         473.84ns     2.11M
PrefixSort                                      219.03%   216.34ns     4.62M
StdSort_no-payloads_1_hugeint_100k                        288.05ns     3.47M
PrefixSort                                      192.22%   149.86ns     6.67M
StdSort_no-payloads_2_hugeint_100k                        403.63ns     2.48M
PrefixSort                                      225.74%   178.80ns     5.59M
StdSort_no-payloads_3_hugeint_100k                        511.05ns     1.96M
PrefixSort                                      257.16%   198.73ns     5.03M
StdSort_no-payloads_4_hugeint_100k                        606.13ns     1.65M
PrefixSort                                      237.37%   255.36ns     3.92M
StdSort_no-payloads_1_hugeint_1000k                       388.12ns     2.58M
PrefixSort                                      216.15%   179.56ns     5.57M
StdSort_no-payloads_2_hugeint_1000k                       546.45ns     1.83M
PrefixSort                                      246.45%   221.73ns     4.51M
StdSort_no-payloads_3_hugeint_1000k                       716.56ns     1.40M
PrefixSort                                      274.69%   260.86ns     3.83M
StdSort_no-payloads_4_hugeint_1000k                       952.00ns     1.05M
PrefixSort                                      283.40%   335.92ns     2.98M
```

Pull Request resolved: #10385

Reviewed By: Yuhta, tanjialiang

Differential Revision: D65739467

Pulled By: xiaoxmeng

fbshipit-source-id: 15f1464a17ff2b75482eae58b328997eed3e2cff
  • Loading branch information
jinchengchenghh authored and facebook-github-bot committed Nov 13, 2024
1 parent 117b5df commit 05dc841
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 2 deletions.
5 changes: 5 additions & 0 deletions velox/exec/PrefixSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix(
prefixSortLayout, index, rowColumn, row, prefixBuffer);
return;
}
case TypeKind::HUGEINT: {
encodeRowColumn<int128_t>(
prefixSortLayout, index, rowColumn, row, prefixBuffer);
return;
}
default:
VELOX_UNSUPPORTED(
"prefix-sort does not support type kind: {}",
Expand Down
16 changes: 16 additions & 0 deletions velox/exec/benchmarks/PrefixSortBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,21 @@ class PrefixSortBenchmark {
bigint(false, iterations, batchSizes);
}

void hugeInt() {
const auto iterations = 10;
const std::vector<vector_size_t> batchSizes = {
1'000, 10'000, 100'000, 1'000'000};
std::vector<RowTypePtr> rowTypes = {
ROW({DECIMAL(23, 2)}),
ROW({DECIMAL(30, 2), DECIMAL(32, 5)}),
ROW({DECIMAL(19, 5), DECIMAL(34, 8), DECIMAL(38, 2)}),
ROW({DECIMAL(30, 2), DECIMAL(24, 3), DECIMAL(32, 5), DECIMAL(34, 3)}),
};
std::vector<int> numKeys = {1, 2, 3, 4};
benchmark(
"no-payloads", "hugeint", batchSizes, rowTypes, numKeys, iterations);
}

void largeVarchar() {
const auto iterations = 10;
const std::vector<vector_size_t> batchSizes = {
Expand Down Expand Up @@ -382,6 +397,7 @@ int main(int argc, char** argv) {

bm.smallBigint();
bm.largeBigint();
bm.hugeInt();
bm.largeBigintWithPayloads();
bm.smallBigintWithPayload();
bm.largeVarchar();
Expand Down
11 changes: 11 additions & 0 deletions velox/exec/prefixsort/PrefixSortEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ class PrefixSortEncoder {
case ::facebook::velox::TypeKind::TIMESTAMP: {
return 17;
}
case ::facebook::velox::TypeKind::HUGEINT: {
return 17;
}
default:
return std::nullopt;
}
Expand Down Expand Up @@ -168,6 +171,14 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
encodeNoNulls(static_cast<uint16_t>(value ^ (1u << 15)), dest);
}

template <>
FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
int128_t value,
char* dest) const {
encodeNoNulls<int64_t>(HugeInt::upper(value), dest);
encodeNoNulls<uint64_t>(HugeInt::lower(value), dest + sizeof(int64_t));
}

namespace detail {
/// Convert double to a uint64_t, their value comparison semantics remain
/// consistent.
Expand Down
14 changes: 14 additions & 0 deletions velox/exec/prefixsort/tests/PrefixEncoderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ TEST_F(PrefixEncoderTest, encode) {
testEncode<double>(100000.00, (char*)&ascExpected, (char*)&descExpected);
}

{
char ascExpected[16] = {
-128, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, -56};
char descExpected[16] = {
127, -1, -1, -1, -1, -1, -1, -11, -1, -1, -1, -1, -1, -1, -1, 55};
int128_t value = HugeInt::build(10, 200);
testEncode<int128_t>(value, (char*)ascExpected, (char*)descExpected);
}

{
Timestamp value = Timestamp(0x000000011223344, 0x000000011223344);
uint64_t ascExpected[2];
Expand All @@ -345,6 +354,7 @@ TEST_F(PrefixEncoderTest, compare) {
testCompare<int64_t>();
testCompare<int32_t>();
testCompare<int16_t>();
testCompare<int128_t>();
testCompare<float>();
testCompare<double>();
testCompare<Timestamp>();
Expand All @@ -362,6 +372,10 @@ TEST_F(PrefixEncoderTest, fuzzyBigint) {
testFuzz<TypeKind::BIGINT>();
}

TEST_F(PrefixEncoderTest, fuzzyHugeInt) {
testFuzz<TypeKind::HUGEINT>();
}

TEST_F(PrefixEncoderTest, fuzzyReal) {
testFuzz<TypeKind::REAL>();
}
Expand Down
18 changes: 16 additions & 2 deletions velox/exec/tests/PrefixSortTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ TEST_F(PrefixSortTest, singleKey) {
makeFlatVector<int64_t>({5, 4, 3, 2, 1}),
makeFlatVector<int32_t>({5, 4, 3, 2, 1}),
makeFlatVector<int16_t>({5, 4, 3, 2, 1}),
makeFlatVector<int128_t>(
{5,
HugeInt::parse("1234567"),
HugeInt::parse("12345678901234567890"),
HugeInt::parse("12345679"),
HugeInt::parse("-12345678901234567890")}),
makeFlatVector<float>({5.5, 4.4, 3.3, 2.2, 1.1}),
makeFlatVector<double>({5.5, 4.4, 3.3, 2.2, 1.1}),
makeFlatVector<Timestamp>(
Expand Down Expand Up @@ -186,6 +192,12 @@ TEST_F(PrefixSortTest, singleKeyWithNulls) {
makeNullableFlatVector<int64_t>({5, 4, std::nullopt, 2, 1}),
makeNullableFlatVector<int32_t>({5, 4, std::nullopt, 2, 1}),
makeNullableFlatVector<int16_t>({5, 4, std::nullopt, 2, 1}),
makeNullableFlatVector<int128_t>(
{5,
HugeInt::parse("1234567"),
std::nullopt,
HugeInt::parse("12345679"),
HugeInt::parse("-12345678901234567890")}),
makeNullableFlatVector<float>({5.5, 4.4, std::nullopt, 2.2, 1.1}),
makeNullableFlatVector<double>({5.5, 4.4, std::nullopt, 2.2, 1.1}),
makeNullableFlatVector<Timestamp>(
Expand Down Expand Up @@ -237,7 +249,8 @@ TEST_F(PrefixSortTest, fuzz) {
TINYINT(),
SMALLINT(),
BIGINT(),
HUGEINT(),
DECIMAL(12, 2),
DECIMAL(25, 6),
REAL(),
DOUBLE(),
TIMESTAMP(),
Expand All @@ -260,7 +273,8 @@ TEST_F(PrefixSortTest, fuzzMulti) {
TINYINT(),
SMALLINT(),
BIGINT(),
HUGEINT(),
DECIMAL(12, 2),
DECIMAL(25, 6),
REAL(),
DOUBLE(),
TIMESTAMP(),
Expand Down

0 comments on commit 05dc841

Please sign in to comment.