From 3bd7ed4379792a7ebb6690b781e809dbabf17815 Mon Sep 17 00:00:00 2001 From: Minhan Cao Date: Thu, 7 Mar 2024 14:55:11 -0800 Subject: [PATCH 1/2] Adding decimal support for min() and max() functions --- .../prestosql/aggregates/MinMaxAggregates.cpp | 16 +- .../prestosql/aggregates/tests/MinMaxTest.cpp | 231 ++++++++++++++++++ 2 files changed, 246 insertions(+), 1 deletion(-) diff --git a/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp b/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp index b2123e817f5fc..d5da384532ff1 100644 --- a/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp +++ b/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp @@ -926,6 +926,17 @@ exec::AggregateRegistrationResult registerMinMax(const std::string& name) { .build()); } + // decimal(p,s), bigint -> row(array(decimal(p,s)), bigint) -> array(decimal(p,s)) + signatures.push_back( + exec::AggregateFunctionSignatureBuilder() + .integerVariable("a_precision") + .integerVariable("a_scale") + .argumentType("DECIMAL(a_precision, a_scale)") + .argumentType("bigint") + .intermediateType("row(bigint, array(DECIMAL(a_precision, a_scale)))") + .returnType("array(DECIMAL(a_precision, a_scale))") + .build()); + return exec::registerAggregateFunction( name, std::move(signatures), @@ -962,7 +973,10 @@ exec::AggregateRegistrationResult registerMinMax(const std::string& name) { case TypeKind::TIMESTAMP: return std::make_unique>(resultType); case TypeKind::HUGEINT: - return std::make_unique>(resultType); + if (inputType->isLongDecimal()) { + return std::make_unique>(resultType); + } + VELOX_NYI(); default: VELOX_CHECK( false, diff --git a/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp b/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp index f3ddc59b74628..15a71b2a7ec74 100644 --- a/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp @@ -606,6 +606,102 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { "second argument of max/min must be less than or equal to 10000"); } + template + void testNumericGlobalDecimal() { + TypePtr type; + if (std::is_same::value) { + type = DECIMAL(6, 2); + } else { + type = DECIMAL(20, 2); + } + auto data = makeRowVector({ + makeFlatVector( + {100000, + 131011, + 223454, + 111911, + 111300, + 800000, + 104000, + 712452, + 161213, + 135243}, + type), + }); + auto expected = makeRowVector({ + makeArrayVector( + { + {100000, 104000}, + }, + type), + makeArrayVector( + { + {100000, 104000, 111300, 111911, 131011}, + }, + type), + makeArrayVector( + { + {800000, 712452, 223454}, + }, + type), + makeArrayVector( + { + {800000, 712452, 223454, 161213, 135243, 131011, 111911}, + }, + type), + }); + + testAggregations( + {data}, + {}, + {"min(c0, 2)", "min(c0, 5)", "max(c0, 3)", "max(c0, 7)"}, + {expected}); + + // Add some nulls. Expect these to be ignored. + data = makeRowVector({ + makeNullableFlatVector( + {100000, + std::nullopt, + 131011, + 223454, + 111911, + std::nullopt, + 111300, + 800000, + 104000, + 712452, + 161213, + 135243, + std::nullopt}, + type), + }); + + testAggregations( + {data}, + {}, + {"min(c0, 2)", "min(c0, 5)", "max(c0, 3)", "max(c0, 7)"}, + {expected}); + + // Test all null input. + data = makeRowVector({ + makeNullableFlatVector( + {std::nullopt, std::nullopt, std::nullopt, std::nullopt}, type), + }); + + expected = makeRowVector({ + makeAllNullArrayVector(1, data->childAt(0)->type()), + makeAllNullArrayVector(1, data->childAt(0)->type()), + makeAllNullArrayVector(1, data->childAt(0)->type()), + makeAllNullArrayVector(1, data->childAt(0)->type()), + }); + + testAggregations( + {data}, + {}, + {"min(c0, 2)", "min(c0, 5)", "max(c0, 3)", "max(c0, 7)"}, + {expected}); + } + template void testNumericGroupBy() { auto data = makeRowVector({ @@ -734,6 +830,131 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {"min(c1, c2)", "min(c1, c4)", "max(c1, c3)", "max(c1, c4)"}, {expected}); } + + template + void testNumericGroupByDecimal() { + TypePtr type; + if (std::is_same::value) { + type = DECIMAL(6, 2); + } else { + type = DECIMAL(20, 2); + } + + auto data = makeRowVector({ + makeFlatVector({1, 2, 1, 1, 2, 2, 1, 2}), + makeFlatVector( + {100000, 131011, 223454, 111911, 111300, 104000, 161213, 135243}, + type), + }); + + auto expected = makeRowVector({ + makeFlatVector({1, 2}), + makeArrayVector( + { + {100000, 111911}, + {104000, 111300}, + }, + type), + makeArrayVector( + { + {100000, 111911, 161213, 223454}, + {104000, 111300, 131011, 135243}, + }, + type), + makeArrayVector( + { + {223454, 161213, 111911}, + {135243, 131011, 111300}, + }, + type), + makeArrayVector( + { + {223454, 161213, 111911, 100000}, + {135243, 131011, 111300, 104000}, + }, + type), + }); + + testAggregations( + {data}, + {"c0"}, + {"min(c1, 2)", "min(c1, 5)", "max(c1, 3)", "max(c1, 7)"}, + {expected}); + + // Add some nulls. Expect these to be ignored. + data = makeRowVector({ + makeFlatVector({1, 2, 1, 1, 1, 2, 2, 2, 1, 2}), + makeNullableFlatVector( + {100000, + 131011, + std::nullopt, + 223454, + 111911, + 111300, + std::nullopt, + 104000, + 161213, + 135243}, + type), + }); + + testAggregations( + {data}, + {"c0"}, + {"min(c1, 2)", "min(c1, 5)", "max(c1, 3)", "max(c1, 7)"}, + {expected}); + + // Test all null input. + data = makeRowVector({ + makeFlatVector({1, 2, 1, 1, 1, 2, 2, 2, 1, 2}), + makeNullableFlatVector( + {std::nullopt, + 131011, + std::nullopt, + std::nullopt, + std::nullopt, + 111300, + std::nullopt, + 104000, + std::nullopt, + 135243}, + type), + }); + + expected = makeRowVector({ + makeFlatVector({1, 2}), + makeNullableArrayVector( + { + std::nullopt, + {{{104000, 111300}}}, + }, + ARRAY(type)), + makeNullableArrayVector( + { + std::nullopt, + {{{104000, 111300, 131011, 135243}}}, + }, + ARRAY(type)), + makeNullableArrayVector( + { + std::nullopt, + {{{135243, 131011, 111300}}}, + }, + ARRAY(type)), + makeNullableArrayVector( + { + std::nullopt, + {{{135243, 131011, 111300, 104000}}}, + }, + ARRAY(type)), + }); + + testAggregations( + {data}, + {"c0"}, + {"min(c1, 2)", "min(c1, 5)", "max(c1, 3)", "max(c1, 7)"}, + {expected}); + } }; TEST_F(MinMaxNTest, tinyint) { @@ -766,6 +987,16 @@ TEST_F(MinMaxNTest, double) { testNumericGroupBy(); } +TEST_F(MinMaxNTest, shortdecimal) { + testNumericGlobalDecimal(); + testNumericGroupByDecimal(); +} + +TEST_F(MinMaxNTest, longdecimal) { + testNumericGlobalDecimal(); + testNumericGroupByDecimal(); +} + TEST_F(MinMaxNTest, incrementalWindow) { // SELECT // c0, c1, c2, c3, From 1ae75eee593227fe7f989d4d2445bc9bf80ffaf4 Mon Sep 17 00:00:00 2001 From: Minhan Cao Date: Wed, 13 Mar 2024 16:23:25 -0700 Subject: [PATCH 2/2] Made heap allocator be an aligned allocator for MinMaxNAccumulator --- .../prestosql/aggregates/MinMaxAggregates.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp b/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp index d5da384532ff1..f9ac18e1345a9 100644 --- a/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp +++ b/velox/functions/prestosql/aggregates/MinMaxAggregates.cpp @@ -545,10 +545,15 @@ std::pair rawOffsetAndSizes( template struct MinMaxNAccumulator { int64_t n{0}; - std::vector> heapValues; + using Allocator = std::conditional_t< + std::is_same_v, + AlignedStlAllocator, + StlAllocator>; + using Heap = std::vector; + Heap heapValues; explicit MinMaxNAccumulator(HashStringAllocator* allocator) - : heapValues{StlAllocator(allocator)} {} + : heapValues{Allocator(allocator)} {} int64_t getN() const { return n; @@ -926,7 +931,8 @@ exec::AggregateRegistrationResult registerMinMax(const std::string& name) { .build()); } - // decimal(p,s), bigint -> row(array(decimal(p,s)), bigint) -> array(decimal(p,s)) + // decimal(p,s), bigint -> row(array(decimal(p,s)), bigint) -> + // array(decimal(p,s)) signatures.push_back( exec::AggregateFunctionSignatureBuilder() .integerVariable("a_precision")