From 58746becaaf76aa13f97eb8e5e805c8e9d6a2beb Mon Sep 17 00:00:00 2001 From: Wei He Date: Thu, 5 Oct 2023 14:11:35 -0700 Subject: [PATCH] Enable testReadFromFiles in testAggregations (#6852) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/6852 Reviewed By: Yuhta Differential Revision: D48737844 --- .../exec/tests/SimpleAggregateAdapterTest.cpp | 5 ++ .../aggregates/tests/AggregationTestBase.cpp | 79 +++++++++++++++---- .../aggregates/tests/AggregationTestBase.h | 36 ++++++--- .../aggregates/tests/ArbitraryTest.cpp | 48 ++++++++--- .../aggregates/tests/ArrayAggTest.cpp | 14 +++- .../tests/AverageAggregationTest.cpp | 45 ++++++++--- .../prestosql/aggregates/tests/MapAggTest.cpp | 20 ++++- .../tests/MapUnionAggregationTest.cpp | 35 ++++++-- .../aggregates/tests/MaxSizeForStatsTest.cpp | 22 +++--- .../tests/MinMaxByAggregationTest.cpp | 37 +++++++-- .../prestosql/aggregates/tests/MinMaxTest.cpp | 78 +++++++++++++----- .../aggregates/tests/ReduceAggTest.cpp | 78 +++++++++++++----- .../tests/SumDataSizeForStatsTest.cpp | 24 +++--- .../prestosql/aggregates/tests/SumTest.cpp | 25 +++++- .../tests/VarianceAggregationTest.cpp | 4 +- .../aggregates/tests/FirstAggregateTest.cpp | 79 ++++++++++++++++--- .../aggregates/tests/LastAggregateTest.cpp | 79 ++++++++++++++++--- 17 files changed, 562 insertions(+), 146 deletions(-) diff --git a/velox/exec/tests/SimpleAggregateAdapterTest.cpp b/velox/exec/tests/SimpleAggregateAdapterTest.cpp index 2c80cbeb68475..e707ff830a253 100644 --- a/velox/exec/tests/SimpleAggregateAdapterTest.cpp +++ b/velox/exec/tests/SimpleAggregateAdapterTest.cpp @@ -183,6 +183,7 @@ TEST_F(SimpleArrayAggAggregationTest, numbers) { {inputVectors}, {}, {"simple_array_agg(c2)", "simple_array_agg(c3)"}, + {"array_sort(a0)", "array_sort(a1)"}, {expected}); expected = makeRowVector( @@ -196,6 +197,7 @@ TEST_F(SimpleArrayAggAggregationTest, numbers) { {inputVectors}, {"c0"}, {"simple_array_agg(c2)", "simple_array_agg(c3)"}, + {"c0", "array_sort(a0)", "array_sort(a1)"}, {expected}); expected = makeRowVector( @@ -209,6 +211,7 @@ TEST_F(SimpleArrayAggAggregationTest, numbers) { {inputVectors}, {"c1"}, {"simple_array_agg(c2)", "simple_array_agg(c3)"}, + {"c1", "array_sort(a0)", "array_sort(a1)"}, {expected}); inputVectors = makeRowVector({makeNullableFlatVector( @@ -248,6 +251,7 @@ TEST_F(SimpleArrayAggAggregationTest, nestedArray) { {inputVectors}, {"c0"}, {"simple_array_agg(c1)", "simple_array_agg(c2)"}, + {"c0", "array_sort(a0)", "array_sort(a1)"}, {expected}); expected = makeRowVector( @@ -269,6 +273,7 @@ TEST_F(SimpleArrayAggAggregationTest, nestedArray) { {inputVectors}, {}, {"simple_array_agg(c1)", "simple_array_agg(c2)"}, + {"array_sort(a0)", "array_sort(a1)"}, {expected}); } diff --git a/velox/functions/lib/aggregates/tests/AggregationTestBase.cpp b/velox/functions/lib/aggregates/tests/AggregationTestBase.cpp index c03a3398dcc61..1d0e6e43e9516 100644 --- a/velox/functions/lib/aggregates/tests/AggregationTestBase.cpp +++ b/velox/functions/lib/aggregates/tests/AggregationTestBase.cpp @@ -88,9 +88,11 @@ void AggregationTestBase::testAggregations( const std::vector& groupingKeys, const std::vector& aggregates, const std::string& duckDbSql, - const std::unordered_map& config) { + const std::unordered_map& config, + bool testWithTableScan) { SCOPED_TRACE(duckDbSql); - testAggregations(data, groupingKeys, aggregates, {}, duckDbSql, config); + testAggregations( + data, groupingKeys, aggregates, {}, duckDbSql, config, testWithTableScan); } void AggregationTestBase::testAggregations( @@ -98,8 +100,16 @@ void AggregationTestBase::testAggregations( const std::vector& groupingKeys, const std::vector& aggregates, const std::vector& expectedResult, - const std::unordered_map& config) { - testAggregations(data, groupingKeys, aggregates, {}, expectedResult, config); + const std::unordered_map& config, + bool testWithTableScan) { + testAggregations( + data, + groupingKeys, + aggregates, + {}, + expectedResult, + config, + testWithTableScan); } void AggregationTestBase::testAggregations( @@ -108,7 +118,8 @@ void AggregationTestBase::testAggregations( const std::vector& aggregates, const std::vector& postAggregationProjections, const std::string& duckDbSql, - const std::unordered_map& config) { + const std::unordered_map& config, + bool testWithTableScan) { SCOPED_TRACE(duckDbSql); testAggregations( [&](PlanBuilder& builder) { builder.values(data); }, @@ -116,7 +127,8 @@ void AggregationTestBase::testAggregations( aggregates, postAggregationProjections, [&](auto& builder) { return builder.assertResults(duckDbSql); }, - config); + config, + testWithTableScan); } namespace { @@ -540,7 +552,8 @@ void AggregationTestBase::testReadFromFiles( const std::vector& aggregates, const std::vector& postAggregationProjections, std::function(exec::test::AssertQueryBuilder&)> - assertResults) { + assertResults, + const std::unordered_map& config) { PlanBuilder builder(pool()); makeSource(builder); auto input = AssertQueryBuilder(builder.planNode()).copyResults(pool()); @@ -565,12 +578,17 @@ void AggregationTestBase::testReadFromFiles( // so it would be the same as the original test. { ScopedChange disableTestStreaming(&testStreaming_, false); - testAggregations( + testAggregationsImpl( [&](auto& builder) { builder.tableScan(asRowType(input->type())); }, groupingKeys, aggregates, postAggregationProjections, - [&](auto& builder) { return assertResults(builder.splits(splits)); }); + [&](auto& builder) { return assertResults(builder.splits(splits)); }, + config); + } + + for (const auto& file : files) { + remove(file->path.c_str()); } } @@ -580,14 +598,16 @@ void AggregationTestBase::testAggregations( const std::vector& aggregates, const std::vector& postAggregationProjections, const std::vector& expectedResult, - const std::unordered_map& config) { + const std::unordered_map& config, + bool testWithTableScan) { testAggregations( [&](PlanBuilder& builder) { builder.values(data); }, groupingKeys, aggregates, postAggregationProjections, [&](auto& builder) { return builder.assertResults(expectedResult); }, - config); + config, + testWithTableScan); } void AggregationTestBase::testAggregations( @@ -595,14 +615,16 @@ void AggregationTestBase::testAggregations( const std::vector& groupingKeys, const std::vector& aggregates, const std::string& duckDbSql, - const std::unordered_map& config) { + const std::unordered_map& config, + bool testWithTableScan) { testAggregations( makeSource, groupingKeys, aggregates, {}, [&](auto& builder) { return builder.assertResults(duckDbSql); }, - config); + config, + testWithTableScan); } RowVectorPtr AggregationTestBase::validateStreamingInTestAggregations( @@ -667,7 +689,7 @@ RowVectorPtr AggregationTestBase::validateStreamingInTestAggregations( return expected; } -void AggregationTestBase::testAggregations( +void AggregationTestBase::testAggregationsImpl( std::function makeSource, const std::vector& groupingKeys, const std::vector& aggregates, @@ -886,6 +908,35 @@ void AggregationTestBase::testAggregations( } } +void AggregationTestBase::testAggregations( + std::function makeSource, + const std::vector& groupingKeys, + const std::vector& aggregates, + const std::vector& postAggregationProjections, + std::function(AssertQueryBuilder&)> + assertResults, + const std::unordered_map& config, + bool testWithTableScan) { + testAggregationsImpl( + makeSource, + groupingKeys, + aggregates, + postAggregationProjections, + assertResults, + config); + + if (testWithTableScan) { + SCOPED_TRACE("Test reading input from table scan"); + testReadFromFiles( + makeSource, + groupingKeys, + aggregates, + postAggregationProjections, + assertResults, + config); + } +} + namespace { std::pair getResultTypes( const std::string& name, diff --git a/velox/functions/lib/aggregates/tests/AggregationTestBase.h b/velox/functions/lib/aggregates/tests/AggregationTestBase.h index 8b2c1387972a7..36fb8819289ad 100644 --- a/velox/functions/lib/aggregates/tests/AggregationTestBase.h +++ b/velox/functions/lib/aggregates/tests/AggregationTestBase.h @@ -68,7 +68,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& groupingKeys, const std::vector& aggregates, const std::string& duckDbSql, - const std::unordered_map& config = {}); + const std::unordered_map& config = {}, + bool testWithTableScan = true); /// Same as above, but allows to specify a set of projections to apply after /// the aggregation. @@ -79,7 +80,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& postAggregationProjections, std::function( exec::test::AssertQueryBuilder& builder)> assertResults, - const std::unordered_map& config = {}); + const std::unordered_map& config = {}, + bool testWithTableScan = true); /// Convenience version that allows to specify input data instead of a /// function to build Values plan node. @@ -88,7 +90,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& groupingKeys, const std::vector& aggregates, const std::string& duckDbSql, - const std::unordered_map& config = {}); + const std::unordered_map& config = {}, + bool testWithTableScan = true); /// Convenience version that allows to specify input data instead of a /// function to build Values plan node. @@ -98,7 +101,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& aggregates, const std::vector& postAggregationProjections, const std::string& duckDbSql, - const std::unordered_map& config = {}); + const std::unordered_map& config = {}, + bool testWithTableScan = true); /// Convenience version that allows to specify input data instead of a /// function to build Values plan node, and the expected result instead of a @@ -108,7 +112,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& groupingKeys, const std::vector& aggregates, const std::vector& expectedResult, - const std::unordered_map& config = {}); + const std::unordered_map& config = {}, + bool testWithTableScan = true); /// Convenience version that allows to specify input data instead of a /// function to build Values plan node, and the expected result instead of a @@ -119,7 +124,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& aggregates, const std::vector& postAggregationProjections, const std::vector& expectedResult, - const std::unordered_map& config = {}); + const std::unordered_map& config = {}, + bool testWithTableScan = true); /// Ensure the function is working in streaming use case. Create a first /// aggregation function, add the rawInput1, then extract the accumulator, @@ -189,13 +195,15 @@ class AggregationTestBase : public exec::test::OperatorTestBase { const std::vector& aggregates, const std::vector& postAggregationProjections, std::function( - exec::test::AssertQueryBuilder&)> assertResults); + exec::test::AssertQueryBuilder&)> assertResults, + const std::unordered_map& config = {}); void testReadFromFiles( const std::vector& data, const std::vector& groupingKeys, const std::vector& aggregates, - const std::vector& expectedResult) { + const std::vector& expectedResult, + const std::unordered_map& config = {}) { testReadFromFiles( [&](auto& planBuilder) { planBuilder.values(data); }, groupingKeys, @@ -203,7 +211,8 @@ class AggregationTestBase : public exec::test::OperatorTestBase { {}, [&](auto& assertBuilder) { return assertBuilder.assertResults({expectedResult}); - }); + }, + config); } /// Generates a variety of logically equivalent plans to compute aggregations @@ -256,6 +265,15 @@ class AggregationTestBase : public exec::test::OperatorTestBase { vector_size_t rawInput2Size, const std::unordered_map& config = {}); + void testAggregationsImpl( + std::function makeSource, + const std::vector& groupingKeys, + const std::vector& aggregates, + const std::vector& postAggregationProjections, + std::function( + exec::test::AssertQueryBuilder&)> assertResults, + const std::unordered_map& config); + bool allowInputShuffle_{false}; }; diff --git a/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp b/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp index 9b55ef6643e7d..4d888d70151d9 100644 --- a/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp @@ -48,12 +48,16 @@ TEST_F(ArbitraryTest, noNulls) { "arbitrary(c5)", "arbitrary(c6)"}; + // We do not test with TableScan because having two input splits makes the + // result non-deterministic. // Global aggregation. testAggregations( vectors, {}, aggregates, - "SELECT first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp"); + "SELECT first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp", + /*config*/ {}, + /*testWithTableScan*/ false); // Group by aggregation. testAggregations( @@ -63,7 +67,9 @@ TEST_F(ArbitraryTest, noNulls) { }, {"p0"}, aggregates, - "SELECT c0 % 10, first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp GROUP BY 1"); + "SELECT c0 % 10, first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp GROUP BY 1", + /*config*/ {}, + /*testWithTableScan*/ false); // encodings: use filter to wrap aggregation inputs in a dictionary. testAggregations( @@ -74,7 +80,9 @@ TEST_F(ArbitraryTest, noNulls) { }, {"p0"}, aggregates, - "SELECT c0 % 10, first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp WHERE c0 % 2 = 0 GROUP BY 1"); + "SELECT c0 % 10, first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp WHERE c0 % 2 = 0 GROUP BY 1", + /*config*/ {}, + /*testWithTableScan*/ false); testAggregations( [&](PlanBuilder& builder) { @@ -82,7 +90,9 @@ TEST_F(ArbitraryTest, noNulls) { }, {}, aggregates, - "SELECT first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp WHERE c0 % 2 = 0"); + "SELECT first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) FROM tmp WHERE c0 % 2 = 0", + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ArbitraryTest, nulls) { @@ -102,19 +112,25 @@ TEST_F(ArbitraryTest, nulls) { makeNullConstant(TypeKind::UNKNOWN, 6)}), }; - // Global aggregation. + // We do not test with TableScan because having two input splits makes the + // result non-deterministic. Also, unknown type is not supported in Writer + // yet. Global aggregation. testAggregations( vectors, {}, {"arbitrary(c1)", "arbitrary(c2)", "arbitrary(c3)"}, - "SELECT * FROM( VALUES (4, 0.50, NULL)) AS t"); + "SELECT * FROM( VALUES (4, 0.50, NULL)) AS t", + /*config*/ {}, + /*testWithTableScan*/ false); // Group by aggregation. testAggregations( vectors, {"c0"}, {"arbitrary(c1)", "arbitrary(c2)", "arbitrary(c3)"}, - "SELECT * FROM(VALUES (1, NULL, 0.50, NULL), (2, 4, NULL, NULL), (3, 5, 0.25, NULL)) AS t"); + "SELECT * FROM(VALUES (1, NULL, 0.50, NULL), (2, 4, NULL, NULL), (3, 5, 0.25, NULL)) AS t", + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ArbitraryTest, varchar) { @@ -122,19 +138,25 @@ TEST_F(ArbitraryTest, varchar) { auto vectors = makeVectors(rowType, 1000, 10); createDuckDbTable(vectors); + // We do not test with TableScan because having two input splits makes the + // result non-deterministic. testAggregations( [&](PlanBuilder& builder) { builder.values(vectors).project({"c0 % 11", "c1"}); }, {"p0"}, {"arbitrary(c1)"}, - "SELECT c0 % 11, first(c1) FROM tmp WHERE c1 IS NOT NULL GROUP BY 1"); + "SELECT c0 % 11, first(c1) FROM tmp WHERE c1 IS NOT NULL GROUP BY 1", + /*config*/ {}, + /*testWithTableScan*/ false); testAggregations( vectors, {}, {"arbitrary(c1)"}, - "SELECT first(c1) FROM tmp WHERE c1 IS NOT NULL"); + "SELECT first(c1) FROM tmp WHERE c1 IS NOT NULL", + /*config*/ {}, + /*testWithTableScan*/ false); // encodings: use filter to wrap aggregation inputs in a dictionary. testAggregations( @@ -143,7 +165,9 @@ TEST_F(ArbitraryTest, varchar) { }, {"p0"}, {"arbitrary(c1)"}, - "SELECT c0 % 11, first(c1) FROM tmp WHERE c0 % 2 = 0 AND c1 IS NOT NULL GROUP BY 1"); + "SELECT c0 % 11, first(c1) FROM tmp WHERE c0 % 2 = 0 AND c1 IS NOT NULL GROUP BY 1", + /*config*/ {}, + /*testWithTableScan*/ false); testAggregations( [&](PlanBuilder& builder) { @@ -151,7 +175,9 @@ TEST_F(ArbitraryTest, varchar) { }, {}, {"arbitrary(c1)"}, - "SELECT first(c1) FROM tmp WHERE c0 % 2 = 0 AND c1 IS NOT NULL"); + "SELECT first(c1) FROM tmp WHERE c0 % 2 = 0 AND c1 IS NOT NULL", + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ArbitraryTest, varcharConstAndNulls) { diff --git a/velox/functions/prestosql/aggregates/tests/ArrayAggTest.cpp b/velox/functions/prestosql/aggregates/tests/ArrayAggTest.cpp index bf654ec8cfad6..3978b244dbfd5 100644 --- a/velox/functions/prestosql/aggregates/tests/ArrayAggTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/ArrayAggTest.cpp @@ -76,7 +76,10 @@ TEST_F(ArrayAggTest, groupBy) { batches, {"c0"}, {fmt::format("{}(a)", functionName)}, - fmt::format("SELECT c0, array_agg(a) {} FROM tmp GROUP BY c0", filter), + {"c0", "array_sort(a0)"}, + fmt::format( + "SELECT c0, array_sort(array_agg(a) {}) FROM tmp GROUP BY c0", + filter), makeConfig(ignoreNulls)); testAggregationsWithCompanion( batches, @@ -97,8 +100,10 @@ TEST_F(ArrayAggTest, groupBy) { batches, {"c0"}, {fmt::format("{}(a)", functionName), "max(c0)"}, + {"c0", "array_sort(a0)", "a1"}, fmt::format( - "SELECT c0, array_agg(a) {}, max(c0) FROM tmp GROUP BY c0", filter), + "SELECT c0, array_sort(array_agg(a) {}), max(c0) FROM tmp GROUP BY c0", + filter), makeConfig(ignoreNulls)); }; @@ -208,7 +213,8 @@ TEST_F(ArrayAggTest, global) { vectors, {}, {fmt::format("{}(c0)", functionName)}, - fmt::format("SELECT array_agg(c0) {} FROM tmp", filter), + {"array_sort(a0)"}, + fmt::format("SELECT array_sort(array_agg(c0) {}) FROM tmp", filter), makeConfig(ignoreNulls)); testAggregationsWithCompanion( vectors, @@ -423,6 +429,7 @@ TEST_F(ArrayAggTest, mask) { split(data), {}, {fmt::format("{}(c0) FILTER (WHERE c1)", functionName)}, + {"array_sort(a0)"}, "SELECT [1, 3, 5]"); // Group-by with all-false mask. @@ -449,6 +456,7 @@ TEST_F(ArrayAggTest, mask) { split(data), {"c0"}, {fmt::format("{}(c1) FILTER (WHERE c2)", functionName)}, + {"c0", "array_sort(a0)"}, "VALUES (10, [1, 3]), (20, [5])"); }; diff --git a/velox/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp index 43c04aef1b6f0..153e52f402884 100644 --- a/velox/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp @@ -441,6 +441,7 @@ TEST_F(AverageAggregationTest, decimalAccumulator) { } TEST_F(AverageAggregationTest, avgDecimal) { + // Skip testing with table scan because decimal is not supported in writers. auto shortDecimal = makeNullableFlatVector( {1'000, 2'000, 3'000, 4'000, 5'000, std::nullopt}, DECIMAL(10, 1)); // Short decimal aggregation @@ -450,7 +451,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {"avg(c0)"}, {}, {makeRowVector( - {makeNullableFlatVector({3'000}, DECIMAL(10, 1))})}); + {makeNullableFlatVector({3'000}, DECIMAL(10, 1))})}, + /*config*/ {}, + /*testWithTableScan*/ false); // Long decimal aggregation testAggregations( @@ -466,7 +469,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {"avg(c0)"}, {}, {makeRowVector({makeFlatVector( - std::vector{HugeInt::build(10, 300)}, DECIMAL(23, 4))})}); + std::vector{HugeInt::build(10, 300)}, DECIMAL(23, 4))})}, + /*config*/ {}, + /*testWithTableScan*/ false); // Round-up average. testAggregations( {makeRowVector( @@ -475,7 +480,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {"avg(c0)"}, {}, {makeRowVector( - {makeFlatVector(std::vector{337}, DECIMAL(3, 2))})}); + {makeFlatVector(std::vector{337}, DECIMAL(3, 2))})}, + /*config*/ {}, + /*testWithTableScan*/ false); // The total sum overflows the max int128_t limit. std::vector rawVector; @@ -489,7 +496,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {}, {makeRowVector({makeFlatVector( std::vector{DecimalUtil::kLongDecimalMax}, - DECIMAL(38, 0))})}); + DECIMAL(38, 0))})}, + /*config*/ {}, + /*testWithTableScan*/ false); // The total sum underflows the min int128_t limit. rawVector.clear(); auto underFlowTestResult = makeFlatVector( @@ -502,7 +511,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {}, {"avg(c0)"}, {}, - {makeRowVector({underFlowTestResult})}); + {makeRowVector({underFlowTestResult})}, + /*config*/ {}, + /*testWithTableScan*/ false); // Add more rows to show that average result starts deviating from expected // result with varying row count. @@ -527,7 +538,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {"avg(c0)"}, {}, {makeRowVector( - {makeFlatVector(std::vector{100}, DECIMAL(3, 2))})}); + {makeFlatVector(std::vector{100}, DECIMAL(3, 2))})}, + /*config*/ {}, + /*testWithTableScan*/ false); auto newSize = shortDecimal->size() * 2; auto indices = makeIndices(newSize, [&](int row) { return row / 2; }); @@ -540,7 +553,9 @@ TEST_F(AverageAggregationTest, avgDecimal) { {"avg(c0)"}, {}, {makeRowVector( - {makeFlatVector(std::vector{3'000}, DECIMAL(10, 1))})}); + {makeFlatVector(std::vector{3'000}, DECIMAL(10, 1))})}, + /*config*/ {}, + /*testWithTableScan*/ false); // Decimal average aggregation with multiple groups. auto inputRows = { @@ -572,7 +587,13 @@ TEST_F(AverageAggregationTest, avgDecimal) { {makeNullableFlatVector({3}), makeFlatVector(std::vector{-2498}, DECIMAL(5, 2))})}; - testAggregations(inputRows, {"c0"}, {"avg(c1)"}, expectedResult); + testAggregations( + inputRows, + {"c0"}, + {"avg(c1)"}, + expectedResult, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(AverageAggregationTest, avgDecimalWithMultipleRowVectors) { @@ -585,7 +606,13 @@ TEST_F(AverageAggregationTest, avgDecimalWithMultipleRowVectors) { auto expectedResult = {makeRowVector( {makeFlatVector(std::vector{350}, DECIMAL(5, 2))})}; - testAggregations(inputRows, {}, {"avg(c0)"}, expectedResult); + testAggregations( + inputRows, + {}, + {"avg(c0)"}, + expectedResult, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(AverageAggregationTest, constantVectorOverflow) { diff --git a/velox/functions/prestosql/aggregates/tests/MapAggTest.cpp b/velox/functions/prestosql/aggregates/tests/MapAggTest.cpp index ffaacef4a9b91..9670dab24d5d5 100644 --- a/velox/functions/prestosql/aggregates/tests/MapAggTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MapAggTest.cpp @@ -148,7 +148,15 @@ TEST_F(MapAggTest, groupByWithDuplicates) { }), }); - testAggregations(vectors, {"c0"}, {"map_agg(c1, c2)"}, {expectedResult}); + // We don't test this case with data read with files because when there are + // duplicate keys in different splits, the result is non-deterministic. + testAggregations( + vectors, + {"c0"}, + {"map_agg(c1, c2)"}, + {expectedResult}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(MapAggTest, groupByNoData) { @@ -300,7 +308,15 @@ TEST_F(MapAggTest, globalDuplicateKeys) { }), }); - testAggregations(vectors, {}, {"map_agg(c0, c1)"}, {expectedResult}); + // We don't test this case with data read with files because when there are + // duplicate keys in different splits, the result is non-deterministic. + testAggregations( + vectors, + {}, + {"map_agg(c0, c1)"}, + {expectedResult}, + /*config*/ {}, + /*testWithTableScan*/ false); } /// Reproduces the bug reported in diff --git a/velox/functions/prestosql/aggregates/tests/MapUnionAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/MapUnionAggregationTest.cpp index 014fe137e4d3a..c1315db7ff8db 100644 --- a/velox/functions/prestosql/aggregates/tests/MapUnionAggregationTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MapUnionAggregationTest.cpp @@ -262,7 +262,8 @@ TEST_F(MapUnionTest, nulls) { } TEST_F(MapUnionTest, unknownKeysAndValues) { - // map_union over empty map(unknown, unknown) is allowed. + // map_union over empty map(unknown, unknown) is allowed. Skip testing with + // data read from files because unknown type is not supported in writers. auto data = makeRowVector({ makeFlatVector({1, 2, 1}), makeMapVector({{}, {}, {}}), @@ -277,8 +278,20 @@ TEST_F(MapUnionTest, unknownKeysAndValues) { makeMapVector({{}, {}}), }); - testAggregations({data}, {}, {"map_union(c1)"}, {expectedGlobalResult}); - testAggregations({data}, {"c0"}, {"map_union(c1)"}, {expectedGroupByResult}); + testAggregations( + {data}, + {}, + {"map_union(c1)"}, + {expectedGlobalResult}, + /*config*/ {}, + /*testWithTableScan*/ false); + testAggregations( + {data}, + {"c0"}, + {"map_union(c1)"}, + {expectedGroupByResult}, + /*config*/ {}, + /*testWithTableScan*/ false); // map_union over non-empty map(T, unknown) where T is not unknown is allowed. data = makeRowVector({ @@ -316,8 +329,20 @@ TEST_F(MapUnionTest, unknownKeysAndValues) { }), }); - testAggregations({data}, {}, {"map_union(c1)"}, {expectedGlobalResult}); - testAggregations({data}, {"c0"}, {"map_union(c1)"}, {expectedGroupByResult}); + testAggregations( + {data}, + {}, + {"map_union(c1)"}, + {expectedGlobalResult}, + /*config*/ {}, + /*testWithTableScan*/ false); + testAggregations( + {data}, + {"c0"}, + {"map_union(c1)"}, + {expectedGroupByResult}, + /*config*/ {}, + /*testWithTableScan*/ false); // map_union over non-emtpy map(unknown, T) is not allowed. data = makeRowVector({ diff --git a/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp b/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp index c4c517210d33e..7889cdee58659 100644 --- a/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp @@ -100,17 +100,19 @@ Timestamp generator(vector_size_t i) { return Timestamp(i, i); } TEST_F(MaxSizeForStatsTest, allScalarTypes) { + // Make input size at least 8 to ensure drivers get 2 input batches for + // spilling when tested with data read from files. auto vectors = {makeRowVector( - {makeFlatVector({1, 2, 1, 2}), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator, nullptr, DATE()), - makeFlatVector(4, generator)})}; + {makeFlatVector({1, 2, 1, 2, 1, 2, 1, 2}), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator, nullptr, DATE()), + makeFlatVector(8, generator)})}; // With grouping keys. testAggregations( diff --git a/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp index 40e99e87e34d8..6fe60bdc42192 100644 --- a/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp @@ -461,12 +461,17 @@ class MinMaxByGlobalByAggregationTest fmt::format("SELECT {}", asSql(dataAt(3)))}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); + // Skip testing with data read from files because the result for some + // testData depends on input order that is non-deterministic with two + // splits. testAggregations( {testData.inputRowVector}, {}, {"min_by(c0, c1)"}, {}, - testData.verifyDuckDbSql); + testData.verifyDuckDbSql, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -555,12 +560,17 @@ class MinMaxByGlobalByAggregationTest fmt::format("SELECT {}", asSql(dataAt(3)))}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); + // Skip testing with data read from files because the result for some + // testData depends on input order that is non-deterministic with two + // splits. testAggregations( {testData.inputRowVector}, {}, {"max_by(c0, c1)"}, {}, - testData.verifyDuckDbSql); + testData.verifyDuckDbSql, + /*config*/ {}, + /*testWithTableScan*/ false); } } }; @@ -845,12 +855,17 @@ class MinMaxByGroupByAggregationTest asSql(dataAt(0)))}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); + // Skip testing with data read from files because the result for some + // testData depends on input order that is non-deterministic with two + // splits. testAggregations( {testData.inputRowVector}, {"c2"}, {"min_by(c0, c1)"}, {}, - testData.verifyDuckDbSql); + testData.verifyDuckDbSql, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -994,12 +1009,17 @@ class MinMaxByGroupByAggregationTest asSql(dataAt(0)))}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); + // Skip testing with data read from files because the result for some + // testData depends on input order that is non-deterministic with two + // splits. testAggregations( {testData.inputRowVector}, {"c2"}, {"max_by(c0, c1)"}, {}, - testData.verifyDuckDbSql); + testData.verifyDuckDbSql, + /*config*/ {}, + /*testWithTableScan*/ false); } } }; @@ -1573,10 +1593,13 @@ TEST_F(MinMaxByNTest, groupBy) { {data}, {"c0"}, {"min_by(c1, c2, 3)", "max_by(c1, c2, 4)"}, {expected}); // bool type of comparison + // Make input size at least 8 to ensure drivers get 2 input batches for + // spilling when tested with data read from files. data = makeRowVector({ - makeFlatVector({1, 2, 1, 2}), - makeFlatVector({1, 2, 3, 4}), - makeFlatVector({true, false, false, true}), + makeFlatVector({1, 2, 1, 2, 1, 2, 1, 2}), + makeFlatVector({1, 2, 3, 4, 1, 2, 3, 4}), + makeFlatVector( + {true, false, false, true, true, false, false, true}), }); expected = makeRowVector({ diff --git a/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp b/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp index ea1b2ecfebe4e..2eef56ef0e28c 100644 --- a/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MinMaxTest.cpp @@ -50,7 +50,8 @@ class MinMaxTest : public functions::aggregate::test::AggregationTestBase { } template - void doTest(TAgg agg, const TypePtr& inputType) { + void + doTest(TAgg agg, const TypePtr& inputType, bool testWithTableScan = true) { auto rowType = ROW({"c0", "c1", "mask"}, {BIGINT(), inputType, BOOLEAN()}); auto vectors = fuzzData(rowType); createDuckDbTable(vectors); @@ -61,7 +62,12 @@ class MinMaxTest : public functions::aggregate::test::AggregationTestBase { // Global aggregation. testAggregations( - vectors, {}, {agg(c1)}, fmt::format("SELECT {} FROM tmp", agg(c1))); + vectors, + {}, + {agg(c1)}, + fmt::format("SELECT {} FROM tmp", agg(c1)), + /*config*/ {}, + testWithTableScan); // Group by aggregation. testAggregations( @@ -70,12 +76,19 @@ class MinMaxTest : public functions::aggregate::test::AggregationTestBase { }, {"p0"}, {agg(c1)}, - fmt::format("SELECT c0 % 10, {} FROM tmp GROUP BY 1", agg(c1))); + fmt::format("SELECT c0 % 10, {} FROM tmp GROUP BY 1", agg(c1)), + /*config*/ {}, + testWithTableScan); // Masked aggregations. auto maskedAgg = agg(c1) + " filter (where mask)"; testAggregations( - vectors, {}, {maskedAgg}, fmt::format("SELECT {} FROM tmp", maskedAgg)); + vectors, + {}, + {maskedAgg}, + fmt::format("SELECT {} FROM tmp", maskedAgg), + /*config*/ {}, + testWithTableScan); testAggregations( [&](auto& builder) { @@ -83,7 +96,9 @@ class MinMaxTest : public functions::aggregate::test::AggregationTestBase { }, {"p0"}, {maskedAgg}, - fmt::format("SELECT c0 % 10, {} FROM tmp GROUP BY 1", maskedAgg)); + fmt::format("SELECT c0 % 10, {} FROM tmp GROUP BY 1", maskedAgg), + /*config*/ {}, + testWithTableScan); // Encodings: use filter to wrap aggregation inputs in a dictionary. testAggregations( @@ -95,14 +110,17 @@ class MinMaxTest : public functions::aggregate::test::AggregationTestBase { {"p0"}, {agg(c1)}, fmt::format( - "SELECT c0 % 11, {} FROM tmp WHERE c0 % 2 = 0 GROUP BY 1", - agg(c1))); + "SELECT c0 % 11, {} FROM tmp WHERE c0 % 2 = 0 GROUP BY 1", agg(c1)), + /*config*/ {}, + testWithTableScan); testAggregations( [&](auto& builder) { builder.values(vectors).filter("c0 % 2 = 0"); }, {}, {agg(c1)}, - fmt::format("SELECT {} FROM tmp WHERE c0 % 2 = 0", agg(c1))); + fmt::format("SELECT {} FROM tmp WHERE c0 % 2 = 0", agg(c1)), + /*config*/ {}, + testWithTableScan); } }; @@ -266,19 +284,19 @@ TEST_F(MinMaxTest, initialValue) { } TEST_F(MinMaxTest, maxShortDecimal) { - doTest(max, DECIMAL(18, 3)); + doTest(max, DECIMAL(18, 3), false); } TEST_F(MinMaxTest, minShortDecimal) { - doTest(min, DECIMAL(3, 1)); + doTest(min, DECIMAL(3, 1), false); } TEST_F(MinMaxTest, maxLongDecimal) { - doTest(max, DECIMAL(20, 3)); + doTest(max, DECIMAL(20, 3), false); } TEST_F(MinMaxTest, minLongDecimal) { - doTest(min, DECIMAL(38, 19)); + doTest(min, DECIMAL(38, 19), false); } TEST_F(MinMaxTest, array) { @@ -505,11 +523,15 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { }), }); + // TODO: Enable testWithTableScan after fixing + // https://github.com/facebookincubator/velox/issues/6506. testAggregations( {data}, {}, {"min(c0, 2)", "min(c0, 5)", "max(c0, 3)", "max(c0, 7)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Add some nulls. Expect these to be ignored. data = makeRowVector({ @@ -533,7 +555,9 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {data}, {}, {"min(c0, 2)", "min(c0, 5)", "max(c0, 3)", "max(c0, 7)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Test all null input. data = makeRowVector({ @@ -551,7 +575,9 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {data}, {}, {"min(c0, 2)", "min(c0, 5)", "max(c0, 3)", "max(c0, 7)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Test the NULL handling in `N` param. data = makeRowVector({ @@ -584,7 +610,9 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {data}, {}, {"min(c0, c1)", "min(c0, c3)", "max(c0, c2)", "max(c0, c3)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Second argument of max_n/min_n must be less than or equal to 10000. VELOX_ASSERT_THROW( @@ -622,11 +650,15 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { }), }); + // TODO: Enable testWithTableScan after fixing + // https://github.com/facebookincubator/velox/issues/6506. testAggregations( {data}, {"c0"}, {"min(c1, 2)", "min(c1, 5)", "max(c1, 3)", "max(c1, 7)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Add some nulls. Expect these to be ignored. data = makeRowVector({ @@ -639,7 +671,9 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {data}, {"c0"}, {"min(c1, 2)", "min(c1, 5)", "max(c1, 3)", "max(c1, 7)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Test all null input. data = makeRowVector({ @@ -681,7 +715,9 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {data}, {"c0"}, {"min(c1, 2)", "min(c1, 5)", "max(c1, 3)", "max(c1, 7)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Test the NULL handling in `N` param. data = makeRowVector({ @@ -721,7 +757,9 @@ class MinMaxNTest : public functions::aggregate::test::AggregationTestBase { {data}, {"c0"}, {"min(c1, c2)", "min(c1, c4)", "max(c1, c3)", "max(c1, c4)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); } }; diff --git a/velox/functions/prestosql/aggregates/tests/ReduceAggTest.cpp b/velox/functions/prestosql/aggregates/tests/ReduceAggTest.cpp index 2086342d63c14..050b3e4ca5550 100644 --- a/velox/functions/prestosql/aggregates/tests/ReduceAggTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/ReduceAggTest.cpp @@ -62,11 +62,15 @@ TEST_F(ReduceAggTest, integersGlobal) { int64_t product = 1 * 2 * 3 * 4 * 5 * 1 * 1; auto expected = makeRowVector({makeConstant(product, 1)}); + // Skip testing with data read from files until + // https://github.com/facebookincubator/velox/issues/6740 is fixed. testAggregations( {data}, {}, {"reduce_agg(c0, 1, (x, y) -> (x * y), (x, y) -> (x * y))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // No nulls. With mask. product = 1 * 3 * 4 * 1; @@ -76,7 +80,9 @@ TEST_F(ReduceAggTest, integersGlobal) { {data}, {}, {"reduce_agg(c0, 1, (x, y) -> (x * y), (x, y) -> (x * y)) FILTER (WHERE m)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Some nulls. product = 1 * 2 * 0 * 1 * 3; @@ -86,7 +92,9 @@ TEST_F(ReduceAggTest, integersGlobal) { {data}, {}, {"reduce_agg(c1, 1, (x, y) -> (x * y), (x, y) -> (x * y))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Same as above, but using a sum reduction. int64_t sum = 1 + 2 + 3 + 4 + 5 + 1 + 1; @@ -96,7 +104,9 @@ TEST_F(ReduceAggTest, integersGlobal) { {data}, {}, {"reduce_agg(c0, 0, (x, y) -> (x + y), (x, y) -> (x + y))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); sum = 1 + 2 + 0 + 1 + 3; expected = makeRowVector({makeConstant(sum, 1)}); @@ -105,7 +115,9 @@ TEST_F(ReduceAggTest, integersGlobal) { {data}, {}, {"reduce_agg(c1, 0, (x, y) -> (x + y), (x, y) -> (x + y))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, integersGroupBy) { @@ -126,7 +138,9 @@ TEST_F(ReduceAggTest, integersGroupBy) { {data}, {"k"}, {"reduce_agg(c0, 1, (x, y) -> (x * y), (x, y) -> (x * y))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // With mask. expected = makeRowVector({ @@ -138,7 +152,9 @@ TEST_F(ReduceAggTest, integersGroupBy) { {data}, {"k"}, {"reduce_agg(c0, 1, (x, y) -> (x * y), (x, y) -> (x * y)) FILTER (WHERE m)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // Sum reduction. expected = makeRowVector({ @@ -150,7 +166,9 @@ TEST_F(ReduceAggTest, integersGroupBy) { {data}, {"k"}, {"reduce_agg(c0, 0, (x, y) -> (x + y), (x, y) -> (x + y))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); // With mask. expected = makeRowVector({ @@ -162,7 +180,9 @@ TEST_F(ReduceAggTest, integersGroupBy) { {data}, {"k"}, {"reduce_agg(c0, 0, (x, y) -> (x + y), (x, y) -> (x + y)) FILTER (WHERE m)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, arraysGlobal) { @@ -188,7 +208,9 @@ TEST_F(ReduceAggTest, arraysGlobal) { {"reduce_agg(c0, c1, " "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3), " "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); expected = makeRowVector({makeArrayVector({{7, 3, 2}})}); @@ -199,7 +221,9 @@ TEST_F(ReduceAggTest, arraysGlobal) { "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3), " "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3)) " "FILTER (WHERE m)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, arraysGroupBy) { @@ -232,7 +256,9 @@ TEST_F(ReduceAggTest, arraysGroupBy) { {"reduce_agg(c0, c1, " "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3), " "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); expected = makeRowVector({ makeFlatVector({1, 2}), @@ -249,7 +275,9 @@ TEST_F(ReduceAggTest, arraysGroupBy) { "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3), " "(a, b) -> slice(reverse(array_sort(array_distinct(concat(a, b)))), 1, 3)) " "FILTER (WHERE m)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, differentInputAndCombine) { @@ -275,7 +303,9 @@ TEST_F(ReduceAggTest, differentInputAndCombine) { {"reduce_agg(c0, c1, " "(s, x) -> array_sort(array_distinct(concat(s, array[x]))), " "(s, s2) -> array_sort(array_distinct(concat(s, s2))))"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); expected = makeRowVector({ makeFlatVector({1, 2}), @@ -289,7 +319,9 @@ TEST_F(ReduceAggTest, differentInputAndCombine) { "(s, x) -> array_sort(array_distinct(concat(s, array[x]))), " "(s, s2) -> array_sort(array_distinct(concat(s, s2)))) " "FILTER (WHERE m)"}, - {expected}); + {expected}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, fuzzGlobalSum) { @@ -304,7 +336,9 @@ TEST_F(ReduceAggTest, fuzzGlobalSum) { {}, {"reduce_agg(c0, 0, (x, y) -> (x + y), (x, y) -> (x + y))"}, {}, - {sumResults}); + {sumResults}, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, fuzzGroupBySum) { @@ -324,7 +358,9 @@ TEST_F(ReduceAggTest, fuzzGroupBySum) { {"key"}, {"reduce_agg(c0, 0, (x, y) -> (x + y), (x, y) -> (x + y))"}, {}, - [&](auto& builder) { return builder.assertResults(sumResults); }); + [&](auto& builder) { return builder.assertResults(sumResults); }, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, fuzzGlobalAvg) { @@ -346,7 +382,9 @@ TEST_F(ReduceAggTest, fuzzGlobalAvg) { "(s, x) -> (row_constructor(s.sum + x, s.count + 1)), " "(s, s2) -> (row_constructor(s.sum + s2.sum, s.count + s2.count)))"}, {"a0.sum / cast(a0.count as double)"}, - [&](auto& builder) { return builder.assertResults(avgResults); }); + [&](auto& builder) { return builder.assertResults(avgResults); }, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(ReduceAggTest, fuzzGroupByAvg) { @@ -372,7 +410,9 @@ TEST_F(ReduceAggTest, fuzzGroupByAvg) { "(s, x) -> (row_constructor(s.sum + x, s.count + 1)), " "(s, s2) -> (row_constructor(s.sum + s2.sum, s.count + s2.count)))"}, {"key", "a0.sum / cast(a0.count as double)"}, - [&](auto& builder) { return builder.assertResults(avgResults); }); + [&](auto& builder) { return builder.assertResults(avgResults); }, + /*config*/ {}, + /*testWithTableScan*/ false); } } // namespace diff --git a/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp b/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp index 4695999ba1c84..4702b034bcd4c 100644 --- a/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp @@ -96,16 +96,18 @@ Timestamp generator(vector_size_t i) { return Timestamp(i, i); } TEST_F(SumDataSizeForStatsTest, allScalarTypes) { + // Make input size at least 8 to ensure drivers get 2 input batches for + // spilling when tested with data read from files. auto vectors = {makeRowVector( - {makeFlatVector({1, 2, 1, 2}), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator), - makeFlatVector(4, generator)})}; + {makeFlatVector({1, 2, 1, 2, 1, 2, 1, 2}), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator), + makeFlatVector(8, generator)})}; // With grouping keys. testAggregations( @@ -120,7 +122,7 @@ TEST_F(SumDataSizeForStatsTest, allScalarTypes) { "sum_data_size_for_stats(c7)", "sum_data_size_for_stats(c8)"}, // result for group 1 and 2 - "VALUES (1,2,4,8,16,8,16,2,32),(2,2,4,8,16,8,16,2,32)"); + "VALUES (1,4,8,16,32,16,32,4,64),(2,4,8,16,32,16,32,4,64)"); // Without grouping keys. testAggregations( @@ -134,7 +136,7 @@ TEST_F(SumDataSizeForStatsTest, allScalarTypes) { "sum_data_size_for_stats(c6)", "sum_data_size_for_stats(c7)", "sum_data_size_for_stats(c8)"}, - "VALUES (4,8,16,32,16,32,4,64)"); + "VALUES (8,16,32,64,32,64,8,128)"); } TEST_F(SumDataSizeForStatsTest, arrayGlobalAggregate) { diff --git a/velox/functions/prestosql/aggregates/tests/SumTest.cpp b/velox/functions/prestosql/aggregates/tests/SumTest.cpp index 1a51c1cc778f3..caefa70541406 100644 --- a/velox/functions/prestosql/aggregates/tests/SumTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/SumTest.cpp @@ -244,6 +244,8 @@ TEST_F(SumTest, sumDoubleAndFloat) { } TEST_F(SumTest, sumDecimal) { + // Skip testing with data read from files because decimal is not supported in + // writers. std::vector> shortDecimalRawVector; std::vector> longDecimalRawVector; for (int i = 0; i < 1000; ++i) { @@ -257,7 +259,12 @@ TEST_F(SumTest, sumDecimal) { makeNullableFlatVector(longDecimalRawVector, DECIMAL(23, 4))}); createDuckDbTable({input}); testAggregations( - {input}, {}, {"sum(c0)", "sum(c1)"}, "SELECT sum(c0), sum(c1) FROM tmp"); + {input}, + {}, + {"sum(c0)", "sum(c1)"}, + "SELECT sum(c0), sum(c1) FROM tmp", + /*config*/ {}, + /*testWithTableScan*/ false); // Decimal sum aggregation with multiple groups. auto inputRows = { @@ -292,7 +299,13 @@ TEST_F(SumTest, sumDecimal) { makeFlatVector( std::vector{-7493}, DECIMAL(38, 2))})}; - testAggregations(inputRows, {"c0"}, {"sum(c1)"}, expectedResult); + testAggregations( + inputRows, + {"c0"}, + {"sum(c1)"}, + expectedResult, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(SumTest, sumDecimalOverflow) { @@ -304,7 +317,13 @@ TEST_F(SumTest, sumDecimalOverflow) { auto input = makeRowVector( {makeFlatVector(shortDecimalInput, DECIMAL(17, 5))}); createDuckDbTable({input}); - testAggregations({input}, {}, {"sum(c0)"}, "SELECT sum(c0) FROM tmp"); + testAggregations( + {input}, + {}, + {"sum(c0)"}, + "SELECT sum(c0) FROM tmp", + /*config*/ {}, + /*testWithTableScan*/ false); auto decimalSumOverflow = [this]( const std::vector& input, diff --git a/velox/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp index bebf61b0bfed1..a4ee8b1bff754 100644 --- a/velox/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp @@ -211,7 +211,9 @@ TEST_F(VarianceAggregationTest, varianceWithGlobalAggregationAndFilter) { } TEST_F(VarianceAggregationTest, varianceWithGroupBy) { - auto vectors = makeVectors(rowType_, 10, 20); + // TODO: increase number of batches after fixing + // https://github.com/facebookincubator/velox/issues/6505. + auto vectors = makeVectors(rowType_, 10, 8); createDuckDbTable(vectors); for (const auto& aggrName : aggrNames_) { diff --git a/velox/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp b/velox/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp index 87c6e3c50bdf3..b3afecd239d2f 100644 --- a/velox/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp +++ b/velox/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp @@ -44,13 +44,17 @@ class FirstAggregateTest : public AggregationTestBase { createDuckDbTable(vectors); + // We do not test with data read from files because having two input + // splits makes the result non-deterministic. { SCOPED_TRACE("ignore null + group by"); testAggregations( vectors, {"c0"}, {"spark_first_ignore_null(c1)"}, - "SELECT c0, first(c1 ORDER BY c1 NULLS LAST) FROM tmp GROUP BY c0"); + "SELECT c0, first(c1 ORDER BY c1 NULLS LAST) FROM tmp GROUP BY c0", + /*config*/ {}, + /*testWithTableScan*/ false); } { // Expected result should have first 7 rows including nulls. @@ -62,7 +66,13 @@ class FirstAggregateTest : public AggregationTestBase { [](auto row) { return row; }, // valueAt [](auto row) { return row % 3 == 0; }), // nullAt })}; - testAggregations(vectors, {"c0"}, {"spark_first(c1)"}, expected); + testAggregations( + vectors, + {"c0"}, + {"spark_first(c1)"}, + expected, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -75,13 +85,24 @@ class FirstAggregateTest : public AggregationTestBase { SCOPED_TRACE("ignore null + global"); auto expectedTrue = {makeRowVector({makeNullableFlatVector({1})})}; testAggregations( - vectors, {}, {"spark_first_ignore_null(c0)"}, expectedTrue); + vectors, + {}, + {"spark_first_ignore_null(c0)"}, + expectedTrue, + /*config*/ {}, + /*testWithTableScan*/ false); } { SCOPED_TRACE("not ignore null + global"); auto expectedFalse = { makeRowVector({makeNullableFlatVector({std::nullopt})})}; - testAggregations(vectors, {}, {"spark_first(c0)"}, expectedFalse); + testAggregations( + vectors, + {}, + {"spark_first(c0)"}, + expectedFalse, + /*config*/ {}, + /*testWithTableScan*/ false); } } } @@ -93,12 +114,23 @@ class FirstAggregateTest : public AggregationTestBase { { SCOPED_TRACE("ignore null + group by"); testAggregations( - data, {"c0"}, {"spark_first_ignore_null(c1)"}, ignoreNullData); + data, + {"c0"}, + {"spark_first_ignore_null(c1)"}, + ignoreNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } { SCOPED_TRACE("not ignore null + group by"); - testAggregations(data, {"c0"}, {"spark_first(c1)"}, hasNullData); + testAggregations( + data, + {"c0"}, + {"spark_first(c1)"}, + hasNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -109,12 +141,23 @@ class FirstAggregateTest : public AggregationTestBase { { SCOPED_TRACE("ignore null + global"); testAggregations( - data, {}, {"spark_first_ignore_null(c0)"}, ignoreNullData); + data, + {}, + {"spark_first_ignore_null(c0)"}, + ignoreNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } { SCOPED_TRACE("not ignore null + global"); - testAggregations(data, {}, {"spark_first(c0)"}, hasNullData); + testAggregations( + data, + {}, + {"spark_first(c0)"}, + hasNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } } }; @@ -379,7 +422,9 @@ TEST_F(FirstAggregateTest, varcharGroupBy) { vectors, {"c0"}, {"spark_first_ignore_null(c1)"}, - "SELECT c0, first(c1) FROM tmp WHERE c1 IS NOT NULL GROUP BY c0"); + "SELECT c0, first(c1) FROM tmp WHERE c1 IS NOT NULL GROUP BY c0", + /*config*/ {}, + /*testWithTableScan*/ false); } { @@ -391,7 +436,13 @@ TEST_F(FirstAggregateTest, varcharGroupBy) { [&data](auto row) { return StringView(data[row]); }, // valueAt nullEvery(3)), })}; - testAggregations(vectors, {"c0"}, {"spark_first(c1)"}, expected); + testAggregations( + vectors, + {"c0"}, + {"spark_first(c1)"}, + expected, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -482,7 +533,13 @@ TEST_F(FirstAggregateTest, mapGroupBy) { [](auto idx) { return idx * 0.1; }), // valueAt })}; - testAggregations(vectors, {"c0"}, {"spark_first(c1)"}, expected); + testAggregations( + vectors, + {"c0"}, + {"spark_first(c1)"}, + expected, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(FirstAggregateTest, mapGlobal) { diff --git a/velox/functions/sparksql/aggregates/tests/LastAggregateTest.cpp b/velox/functions/sparksql/aggregates/tests/LastAggregateTest.cpp index ad0f9fa7635c4..3df962e73b4ff 100644 --- a/velox/functions/sparksql/aggregates/tests/LastAggregateTest.cpp +++ b/velox/functions/sparksql/aggregates/tests/LastAggregateTest.cpp @@ -42,13 +42,17 @@ class LastAggregateTest : public aggregate::test::AggregationTestBase { createDuckDbTable(vectors); + // We do not test with data read from files because having two input + // splits makes the result non-deterministic. { SCOPED_TRACE("ignore null + group by"); testAggregations( vectors, {"c0"}, {"spark_last_ignore_null(c1)"}, - "SELECT c0, last(c1 ORDER BY c1 NULLS FIRST) FROM tmp GROUP BY c0"); + "SELECT c0, last(c1 ORDER BY c1 NULLS FIRST) FROM tmp GROUP BY c0", + /*config*/ {}, + /*testWithTableScan*/ false); } { // Expected result should have first 7 rows including nulls. @@ -60,7 +64,13 @@ class LastAggregateTest : public aggregate::test::AggregationTestBase { [](auto row) { return 91 + row; }, // valueAt [](auto row) { return (91 + row) % 3 == 0; }), // nullAt })}; - testAggregations(vectors, {"c0"}, {"spark_last(c1)"}, expected); + testAggregations( + vectors, + {"c0"}, + {"spark_last(c1)"}, + expected, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -73,13 +83,24 @@ class LastAggregateTest : public aggregate::test::AggregationTestBase { SCOPED_TRACE("ignore null + global"); auto expectedTrue = {makeRowVector({makeNullableFlatVector({2})})}; testAggregations( - vectors, {}, {"spark_last_ignore_null(c0)"}, expectedTrue); + vectors, + {}, + {"spark_last_ignore_null(c0)"}, + expectedTrue, + /*config*/ {}, + /*testWithTableScan*/ false); } { SCOPED_TRACE("not ignore null + global"); auto expectedFalse = { makeRowVector({makeNullableFlatVector({std::nullopt})})}; - testAggregations(vectors, {}, {"spark_last(c0)"}, expectedFalse); + testAggregations( + vectors, + {}, + {"spark_last(c0)"}, + expectedFalse, + /*config*/ {}, + /*testWithTableScan*/ false); } } } @@ -91,12 +112,23 @@ class LastAggregateTest : public aggregate::test::AggregationTestBase { { SCOPED_TRACE("ignore null + group by"); testAggregations( - data, {"c0"}, {"spark_last_ignore_null(c1)"}, ignoreNullData); + data, + {"c0"}, + {"spark_last_ignore_null(c1)"}, + ignoreNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } { SCOPED_TRACE("not ignore null + group by"); - testAggregations(data, {"c0"}, {"spark_last(c1)"}, hasNullData); + testAggregations( + data, + {"c0"}, + {"spark_last(c1)"}, + hasNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } } @@ -107,12 +139,23 @@ class LastAggregateTest : public aggregate::test::AggregationTestBase { { SCOPED_TRACE("ignore null + global"); testAggregations( - data, {}, {"spark_last_ignore_null(c0)"}, ignoreNullData); + data, + {}, + {"spark_last_ignore_null(c0)"}, + ignoreNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } { SCOPED_TRACE("not ignore null + global"); - testAggregations(data, {}, {"spark_last(c0)"}, hasNullData); + testAggregations( + data, + {}, + {"spark_last(c0)"}, + hasNullData, + /*config*/ {}, + /*testWithTableScan*/ false); } } }; @@ -378,7 +421,9 @@ TEST_F(LastAggregateTest, varcharGroupBy) { vectors, {"c0"}, {"spark_last_ignore_null(c1)"}, - "SELECT c0, last(c1) FROM tmp WHERE c1 IS NOT NULL GROUP BY c0"); + "SELECT c0, last(c1) FROM tmp WHERE c1 IS NOT NULL GROUP BY c0", + /*config*/ {}, + /*testWithTableScan*/ false); // Verify when ignoreNull is false. // Expected result should have last 7 rows [91..98) including nulls. @@ -389,7 +434,13 @@ TEST_F(LastAggregateTest, varcharGroupBy) { [&data](auto row) { return StringView(data[91 + row]); }, // valueAt [](auto row) { return (91 + row) % 3 == 0; }), // nullAt })}; - testAggregations(vectors, {"c0"}, {"spark_last(c1)"}, expected); + testAggregations( + vectors, + {"c0"}, + {"spark_last(c1)"}, + expected, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(LastAggregateTest, varcharGlobal) { @@ -482,7 +533,13 @@ TEST_F(LastAggregateTest, mapGroupBy) { [](auto idx) { return (92 + idx) * 0.1; }), // valueAt })}; - testAggregations(vectors, {"c0"}, {"spark_last(c1)"}, expected); + testAggregations( + vectors, + {"c0"}, + {"spark_last(c1)"}, + expected, + /*config*/ {}, + /*testWithTableScan*/ false); } TEST_F(LastAggregateTest, mapGlobal) {