From fd920aa077cd4214cef46c3b80775692cd48c22f Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Tue, 9 Jan 2024 13:17:16 -0800 Subject: [PATCH] Add support for aggregation fuzzer to call Presto java and setup CI run. (#7947) Summary: This PR: 1. Sets up Aggregation Fuzzer to call Presto Java 2. Sets up an experimental scheduled run for the agg fuzzer. Pull Request resolved: https://github.com/facebookincubator/velox/pull/7947 Reviewed By: mbasmanova Differential Revision: D52040998 Pulled By: kgpai fbshipit-source-id: 020282eef5f2c950c000cebed388fb160aba726b --- .github/workflows/experimental.yml | 3 +- .../fuzzer/AggregationFuzzerTest.cpp | 38 +++++++++++++------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/.github/workflows/experimental.yml b/.github/workflows/experimental.yml index cd330be6b1f9..a16b9d0f522e 100644 --- a/.github/workflows/experimental.yml +++ b/.github/workflows/experimental.yml @@ -116,7 +116,7 @@ jobs: name: aggregation - name: "Run Aggregate Fuzzer" - run: | + run: | mkdir -p /tmp/aggregate_fuzzer_repro/ rm -rfv /tmp/aggregate_fuzzer_repro/* chmod -R 777 /tmp/aggregate_fuzzer_repro @@ -177,4 +177,3 @@ jobs: name: spark-agg-fuzzer-failure-artifacts path: | /tmp/spark_aggregate_fuzzer_repro - diff --git a/velox/functions/prestosql/fuzzer/AggregationFuzzerTest.cpp b/velox/functions/prestosql/fuzzer/AggregationFuzzerTest.cpp index d41be51ad9d8..6fab78a4fd92 100644 --- a/velox/functions/prestosql/fuzzer/AggregationFuzzerTest.cpp +++ b/velox/functions/prestosql/fuzzer/AggregationFuzzerTest.cpp @@ -22,6 +22,7 @@ #include "velox/exec/fuzzer/AggregationFuzzerOptions.h" #include "velox/exec/fuzzer/AggregationFuzzerRunner.h" #include "velox/exec/fuzzer/DuckQueryRunner.h" +#include "velox/exec/fuzzer/PrestoQueryRunner.h" #include "velox/exec/fuzzer/TransformResultVerifier.h" #include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h" #include "velox/functions/prestosql/fuzzer/ApproxDistinctInputGenerator.h" @@ -45,6 +46,13 @@ DEFINE_string( "this comma separated list of function names " "(e.g: --only \"min\" or --only \"sum,avg\")."); +DEFINE_string( + presto_url, + "", + "Presto coordinator URI along with port. If set, we use Presto " + "source of truth. Otherwise, use DuckDB. Example: " + "--presto_url=http://127.0.0.1:8080"); + namespace facebook::velox::exec::test { namespace { @@ -61,6 +69,23 @@ getCustomInputGenerators() { }; } +std::unique_ptr setupReferenceQueryRunner() { + if (FLAGS_presto_url.empty()) { + auto duckQueryRunner = std::make_unique(); + duckQueryRunner->disableAggregateFunctions({ + "skewness", + // DuckDB results on constant inputs are incorrect. Should be NaN, + // but DuckDB returns some random value. + "kurtosis", + "entropy", + }); + return duckQueryRunner; + } else { + return std::make_unique( + FLAGS_presto_url, "aggregation_fuzzer"); + } +} + } // namespace } // namespace facebook::velox::exec::test @@ -81,16 +106,6 @@ int main(int argc, char** argv) { size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - auto duckQueryRunner = - std::make_unique(); - duckQueryRunner->disableAggregateFunctions({ - "skewness", - // DuckDB results on constant inputs are incorrect. Should be NaN, - // but DuckDB returns some random value. - "kurtosis", - "entropy", - }); - // List of functions that have known bugs that cause crashes or failures. static const std::unordered_set skipFunctions = { // https://github.com/facebookincubator/velox/issues/3493 @@ -101,6 +116,7 @@ int main(int argc, char** argv) { using facebook::velox::exec::test::ApproxDistinctResultVerifier; using facebook::velox::exec::test::ApproxPercentileResultVerifier; + using facebook::velox::exec::test::setupReferenceQueryRunner; using facebook::velox::exec::test::TransformResultVerifier; auto makeArrayVerifier = []() { @@ -161,5 +177,5 @@ int main(int argc, char** argv) { facebook::velox::exec::test::getCustomInputGenerators(); options.timestampPrecision = facebook::velox::VectorFuzzer::Options::TimestampPrecision::kMilliSeconds; - return Runner::run(initialSeed, std::move(duckQueryRunner), options); + return Runner::run(initialSeed, setupReferenceQueryRunner(), options); }