From 753c549900ca0ce718356abaffd6a0ad21745082 Mon Sep 17 00:00:00 2001 From: Masha Basmanova <mbasmanova@meta.com> Date: Thu, 4 Apr 2024 09:22:57 -0700 Subject: [PATCH] Add support for DECIMAL types to Expression Fuzzer --- .github/workflows/experimental.yml | 111 +++++++++++++++++- CONTRIBUTING.md | 4 +- velox/docs/develop/testing/fuzzer.rst | 2 + velox/expression/ReverseSignatureBinder.cpp | 20 ---- velox/expression/ReverseSignatureBinder.h | 4 - .../expression/fuzzer/ArgumentTypeFuzzer.cpp | 7 ++ velox/expression/fuzzer/ExpressionFuzzer.cpp | 75 ++++++++---- velox/expression/fuzzer/ExpressionFuzzer.h | 12 +- .../fuzzer/ExpressionFuzzerTest.cpp | 18 ++- .../fuzzer/ExpressionFuzzerVerifier.cpp | 7 +- .../fuzzer/ExpressionFuzzerVerifier.h | 4 +- velox/expression/fuzzer/FuzzerRunner.cpp | 19 ++- velox/expression/fuzzer/FuzzerRunner.h | 8 +- .../fuzzer/SparkExpressionFuzzerTest.cpp | 23 +++- .../fuzzer/tests/ArgumentTypeFuzzerTest.cpp | 2 +- 15 files changed, 254 insertions(+), 62 deletions(-) diff --git a/.github/workflows/experimental.yml b/.github/workflows/experimental.yml index 22aeb8a9939a6..f0fb0d6a830dc 100644 --- a/.github/workflows/experimental.yml +++ b/.github/workflows/experimental.yml @@ -63,12 +63,16 @@ jobs: key: ccache-benchmark-${{ github.sha }} restore-keys: | ccache-benchmark- + - name: "Checkout Repo" + if: ${{ github.event_name == 'pull_request' }} uses: actions/checkout@v3 with: - path: velox + path: 'velox' + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ inputs.ref || github.head_ref }} + fetch-depth: 0 submodules: 'recursive' - ref: "${{ inputs.ref || 'main' }}" - name: "Install dependencies" run: cd velox && source ./scripts/setup-ubuntu.sh && install_apt_deps @@ -103,6 +107,17 @@ jobs: name: join path: velox/_build/debug/velox/exec/tests/velox_join_fuzzer_test + - name: Upload Presto expression fuzzer + uses: actions/upload-artifact@v3 + with: + name: presto_expression_fuzzer + path: velox/_build/debug/velox/expression/fuzzer/velox_expression_fuzzer_test + + - name: Upload Spark expression fuzzer + uses: actions/upload-artifact@v3 + with: + name: spark_expression_fuzzer + path: velox/_build/debug/velox/expression/fuzzer/spark_expression_fuzzer_test presto-java-aggregation-fuzzer-run: runs-on: 8-core @@ -250,3 +265,95 @@ jobs: name: join-fuzzer-failure-artifacts path: | /tmp/join_fuzzer_repro + + presto-expression-fuzzer-run: + runs-on: ubuntu-latest + needs: compile + timeout-minutes: 120 + steps: + + - name: "Checkout Repo" + uses: actions/checkout@v3 + with: + ref: "${{ inputs.ref || 'main' }}" + + - name: "Install dependencies" + run: source ./scripts/setup-ubuntu.sh && install_apt_deps + + - name: Download presto fuzzer + uses: actions/download-artifact@v3 + with: + name: presto_expression_fuzzer + + - name: "Run Presto Fuzzer" + run: | + mkdir -p /tmp/presto_fuzzer_repro/ + rm -rfv /tmp/presto_fuzzer_repro/* + chmod -R 777 /tmp/presto_fuzzer_repro + chmod +x velox_expression_fuzzer_test + ./velox_expression_fuzzer_test \ + --seed ${RANDOM} \ + --enable_variadic_signatures \ + --velox_fuzzer_enable_complex_types \ + --velox_fuzzer_enable_decimal_type \ + --lazy_vector_generation_ratio 0.2 \ + --velox_fuzzer_enable_column_reuse \ + --velox_fuzzer_enable_expression_reuse \ + --max_expression_trees_per_step 2 \ + --retry_with_try \ + --enable_dereference \ + --duration_sec 1800 \ + --logtostderr=1 \ + --minloglevel=1 \ + --repro_persist_path=/tmp/presto_fuzzer_repro \ + && echo -e "\n\nFuzzer run finished successfully." + + - name: Archive Presto expression production artifacts + if: always() + uses: actions/upload-artifact@v3 + with: + name: presto-fuzzer-failure-artifacts + path: | + /tmp/presto_fuzzer_repro + + spark-expression-fuzzer-run: + runs-on: ubuntu-latest + needs: compile + timeout-minutes: 120 + steps: + + - name: "Checkout Repo" + uses: actions/checkout@v3 + with: + ref: "${{ inputs.ref || 'main' }}" + + - name: "Install dependencies" + run: source ./scripts/setup-ubuntu.sh && install_apt_deps + + - name: Download spark fuzzer + uses: actions/download-artifact@v3 + with: + name: spark_expression_fuzzer + + - name: "Run Spark Fuzzer" + run: | + mkdir -p /tmp/spark_fuzzer_repro/ + rm -rfv /tmp/spark_fuzzer_repro/* + chmod -R 777 /tmp/spark_fuzzer_repro + chmod +x spark_expression_fuzzer_test + ./spark_expression_fuzzer_test \ + --seed ${RANDOM} \ + --duration_sec 1800 \ + --logtostderr=1 \ + --minloglevel=1 \ + --repro_persist_path=/tmp/spark_fuzzer_repro \ + --velox_fuzzer_enable_decimal_type \ + && echo -e "\n\nSpark Fuzzer run finished successfully." + + - name: Archive Spark expression production artifacts + if: always() + uses: actions/upload-artifact@v3 + with: + name: spark-fuzzer-failure-artifacts + path: | + /tmp/spark_fuzzer_repro diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0be82b463ad19..249716110726d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -197,11 +197,11 @@ functions: ``` # Test the new function in isolation. Use --only flag to restrict the set of functions # and run for 60 seconds or longer. - velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse + velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse # Test the new function in combination with other functions. Do not restrict the set # of functions and run for 10 minutes (600 seconds) or longer. - velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse + velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse ``` Here are example PRs: diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst index 639acda6ee5e5..97dd226698a84 100644 --- a/velox/docs/develop/testing/fuzzer.rst +++ b/velox/docs/develop/testing/fuzzer.rst @@ -228,6 +228,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer: * ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false. +* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false. + * ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0. * ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false. diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index 495920e6bf1ff..274d2a06480f4 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -18,27 +18,7 @@ namespace facebook::velox::exec { -bool ReverseSignatureBinder::hasConstrainedIntegerVariable( - const TypeSignature& type) const { - if (type.parameters().empty()) { - auto it = variables().find(type.baseName()); - return it != variables().end() && it->second.isIntegerParameter() && - it->second.constraint() != ""; - } - - const auto& parameters = type.parameters(); - for (const auto& parameter : parameters) { - if (hasConstrainedIntegerVariable(parameter)) { - return true; - } - } - return false; -} - bool ReverseSignatureBinder::tryBind() { - if (hasConstrainedIntegerVariable(signature_.returnType())) { - return false; - } tryBindSucceeded_ = SignatureBinderBase::tryBind(signature_.returnType(), returnType_); return tryBindSucceeded_; diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 087adc4dbe771..3da800a5edbb6 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase { } private: - // Return whether there is a constraint on an integer variable in type - // signature. - bool hasConstrainedIntegerVariable(const TypeSignature& type) const; - const TypePtr returnType_; // True if 'tryBind' has been called and succeeded. False otherwise. diff --git a/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp b/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp index 1d7b784f29905..e077348ed014f 100644 --- a/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp +++ b/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp @@ -162,6 +162,13 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { if (!binder.tryBind()) { return false; } + for (const auto& [name, _] : integerBindings_) { + // If the variable used by result type is constrained, argument types can + // not be generated without following the constraints. + if (variables().count(name) && variables().at(name).constraint() != "") { + return false; + } + } bindings_ = binder.bindings(); integerBindings_ = binder.integerBindings(); } else { diff --git a/velox/expression/fuzzer/ExpressionFuzzer.cpp b/velox/expression/fuzzer/ExpressionFuzzer.cpp index 4817d71ac4627..e697c2a4110fe 100644 --- a/velox/expression/fuzzer/ExpressionFuzzer.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzer.cpp @@ -361,7 +361,8 @@ bool isDeterministic( return typeAndMetadata->second.deterministic; } - // functionName must be a special form. + // functionName must be a special form or a function whose determinism cannot + // be established. LOG(WARNING) << "Unable to determine if '" << functionName << "' is deterministic or not. Assuming it is."; return true; @@ -435,16 +436,20 @@ bool useTypeName( bool isSupportedSignature( const exec::FunctionSignature& signature, - bool enableComplexType) { - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. + bool enableComplexType, + bool enableDecimalType) { + // When enableComplexType is disabled, not supporting complex functions. + const bool useComplexType = useTypeName(signature, "array") || + useTypeName(signature, "map") || useTypeName(signature, "row"); + // When enableDecimalType is disabled, not supporting decimal functions. Not + // supporting lambda functions, or functions using timestamp with time zone + // types. return !( useTypeName(signature, "opaque") || - useTypeName(signature, "long_decimal") || - useTypeName(signature, "short_decimal") || - useTypeName(signature, "decimal") || useTypeName(signature, "timestamp with time zone") || useTypeName(signature, "interval day to second") || + (!enableDecimalType && useTypeName(signature, "decimal")) || + (!enableComplexType && useComplexType) || (enableComplexType && useTypeName(signature, "unknown"))); } @@ -526,10 +531,13 @@ ExpressionFuzzer::ExpressionFuzzer( FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr<VectorFuzzer>& vectorFuzzer, - const std::optional<ExpressionFuzzer::Options>& options) + const std::optional<ExpressionFuzzer::Options>& options, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators) : options_(options.value_or(Options())), vectorFuzzer_(vectorFuzzer), - state{rng_, std::max(1, options_.maxLevelOfNesting)} { + state{rng_, std::max(1, options_.maxLevelOfNesting)}, + argGenerators_(argGenerators) { VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided"); seed(initialSeed); @@ -553,10 +561,14 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - if (!isSupportedSignature(*signature, options_.enableComplexTypes)) { + if (!isSupportedSignature( + *signature, + options_.enableComplexTypes, + options_.enableDecimalType)) { continue; } - if (!(signature->variables().empty() || options_.enableComplexTypes)) { + if (!(signature->variables().empty() || options_.enableComplexTypes || + options_.enableDecimalType)) { LOG(WARNING) << "Skipping unsupported signature: " << function.first << signature->toString(); continue; @@ -590,11 +602,13 @@ ExpressionFuzzer::ExpressionFuzzer( continue; } } else { + // TODO Remove this code. argTypes are used only to call + // isDeterministic() which can be be figured out without the argTypes. ArgumentTypeFuzzer typeFuzzer{*signature, localRng}; typeFuzzer.fuzzReturnType(); - VELOX_CHECK_EQ( - typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true); - argTypes = typeFuzzer.argumentTypes(); + if (typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs)) { + argTypes = typeFuzzer.argumentTypes(); + } } if (!isDeterministic(function.first, argTypes)) { LOG(WARNING) << "Skipping non-deterministic function: " @@ -679,8 +693,10 @@ ExpressionFuzzer::ExpressionFuzzer( sortSignatureTemplates(signatureTemplates_); for (const auto& it : signatureTemplates_) { - auto& returnType = it.signature->returnType().baseName(); - auto* returnTypeKey = &returnType; + const auto returnType = + exec::sanitizeName(it.signature->returnType().baseName()); + const auto* returnTypeKey = &returnType; + if (it.typeVariables.find(returnType) != it.typeVariables.end()) { // Return type is a template variable. returnTypeKey = &kTypeParameterName; @@ -754,7 +770,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes( const std::string& funcName) { int tickets = getTickets(funcName); for (int i = 0; i < tickets; i++) { - typeToExpressionList_[type].push_back(funcName); + typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName); } } @@ -1028,7 +1044,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( } else { expression = generateExpressionFromConcreteSignatures( returnType, chosenFunctionName); - if (!expression && options_.enableComplexTypes) { + if (!expression && + (options_.enableComplexTypes || options_.enableDecimalType)) { expression = generateExpressionFromSignatureTemplate( returnType, chosenFunctionName); } @@ -1223,8 +1240,26 @@ core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate( auto chosenSignature = *chosen->signature; ArgumentTypeFuzzer fuzzer{chosenSignature, returnType, rng_}; - VELOX_CHECK_EQ(fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true); - auto& argumentTypes = fuzzer.argumentTypes(); + + std::vector<TypePtr> argumentTypes; + const auto it = argGenerators_.find(functionName); + if (useTypeName(chosenSignature, "decimal") && it != argGenerators_.end()) { + // For function using decimal type, prioritize to use argument generator if + // provided. + argumentTypes = it->second->generateArgs(chosenSignature, returnType, rng_); + if (argumentTypes.empty()) { + return nullptr; + } + } else { + // Use the argument fuzzer to generate argument types. + VELOX_CHECK( + fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), + "Cannot generate argument types for {} with return type {}.", + functionName, + returnType->toString()); + argumentTypes = fuzzer.argumentTypes(); + } + auto constantArguments = chosenSignature.constantArguments(); // ArgumentFuzzer may generate duplicate arguments if the signature's diff --git a/velox/expression/fuzzer/ExpressionFuzzer.h b/velox/expression/fuzzer/ExpressionFuzzer.h index 1e0dc9a74f748..a703ddb7eef91 100644 --- a/velox/expression/fuzzer/ExpressionFuzzer.h +++ b/velox/expression/fuzzer/ExpressionFuzzer.h @@ -19,6 +19,7 @@ #include "velox/core/ITypedExpr.h" #include "velox/core/QueryCtx.h" #include "velox/expression/Expr.h" +#include "velox/expression/fuzzer/ArgGenerator.h" #include "velox/expression/fuzzer/FuzzerToolkit.h" #include "velox/expression/tests/ExpressionVerifier.h" #include "velox/functions/FunctionRegistry.h" @@ -48,6 +49,10 @@ class ExpressionFuzzer { // types. bool enableComplexTypes = false; + // Enable testing of function signatures with decimal argument or return + // types. + bool enableDecimalType = false; + // Enable generation of expressions where one input column can be used by // multiple subexpressions. bool enableColumnReuse = false; @@ -103,7 +108,9 @@ class ExpressionFuzzer { FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr<VectorFuzzer>& vectorFuzzer, - const std::optional<ExpressionFuzzer::Options>& options = std::nullopt); + const std::optional<ExpressionFuzzer::Options>& options = std::nullopt, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators = {}); template <typename TFunc> void registerFuncOverride(TFunc func, const std::string& name); @@ -416,6 +423,9 @@ class ExpressionFuzzer { } state; friend class ExpressionFuzzerUnitTest; + + // Maps from function name to a specific generator of argument types. + std::unordered_map<std::string, std::shared_ptr<ArgGenerator>> argGenerators_; }; } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp index e9f35f02f7702..3d0f664777c12 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp @@ -18,7 +18,12 @@ #include <gtest/gtest.h> #include <unordered_set> +#include "velox/expression/fuzzer/ArgGenerator.h" #include "velox/expression/fuzzer/FuzzerRunner.h" +#include "velox/functions/prestosql/fuzzer/DivideArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/FloorAndRoundArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/MultiplyArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/PlusMinusArgGenerator.h" #include "velox/functions/prestosql/registration/RegistrationFunctions.h" DEFINE_int64( @@ -27,6 +32,8 @@ DEFINE_int64( "Initial seed for random number generator used to reproduce previous " "results (0 means start with random seed)."); +using namespace facebook::velox::exec::test; +using facebook::velox::fuzzer::ArgGenerator; using facebook::velox::fuzzer::FuzzerRunner; int main(int argc, char** argv) { @@ -66,5 +73,14 @@ int main(int argc, char** argv) { "regexp_like", }; size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - return FuzzerRunner::run(initialSeed, skipFunctions, {{}}); + + std::unordered_map<std::string, std::shared_ptr<ArgGenerator>> argGenerators = + {{"plus", std::make_shared<PlusMinusArgGenerator>()}, + {"minus", std::make_shared<PlusMinusArgGenerator>()}, + {"multiply", std::make_shared<MultiplyArgGenerator>()}, + {"divide", std::make_shared<DivideArgGenerator>()}, + {"floor", std::make_shared<FloorAndRoundArgGenerator>()}, + {"round", std::make_shared<FloorAndRoundArgGenerator>()}}; + + return FuzzerRunner::run(initialSeed, skipFunctions, {{}}, argGenerators); } diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp index 1415db5364feb..cbd7979bbf51d 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp @@ -80,7 +80,9 @@ RowVectorPtr wrapChildren( ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const ExpressionFuzzerVerifier::Options& options) + const ExpressionFuzzerVerifier::Options& options, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators) : options_(options), queryCtx_(core::QueryCtx::create( nullptr, @@ -98,7 +100,8 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( signatureMap, initialSeed, vectorFuzzer_, - options.expressionFuzzerOptions) { + options.expressionFuzzerOptions, + argGenerators) { seed(initialSeed); // Init stats and register listener. diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.h b/velox/expression/fuzzer/ExpressionFuzzerVerifier.h index f651ad5541430..be00ddca9bf12 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.h +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.h @@ -51,7 +51,9 @@ class ExpressionFuzzerVerifier { ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const Options& options); + const Options& options, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators); // This function starts the test that is performed by the // ExpressionFuzzerVerifier which is generating random expressions and diff --git a/velox/expression/fuzzer/FuzzerRunner.cpp b/velox/expression/fuzzer/FuzzerRunner.cpp index 56c58a3658413..97ef1fc7bd670 100644 --- a/velox/expression/fuzzer/FuzzerRunner.cpp +++ b/velox/expression/fuzzer/FuzzerRunner.cpp @@ -121,6 +121,11 @@ DEFINE_bool( false, "Enable testing of function signatures with complex argument or return types."); +DEFINE_bool( + velox_fuzzer_enable_decimal_type, + false, + "Enable testing of function signatures with decimal argument or return types."); + DEFINE_bool( velox_fuzzer_enable_column_reuse, false, @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions( opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures; opts.enableDereference = FLAGS_enable_dereference; opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types; + opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type; opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse; opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; @@ -204,8 +210,10 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions( int FuzzerRunner::run( size_t seed, const std::unordered_set<std::string>& skipFunctions, - const std::unordered_map<std::string, std::string>& queryConfigs) { - runFromGtest(seed, skipFunctions, queryConfigs); + const std::unordered_map<std::string, std::string>& queryConfigs, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators) { + runFromGtest(seed, skipFunctions, queryConfigs, argGenerators); return RUN_ALL_TESTS(); } @@ -213,13 +221,16 @@ int FuzzerRunner::run( void FuzzerRunner::runFromGtest( size_t seed, const std::unordered_set<std::string>& skipFunctions, - const std::unordered_map<std::string, std::string>& queryConfigs) { + const std::unordered_map<std::string, std::string>& queryConfigs, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators) { memory::MemoryManager::testingSetInstance({}); auto signatures = facebook::velox::getFunctionSignatures(); ExpressionFuzzerVerifier( signatures, seed, - getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs)) + getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs), + argGenerators) .go(); } } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/FuzzerRunner.h b/velox/expression/fuzzer/FuzzerRunner.h index 0eda0ecd1d7a9..c7c5cda54c650 100644 --- a/velox/expression/fuzzer/FuzzerRunner.h +++ b/velox/expression/fuzzer/FuzzerRunner.h @@ -33,12 +33,16 @@ class FuzzerRunner { static int run( size_t seed, const std::unordered_set<std::string>& skipFunctions, - const std::unordered_map<std::string, std::string>& queryConfigs); + const std::unordered_map<std::string, std::string>& queryConfigs, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators); static void runFromGtest( size_t seed, const std::unordered_set<std::string>& skipFunctions, - const std::unordered_map<std::string, std::string>& queryConfigs); + const std::unordered_map<std::string, std::string>& queryConfigs, + const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>& + argGenerators); }; } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp index ffba105e2e08d..ba5abd02bca33 100644 --- a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp @@ -24,6 +24,14 @@ #include "velox/expression/fuzzer/FuzzerRunner.h" #include "velox/functions/sparksql/Register.h" +#include "velox/functions/sparksql/fuzzer/AddSubtractArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/DivideArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/MultiplyArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgGenerator.h" + +using namespace facebook::velox::functions::sparksql::fuzzer; +using facebook::velox::fuzzer::ArgGenerator; DEFINE_int64( seed, @@ -58,7 +66,18 @@ int main(int argc, char** argv) { // Required by spark_partition_id function. std::unordered_map<std::string, std::string> queryConfigs = { - {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}}; + {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}, + {facebook::velox::core::QueryConfig::kSessionTimezone, + "America/Los_Angeles"}}; + + std::unordered_map<std::string, std::shared_ptr<ArgGenerator>> argGenerators = + {{"add", std::make_shared<AddSubtractArgGenerator>()}, + {"subtract", std::make_shared<AddSubtractArgGenerator>()}, + {"multiply", std::make_shared<MultiplyArgGenerator>()}, + {"divide", std::make_shared<DivideArgGenerator>()}, + {"unscaled_value", std::make_shared<UnscaledValueArgGenerator>()}, + {"make_timestamp", std::make_shared<MakeTimestampArgGenerator>()}}; - return FuzzerRunner::run(FLAGS_seed, skipFunctions, queryConfigs); + return FuzzerRunner::run( + FLAGS_seed, skipFunctions, queryConfigs, argGenerators); } diff --git a/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp index 32971df22e0d6..c6926c08fc9b1 100644 --- a/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp @@ -239,7 +239,7 @@ TEST_F(ArgumentTypeFuzzerTest, unsupported) { .argumentType("decimal(b_precision, b_scale)") .build(); - testFuzzingFailure(signature, DECIMAL(13, 6)); + testFuzzingFailure(signature, ROW({ARRAY(DECIMAL(13, 6))})); } TEST_F(ArgumentTypeFuzzerTest, lambda) {