From 7c890cef07f4b01311db0026662d1e7975ac7fc1 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Tue, 5 Dec 2023 10:40:13 -0800 Subject: [PATCH] make skipFunctions, onlyFunctions, and specialForms as ExpressionFuzzer options. (#7882) Summary: More refactoring for the expression fuzzer. 1) Push the skipFunctions, onlyFunctions, and specialForms as ExpressionFuzzer options. 2) FuzzerRunner: just a tool that wrap ExpressionFuzzerVerifier into a unit test. 3) Move the comment from FuzzerRunner class to ExpressionFuzzerVerifier since it describes the later. Next diff : 4) Move all the flags from ExpressionFuzzerVerifier to FuzzerRunner and pass them through ExpressionFuzzerVerifier::Options . 1. spark fuzzer used to only support and, or not it uses all of them "and,or,cast,coalesce,if,switch". Differential Revision: D51856248 --- velox/expression/tests/ExpressionFuzzer.cpp | 307 +++++++++++++++++- velox/expression/tests/ExpressionFuzzer.h | 21 +- .../expression/tests/ExpressionFuzzerTest.cpp | 16 +- .../tests/ExpressionFuzzerUnitTest.cpp | 15 +- .../tests/ExpressionFuzzerVerifier.cpp | 24 +- .../tests/ExpressionFuzzerVerifier.h | 21 +- velox/expression/tests/FuzzerRunner.cpp | 200 +----------- velox/expression/tests/FuzzerRunner.h | 154 +-------- .../tests/SparkExpressionFuzzerTest.cpp | 16 +- 9 files changed, 380 insertions(+), 394 deletions(-) diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index f7372353b1588..93b8483c2f6b1 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -32,7 +32,6 @@ namespace facebook::velox::test { namespace { - using exec::SignatureBinder; using exec::SignatureBinderBase; @@ -71,6 +70,277 @@ class FullSignatureBinder : public SignatureBinderBase { bool bound_{false}; }; +static const std::vector kIntegralTypes{ + "tinyint", + "smallint", + "integer", + "bigint", + "boolean"}; + +static const std::vector kFloatingPointTypes{"real", "double"}; + +facebook::velox::exec::FunctionSignaturePtr makeCastSignature( + const std::string& fromType, + const std::string& toType) { + return facebook::velox::exec::FunctionSignatureBuilder() + .argumentType(fromType) + .returnType(toType) + .build(); +} + +void addCastFromIntegralSignatures( + const std::string& toType, + std::vector& signatures) { + for (const auto& fromType : kIntegralTypes) { + signatures.push_back(makeCastSignature(fromType, toType)); + } +} + +void addCastFromFloatingPointSignatures( + const std::string& toType, + std::vector& signatures) { + for (const auto& fromType : kFloatingPointTypes) { + signatures.push_back(makeCastSignature(fromType, toType)); + } +} + +void addCastFromVarcharSignature( + const std::string& toType, + std::vector& signatures) { + signatures.push_back(makeCastSignature("varchar", toType)); +} + +void addCastFromTimestampSignature( + const std::string& toType, + std::vector& signatures) { + signatures.push_back(makeCastSignature("timestamp", toType)); +} + +void addCastFromDateSignature( + const std::string& toType, + std::vector& signatures) { + signatures.push_back(makeCastSignature("date", toType)); +} + +std::vector +getSignaturesForCast() { + std::vector signatures; + + // To integral types. + for (const auto& toType : kIntegralTypes) { + addCastFromIntegralSignatures(toType, signatures); + addCastFromFloatingPointSignatures(toType, signatures); + addCastFromVarcharSignature(toType, signatures); + } + + // To floating-point types. + for (const auto& toType : kFloatingPointTypes) { + addCastFromIntegralSignatures(toType, signatures); + addCastFromFloatingPointSignatures(toType, signatures); + addCastFromVarcharSignature(toType, signatures); + } + + // To varchar type. + addCastFromIntegralSignatures("varchar", signatures); + addCastFromFloatingPointSignatures("varchar", signatures); + addCastFromVarcharSignature("varchar", signatures); + addCastFromDateSignature("varchar", signatures); + addCastFromTimestampSignature("varchar", signatures); + + // To timestamp type. + addCastFromVarcharSignature("timestamp", signatures); + addCastFromDateSignature("timestamp", signatures); + + // To date type. + addCastFromVarcharSignature("date", signatures); + addCastFromTimestampSignature("date", signatures); + + // For each supported translation pair T --> U, add signatures of array(T) --> + // array(U), map(varchar, T) --> map(varchar, U), row(T) --> row(U). + auto size = signatures.size(); + for (auto i = 0; i < size; ++i) { + auto from = signatures[i]->argumentTypes()[0].baseName(); + auto to = signatures[i]->returnType().baseName(); + + signatures.push_back(makeCastSignature( + fmt::format("array({})", from), fmt::format("array({})", to))); + + signatures.push_back(makeCastSignature( + fmt::format("map(varchar, {})", from), + fmt::format("map(varchar, {})", to))); + + signatures.push_back(makeCastSignature( + fmt::format("row({})", from), fmt::format("row({})", to))); + } + return signatures; +} + +static const std::unordered_map< + std::string, + std::vector> + kSpecialForms = { + {"and", + std::vector{ + // Signature: and (condition,...) -> output: + // boolean, boolean,.. -> boolean + facebook::velox::exec::FunctionSignatureBuilder() + .argumentType("boolean") + .argumentType("boolean") + .variableArity() + .returnType("boolean") + .build()}}, + {"or", + std::vector{ + // Signature: or (condition,...) -> output: + // boolean, boolean,.. -> boolean + facebook::velox::exec::FunctionSignatureBuilder() + .argumentType("boolean") + .argumentType("boolean") + .variableArity() + .returnType("boolean") + .build()}}, + {"coalesce", + std::vector{ + // Signature: coalesce (input,...) -> output: + // T, T,.. -> T + facebook::velox::exec::FunctionSignatureBuilder() + .typeVariable("T") + .argumentType("T") + .argumentType("T") + .variableArity() + .returnType("T") + .build()}}, + { + "if", + std::vector{ + // Signature: if (condition, then) -> output: + // boolean, T -> T + facebook::velox::exec::FunctionSignatureBuilder() + .typeVariable("T") + .argumentType("boolean") + .argumentType("T") + .returnType("T") + .build(), + // Signature: if (condition, then, else) -> output: + // boolean, T, T -> T + facebook::velox::exec::FunctionSignatureBuilder() + .typeVariable("T") + .argumentType("boolean") + .argumentType("T") + .argumentType("T") + .returnType("T") + .build()}, + }, + { + "switch", + std::vector{ + // Signature: Switch (condition, then) -> output: + // boolean, T -> T + // This is only used to bind to a randomly selected type for the + // output, then while generating arguments, an override is used + // to generate inputs that can create variation of multiple + // cases and may or may not include a final else clause. + facebook::velox::exec::FunctionSignatureBuilder() + .typeVariable("T") + .argumentType("boolean") + .argumentType("T") + .returnType("T") + .build()}, + }, + { + "cast", + /// TODO: Add supported Cast signatures to CastTypedExpr and expose + /// them to fuzzer instead of hard-coding signatures here. + getSignaturesForCast(), + }, +}; + +static std::unordered_set splitNames(const std::string& names) { + // Parse, lower case and trim it. + std::vector nameList; + folly::split(',', names, nameList); + std::unordered_set nameSet; + + for (const auto& it : nameList) { + auto str = folly::trimWhitespace(it).toString(); + folly::toLowerAscii(str); + nameSet.insert(str); + } + return nameSet; +} + +static std::pair splitSignature( + const std::string& signature) { + const auto parenPos = signature.find("("); + + if (parenPos != std::string::npos) { + return {signature.substr(0, parenPos), signature.substr(parenPos)}; + } + + return {signature, ""}; +} + +// Parse the comma separated list of function names, and use it to filter the +// input signatures. +static void filterSignatures( + facebook::velox::FunctionSignatureMap& input, + const std::string& onlyFunctions, + const std::unordered_set& skipFunctions) { + if (!onlyFunctions.empty()) { + // Parse, lower case and trim it. + auto nameSet = splitNames(onlyFunctions); + + // Use the generated set to filter the input signatures. + for (auto it = input.begin(); it != input.end();) { + if (!nameSet.count(it->first)) { + it = input.erase(it); + } else + it++; + } + } + + for (auto skip : skipFunctions) { + // 'skip' can be function name or signature. + const auto [skipName, skipSignature] = splitSignature(skip); + + if (skipSignature.empty()) { + input.erase(skipName); + } else { + auto it = input.find(skipName); + if (it != input.end()) { + // Compiler refuses to reference 'skipSignature' from the lambda as + // is. + const auto& signatureToRemove = skipSignature; + + auto removeIt = std::find_if( + it->second.begin(), it->second.end(), [&](const auto& signature) { + return signature->toString() == signatureToRemove; + }); + VELOX_CHECK( + removeIt != it->second.end(), "Skip signature not found: {}", skip); + it->second.erase(removeIt); + } + } + } +} + +static void appendSpecialForms( + facebook::velox::FunctionSignatureMap& signatureMap, + const std::string& specialForms) { + auto specialFormNames = splitNames(specialForms); + for (const auto& [name, signatures] : kSpecialForms) { + if (specialFormNames.count(name) == 0) { + LOG(INFO) << "Skipping special form: " << name; + continue; + } + std::vector rawSignatures; + for (const auto& signature : signatures) { + rawSignatures.push_back(signature.get()); + } + signatureMap.insert({name, std::move(rawSignatures)}); + } +} + /// Returns if `functionName` with the given `argTypes` is deterministic. /// Returns true if the function was not found or determinism cannot be /// established. @@ -78,8 +348,8 @@ bool isDeterministic( const std::string& functionName, const std::vector& argTypes) { // We know that the 'cast', 'and', and 'or' special forms are deterministic. - // Hard-code them here because they are not real functions and hence cannot be - // resolved by the code below. + // Hard-code them here because they are not real functions and hence cannot + // be resolved by the code below. if (functionName == "and" || functionName == "or" || functionName == "coalesce" || functionName == "if" || functionName == "switch" || functionName == "cast") { @@ -93,9 +363,9 @@ bool isDeterministic( } // Vector functions are a bit more complicated. We need to fetch the list of - // available signatures and check if any of them bind given the current input - // arg types. If it binds (if there's a match), we fetch the function and - // return the isDeterministic bool. + // available signatures and check if any of them bind given the current + // input arg types. If it binds (if there's a match), we fetch the function + // and return the isDeterministic bool. try { if (auto vectorFunctionSignatures = exec::getVectorFunctionSignatures(functionName)) { @@ -110,12 +380,12 @@ bool isDeterministic( } } } - // TODO: Some stateful functions can only be built when constant arguments are - // passed, making the getVectorFunction() call above to throw. We only have a - // few of these functions, so for now we assume they are deterministic so they - // are picked for Fuzz testing. Once we make the isDeterministic() flag static - // (and hence we won't need to build the function object in here) we can clean - // up this code. + // TODO: Some stateful functions can only be built when constant arguments + // are passed, making the getVectorFunction() call above to throw. We only + // have a few of these functions, so for now we assume they are + // deterministic so they are picked for Fuzz testing. Once we make the + // isDeterministic() flag static (and hence we won't need to build the + // function object in here) we can clean up this code. catch (const std::exception& e) { LOG(WARNING) << "Unable to determine if '" << functionName << "' is deterministic or not. Assuming it is."; @@ -178,8 +448,8 @@ bool containTypeName( return false; } -// Determine whether the signature has an argument or return type that contains -// typeName. typeName should be in lower case. +// Determine whether the signature has an argument or return type that +// contains typeName. typeName should be in lower case. bool useTypeName( const exec::FunctionSignature& signature, const std::string& typeName) { @@ -237,7 +507,8 @@ BufferPtr extractNonNullIndices(const RowVectorPtr& data) { } /// Wraps child vectors of the specified 'rowVector' in dictionary using -/// specified 'indices'. Returns new RowVector created from the wrapped vectors. +/// specified 'indices'. Returns new RowVector created from the wrapped +/// vectors. RowVectorPtr wrapChildren( const BufferPtr& indices, const RowVectorPtr& rowVector) { @@ -283,7 +554,7 @@ uint32_t levelOfNesting(const TypePtr& type) { } // namespace ExpressionFuzzer::ExpressionFuzzer( - const FunctionSignatureMap& signatureMap, + FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, const std::optional& options) @@ -293,6 +564,10 @@ ExpressionFuzzer::ExpressionFuzzer( VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided"); seed(initialSeed); + appendSpecialForms(signatureMap, options_.specialForms); + filterSignatures( + signatureMap, options_.useOnlyFunctions, options_.skipFunctions); + size_t totalFunctions = 0; size_t totalFunctionSignatures = 0; size_t supportedFunctionSignatures = 0; diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index 9a4d7e7b30aa2..eec022cc0ea62 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -73,10 +73,27 @@ class ExpressionFuzzer { // Chance of adding a null constant to the plan, or null value in a vector // (expressed as double from 0 to 1). double nullRatio = 0.1; + + // If specified, Fuzzer will only choose functions from this comma separated + // list of function names (e.g: --only \"split\" or --only + // \"substr,ltrim\")." + std::string useOnlyFunctions = ""; + + // Comma-separated list of special forms to use in generated expression. + // Supported special forms: and, or, coalesce, if, switch, cast.") + std::string specialForms = "and,or,cast,coalesce,if,switch"; + + // This list can include a mix of function names and function signatures. + // Use function name to exclude all signatures of a given function from + // testing. Use function signature to exclude only a specific signature. + // ex skipFunctions{ + // "width_bucket", + // "array_sort(array(T),constant function(T,T,bigint)) -> array(T)"} + std::unordered_set skipFunctions; }; ExpressionFuzzer( - const FunctionSignatureMap& signatureMap, + FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, const std::optional& options = std::nullopt); @@ -317,7 +334,7 @@ class ExpressionFuzzer { static const inline std::string kTypeParameterName = "T"; - Options options_; + const Options options_; std::vector signatures_; std::vector signatureTemplates_; diff --git a/velox/expression/tests/ExpressionFuzzerTest.cpp b/velox/expression/tests/ExpressionFuzzerTest.cpp index eee027676fc37..6b5a049dcfadc 100644 --- a/velox/expression/tests/ExpressionFuzzerTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerTest.cpp @@ -27,19 +27,6 @@ DEFINE_int64( "Initial seed for random number generator used to reproduce previous " "results (0 means start with random seed)."); -DEFINE_string( - only, - "", - "If specified, Fuzzer will only choose functions from " - "this comma separated list of function names " - "(e.g: --only \"split\" or --only \"substr,ltrim\")."); - -DEFINE_string( - special_forms, - "and,or,cast,coalesce,if,switch", - "Comma-separated list of special forms to use in generated expression. " - "Supported special forms: and, or, coalesce, if, switch, cast."); - int main(int argc, char** argv) { facebook::velox::functions::prestosql::registerAllScalarFunctions(); @@ -67,6 +54,5 @@ int main(int argc, char** argv) { "array_sort(array(T),constant function(T,T,bigint)) -> array(T)", }; size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - return FuzzerRunner::run( - FLAGS_only, initialSeed, skipFunctions, FLAGS_special_forms); + return FuzzerRunner::run(initialSeed, skipFunctions); } diff --git a/velox/expression/tests/ExpressionFuzzerUnitTest.cpp b/velox/expression/tests/ExpressionFuzzerUnitTest.cpp index 76fcb51e0cb4b..6c5e8a214ce40 100644 --- a/velox/expression/tests/ExpressionFuzzerUnitTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerUnitTest.cpp @@ -56,6 +56,13 @@ class ExpressionFuzzerUnitTest : public testing::Test { std::make_shared(VectorFuzzer::Options{}, pool.get())}; }; +namespace { +auto makeOptionsWithMaxLevelNesting(int32_t value) { + ExpressionFuzzer::Options options; + options.maxLevelOfNesting = value; + return options; +} +} // namespace TEST_F(ExpressionFuzzerUnitTest, restrictedLevelOfNesting) { velox::functions::prestosql::registerAllScalarFunctions(); std::mt19937 seed{0}; @@ -65,7 +72,7 @@ TEST_F(ExpressionFuzzerUnitTest, restrictedLevelOfNesting) { velox::getFunctionSignatures(), 0, vectorfuzzer, - {{.maxLevelOfNesting = maxLevelOfNesting}}, + makeOptionsWithMaxLevelNesting(maxLevelOfNesting), }; for (int i = 0; i < 5000; ++i) { @@ -104,7 +111,7 @@ TEST_F(ExpressionFuzzerUnitTest, reproduceExpressionWithSeed) { velox::getFunctionSignatures(), 1234567, vectorfuzzer, - {{.maxLevelOfNesting = 5}}}; + makeOptionsWithMaxLevelNesting(5)}; for (auto i = 0; i < 10; ++i) { firstGeneration.push_back( fuzzer.fuzzExpression().expressions[0]->toString()); @@ -130,7 +137,7 @@ TEST_F(ExpressionFuzzerUnitTest, exprBank) { velox::getFunctionSignatures(), 0, vectorfuzzer, - {{.maxLevelOfNesting = maxLevelOfNesting}}}; + makeOptionsWithMaxLevelNesting(maxLevelOfNesting)}; ExpressionFuzzer::ExprBank exprBank(seed, maxLevelOfNesting); for (int i = 0; i < 5000; ++i) { auto expression = fuzzer.fuzzExpression().expressions[0]; @@ -158,7 +165,7 @@ TEST_F(ExpressionFuzzerUnitTest, exprBank) { velox::getFunctionSignatures(), 0, vectorfuzzer, - {{.maxLevelOfNesting = maxLevelOfNesting}}}; + makeOptionsWithMaxLevelNesting(maxLevelOfNesting)}; ExpressionFuzzer::ExprBank exprBank(seed, maxLevelOfNesting); for (int i = 0; i < 1000; ++i) { auto expression = fuzzer.fuzzExpression().expressions[0]; diff --git a/velox/expression/tests/ExpressionFuzzerVerifier.cpp b/velox/expression/tests/ExpressionFuzzerVerifier.cpp index 9159adcad4aa0..e5c8ab0af6ffe 100644 --- a/velox/expression/tests/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/tests/ExpressionFuzzerVerifier.cpp @@ -83,6 +83,19 @@ DEFINE_int32( "enabled)."); // The flags bellow are used to initialize ExpressionFuzzer::options. +DEFINE_string( + only, + "", + "If specified, Fuzzer will only choose functions from " + "this comma separated list of function names " + "(e.g: --only \"split\" or --only \"substr,ltrim\")."); + +DEFINE_string( + special_forms, + "and,or,cast,coalesce,if,switch", + "Comma-separated list of special forms to use in generated expression. " + "Supported special forms: and, or, coalesce, if, switch, cast."); + DEFINE_int32( velox_fuzzer_max_level_of_nesting, 10, @@ -159,7 +172,8 @@ VectorFuzzer::Options getVectorFuzzerOptions() { return opts; } -ExpressionFuzzer::Options getExpressionFuzzerOptions() { +ExpressionFuzzer::Options getExpressionFuzzerOptions( + const std::unordered_set& skipFunctions) { ExpressionFuzzer::Options opts; opts.maxLevelOfNesting = FLAGS_velox_fuzzer_max_level_of_nesting; opts.maxNumVarArgs = FLAGS_max_num_varargs; @@ -170,6 +184,9 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions() { opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; opts.nullRatio = FLAGS_null_ratio; + opts.specialForms = FLAGS_special_forms; + opts.useOnlyFunctions = FLAGS_only; + opts.skipFunctions = skipFunctions; return opts; } @@ -240,7 +257,8 @@ RowVectorPtr wrapChildren( ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, - size_t initialSeed) + size_t initialSeed, + const std::unordered_set& skipFunctions) : verifier_( &execCtx_, {FLAGS_disable_constant_folding, @@ -253,7 +271,7 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( signatureMap, initialSeed, vectorFuzzer_, - getExpressionFuzzerOptions()) { + getExpressionFuzzerOptions(skipFunctions)) { seed(initialSeed); // Init stats and register listener. diff --git a/velox/expression/tests/ExpressionFuzzerVerifier.h b/velox/expression/tests/ExpressionFuzzerVerifier.h index f510644dc2fa2..3526a430eebeb 100644 --- a/velox/expression/tests/ExpressionFuzzerVerifier.h +++ b/velox/expression/tests/ExpressionFuzzerVerifier.h @@ -30,13 +30,28 @@ DECLARE_int32(velox_fuzzer_max_level_of_nesting); namespace facebook::velox::test { -// A tool utilizes ExpressionFuzzer, VectorFuzzer and ExpressionVerfier to -// generate random expressions and verify the correctness of the results. +// A tool that utilizes ExpressionFuzzer, VectorFuzzer and ExpressionVerfier to +// generate random expressions and verify the correctness of the results. It +// works by: +/// +/// 1. Taking an initial set of available function signatures. +/// 2. Generating a random expression tree based on the available function +/// signatures. +/// 3. Generating a random set of input data (vector), with a variety of +/// encodings and data layouts. +/// 4. Executing the expression using the common and simplified eval paths, and +/// asserting results are the exact same. +/// 5. Rinse and repeat. +/// +/// The tool depends on many flags that are listed on top of +/// ExpressionFuzzerVerifier.cpp. + class ExpressionFuzzerVerifier { public: ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, - size_t initialSeed); + size_t initialSeed, + const std::unordered_set& skipFunctions = {}); // This function starts the test that is performed by the // ExpressionFuzzerVerifier which is generating random expressions and diff --git a/velox/expression/tests/FuzzerRunner.cpp b/velox/expression/tests/FuzzerRunner.cpp index ade9c3caf893d..9727e0d7d52f8 100644 --- a/velox/expression/tests/FuzzerRunner.cpp +++ b/velox/expression/tests/FuzzerRunner.cpp @@ -13,193 +13,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "velox/expression/tests/FuzzerRunner.h" - -namespace { - -static const std::vector kIntegralTypes{ - "tinyint", - "smallint", - "integer", - "bigint", - "boolean"}; -static const std::vector kFloatingPointTypes{"real", "double"}; - -facebook::velox::exec::FunctionSignaturePtr makeCastSignature( - const std::string& fromType, - const std::string& toType) { - return facebook::velox::exec::FunctionSignatureBuilder() - .argumentType(fromType) - .returnType(toType) - .build(); -} - -void addCastFromIntegralSignatures( - const std::string& toType, - std::vector& signatures) { - for (const auto& fromType : kIntegralTypes) { - signatures.push_back(makeCastSignature(fromType, toType)); - } -} -void addCastFromFloatingPointSignatures( - const std::string& toType, - std::vector& signatures) { - for (const auto& fromType : kFloatingPointTypes) { - signatures.push_back(makeCastSignature(fromType, toType)); - } -} - -void addCastFromVarcharSignature( - const std::string& toType, - std::vector& signatures) { - signatures.push_back(makeCastSignature("varchar", toType)); -} - -void addCastFromTimestampSignature( - const std::string& toType, - std::vector& signatures) { - signatures.push_back(makeCastSignature("timestamp", toType)); -} - -void addCastFromDateSignature( - const std::string& toType, - std::vector& signatures) { - signatures.push_back(makeCastSignature("date", toType)); -} - -std::vector -getSignaturesForCast() { - std::vector signatures; - - // To integral types. - for (const auto& toType : kIntegralTypes) { - addCastFromIntegralSignatures(toType, signatures); - addCastFromFloatingPointSignatures(toType, signatures); - addCastFromVarcharSignature(toType, signatures); - } - - // To floating-point types. - for (const auto& toType : kFloatingPointTypes) { - addCastFromIntegralSignatures(toType, signatures); - addCastFromFloatingPointSignatures(toType, signatures); - addCastFromVarcharSignature(toType, signatures); - } - - // To varchar type. - addCastFromIntegralSignatures("varchar", signatures); - addCastFromFloatingPointSignatures("varchar", signatures); - addCastFromVarcharSignature("varchar", signatures); - addCastFromDateSignature("varchar", signatures); - addCastFromTimestampSignature("varchar", signatures); - - // To timestamp type. - addCastFromVarcharSignature("timestamp", signatures); - addCastFromDateSignature("timestamp", signatures); - - // To date type. - addCastFromVarcharSignature("date", signatures); - addCastFromTimestampSignature("date", signatures); - - // For each supported translation pair T --> U, add signatures of array(T) --> - // array(U), map(varchar, T) --> map(varchar, U), row(T) --> row(U). - auto size = signatures.size(); - for (auto i = 0; i < size; ++i) { - auto from = signatures[i]->argumentTypes()[0].baseName(); - auto to = signatures[i]->returnType().baseName(); - - signatures.push_back(makeCastSignature( - fmt::format("array({})", from), fmt::format("array({})", to))); - - signatures.push_back(makeCastSignature( - fmt::format("map(varchar, {})", from), - fmt::format("map(varchar, {})", to))); +#include "velox/expression/tests/FuzzerRunner.h" - signatures.push_back(makeCastSignature( - fmt::format("row({})", from), fmt::format("row({})", to))); - } - return signatures; +// static +int FuzzerRunner::run( + size_t seed, + const std::unordered_set& skipFunctions) { + runFromGtest(seed, skipFunctions); + return RUN_ALL_TESTS(); } -} // namespace - // static -const std::unordered_map< - std::string, - std::vector> - FuzzerRunner::kSpecialForms = { - {"and", - std::vector{ - // Signature: and (condition,...) -> output: - // boolean, boolean,.. -> boolean - facebook::velox::exec::FunctionSignatureBuilder() - .argumentType("boolean") - .argumentType("boolean") - .variableArity() - .returnType("boolean") - .build()}}, - {"or", - std::vector{ - // Signature: or (condition,...) -> output: - // boolean, boolean,.. -> boolean - facebook::velox::exec::FunctionSignatureBuilder() - .argumentType("boolean") - .argumentType("boolean") - .variableArity() - .returnType("boolean") - .build()}}, - {"coalesce", - std::vector{ - // Signature: coalesce (input,...) -> output: - // T, T,.. -> T - facebook::velox::exec::FunctionSignatureBuilder() - .typeVariable("T") - .argumentType("T") - .argumentType("T") - .variableArity() - .returnType("T") - .build()}}, - { - "if", - std::vector{ - // Signature: if (condition, then) -> output: - // boolean, T -> T - facebook::velox::exec::FunctionSignatureBuilder() - .typeVariable("T") - .argumentType("boolean") - .argumentType("T") - .returnType("T") - .build(), - // Signature: if (condition, then, else) -> output: - // boolean, T, T -> T - facebook::velox::exec::FunctionSignatureBuilder() - .typeVariable("T") - .argumentType("boolean") - .argumentType("T") - .argumentType("T") - .returnType("T") - .build()}, - }, - { - "switch", - std::vector{ - // Signature: Switch (condition, then) -> output: - // boolean, T -> T - // This is only used to bind to a randomly selected type for the - // output, then while generating arguments, an override is used - // to generate inputs that can create variation of multiple - // cases and may or may not include a final else clause. - facebook::velox::exec::FunctionSignatureBuilder() - .typeVariable("T") - .argumentType("boolean") - .argumentType("T") - .returnType("T") - .build()}, - }, - { - "cast", - /// TODO: Add supported Cast signatures to CastTypedExpr and expose - /// them to fuzzer instead of hard-coding signatures here. - getSignaturesForCast(), - }, -}; +void FuzzerRunner::runFromGtest( + size_t seed, + const std::unordered_set& skipFunctions) { + auto signatures = facebook::velox::getFunctionSignatures(); + facebook::velox::test::ExpressionFuzzerVerifier( + signatures, seed, skipFunctions) + .go(); +} diff --git a/velox/expression/tests/FuzzerRunner.h b/velox/expression/tests/FuzzerRunner.h index 985d1035c24a8..6f2936e5d23ca 100644 --- a/velox/expression/tests/FuzzerRunner.h +++ b/velox/expression/tests/FuzzerRunner.h @@ -24,163 +24,15 @@ #include "velox/expression/tests/ExpressionFuzzerVerifier.h" #include "velox/functions/FunctionRegistry.h" -/// FuzzerRunner leverages ExpressionFuzzer and VectorFuzzer to automatically -/// generate and execute expression tests. It works by: -/// -/// 1. Taking an initial set of available function signatures. -/// 2. Generating a random expression tree based on the available function -/// signatures. -/// 3. Generating a random set of input data (vector), with a variety of -/// encodings and data layouts. -/// 4. Executing the expression using the common and simplified eval paths, and -/// asserting results are the exact same. -/// 5. Rinse and repeat. -/// -/// The common usage pattern is as following: -/// -/// $ ./velox_expression_fuzzer_test --steps 10000 -/// -/// The important flags that control Fuzzer's behavior are: -/// -/// --steps: how many iterations to run. -/// --duration_sec: alternatively, for how many seconds it should run (takes -/// precedence over --steps). -/// --seed: pass a deterministic seed to reproduce the behavior (each iteration -/// will print a seed as part of the logs). -/// --v=1: verbose logging; print a lot more details about the execution. -/// --only: restrict the functions to fuzz. -/// --batch_size: size of input vector batches generated. -/// -/// e.g: -/// -/// $ ./velox_expression_fuzzer_test \ -/// --steps 10000 \ -/// --seed 123 \ -/// --v=1 \ -/// --only "substr,trim" +/// FuzzerRunner leverages ExpressionFuzzerVerifier to create a unit test. class FuzzerRunner { - static std::unordered_set splitNames(const std::string& names) { - // Parse, lower case and trim it. - std::vector nameList; - folly::split(',', names, nameList); - std::unordered_set nameSet; - - for (const auto& it : nameList) { - auto str = folly::trimWhitespace(it).toString(); - folly::toLowerAscii(str); - nameSet.insert(str); - } - return nameSet; - } - - static std::pair splitSignature( - const std::string& signature) { - const auto parenPos = signature.find("("); - - if (parenPos != std::string::npos) { - return {signature.substr(0, parenPos), signature.substr(parenPos)}; - } - - return {signature, ""}; - } - - // Parse the comma separated list of function names, and use it to filter the - // input signatures. - static facebook::velox::FunctionSignatureMap filterSignatures( - const facebook::velox::FunctionSignatureMap& input, - const std::string& onlyFunctions, - const std::unordered_set& skipFunctions) { - if (onlyFunctions.empty()) { - if (skipFunctions.empty()) { - return input; - } - facebook::velox::FunctionSignatureMap output(input); - for (auto skip : skipFunctions) { - // 'skip' can be function name or signature. - const auto [skipName, skipSignature] = splitSignature(skip); - - if (skipSignature.empty()) { - output.erase(skipName); - } else { - auto it = output.find(skipName); - if (it != output.end()) { - // Compiler refuses to reference 'skipSignature' from the lambda as - // is. - const auto& signatureToRemove = skipSignature; - - auto removeIt = std::find_if( - it->second.begin(), - it->second.end(), - [&](const auto& signature) { - return signature->toString() == signatureToRemove; - }); - VELOX_CHECK( - removeIt != it->second.end(), - "Skip signature not found: {}", - skip); - it->second.erase(removeIt); - } - } - } - return output; - } - - // Parse, lower case and trim it. - auto nameSet = splitNames(onlyFunctions); - - // Use the generated set to filter the input signatures. - facebook::velox::FunctionSignatureMap output; - for (const auto& it : input) { - if (nameSet.count(it.first) > 0) { - output.insert(it); - } - } - return output; - } - - static const std::unordered_map< - std::string, - std::vector> - kSpecialForms; - - static void appendSpecialForms( - const std::string& specialForms, - facebook::velox::FunctionSignatureMap& signatureMap) { - auto specialFormNames = splitNames(specialForms); - for (const auto& [name, signatures] : kSpecialForms) { - if (specialFormNames.count(name) == 0) { - LOG(INFO) << "Skipping special form: " << name; - continue; - } - std::vector - rawSignatures; - for (const auto& signature : signatures) { - rawSignatures.push_back(signature.get()); - } - signatureMap.insert({name, std::move(rawSignatures)}); - } - } - public: static int run( - const std::string& onlyFunctions, size_t seed, - const std::unordered_set& skipFunctions, - const std::string& specialForms) { - runFromGtest(onlyFunctions, seed, skipFunctions, specialForms); - return RUN_ALL_TESTS(); - } + const std::unordered_set& skipFunctions); static void runFromGtest( - const std::string& onlyFunctions, size_t seed, - const std::unordered_set& skipFunctions, - const std::string& specialForms) { - auto signatures = facebook::velox::getFunctionSignatures(); - appendSpecialForms(specialForms, signatures); - facebook::velox::test::ExpressionFuzzerVerifier( - filterSignatures(signatures, onlyFunctions, skipFunctions), seed) - .go(); - } + const std::unordered_set& skipFunctions); }; diff --git a/velox/expression/tests/SparkExpressionFuzzerTest.cpp b/velox/expression/tests/SparkExpressionFuzzerTest.cpp index 84254b66b1d76..e8357abe2f5c4 100644 --- a/velox/expression/tests/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/tests/SparkExpressionFuzzerTest.cpp @@ -30,19 +30,6 @@ DEFINE_int64( "Initial seed for random number generator " "(use it to reproduce previous results)."); -DEFINE_string( - only, - "", - "If specified, Fuzzer will only choose functions from " - "this comma separated list of function names " - "(e.g: --only \"split\" or --only \"substr,ltrim\")."); - -DEFINE_string( - special_forms, - "and,or", - "Comma-separated list of special forms to use in generated expression. " - "Supported special forms: and, or, coalesce, if, switch, cast."); - int main(int argc, char** argv) { facebook::velox::functions::sparksql::registerFunctions(""); @@ -63,6 +50,5 @@ int main(int argc, char** argv) { "replace", "might_contain", "unix_timestamp"}; - return FuzzerRunner::run( - FLAGS_only, FLAGS_seed, skipFunctions, FLAGS_special_forms); + return FuzzerRunner::run(FLAGS_seed, skipFunctions); }