From 5408f946ec3581a44cde34e7263b7387bf3f17e3 Mon Sep 17 00:00:00 2001 From: rui-mo Date: Thu, 11 Apr 2024 14:29:21 +0800 Subject: [PATCH] Support decimal in fuzzer test --- velox/docs/develop/testing/fuzzer.rst | 2 + velox/expression/ReverseSignatureBinder.cpp | 20 ----- velox/expression/ReverseSignatureBinder.h | 4 - velox/expression/tests/ExpressionFuzzer.cpp | 83 +++++++------------ velox/expression/tests/ExpressionFuzzer.h | 10 +-- velox/expression/tests/FuzzerRunner.cpp | 6 ++ .../tests/utils/ArgumentTypeFuzzer.cpp | 9 +- velox/vector/fuzzer/VectorFuzzer.cpp | 24 ------ velox/vector/fuzzer/VectorFuzzer.h | 2 - 9 files changed, 49 insertions(+), 111 deletions(-) diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst index 9ceade966d405..55dcca70b23df 100644 --- a/velox/docs/develop/testing/fuzzer.rst +++ b/velox/docs/develop/testing/fuzzer.rst @@ -219,6 +219,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer: * ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false. +* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false. + * ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0. * ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false. diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index 495920e6bf1ff..274d2a06480f4 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -18,27 +18,7 @@ namespace facebook::velox::exec { -bool ReverseSignatureBinder::hasConstrainedIntegerVariable( - const TypeSignature& type) const { - if (type.parameters().empty()) { - auto it = variables().find(type.baseName()); - return it != variables().end() && it->second.isIntegerParameter() && - it->second.constraint() != ""; - } - - const auto& parameters = type.parameters(); - for (const auto& parameter : parameters) { - if (hasConstrainedIntegerVariable(parameter)) { - return true; - } - } - return false; -} - bool ReverseSignatureBinder::tryBind() { - if (hasConstrainedIntegerVariable(signature_.returnType())) { - return false; - } tryBindSucceeded_ = SignatureBinderBase::tryBind(signature_.returnType(), returnType_); return tryBindSucceeded_; diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 087adc4dbe771..3da800a5edbb6 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase { } private: - // Return whether there is a constraint on an integer variable in type - // signature. - bool hasConstrainedIntegerVariable(const TypeSignature& type) const; - const TypePtr returnType_; // True if 'tryBind' has been called and succeeded. False otherwise. diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index 7cd4f5aec42ba..9ec67bd4b414b 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -39,13 +39,6 @@ int32_t rand(FuzzerGenerator& rng) { class DecimalArgGeneratorBase : public ArgGenerator { public: - TypePtr generateReturnType( - const exec::FunctionSignature& /*signature*/, - FuzzerGenerator& /*rng*/) override { - auto [p, s] = inputs_.begin()->first; - return DECIMAL(p, s); - } - std::vector generateArgs( const exec::FunctionSignature& /*signature*/, const TypePtr& returnType, @@ -80,7 +73,7 @@ class DecimalArgGeneratorBase : public ArgGenerator { } } } - + struct Inputs { TypePtr a; TypePtr b; @@ -163,19 +156,6 @@ class DivideArgGenerator : public DecimalArgGeneratorBase { class FloorAndRoundArgGenerator : public ArgGenerator { public: - TypePtr generateReturnType( - const exec::FunctionSignature& signature, - FuzzerGenerator& rng) override { - if (signature.argumentTypes().size() == 1) { - auto p = 1 + rand(rng) % 38; - return DECIMAL(p, 0); - } else { - auto p = 2 + rand(rng) % 37; - auto s = rand(rng) % p; - return DECIMAL(p, s); - } - } - std::vector generateArgs( const exec::FunctionSignature& signature, const TypePtr& returnType, @@ -624,16 +604,20 @@ bool useTypeName( bool isSupportedSignature( const exec::FunctionSignature& signature, - bool enableComplexType) { - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. + bool enableComplexType, + bool enableDecimalType) { + // When enableComplexType is disabled, not supporting complex functions. + const bool useComplexType = useTypeName(signature, "array") || + useTypeName(signature, "map") || useTypeName(signature, "row"); + // When enableDecimalType is disabled, not supporting decimal functions. Not + // supporting lambda functions, or functions using timestamp with time zone + // types. return !( useTypeName(signature, "opaque") || - // useTypeName(signature, "long_decimal") || - // useTypeName(signature, "short_decimal") || - // useTypeName(signature, "decimal") || useTypeName(signature, "timestamp with time zone") || useTypeName(signature, "interval day to second") || + (!enableDecimalType && useTypeName(signature, "decimal")) || + (!enableComplexType && useComplexType) || (enableComplexType && useTypeName(signature, "unknown"))); } @@ -751,10 +735,14 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - if (!isSupportedSignature(*signature, options_.enableComplexTypes)) { + if (!isSupportedSignature( + *signature, + options_.enableComplexTypes, + options_.enableDecimalType)) { continue; } - if (!(signature->variables().empty() || options_.enableComplexTypes)) { + if (!(signature->variables().empty() || options_.enableComplexTypes || + options_.enableDecimalType)) { LOG(WARNING) << "Skipping unsupported signature: " << function.first << signature->toString(); continue; @@ -793,29 +781,15 @@ ExpressionFuzzer::ExpressionFuzzer( ArgumentTypeFuzzer typeFuzzer{*signature, localRng}; auto returnType = typeFuzzer.fuzzReturnType(); bool ok = typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs); - if (!ok) { - auto it = argGenerators_.find(function.first); - if (it != argGenerators_.end()) { - returnType = it->second->generateReturnType(*signature, localRng); - argTypes = - it->second->generateArgs(*signature, returnType, localRng); - VELOX_CHECK( - !argTypes.empty(), - "Failed to generate arguments for {} with return type {}", - function.first, - returnType->toString()); - } else { - VELOX_FAIL("Failed to generate arguments"); - } - } else { + if (ok) { argTypes = typeFuzzer.argumentTypes(); + if (!isDeterministic(function.first, argTypes)) { + LOG(WARNING) << "Skipping non-deterministic function: " + << function.first << signature->toString(); + continue; + } } } - if (!isDeterministic(function.first, argTypes)) { - LOG(WARNING) << "Skipping non-deterministic function: " - << function.first << signature->toString(); - continue; - } if (!signature->variables().empty()) { std::unordered_set typeVariables; @@ -894,8 +868,10 @@ ExpressionFuzzer::ExpressionFuzzer( sortSignatureTemplates(signatureTemplates_); for (const auto& it : signatureTemplates_) { - auto& returnType = it.signature->returnType().baseName(); - auto* returnTypeKey = &returnType; + const auto returnType = + exec::sanitizeName(it.signature->returnType().baseName()); + const auto* returnTypeKey = &returnType; + if (it.typeVariables.find(returnType) != it.typeVariables.end()) { // Return type is a template variable. returnTypeKey = &kTypeParameterName; @@ -969,7 +945,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes( const std::string& funcName) { int tickets = getTickets(funcName); for (int i = 0; i < tickets; i++) { - typeToExpressionList_[type].push_back(funcName); + typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName); } } @@ -1243,7 +1219,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( } else { expression = generateExpressionFromConcreteSignatures( returnType, chosenFunctionName); - if (!expression && options_.enableComplexTypes) { + if (!expression && + (options_.enableComplexTypes || options_.enableDecimalType)) { expression = generateExpressionFromSignatureTemplate( returnType, chosenFunctionName); } diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index 7d69832e28b9d..bfae96a92bbe6 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -38,12 +38,6 @@ class ArgGenerator { const exec::FunctionSignature& signature, const TypePtr& returnType, FuzzerGenerator& rng) = 0; - - /// Returns a randomly generated valid return type for a given signature. - /// TODO Remove. This API should not be needed. - virtual TypePtr generateReturnType( - const exec::FunctionSignature& signature, - FuzzerGenerator& rng) = 0; }; // A tool that can be used to generate random expressions. @@ -67,6 +61,10 @@ class ExpressionFuzzer { // types. bool enableComplexTypes = false; + // Enable testing of function signatures with decimal argument or return + // types. + bool enableDecimalType = false; + // Enable generation of expressions where one input column can be used by // multiple subexpressions. bool enableColumnReuse = false; diff --git a/velox/expression/tests/FuzzerRunner.cpp b/velox/expression/tests/FuzzerRunner.cpp index ac741944dfa61..e947f147c6853 100644 --- a/velox/expression/tests/FuzzerRunner.cpp +++ b/velox/expression/tests/FuzzerRunner.cpp @@ -121,6 +121,11 @@ DEFINE_bool( false, "Enable testing of function signatures with complex argument or return types."); +DEFINE_bool( + velox_fuzzer_enable_decimal_type, + false, + "Enable testing of function signatures with decimal argument or return types."); + DEFINE_bool( velox_fuzzer_enable_column_reuse, false, @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions( opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures; opts.enableDereference = FLAGS_enable_dereference; opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types; + opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type; opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse; opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp index 2ea3a0222f8e2..7441f081e0b6b 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp @@ -157,8 +157,6 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { const auto& formalArgs = signature_.argumentTypes(); auto formalArgsCnt = formalArgs.size(); - std::unordered_map integerBindings; - if (returnType_) { exec::ReverseSignatureBinder binder{signature_, returnType_}; if (!binder.tryBind()) { @@ -166,6 +164,13 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { } bindings_ = binder.bindings(); integerBindings_ = binder.integerBindings(); + for (const auto& [name, _] : integerBindings_) { + // If the variable used by result type is constrained, argument types can + // not be generated without following the constraints. + if (variables().count(name) && variables().at(name).constraint() != "") { + return false; + } + } } else { for (const auto& [name, _] : signature_.variables()) { bindings_.insert({name, nullptr}); diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index f64edce06b226..bb4578d42b203 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -1040,32 +1040,8 @@ const std::vector defaultScalarTypes() { }; return kScalarTypes; } - -std::pair randPrecisionScale( - FuzzerGenerator& rng, - int8_t maxPrecision) { - // Generate precision in range [1, Decimal type max precision] - auto precision = 1 + rand(rng) % maxPrecision; - // Generate scale in range [0, precision] - auto scale = rand(rng) % (precision + 1); - return {precision, scale}; -} - } // namespace -TypePtr randDecimalType(FuzzerGenerator& rng) { - // Short or Long? - if (rand(rng)) { - auto [precision, scale] = - randPrecisionScale(rng, ShortDecimalType::kMaxPrecision); - return DECIMAL(precision, scale); - } else { - auto [precision, scale] = - randPrecisionScale(rng, LongDecimalType::kMaxPrecision); - return DECIMAL(precision, scale); - } -} - TypePtr randType(FuzzerGenerator& rng, int maxDepth) { return randType(rng, defaultScalarTypes(), maxDepth); } diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 9b7fdcb457ef6..663fb5471f697 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -379,6 +379,4 @@ RowTypePtr randRowType( const std::vector& scalarTypes, int maxDepth = 5); -TypePtr randDecimalType(FuzzerGenerator& rng); - } // namespace facebook::velox