From 8c9c123ffde3192613d23966f8828dfbcc2e19a9 Mon Sep 17 00:00:00 2001 From: rui-mo Date: Thu, 11 Apr 2024 14:29:21 +0800 Subject: [PATCH] Support decimal in fuzzer test --- velox/docs/develop/testing/fuzzer.rst | 2 + velox/expression/ReverseSignatureBinder.cpp | 20 --- velox/expression/ReverseSignatureBinder.h | 4 - velox/expression/tests/ExpressionFuzzer.cpp | 156 +++++++++--------- velox/expression/tests/ExpressionFuzzer.h | 10 +- velox/expression/tests/FuzzerRunner.cpp | 6 + .../tests/utils/ArgumentTypeFuzzer.cpp | 9 +- velox/vector/fuzzer/VectorFuzzer.cpp | 24 --- velox/vector/fuzzer/VectorFuzzer.h | 2 - 9 files changed, 101 insertions(+), 132 deletions(-) diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst index 9ceade966d405..55dcca70b23df 100644 --- a/velox/docs/develop/testing/fuzzer.rst +++ b/velox/docs/develop/testing/fuzzer.rst @@ -219,6 +219,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer: * ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false. +* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false. + * ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0. * ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false. diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index 495920e6bf1ff..274d2a06480f4 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -18,27 +18,7 @@ namespace facebook::velox::exec { -bool ReverseSignatureBinder::hasConstrainedIntegerVariable( - const TypeSignature& type) const { - if (type.parameters().empty()) { - auto it = variables().find(type.baseName()); - return it != variables().end() && it->second.isIntegerParameter() && - it->second.constraint() != ""; - } - - const auto& parameters = type.parameters(); - for (const auto& parameter : parameters) { - if (hasConstrainedIntegerVariable(parameter)) { - return true; - } - } - return false; -} - bool ReverseSignatureBinder::tryBind() { - if (hasConstrainedIntegerVariable(signature_.returnType())) { - return false; - } tryBindSucceeded_ = SignatureBinderBase::tryBind(signature_.returnType(), returnType_); return tryBindSucceeded_; diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 087adc4dbe771..3da800a5edbb6 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase { } private: - // Return whether there is a constraint on an integer variable in type - // signature. - bool hasConstrainedIntegerVariable(const TypeSignature& type) const; - const TypePtr returnType_; // True if 'tryBind' has been called and succeeded. False otherwise. diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index 7cd4f5aec42ba..3c16aea4866e1 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -39,52 +39,38 @@ int32_t rand(FuzzerGenerator& rng) { class DecimalArgGeneratorBase : public ArgGenerator { public: - TypePtr generateReturnType( - const exec::FunctionSignature& /*signature*/, - FuzzerGenerator& /*rng*/) override { - auto [p, s] = inputs_.begin()->first; - return DECIMAL(p, s); - } - std::vector generateArgs( const exec::FunctionSignature& /*signature*/, const TypePtr& returnType, FuzzerGenerator& rng) override { auto inputs = findInputs(returnType, rng); - if (inputs.a == nullptr || inputs.b == nullptr) { - return {}; + for (const auto& input : inputs) { + if (input == nullptr) { + return {}; + } } - - return {std::move(inputs.a), std::move(inputs.b)}; + return inputs; } protected: // Compute result type for all possible pairs of decimal input types. Store // the results in 'inputs_' maps keyed by return type. void initialize() { - std::vector allTypes; - for (auto p = 1; p < 38; ++p) { - for (auto s = 0; s <= p; ++s) { - allTypes.push_back(DECIMAL(p, s)); - } - } - - for (auto& a : allTypes) { - for (auto& b : allTypes) { + // By default, the result type is considered to be calculated from two input + // decimal types. + for (const auto& a : getAllTypes()) { + for (const auto& b : getAllTypes()) { auto [p1, s1] = getDecimalPrecisionScale(*a); auto [p2, s2] = getDecimalPrecisionScale(*b); - if (auto returnType = toReturnType(p1, s1, p2, s2)) { + if (auto returnType = toReturnType(4, p1, s1, p2, s2)) { inputs_[returnType.value()].push_back({a, b}); } } } } - struct Inputs { - TypePtr a; - TypePtr b; - }; + using Inputs = std::vector; // Return randomly selected pair of input types that produce the specified // result type. @@ -100,10 +86,24 @@ class DecimalArgGeneratorBase : public ArgGenerator { return it->second[index]; } + const std::vector& getAllTypes() const { + const auto generateAllTypes = []() { + std::vector allTypes; + for (auto p = 1; p < 38; ++p) { + for (auto s = 0; s <= p; ++s) { + allTypes.push_back(DECIMAL(p, s)); + } + } + return allTypes; + }; + + static std::vector allTypes = generateAllTypes(); + return allTypes; + } + // Given precisions and scales of the inputs, return precision and scale of // the result. - virtual std::optional> - toReturnType(int p1, int s1, int p2, int s2) = 0; + virtual std::optional> toReturnType(int count, ...) = 0; std::map, std::vector> inputs_; }; @@ -115,8 +115,16 @@ class PlusMinusArgGenerator : public DecimalArgGeneratorBase { } protected: - std::optional> - toReturnType(int p1, int s1, int p2, int s2) override { + std::optional> toReturnType(int count, ...) override { + VELOX_CHECK_EQ(count, 4); + va_list args; + va_start(args, count); + int p1 = va_arg(args, int); + int s1 = va_arg(args, int); + int p2 = va_arg(args, int); + int s2 = va_arg(args, int); + va_end(args); + auto s = std::max(s1, s2); auto p = std::min(38, std::max(p1 - s1, p2 - s2) + 1 + s); return {{p, s}}; @@ -130,8 +138,16 @@ class MultiplyArgGenerator : public DecimalArgGeneratorBase { } protected: - std::optional> - toReturnType(int p1, int s1, int p2, int s2) override { + std::optional> toReturnType(int count, ...) override { + VELOX_CHECK_EQ(count, 4); + va_list args; + va_start(args, count); + int p1 = va_arg(args, int); + int s1 = va_arg(args, int); + int p2 = va_arg(args, int); + int s2 = va_arg(args, int); + va_end(args); + if (s1 + s2 > 38) { return std::nullopt; } @@ -149,8 +165,16 @@ class DivideArgGenerator : public DecimalArgGeneratorBase { } protected: - std::optional> - toReturnType(int p1, int s1, int p2, int s2) override { + std::optional> toReturnType(int count, ...) override { + VELOX_CHECK_EQ(count, 4); + va_list args; + va_start(args, count); + int p1 = va_arg(args, int); + int s1 = va_arg(args, int); + int p2 = va_arg(args, int); + int s2 = va_arg(args, int); + va_end(args); + if (s1 + s2 > 38) { return std::nullopt; } @@ -163,19 +187,6 @@ class DivideArgGenerator : public DecimalArgGeneratorBase { class FloorAndRoundArgGenerator : public ArgGenerator { public: - TypePtr generateReturnType( - const exec::FunctionSignature& signature, - FuzzerGenerator& rng) override { - if (signature.argumentTypes().size() == 1) { - auto p = 1 + rand(rng) % 38; - return DECIMAL(p, 0); - } else { - auto p = 2 + rand(rng) % 37; - auto s = rand(rng) % p; - return DECIMAL(p, s); - } - } - std::vector generateArgs( const exec::FunctionSignature& signature, const TypePtr& returnType, @@ -624,16 +635,20 @@ bool useTypeName( bool isSupportedSignature( const exec::FunctionSignature& signature, - bool enableComplexType) { - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. + bool enableComplexType, + bool enableDecimalType) { + // When enableComplexType is disabled, not supporting complex functions. + const bool useComplexType = useTypeName(signature, "array") || + useTypeName(signature, "map") || useTypeName(signature, "row"); + // When enableDecimalType is disabled, not supporting decimal functions. Not + // supporting lambda functions, or functions using timestamp with time zone + // types. return !( useTypeName(signature, "opaque") || - // useTypeName(signature, "long_decimal") || - // useTypeName(signature, "short_decimal") || - // useTypeName(signature, "decimal") || useTypeName(signature, "timestamp with time zone") || useTypeName(signature, "interval day to second") || + (!enableDecimalType && useTypeName(signature, "decimal")) || + (!enableComplexType && useComplexType) || (enableComplexType && useTypeName(signature, "unknown"))); } @@ -751,10 +766,14 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - if (!isSupportedSignature(*signature, options_.enableComplexTypes)) { + if (!isSupportedSignature( + *signature, + options_.enableComplexTypes, + options_.enableDecimalType)) { continue; } - if (!(signature->variables().empty() || options_.enableComplexTypes)) { + if (!(signature->variables().empty() || options_.enableComplexTypes || + options_.enableDecimalType)) { LOG(WARNING) << "Skipping unsupported signature: " << function.first << signature->toString(); continue; @@ -793,27 +812,13 @@ ExpressionFuzzer::ExpressionFuzzer( ArgumentTypeFuzzer typeFuzzer{*signature, localRng}; auto returnType = typeFuzzer.fuzzReturnType(); bool ok = typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs); - if (!ok) { - auto it = argGenerators_.find(function.first); - if (it != argGenerators_.end()) { - returnType = it->second->generateReturnType(*signature, localRng); - argTypes = - it->second->generateArgs(*signature, returnType, localRng); - VELOX_CHECK( - !argTypes.empty(), - "Failed to generate arguments for {} with return type {}", - function.first, - returnType->toString()); - } else { - VELOX_FAIL("Failed to generate arguments"); - } - } else { + if (ok) { argTypes = typeFuzzer.argumentTypes(); } } if (!isDeterministic(function.first, argTypes)) { LOG(WARNING) << "Skipping non-deterministic function: " - << function.first << signature->toString(); + << function.first << signature->toString(); continue; } @@ -894,8 +899,10 @@ ExpressionFuzzer::ExpressionFuzzer( sortSignatureTemplates(signatureTemplates_); for (const auto& it : signatureTemplates_) { - auto& returnType = it.signature->returnType().baseName(); - auto* returnTypeKey = &returnType; + const auto returnType = + exec::sanitizeName(it.signature->returnType().baseName()); + const auto* returnTypeKey = &returnType; + if (it.typeVariables.find(returnType) != it.typeVariables.end()) { // Return type is a template variable. returnTypeKey = &kTypeParameterName; @@ -969,7 +976,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes( const std::string& funcName) { int tickets = getTickets(funcName); for (int i = 0; i < tickets; i++) { - typeToExpressionList_[type].push_back(funcName); + typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName); } } @@ -1243,7 +1250,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( } else { expression = generateExpressionFromConcreteSignatures( returnType, chosenFunctionName); - if (!expression && options_.enableComplexTypes) { + if (!expression && + (options_.enableComplexTypes || options_.enableDecimalType)) { expression = generateExpressionFromSignatureTemplate( returnType, chosenFunctionName); } diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index 7d69832e28b9d..bfae96a92bbe6 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -38,12 +38,6 @@ class ArgGenerator { const exec::FunctionSignature& signature, const TypePtr& returnType, FuzzerGenerator& rng) = 0; - - /// Returns a randomly generated valid return type for a given signature. - /// TODO Remove. This API should not be needed. - virtual TypePtr generateReturnType( - const exec::FunctionSignature& signature, - FuzzerGenerator& rng) = 0; }; // A tool that can be used to generate random expressions. @@ -67,6 +61,10 @@ class ExpressionFuzzer { // types. bool enableComplexTypes = false; + // Enable testing of function signatures with decimal argument or return + // types. + bool enableDecimalType = false; + // Enable generation of expressions where one input column can be used by // multiple subexpressions. bool enableColumnReuse = false; diff --git a/velox/expression/tests/FuzzerRunner.cpp b/velox/expression/tests/FuzzerRunner.cpp index ac741944dfa61..e947f147c6853 100644 --- a/velox/expression/tests/FuzzerRunner.cpp +++ b/velox/expression/tests/FuzzerRunner.cpp @@ -121,6 +121,11 @@ DEFINE_bool( false, "Enable testing of function signatures with complex argument or return types."); +DEFINE_bool( + velox_fuzzer_enable_decimal_type, + false, + "Enable testing of function signatures with decimal argument or return types."); + DEFINE_bool( velox_fuzzer_enable_column_reuse, false, @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions( opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures; opts.enableDereference = FLAGS_enable_dereference; opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types; + opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type; opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse; opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp index 2ea3a0222f8e2..7441f081e0b6b 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp @@ -157,8 +157,6 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { const auto& formalArgs = signature_.argumentTypes(); auto formalArgsCnt = formalArgs.size(); - std::unordered_map integerBindings; - if (returnType_) { exec::ReverseSignatureBinder binder{signature_, returnType_}; if (!binder.tryBind()) { @@ -166,6 +164,13 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { } bindings_ = binder.bindings(); integerBindings_ = binder.integerBindings(); + for (const auto& [name, _] : integerBindings_) { + // If the variable used by result type is constrained, argument types can + // not be generated without following the constraints. + if (variables().count(name) && variables().at(name).constraint() != "") { + return false; + } + } } else { for (const auto& [name, _] : signature_.variables()) { bindings_.insert({name, nullptr}); diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index f64edce06b226..bb4578d42b203 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -1040,32 +1040,8 @@ const std::vector defaultScalarTypes() { }; return kScalarTypes; } - -std::pair randPrecisionScale( - FuzzerGenerator& rng, - int8_t maxPrecision) { - // Generate precision in range [1, Decimal type max precision] - auto precision = 1 + rand(rng) % maxPrecision; - // Generate scale in range [0, precision] - auto scale = rand(rng) % (precision + 1); - return {precision, scale}; -} - } // namespace -TypePtr randDecimalType(FuzzerGenerator& rng) { - // Short or Long? - if (rand(rng)) { - auto [precision, scale] = - randPrecisionScale(rng, ShortDecimalType::kMaxPrecision); - return DECIMAL(precision, scale); - } else { - auto [precision, scale] = - randPrecisionScale(rng, LongDecimalType::kMaxPrecision); - return DECIMAL(precision, scale); - } -} - TypePtr randType(FuzzerGenerator& rng, int maxDepth) { return randType(rng, defaultScalarTypes(), maxDepth); } diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 9b7fdcb457ef6..663fb5471f697 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -379,6 +379,4 @@ RowTypePtr randRowType( const std::vector& scalarTypes, int maxDepth = 5); -TypePtr randDecimalType(FuzzerGenerator& rng); - } // namespace facebook::velox