diff --git a/velox/expression/FunctionSignature.h b/velox/expression/FunctionSignature.h index 3266d74db2fe..8b3f063e4eb8 100644 --- a/velox/expression/FunctionSignature.h +++ b/velox/expression/FunctionSignature.h @@ -180,6 +180,11 @@ class FunctionSignature { return constantArguments_; } + bool hasConstantArgument() const { + return std::any_of( + constantArguments_.begin(), constantArguments_.end(), folly::identity); + } + bool variableArity() const { return variableArity_; } diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index 9134897a8f4a..f7372353b158 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -34,6 +34,42 @@ namespace facebook::velox::test { namespace { using exec::SignatureBinder; +using exec::SignatureBinderBase; + +class FullSignatureBinder : public SignatureBinderBase { + public: + FullSignatureBinder( + const exec::FunctionSignature& signature, + const std::vector& argTypes, + const TypePtr& returnType) + : SignatureBinderBase(signature) { + if (signature_.argumentTypes().size() != argTypes.size()) { + return; + } + + for (auto i = 0; i < argTypes.size(); ++i) { + if (!SignatureBinderBase::tryBind( + signature_.argumentTypes()[i], argTypes[i])) { + return; + } + } + + if (!SignatureBinderBase::tryBind(signature_.returnType(), returnType)) { + return; + } + + bound_ = true; + } + + // Returns true if argument types and return type specified in the constructor + // match specified signature. + bool tryBind() { + return bound_; + } + + private: + bool bound_{false}; +}; /// Returns if `functionName` with the given `argTypes` is deterministic. /// Returns true if the function was not found or determinism cannot be @@ -164,7 +200,7 @@ bool isSupportedSignature( // Not supporting lambda functions, or functions using decimal and // timestamp with time zone types. return !( - useTypeName(signature, "opaque") || useTypeName(signature, "function") || + useTypeName(signature, "opaque") || useTypeName(signature, "long_decimal") || useTypeName(signature, "short_decimal") || useTypeName(signature, "decimal") || @@ -578,9 +614,69 @@ std::vector ExpressionFuzzer::generateArgs( return generateArgs(input.args, input.constantArgs, numVarArgs); } +core::TypedExprPtr ExpressionFuzzer::generateArgFunction(const TypePtr& arg) { + const auto& functionType = arg->asFunction(); + + std::vector args; + std::vector names; + std::vector inputs; + args.reserve(arg->size() - 1); + names.reserve(arg->size() - 1); + inputs.reserve(arg->size() - 1); + + for (auto i = 0; i < arg->size() - 1; ++i) { + args.push_back(arg->childAt(i)); + names.push_back(fmt::format("__a{}", i)); + inputs.push_back(std::make_shared( + args.back(), names.back())); + } + + const auto& returnType = functionType.children().back(); + + const auto baseType = typeToBaseName(returnType); + const auto& baseList = typeToExpressionList_[baseType]; + const auto& templateList = typeToExpressionList_[kTypeParameterName]; + + std::vector eligible; + for (const auto& functionName : baseList) { + if (auto* signature = + findConcreteSignature(args, returnType, functionName)) { + eligible.push_back(functionName); + } else if ( + auto* signatureTemplate = + findSignatureTemplate(args, returnType, baseType, functionName)) { + eligible.push_back(functionName); + } + } + + for (const auto& functionName : templateList) { + if (auto* signatureTemplate = + findSignatureTemplate(args, returnType, baseType, functionName)) { + eligible.push_back(functionName); + } + } + + if (eligible.empty()) { + return std::make_shared( + ROW(std::move(names), std::move(args)), + generateArgConstant(returnType)); + } + + const auto idx = rand32(0, eligible.size() - 1); + const auto name = eligible[idx]; + + return std::make_shared( + ROW(std::move(names), std::move(args)), + std::make_shared(returnType, inputs, name)); +} + core::TypedExprPtr ExpressionFuzzer::generateArg( const TypePtr& arg, bool isConstant) { + if (arg->isFunction()) { + return generateArgFunction(arg); + } + if (isConstant) { return generateArgConstant(arg); } else { @@ -768,6 +864,46 @@ const CallableSignature* ExpressionFuzzer::chooseRandomConcreteSignature( return eligible[idx]; } +const CallableSignature* ExpressionFuzzer::findConcreteSignature( + const std::vector& argTypes, + const TypePtr& returnType, + const std::string& functionName) { + if (expressionToSignature_.find(functionName) == + expressionToSignature_.end()) { + return nullptr; + } + auto baseType = typeToBaseName(returnType); + auto it = expressionToSignature_[functionName].find(baseType); + if (it == expressionToSignature_[functionName].end()) { + return nullptr; + } + + for (auto signature : it->second) { + if (!signature->returnType->equivalent(*returnType)) { + continue; + } + + if (signature->args.size() != argTypes.size()) { + continue; + } + + bool argTypesMatch = true; + for (auto i = 0; i < argTypes.size(); ++i) { + if (signature->constantArgs[i] || + !signature->args[i]->equivalent(*argTypes[i])) { + argTypesMatch = false; + break; + } + } + + if (argTypesMatch) { + return signature; + } + } + + return nullptr; +} + core::TypedExprPtr ExpressionFuzzer::generateExpressionFromConcreteSignatures( const TypePtr& returnType, const std::string& functionName) { @@ -812,6 +948,37 @@ const SignatureTemplate* ExpressionFuzzer::chooseRandomSignatureTemplate( return eligible[idx]; } +const SignatureTemplate* ExpressionFuzzer::findSignatureTemplate( + const std::vector& argTypes, + const TypePtr& returnType, + const std::string& typeName, + const std::string& functionName) { + std::vector eligible; + if (expressionToTemplatedSignature_.find(functionName) == + expressionToTemplatedSignature_.end()) { + return nullptr; + } + auto it = expressionToTemplatedSignature_[functionName].find(typeName); + if (it == expressionToTemplatedSignature_[functionName].end()) { + return nullptr; + } + + for (auto signatureTemplate : it->second) { + // Skip signatures with constant arguments. + if (signatureTemplate->signature->hasConstantArgument()) { + continue; + } + + FullSignatureBinder binder{ + *signatureTemplate->signature, argTypes, returnType}; + if (binder.tryBind()) { + return signatureTemplate; + } + } + + return nullptr; +} + core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate( const TypePtr& returnType, const std::string& functionName) { diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index 4f2b7dce56ff..9a4d7e7b30aa 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -191,6 +191,16 @@ class ExpressionFuzzer { core::TypedExprPtr generateArg(const TypePtr& arg); + // Given lambda argument type, generate matching LambdaTypedExpr. + // + // The 'arg' specifies inputs types and result type for the lambda. This + // method finds all matching signatures and signature templates, picks one + // randomly and generates LambdaTypedExpr. If no matching signatures or + // signature templates found, this method returns LambdaTypedExpr that + // represents a constant lambda, i.e lambda that returns the same value for + // all input. The constant value is generated using 'generateArgConstant'. + core::TypedExprPtr generateArgFunction(const TypePtr& arg); + std::vector generateArgs(const CallableSignature& input); std::vector generateArgs( @@ -239,6 +249,13 @@ class ExpressionFuzzer { const TypePtr& returnType, const std::string& functionName); + /// Returns a signature with matching input types and return type. Returns + /// nullptr if matching signature doesn't exist. + const CallableSignature* findConcreteSignature( + const std::vector& argTypes, + const TypePtr& returnType, + const std::string& functionName); + /// Generate an expression by randomly selecting a concrete function /// signature that returns 'returnType' among all signatures that the /// function named 'functionName' supports. @@ -254,6 +271,14 @@ class ExpressionFuzzer { const std::string& typeName, const std::string& functionName); + /// Returns a signature template with matching input types and return type. + /// Returns nullptr if matching signature template doesn't exist. + const SignatureTemplate* findSignatureTemplate( + const std::vector& argTypes, + const TypePtr& returnType, + const std::string& typeName, + const std::string& functionName); + /// Generate an expression by randomly selecting a function signature /// template that returns 'returnType' among all signature templates that /// the function named 'functionName' supports. diff --git a/velox/expression/tests/ExpressionFuzzerTest.cpp b/velox/expression/tests/ExpressionFuzzerTest.cpp index 61df1dba943a..eee027676fc3 100644 --- a/velox/expression/tests/ExpressionFuzzerTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerTest.cpp @@ -52,6 +52,9 @@ int main(int argc, char** argv) { // TODO: List of the functions that at some point crash or fail and need to // be fixed before we can enable. + // This list can include a mix of function names and function signatures. + // Use function name to exclude all signatures of a given function from + // testing. Use function signature to exclude only a specific signature. std::unordered_set skipFunctions = { // Fuzzer and the underlying engine are confused about cardinality(HLL) // (since HLL is a user defined type), and end up trying to use @@ -60,6 +63,8 @@ int main(int argc, char** argv) { "cardinality", "element_at", "width_bucket", + // Fuzzer cannot generate valid 'comparator' lambda. + "array_sort(array(T),constant function(T,T,bigint)) -> array(T)", }; size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; return FuzzerRunner::run( diff --git a/velox/expression/tests/FuzzerRunner.h b/velox/expression/tests/FuzzerRunner.h index 65cedaf5498e..985d1035c24a 100644 --- a/velox/expression/tests/FuzzerRunner.h +++ b/velox/expression/tests/FuzzerRunner.h @@ -74,6 +74,17 @@ class FuzzerRunner { return nameSet; } + static std::pair splitSignature( + const std::string& signature) { + const auto parenPos = signature.find("("); + + if (parenPos != std::string::npos) { + return {signature.substr(0, parenPos), signature.substr(parenPos)}; + } + + return {signature, ""}; + } + // Parse the comma separated list of function names, and use it to filter the // input signatures. static facebook::velox::FunctionSignatureMap filterSignatures( @@ -85,10 +96,32 @@ class FuzzerRunner { return input; } facebook::velox::FunctionSignatureMap output(input); - for (auto s : skipFunctions) { - auto str = s; - folly::toLowerAscii(str); - output.erase(str); + for (auto skip : skipFunctions) { + // 'skip' can be function name or signature. + const auto [skipName, skipSignature] = splitSignature(skip); + + if (skipSignature.empty()) { + output.erase(skipName); + } else { + auto it = output.find(skipName); + if (it != output.end()) { + // Compiler refuses to reference 'skipSignature' from the lambda as + // is. + const auto& signatureToRemove = skipSignature; + + auto removeIt = std::find_if( + it->second.begin(), + it->second.end(), + [&](const auto& signature) { + return signature->toString() == signatureToRemove; + }); + VELOX_CHECK( + removeIt != it->second.end(), + "Skip signature not found: {}", + skip); + it->second.erase(removeIt); + } + } } return output; } diff --git a/velox/functions/lib/Re2Functions.cpp b/velox/functions/lib/Re2Functions.cpp index 59bfd8451105..7523ed6e7270 100644 --- a/velox/functions/lib/Re2Functions.cpp +++ b/velox/functions/lib/Re2Functions.cpp @@ -581,7 +581,7 @@ class LikeGeneric final : public VectorFunction { auto [it, inserted] = compiledRegularExpressions_.emplace( key, std::make_unique(toStringPiece(regex), opt)); - VELOX_CHECK_LE( + VELOX_USER_CHECK_LE( compiledRegularExpressions_.size(), kMaxCompiledRegexes, "Max number of regex reached"); diff --git a/velox/functions/lib/tests/Re2FunctionsTest.cpp b/velox/functions/lib/tests/Re2FunctionsTest.cpp index 57376d3f44e6..5285a7e3aaa7 100644 --- a/velox/functions/lib/tests/Re2FunctionsTest.cpp +++ b/velox/functions/lib/tests/Re2FunctionsTest.cpp @@ -1104,20 +1104,24 @@ TEST_F(Re2FunctionsTest, likeRegexLimit) { } VELOX_ASSERT_THROW( - evaluate("like(c0 , c1)", makeRowVector({input, pattern})), + evaluate("like(c0, c1)", makeRowVector({input, pattern})), "Max number of regex reached"); - // Make sure try does not suppress that faluire. - VELOX_ASSERT_THROW( - evaluate("try(like(c0 , c1))", makeRowVector({input, pattern})), - "Max number of regex reached"); + // First 20 rows should return false, the rest raise and error and become + // null. + result = evaluate("try(like(c0, c1))", makeRowVector({input, pattern})); + auto expected = makeFlatVector( + 26, + [](auto /*row*/) { return false; }, + [](auto row) { return row >= 20; }); + assertEqualVectors(expected, result); // All are complex but the same, should pass. for (int i = 0; i < 26; i++) { flatPattern->set(i, "b%[0-9]+.*{}.*{}.*[0-9]+"); } - assertEqualVectors( - BaseVector::createConstant(BOOLEAN(), false, 26, pool()), result); + result = evaluate("like(c0, c1)", makeRowVector({input, pattern})); + assertEqualVectors(makeConstant(false, 26), result); } TEST_F(Re2FunctionsTest, invalidEscapeChar) {