From e5594ba61f453e9ae2a54eaec6bc89cdeaa24e8b Mon Sep 17 00:00:00 2001 From: rui-mo Date: Tue, 2 Apr 2024 13:28:20 +0800 Subject: [PATCH] Extend expression fuzzer test to support decimal --- .github/workflows/scheduled.yml | 3 + velox/docs/develop/scalar-functions.rst | 7 + velox/docs/develop/testing/fuzzer.rst | 3 + velox/expression/ReverseSignatureBinder.cpp | 20 -- velox/expression/ReverseSignatureBinder.h | 4 - velox/expression/tests/ArgumentGenerator.h | 36 +++ .../tests/ArgumentTypeFuzzerTest.cpp | 253 +++++++++++++++++- velox/expression/tests/CMakeLists.txt | 10 +- velox/expression/tests/ExpressionFuzzer.cpp | 72 +++-- velox/expression/tests/ExpressionFuzzer.h | 41 ++- .../expression/tests/ExpressionFuzzerTest.cpp | 16 +- .../tests/ExpressionFuzzerUnitTest.cpp | 4 + .../tests/ExpressionFuzzerVerifier.cpp | 5 +- .../tests/ExpressionFuzzerVerifier.h | 4 +- velox/expression/tests/FuzzerRunner.cpp | 19 +- velox/expression/tests/FuzzerRunner.h | 9 +- .../tests/SparkExpressionFuzzerTest.cpp | 20 +- .../tests/utils/ArgumentTypeFuzzer.cpp | 182 ++++++++++++- .../tests/utils/ArgumentTypeFuzzer.h | 21 +- .../functions/prestosql/DecimalFunctions.cpp | 23 +- .../functions/prestosql/fuzzer/CMakeLists.txt | 3 + .../fuzzer/ExtremeArgumentGenerator.cpp | 60 +++++ .../fuzzer/ExtremeArgumentGenerator.h | 36 +++ velox/functions/sparksql/MakeTimestamp.cpp | 4 +- .../sparksql/UnscaledValueFunction.cpp | 4 +- .../functions/sparksql/fuzzer/CMakeLists.txt | 5 + .../fuzzer/MakeTimestampArgumentGenerator.cpp | 64 +++++ .../fuzzer/MakeTimestampArgumentGenerator.h | 37 +++ .../fuzzer/UnscaledValueArgumentGenerator.cpp | 46 ++++ .../fuzzer/UnscaledValueArgumentGenerator.h | 36 +++ velox/type/SimpleFunctionApi.h | 3 + 31 files changed, 966 insertions(+), 84 deletions(-) create mode 100644 velox/expression/tests/ArgumentGenerator.h create mode 100644 velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp create mode 100644 velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h create mode 100644 velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp create mode 100644 velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h create mode 100644 velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp create mode 100644 velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index daad6cbb6113d..ac602187ec0b3 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -293,6 +293,7 @@ jobs: --seed ${RANDOM} \ --enable_variadic_signatures \ --velox_fuzzer_enable_complex_types \ + --velox_fuzzer_enable_decimal_type \ --lazy_vector_generation_ratio 0.2 \ --velox_fuzzer_enable_column_reuse \ --velox_fuzzer_enable_expression_reuse \ @@ -346,6 +347,7 @@ jobs: --duration_sec 3600 \ --enable_variadic_signatures \ --velox_fuzzer_enable_complex_types \ + --velox_fuzzer_enable_decimal_type \ --velox_fuzzer_enable_column_reuse \ --velox_fuzzer_enable_expression_reuse \ --max_expression_trees_per_step 2 \ @@ -465,6 +467,7 @@ jobs: --enable_variadic_signatures \ --lazy_vector_generation_ratio 0.2 \ --velox_fuzzer_enable_column_reuse \ + --velox_fuzzer_enable_decimal_type \ --velox_fuzzer_enable_expression_reuse \ --max_expression_trees_per_step 2 \ --retry_with_try \ diff --git a/velox/docs/develop/scalar-functions.rst b/velox/docs/develop/scalar-functions.rst index d9063787781bc..62a0435540c46 100644 --- a/velox/docs/develop/scalar-functions.rst +++ b/velox/docs/develop/scalar-functions.rst @@ -998,6 +998,13 @@ element of the array and returns a new array of the results. The signature of a function that handles DECIMAL types can additionally take variables and constraints to represent the precision and scale values. +The variables of input decimal types store the input precisions and scales. +Their names begin with an incrementing prefix starting from 'a', and followed +by '_precision' or '_scale'. Variables of output decimal types store the output +precision and scale. Their names begin with 'r', and followed by '_precision' +or '_scale'. When there is only one input decimal type, and the output type +holds the same precision and scale with the input type, the variables could be +named as 'precision' and 'scale'. The constraints are evaluated using a type calculator built from Flex and Bison tools. The decimal arithmetic addition function has the following signature: diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst index 9ceade966d405..b2f555572ada5 100644 --- a/velox/docs/develop/testing/fuzzer.rst +++ b/velox/docs/develop/testing/fuzzer.rst @@ -219,6 +219,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer: * ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false. +* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false. + * ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0. * ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false. @@ -246,6 +248,7 @@ An example set of arguments to run the expression fuzzer with all features enabl --enable_variadic_signatures --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_complex_types +--velox_fuzzer_enable_decimal_type --velox_fuzzer_enable_expression_reuse --velox_fuzzer_enable_column_reuse --retry_with_try diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index b21715fef36d3..10319a88403e1 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -18,27 +18,7 @@ namespace facebook::velox::exec { -bool ReverseSignatureBinder::hasConstrainedIntegerVariable( - const TypeSignature& type) const { - if (type.parameters().empty()) { - auto it = variables().find(type.baseName()); - return it != variables().end() && it->second.isIntegerParameter() && - it->second.constraint() != ""; - } - - const auto& parameters = type.parameters(); - for (const auto& parameter : parameters) { - if (hasConstrainedIntegerVariable(parameter)) { - return true; - } - } - return false; -} - bool ReverseSignatureBinder::tryBind() { - if (hasConstrainedIntegerVariable(signature_.returnType())) { - return false; - } return SignatureBinderBase::tryBind(signature_.returnType(), returnType_); } diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 02b92d217cf41..d23dc547dc77c 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -44,10 +44,6 @@ class ReverseSignatureBinder : private SignatureBinderBase { } private: - /// Return whether there is a constraint on an integer variable in type - /// signature. - bool hasConstrainedIntegerVariable(const TypeSignature& type) const; - const TypePtr returnType_; }; diff --git a/velox/expression/tests/ArgumentGenerator.h b/velox/expression/tests/ArgumentGenerator.h new file mode 100644 index 0000000000000..46ee4d9b58d16 --- /dev/null +++ b/velox/expression/tests/ArgumentGenerator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/core/ITypedExpr.h" +#include "velox/expression/tests/utils/FuzzerToolkit.h" + +namespace facebook::velox::test { + +class ExpressionFuzzer; + +class ArgumentGenerator { + public: + virtual ~ArgumentGenerator() = default; + + // Generates function arguments of the specified signature. + virtual std::vector generate( + ExpressionFuzzer* expressionFuzzer, + const CallableSignature& input, + int32_t maxNumVarArgs) = 0; +}; + +} // namespace facebook::velox::test diff --git a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp index e139ae0c4f7a1..25e849a35b48e 100644 --- a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp @@ -59,6 +59,43 @@ class ArgumentTypeFuzzerTest : public testing::Test { } } + void testFuzzingDecimalSuccess( + const std::shared_ptr& signature, + int32_t expectedArguments, + const std::function&, const TypePtr&)>& + returnTypeVerifier) { + std::mt19937 seed{0}; + ArgumentTypeFuzzer fuzzer{*signature, seed}; + ASSERT_TRUE(fuzzer.fuzzArgumentTypes(kMaxVariadicArgs)); + + auto& argumentTypes = fuzzer.argumentTypes(); + for (const auto& argType : argumentTypes) { + SCOPED_TRACE(fmt::format("argType: {}", argType->toString())); + } + ASSERT_GE(argumentTypes.size(), expectedArguments); + + auto& argumentSignatures = signature->argumentTypes(); + int i; + for (i = 0; i < expectedArguments; ++i) { + ASSERT_TRUE(argumentTypes[i]->isDecimal()) + << "at " << i + << ": Expected DECIMAL. Got: " << argumentTypes[i]->toString(); + } + + if (i < argumentTypes.size()) { + ASSERT_TRUE(signature->variableArity()); + ASSERT_LE( + argumentTypes.size() - argumentSignatures.size(), kMaxVariadicArgs); + for (int j = i; j < argumentTypes.size(); ++j) { + ASSERT_TRUE(argumentTypes[j]->equivalent(*argumentTypes[i - 1])); + } + } + + const auto returnType = fuzzer.fuzzReturnType(); + SCOPED_TRACE(fmt::format("returnType: {}", returnType->toString())); + returnTypeVerifier(argumentTypes, returnType); + } + void testFuzzingFailure( const std::shared_ptr& signature, const TypePtr& returnType) { @@ -66,6 +103,18 @@ class ArgumentTypeFuzzerTest : public testing::Test { ArgumentTypeFuzzer fuzzer{*signature, returnType, seed}; ASSERT_FALSE(fuzzer.fuzzArgumentTypes(kMaxVariadicArgs)); } + + void testFuzzingReturnType( + const std::shared_ptr& signature, + const std::vector& argumentTypes, + const TypePtr& expectedReturnType) { + std::mt19937 seed{0}; + ArgumentTypeFuzzer fuzzer{*signature, seed, argumentTypes}; + const auto returnType = fuzzer.returnType(); + ASSERT_TRUE(returnType->equivalent(*expectedReturnType)) + << ": Expected: " << expectedReturnType->toString() + << ". Got: " << returnType->toString(); + } }; TEST_F(ArgumentTypeFuzzerTest, concreteSignature) { @@ -222,9 +271,60 @@ TEST_F(ArgumentTypeFuzzerTest, any) { ASSERT_TRUE(argumentTypes[0] != nullptr); } -TEST_F(ArgumentTypeFuzzerTest, unsupported) { - // Constraints on the return type is not supported. - auto signature = +TEST_F(ArgumentTypeFuzzerTest, decimal) { + auto signature = exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("a_precision") + .returnType("boolean") + .argumentType("decimal(a_precision, a_scale)") + .argumentType("decimal(a_precision, a_scale)") + .argumentType("decimal(a_precision, a_scale)") + .build(); + + std::function&, const TypePtr&)> verifier = + [](const std::vector& argumentTypes, const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::BOOLEAN); + }; + testFuzzingDecimalSuccess(signature, 3, verifier); + testFuzzingReturnType( + signature, {DECIMAL(22, 5), DECIMAL(22, 5)}, BOOLEAN()); + + signature = + exec::FunctionSignatureBuilder() + .integerVariable("a_precision") + .integerVariable("a_scale") + .integerVariable("b_precision") + .integerVariable("b_scale") + .integerVariable( + "r_precision", + "min(38, max(a_precision - a_scale, b_precision - b_scale) + max(a_scale, b_scale) + 1)") + .integerVariable("r_scale", "max(a_scale, b_scale)") + .returnType("DECIMAL(r_precision, r_scale)") + .argumentType("DECIMAL(a_precision, a_scale)") + .argumentType("DECIMAL(b_precision, b_scale)") + .build(); + + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_TRUE(returnType->isDecimal()); + const auto [aPrecision, aScale] = + getDecimalPrecisionScale(*argumentTypes[0]); + const auto [bPrecision, bScale] = + getDecimalPrecisionScale(*argumentTypes[1]); + const auto [rPrecision, rScale] = getDecimalPrecisionScale(*returnType); + ASSERT_EQ( + rPrecision, + std::min( + 38, + std::max(aPrecision - aScale, bPrecision - bScale) + + std::max(aScale, bScale) + 1)); + ASSERT_EQ(rScale, std::max(aScale, bScale)); + }; + testFuzzingDecimalSuccess(signature, 2, verifier); + testFuzzingReturnType( + signature, {DECIMAL(10, 5), DECIMAL(20, 10)}, DECIMAL(21, 10)); + + signature = exec::FunctionSignatureBuilder() .integerVariable("a_scale") .integerVariable("b_scale") @@ -239,7 +339,152 @@ TEST_F(ArgumentTypeFuzzerTest, unsupported) { .argumentType("decimal(b_precision, b_scale)") .build(); - testFuzzingFailure(signature, DECIMAL(13, 6)); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::ROW); + const auto [aPrecision, aScale] = + getDecimalPrecisionScale(*argumentTypes[0]); + const auto [bPrecision, bScale] = + getDecimalPrecisionScale(*argumentTypes[1]); + const auto arrayType = std::dynamic_pointer_cast( + asRowType(returnType)->childAt(0)); + const auto [rPrecision, rScale] = + getDecimalPrecisionScale(*(arrayType->elementType())); + ASSERT_EQ( + rPrecision, + std::min( + 38, + std::max(aPrecision - aScale, bPrecision - bScale) + + std::max(aScale, bScale) + 1)); + ASSERT_EQ(rScale, std::max(aScale, bScale)); + }; + testFuzzingDecimalSuccess(signature, 2, verifier); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("i1") + .integerVariable("i2") + .integerVariable("i5") + .integerVariable("i6") + .integerVariable( + "i3", "min(38, max(i1 - i5, i2 - i6) + max(i5, i6) + 1)") + .integerVariable("i7", "max(i5, i6)") + .returnType("decimal(i3,i7)") + .argumentType("decimal(i1,i5)") + .argumentType("decimal(i2,i6)") + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_TRUE(returnType->isDecimal()); + const auto [aPrecision, aScale] = + getDecimalPrecisionScale(*argumentTypes[0]); + const auto [bPrecision, bScale] = + getDecimalPrecisionScale(*argumentTypes[1]); + const auto [rPrecision, rScale] = getDecimalPrecisionScale(*returnType); + ASSERT_EQ( + rPrecision, + std::min( + 38, + std::max(aPrecision - aScale, bPrecision - bScale) + + std::max(aScale, bScale) + 1)); + ASSERT_EQ(rScale, std::max(aScale, bScale)); + }; + testFuzzingDecimalSuccess(signature, 2, verifier); + testFuzzingReturnType( + signature, {DECIMAL(38, 0), DECIMAL(32, 2)}, DECIMAL(38, 2)); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("i1") + .integerVariable("i5") + .returnType("boolean") + .argumentType("decimal(i1,i5)") + .argumentType("decimal(i1,i5)") + .argumentType("decimal(i1,i5)") + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::BOOLEAN); + }; + testFuzzingDecimalSuccess(signature, 3, verifier); + testFuzzingReturnType( + signature, {DECIMAL(19, 3), DECIMAL(19, 3)}, BOOLEAN()); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("precision") + .integerVariable("scale") + .returnType("DECIMAL(precision, scale)") + .argumentType("DECIMAL(precision, scale)") + .variableArity() + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_TRUE(returnType->isDecimal()); + const auto [aPrecision, aScale] = + getDecimalPrecisionScale(*argumentTypes[0]); + const auto [rPrecision, rScale] = getDecimalPrecisionScale(*returnType); + ASSERT_EQ(rPrecision, aPrecision); + ASSERT_EQ(rScale, aScale); + }; + testFuzzingDecimalSuccess(signature, 1, verifier); + testFuzzingReturnType( + signature, + {DECIMAL(12, 5), DECIMAL(12, 5), DECIMAL(12, 5)}, + DECIMAL(12, 5)); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("precision", "min(max(6, precision), 18)") + .returnType("timestamp") + .argumentType("decimal(precision, 6)") + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::TIMESTAMP); + const auto [precision, scale] = getDecimalPrecisionScale(*argumentTypes[0]); + ASSERT_EQ(scale, 6); + }; + testFuzzingDecimalSuccess(signature, 1, verifier); + testFuzzingReturnType(signature, {DECIMAL(12, 6)}, TIMESTAMP()); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("scale") + .returnType("timestamp") + .argumentType("decimal(38, scale)") + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::TIMESTAMP); + const auto [precision, scale] = getDecimalPrecisionScale(*argumentTypes[0]); + ASSERT_EQ(precision, 38); + }; + testFuzzingDecimalSuccess(signature, 1, verifier); + testFuzzingReturnType(signature, {DECIMAL(38, 6)}, TIMESTAMP()); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("i5", "min(20, i5)") + .returnType("boolean") + .argumentType("decimal(20, i5)") + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::BOOLEAN); + const auto [precision, scale] = getDecimalPrecisionScale(*argumentTypes[0]); + ASSERT_EQ(precision, 20); + }; + testFuzzingDecimalSuccess(signature, 1, verifier); + testFuzzingReturnType(signature, {DECIMAL(20, 12)}, BOOLEAN()); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("i1", "max(8, i1)") + .returnType("boolean") + .argumentType("decimal(i1, 8)") + .build(); + verifier = [](const std::vector& argumentTypes, + const TypePtr& returnType) { + ASSERT_EQ(returnType->kind(), TypeKind::BOOLEAN); + const auto [precision, scale] = getDecimalPrecisionScale(*argumentTypes[0]); + ASSERT_EQ(scale, 8); + }; + testFuzzingDecimalSuccess(signature, 1, verifier); + testFuzzingReturnType(signature, {DECIMAL(20, 8)}, BOOLEAN()); } TEST_F(ArgumentTypeFuzzerTest, lambda) { diff --git a/velox/expression/tests/CMakeLists.txt b/velox/expression/tests/CMakeLists.txt index 6958e22f9276d..a7c0491e4255b 100644 --- a/velox/expression/tests/CMakeLists.txt +++ b/velox/expression/tests/CMakeLists.txt @@ -153,10 +153,12 @@ target_link_libraries( add_executable(velox_expression_fuzzer_test ExpressionFuzzerTest.cpp) -target_link_libraries(velox_expression_fuzzer_test velox_expression_fuzzer - velox_functions_prestosql gtest gtest_main) +target_link_libraries( + velox_expression_fuzzer_test velox_expression_fuzzer_utility + velox_expression_fuzzer velox_functions_prestosql gtest gtest_main) add_executable(spark_expression_fuzzer_test SparkExpressionFuzzerTest.cpp) -target_link_libraries(spark_expression_fuzzer_test velox_expression_fuzzer - velox_functions_spark gtest gtest_main) +target_link_libraries( + spark_expression_fuzzer_test spark_expression_fuzzer_utility + velox_expression_fuzzer velox_functions_spark gtest gtest_main) diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index f8bdf6c59c874..77a5fd0945b04 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -435,16 +435,21 @@ bool useTypeName( bool isSupportedSignature( const exec::FunctionSignature& signature, - bool enableComplexType) { - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. + bool enableComplexType, + bool enableDecimalType) { + // When enableComplexType is disabled, not supporting complex functions. + const bool useComplexType = + (useTypeName(signature, "array") || useTypeName(signature, "map") || + useTypeName(signature, "row")); + // When enableDecimalType is disabled, not supporting decimal functions. Not + // supporting lambda functions, or functions using timestamp with time zone + // types. return !( useTypeName(signature, "opaque") || - useTypeName(signature, "long_decimal") || - useTypeName(signature, "short_decimal") || - useTypeName(signature, "decimal") || useTypeName(signature, "timestamp with time zone") || useTypeName(signature, "interval day to second") || + (!enableDecimalType && useTypeName(signature, "decimal")) || + (!enableComplexType && useComplexType) || (enableComplexType && useTypeName(signature, "unknown"))); } @@ -526,10 +531,13 @@ ExpressionFuzzer::ExpressionFuzzer( FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, + const std::unordered_map>& + customArgumentGenerators, const std::optional& options) : options_(options.value_or(Options())), vectorFuzzer_(vectorFuzzer), - state{rng_, std::max(1, options_.maxLevelOfNesting)} { + state{rng_, std::max(1, options_.maxLevelOfNesting)}, + customArgumentGenerators_(customArgumentGenerators) { VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided"); seed(initialSeed); @@ -553,10 +561,14 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - if (!isSupportedSignature(*signature, options_.enableComplexTypes)) { + if (!isSupportedSignature( + *signature, + options_.enableComplexTypes, + options_.enableDecimalType)) { continue; } - if (!(signature->variables().empty() || options_.enableComplexTypes)) { + if (!(signature->variables().empty() || options_.enableComplexTypes || + options_.enableDecimalType)) { LOG(WARNING) << "Skipping unsupported signature: " << function.first << signature->toString(); continue; @@ -680,8 +692,10 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& it : signatureTemplates_) { auto& returnType = it.signature->returnType().baseName(); - auto* returnTypeKey = &returnType; - if (it.typeVariables.find(returnType) != it.typeVariables.end()) { + const auto sanitizedName = exec::sanitizeName(returnType); + + const auto* returnTypeKey = &sanitizedName; + if (it.typeVariables.find(sanitizedName) != it.typeVariables.end()) { // Return type is a template variable. returnTypeKey = &kTypeParameterName; } @@ -752,9 +766,10 @@ int ExpressionFuzzer::getTickets(const std::string& funcName) { void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes( const std::string& type, const std::string& funcName) { + const auto sanitizedName = exec::sanitizeName(type); int tickets = getTickets(funcName); for (int i = 0; i < tickets; i++) { - typeToExpressionList_[type].push_back(funcName); + typeToExpressionList_[sanitizedName].push_back(funcName); } } @@ -1028,7 +1043,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( } else { expression = generateExpressionFromConcreteSignatures( returnType, chosenFunctionName); - if (!expression && options_.enableComplexTypes) { + if (!expression && + (options_.enableComplexTypes || options_.enableDecimalType)) { expression = generateExpressionFromSignatureTemplate( returnType, chosenFunctionName); } @@ -1045,6 +1061,10 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( std::vector ExpressionFuzzer::getArgsForCallable( const CallableSignature& callable) { + if (customArgumentGenerators_.count(callable.name)) { + return customArgumentGenerators_[callable.name]->generate( + this, callable, options_.maxNumVarArgs); + } auto funcIt = funcArgOverrides_.find(callable.name); if (funcIt == funcArgOverrides_.end()) { return generateArgs(callable); @@ -1054,11 +1074,31 @@ std::vector ExpressionFuzzer::getArgsForCallable( core::TypedExprPtr ExpressionFuzzer::getCallExprFromCallable( const CallableSignature& callable, - const TypePtr& type) { + const TypePtr& type, + const exec::FunctionSignature* signature) { auto args = getArgsForCallable(callable); + // Generate a CallTypedExpr with type because callable.returnType may not have // the required field names. - return std::make_shared(type, args, callable.name); + auto outputType = type; + // If signature is provided, for a decimal function (especially a nested one), + // as argument precisions and scales are randomly generated, + // callable.returnType does not follow the required constraints, and the + // matched result type needs to be recalculated from the argument types. Use + // ArgumentTypeFuzzer to generate a constrained type to avoid breaking the + // constraints between input types and output types. + if (signature) { + std::vector argTypes; + argTypes.reserve(args.size()); + for (const auto& arg : args) { + argTypes.emplace_back(arg->type()); + } + ArgumentTypeFuzzer fuzzer{*signature, rng_, argTypes}; + if (auto constrainedType = fuzzer.returnType()) { + outputType = constrainedType; + } + } + return std::make_shared(outputType, args, callable.name); } const CallableSignature* ExpressionFuzzer::chooseRandomConcreteSignature( @@ -1245,7 +1285,7 @@ core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate( .constantArgs = constantArguments}; markSelected(chosen->name); - return getCallExprFromCallable(callable, returnType); + return getCallExprFromCallable(callable, returnType, chosen->signature); } core::TypedExprPtr ExpressionFuzzer::generateCastExpression( diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index fcc7bee02a27a..6c01d1026b8c2 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -19,6 +19,7 @@ #include "velox/core/ITypedExpr.h" #include "velox/core/QueryCtx.h" #include "velox/expression/Expr.h" +#include "velox/expression/tests/ArgumentGenerator.h" #include "velox/expression/tests/ExpressionVerifier.h" #include "velox/expression/tests/utils/FuzzerToolkit.h" #include "velox/functions/FunctionRegistry.h" @@ -48,6 +49,10 @@ class ExpressionFuzzer { // types. bool enableComplexTypes = false; + // Enable testing of function signatures with decimal argument or return + // types. + bool enableDecimalType = false; + // Enable generation of expressions where one input column can be used by // multiple subexpressions. bool enableColumnReuse = false; @@ -103,6 +108,8 @@ class ExpressionFuzzer { FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, + const std::unordered_map>& + customArgumentGenerators, const std::optional& options = std::nullopt); template @@ -191,6 +198,19 @@ class ExpressionFuzzer { RowTypePtr fuzzRowReturnType(size_t size, char prefix = 'p'); + core::TypedExprPtr generateArg(const TypePtr& arg); + + core::TypedExprPtr generateArg(const TypePtr& arg, bool isConstant); + + std::vector generateArgs(const CallableSignature& input); + + core::TypedExprPtr generateArgColumn(const TypePtr& arg); + + core::TypedExprPtr generateArgConstant(const TypePtr& arg); + + // Returns random integer between min and max inclusive. + int32_t rand32(int32_t min, int32_t max); + private: // Either generates a new expression of the required return type or if // already generated expressions of the same return type exist then there is @@ -214,12 +234,6 @@ class ExpressionFuzzer { void appendConjunctSignatures(); - core::TypedExprPtr generateArgConstant(const TypePtr& arg); - - core::TypedExprPtr generateArgColumn(const TypePtr& arg); - - core::TypedExprPtr generateArg(const TypePtr& arg); - // Given lambda argument type, generate matching LambdaTypedExpr. // // The 'arg' specifies inputs types and result type for the lambda. This @@ -230,15 +244,11 @@ class ExpressionFuzzer { // all input. The constant value is generated using 'generateArgConstant'. core::TypedExprPtr generateArgFunction(const TypePtr& arg); - std::vector generateArgs(const CallableSignature& input); - std::vector generateArgs( const std::vector& argTypes, const std::vector& constantArgs, uint32_t numVarArgs = 0); - core::TypedExprPtr generateArg(const TypePtr& arg, bool isConstant); - // Return a vector of expressions for each argument of callable in order. std::vector getArgsForCallable( const CallableSignature& callable); @@ -254,7 +264,8 @@ class ExpressionFuzzer { core::TypedExprPtr getCallExprFromCallable( const CallableSignature& callable, - const TypePtr& type); + const TypePtr& type, + const exec::FunctionSignature* signature = nullptr); /// Return a random signature mapped to functionName in /// expressionToSignature_ whose return type can match returnType. Return @@ -326,9 +337,6 @@ class ExpressionFuzzer { state.expressionStats_[funcName]++; } - // Returns random integer between min and max inclusive. - int32_t rand32(int32_t min, int32_t max); - static const inline std::string kTypeParameterName = "T"; const Options options_; @@ -415,6 +423,11 @@ class ExpressionFuzzer { int32_t remainingLevelOfNesting_; } state; + + // Maps from function name to a custom arguments generator. + std::unordered_map> + customArgumentGenerators_; + friend class ExpressionFuzzerUnitTest; }; diff --git a/velox/expression/tests/ExpressionFuzzerTest.cpp b/velox/expression/tests/ExpressionFuzzerTest.cpp index 1c6a675bfdf73..68cddccf86a00 100644 --- a/velox/expression/tests/ExpressionFuzzerTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerTest.cpp @@ -19,6 +19,7 @@ #include #include "velox/expression/tests/FuzzerRunner.h" +#include "velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h" #include "velox/functions/prestosql/registration/RegistrationFunctions.h" DEFINE_int64( @@ -65,6 +66,19 @@ int main(int argc, char** argv) { "regexp_extract_all", "regexp_like", }; + + const std::unordered_map< + std::string, + std::shared_ptr> + customArgumentGenerators = { + {"greatest", + std::make_shared< + facebook::velox::functions::test::ExtremeArgumentGenerator>()}, + {"least", + std::make_shared< + facebook::velox::functions::test::ExtremeArgumentGenerator>()}}; + size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - return FuzzerRunner::run(initialSeed, skipFunctions, {{}}); + return FuzzerRunner::run( + initialSeed, skipFunctions, {{}}, customArgumentGenerators); } diff --git a/velox/expression/tests/ExpressionFuzzerUnitTest.cpp b/velox/expression/tests/ExpressionFuzzerUnitTest.cpp index 9fd3a09b3a047..043e45a35fd03 100644 --- a/velox/expression/tests/ExpressionFuzzerUnitTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerUnitTest.cpp @@ -77,6 +77,7 @@ TEST_F(ExpressionFuzzerUnitTest, restrictedLevelOfNesting) { velox::getFunctionSignatures(), 0, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(maxLevelOfNesting), }; @@ -116,6 +117,7 @@ TEST_F(ExpressionFuzzerUnitTest, reproduceExpressionWithSeed) { velox::getFunctionSignatures(), 1234567, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(5)}; for (auto i = 0; i < 10; ++i) { firstGeneration.push_back( @@ -142,6 +144,7 @@ TEST_F(ExpressionFuzzerUnitTest, exprBank) { velox::getFunctionSignatures(), 0, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(maxLevelOfNesting)}; ExpressionFuzzer::ExprBank exprBank(seed, maxLevelOfNesting); for (int i = 0; i < 5000; ++i) { @@ -170,6 +173,7 @@ TEST_F(ExpressionFuzzerUnitTest, exprBank) { velox::getFunctionSignatures(), 0, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(maxLevelOfNesting)}; ExpressionFuzzer::ExprBank exprBank(seed, maxLevelOfNesting); for (int i = 0; i < 1000; ++i) { diff --git a/velox/expression/tests/ExpressionFuzzerVerifier.cpp b/velox/expression/tests/ExpressionFuzzerVerifier.cpp index 8e36fd739f21c..94ce099a72f41 100644 --- a/velox/expression/tests/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/tests/ExpressionFuzzerVerifier.cpp @@ -80,7 +80,9 @@ RowVectorPtr wrapChildren( ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const ExpressionFuzzerVerifier::Options& options) + const ExpressionFuzzerVerifier::Options& options, + const std::unordered_map>& + customArgumentGenerators) : options_(options), queryCtx_(std::make_shared( nullptr, @@ -98,6 +100,7 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( signatureMap, initialSeed, vectorFuzzer_, + customArgumentGenerators, options.expressionFuzzerOptions) { seed(initialSeed); diff --git a/velox/expression/tests/ExpressionFuzzerVerifier.h b/velox/expression/tests/ExpressionFuzzerVerifier.h index 2f85b5d52bc71..a9e9b6392fa16 100644 --- a/velox/expression/tests/ExpressionFuzzerVerifier.h +++ b/velox/expression/tests/ExpressionFuzzerVerifier.h @@ -51,7 +51,9 @@ class ExpressionFuzzerVerifier { ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const Options& options); + const Options& options, + const std::unordered_map>& + customArgumentGenerators); // This function starts the test that is performed by the // ExpressionFuzzerVerifier which is generating random expressions and diff --git a/velox/expression/tests/FuzzerRunner.cpp b/velox/expression/tests/FuzzerRunner.cpp index ac741944dfa61..c05333f8f7f93 100644 --- a/velox/expression/tests/FuzzerRunner.cpp +++ b/velox/expression/tests/FuzzerRunner.cpp @@ -121,6 +121,11 @@ DEFINE_bool( false, "Enable testing of function signatures with complex argument or return types."); +DEFINE_bool( + velox_fuzzer_enable_decimal_type, + false, + "Enable testing of function signatures with decimal argument or return types."); + DEFINE_bool( velox_fuzzer_enable_column_reuse, false, @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions( opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures; opts.enableDereference = FLAGS_enable_dereference; opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types; + opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type; opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse; opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; @@ -204,8 +210,10 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions( int FuzzerRunner::run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { - runFromGtest(seed, skipFunctions, queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators) { + runFromGtest(seed, skipFunctions, queryConfigs, customArgumentGenerators); return RUN_ALL_TESTS(); } @@ -213,13 +221,16 @@ int FuzzerRunner::run( void FuzzerRunner::runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators) { memory::MemoryManager::testingSetInstance({}); auto signatures = facebook::velox::getFunctionSignatures(); ExpressionFuzzerVerifier( signatures, seed, - getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs)) + getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs), + customArgumentGenerators) .go(); } } // namespace facebook::velox::test diff --git a/velox/expression/tests/FuzzerRunner.h b/velox/expression/tests/FuzzerRunner.h index cbf3d5ac290a9..efb09b262b461 100644 --- a/velox/expression/tests/FuzzerRunner.h +++ b/velox/expression/tests/FuzzerRunner.h @@ -22,6 +22,7 @@ #include #include +#include "velox/expression/tests/ArgumentGenerator.h" #include "velox/expression/tests/ExpressionFuzzerVerifier.h" #include "velox/functions/FunctionRegistry.h" @@ -33,12 +34,16 @@ class FuzzerRunner { static int run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators); static void runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators); }; } // namespace facebook::velox::test diff --git a/velox/expression/tests/SparkExpressionFuzzerTest.cpp b/velox/expression/tests/SparkExpressionFuzzerTest.cpp index c9531632f4137..68e34c81d129c 100644 --- a/velox/expression/tests/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/tests/SparkExpressionFuzzerTest.cpp @@ -24,6 +24,8 @@ #include "velox/expression/tests/FuzzerRunner.h" #include "velox/functions/sparksql/Register.h" +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h" +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h" DEFINE_int64( seed, @@ -58,7 +60,21 @@ int main(int argc, char** argv) { // Required by spark_partition_id function. std::unordered_map queryConfigs = { - {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}}; + {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}, + {facebook::velox::core::QueryConfig::kSessionTimezone, + "America/Los_Angeles"}}; - return FuzzerRunner::run(FLAGS_seed, skipFunctions, queryConfigs); + const std::unordered_map< + std::string, + std::shared_ptr> + customArgumentGenerators = { + {"unscaled_value", + std::make_shared()}, + {"make_timestamp", + std::make_shared()}}; + + return FuzzerRunner::run( + FLAGS_seed, skipFunctions, queryConfigs, customArgumentGenerators); } diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp index 44c8a0a69bf3d..4ad6c0e635bc7 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp @@ -21,13 +21,17 @@ #include "velox/expression/ReverseSignatureBinder.h" #include "velox/expression/SignatureBinder.h" +#include "velox/expression/type_calculation/TypeCalculation.h" +#include "velox/type/SimpleFunctionApi.h" #include "velox/type/Type.h" #include "velox/vector/fuzzer/VectorFuzzer.h" namespace facebook::velox::test { std::string typeToBaseName(const TypePtr& type) { - return boost::algorithm::to_lower_copy(std::string{type->kindName()}); + return type->isDecimal() + ? "decimal" + : boost::algorithm::to_lower_copy(std::string{type->kindName()}); } std::optional baseNameToTypeKind(const std::string& typeName) { @@ -35,6 +39,173 @@ std::optional baseNameToTypeKind(const std::string& typeName) { return tryMapNameToTypeKind(kindName); } +ArgumentTypeFuzzer::ArgumentTypeFuzzer( + const exec::FunctionSignature& signature, + std::mt19937& rng, + const std::vector& argumentTypes) + : ArgumentTypeFuzzer(signature, nullptr, rng) { + if (argumentTypes.empty()) { + return; + } + // Checks if any variable is integer constrained, and get the decimal name + // style. + bool integerConstrained = false; + char decimalNameStyle = 0; + for (const auto& [variableName, variableInfo] : signature.variables()) { + if (variableInfo.isIntegerParameter()) { + // If constraints are empty, the integer variable is also regarded to be + // constrained as variables are shared across argument and return types. + integerConstrained = true; + if (variableName == "precision" || variableName == "scale") { + decimalNameStyle = 'a'; + break; + } + if (variableName.find("precision") != std::string::npos || + variableName.find("scale") != std::string::npos) { + decimalNameStyle = 'b'; + break; + } + if (variableName.find("i") != std::string::npos) { + decimalNameStyle = 'c'; + break; + } + } + } + + // To handle the constraints between input types and output types of a decimal + // function, extracts the input precisions and scales from decimal arguments, + // and bind them to integer variables. + column_index_t decimalColIndex = 1; + for (column_index_t i = 0; i < argumentTypes.size(); ++i) { + const auto argType = argumentTypes[i]; + if (argType->isDecimal()) { + const auto [p, s] = getDecimalPrecisionScale(*argType); + switch (decimalNameStyle) { + case 'a': { + integerVariablesBindings_["precision"] = p; + integerVariablesBindings_["scale"] = s; + break; + } + case 'b': { + const auto column = std::string(1, 'a' + i); + integerVariablesBindings_[column + "_precision"] = p; + integerVariablesBindings_[column + "_scale"] = s; + break; + } + case 'c': { + integerVariablesBindings_["i" + std::to_string(decimalColIndex)] = p; + integerVariablesBindings_ + ["i" + std::to_string(decimalColIndex + kIntegerPairSize)] = s; + decimalColIndex++; + break; + } + default: + VELOX_UNSUPPORTED( + "Unsupported decimal name style {}.", decimalNameStyle); + } + } + } + // Calculates the matched return type through the argument types with argument + // type fuzzer, which evaluates the constraints internally. + if (integerConstrained && argumentTypes.size() > 0) { + returnType_ = fuzzReturnType(); + } +} + +void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() { + const auto genPrecision = [&]() { + return boost::random::uniform_int_distribution( + 1, LongDecimalType::kMaxPrecision)(rng_); + }; + const auto genScaleWithPrecision = [&](uint32_t precision) { + return boost::random::uniform_int_distribution( + 0, precision)(rng_); + }; + const auto genScale = [&]() { + return boost::random::uniform_int_distribution( + 0, LongDecimalType::kMaxPrecision)(rng_); + }; + const auto genPrecisionWithScale = [&](uint32_t scale) { + return boost::random::uniform_int_distribution( + std::max(scale, (uint32_t)1), LongDecimalType::kMaxPrecision)(rng_); + }; + + // Assign a random value for all integer values. + for (const auto& [variableName, variableInfo] : variables()) { + if (!variableInfo.isIntegerParameter() || + integerVariablesBindings_.count(variableName)) { + continue; + } + + // When decimal function is registered as vector function, the variable name + // contains 'precision' or 'scale' like 'a_precision' and 'a_scale' as + // docs/develop/scalar-functions.rst illustrates. + if (auto pos = variableName.find("precision"); pos != std::string::npos) { + // Scale corresponds to the generated random precision. + // 1 <= precision <= 38, and 0 <= scale <= precision. + const auto precision = genPrecision(); + integerVariablesBindings_[variableName] = precision; + const auto scaleName = variableName.substr(0, pos) + "scale"; + if (variables().count(scaleName)) { + integerVariablesBindings_[scaleName] = genScaleWithPrecision(precision); + } + continue; + } + if (auto pos = variableName.find("scale"); pos != std::string::npos) { + // Precision corresponds to the generated random scale. + // 0 <= scale <= 38, and max(1, scale) <= precision <= 38. + const auto scale = genScale(); + integerVariablesBindings_[variableName] = scale; + const auto precisionName = variableName.substr(0, pos) + "precision"; + if (variables().count(precisionName)) { + integerVariablesBindings_[precisionName] = genPrecisionWithScale(scale); + } + continue; + } + + // When decimal function is registered as simple function, the variable name + // contains 'i' like 'i1' as method name() of IntegerVariable returns. + if (auto pos = variableName.find("i"); pos != std::string::npos) { + VELOX_USER_CHECK_GE(variableName.size(), 2); + const auto index = + std::stoi(variableName.substr(pos + 1, variableName.size())); + if (index <= kIntegerPairSize) { + const auto precision = genPrecision(); + integerVariablesBindings_[variableName] = precision; + const auto scaleIndex = index + kIntegerPairSize; + const auto scaleName = "i" + std::to_string(scaleIndex); + if (variables().count(scaleName)) { + integerVariablesBindings_[scaleName] = + genScaleWithPrecision(precision); + } + } else { + const auto scale = genScale(); + integerVariablesBindings_[variableName] = scale; + const auto precisionIndex = index - kIntegerPairSize; + const auto precisionName = "i" + std::to_string(precisionIndex); + if (variables().count(precisionName)) { + integerVariablesBindings_[precisionName] = + genPrecisionWithScale(scale); + } + } + continue; + } + + integerVariablesBindings_[variableName] = + boost::random::uniform_int_distribution()(rng_); + } + + // Handle constraints. + for (const auto& [variableName, variableInfo] : variables()) { + const auto constraint = variableInfo.constraint(); + if (constraint == "") { + continue; + } + const auto calculation = fmt::format("{}={}", variableName, constraint); + expression::calculation::evaluate(calculation, integerVariablesBindings_); + } +} + void ArgumentTypeFuzzer::determineUnboundedTypeVariables() { for (auto& [variableName, variableInfo] : variables()) { if (!variableInfo.isTypeParameter()) { @@ -80,6 +251,7 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { } } + determineUnboundedIntegerVariables(); determineUnboundedTypeVariables(); for (auto i = 0; i < formalArgsCnt; i++) { TypePtr actualArg; @@ -87,7 +259,7 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { actualArg = randType(); } else { actualArg = exec::SignatureBinder::tryResolveType( - formalArgs[i], variables(), bindings_); + formalArgs[i], variables(), bindings_, integerVariablesBindings_); VELOX_CHECK(actualArg != nullptr); } argumentTypes_.push_back(actualArg); @@ -113,13 +285,17 @@ TypePtr ArgumentTypeFuzzer::fuzzReturnType() { nullptr, "Only fuzzing uninitialized return type is allowed."); + determineUnboundedIntegerVariables(); determineUnboundedTypeVariables(); if (signature_.returnType().baseName() == "any") { returnType_ = randType(); return returnType_; } else { returnType_ = exec::SignatureBinder::tryResolveType( - signature_.returnType(), variables(), bindings_); + signature_.returnType(), + variables(), + bindings_, + integerVariablesBindings_); VELOX_CHECK_NE(returnType_, nullptr); return returnType_; } diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.h b/velox/expression/tests/utils/ArgumentTypeFuzzer.h index 540a353221bfd..8250f07a4a1e1 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.h +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.h @@ -28,13 +28,15 @@ namespace facebook::velox::test { /// arguments types. Optionally, allows to specify a desired return type. If /// specified, the return type acts as a constraint on the possible set of /// argument types. If no return type is specified, it also allows generate a -/// random type that can bind to the function's return type. +/// random type that can bind to the function's return type. Optionally, allows +/// to specify argument types. When specified, integer bindings are constructed, +/// and a return type that can bind to the integer constraints is generated. class ArgumentTypeFuzzer { public: ArgumentTypeFuzzer( const exec::FunctionSignature& signature, - std::mt19937& rng) - : ArgumentTypeFuzzer(signature, nullptr, rng) {} + std::mt19937& rng, + const std::vector& argumentTypes = {}); ArgumentTypeFuzzer( const exec::FunctionSignature& signature, @@ -54,6 +56,12 @@ class ArgumentTypeFuzzer { return argumentTypes_; } + /// Return the generated return type. This function could return nullptr if + /// return type has not been set. + const TypePtr& returnType() const { + return returnType_; + } + /// Return a random type that can bind to the function signature's return /// type and set returnType_ to this type. This function can only be called /// when returnType_ is uninitialized. @@ -65,6 +73,10 @@ class ArgumentTypeFuzzer { return signature_.variables(); } + /// Bind each integer variable that is not determined to a randomly generated + /// value. + void determineUnboundedIntegerVariables(); + /// Bind each type variable that is not determined by the return type to a /// randomly generated type. void determineUnboundedTypeVariables(); @@ -83,6 +95,9 @@ class ArgumentTypeFuzzer { /// Bindings between type variables and their actual types. std::unordered_map bindings_; + /// Bindings between integer variables and their values. + std::unordered_map integerVariablesBindings_; + /// RNG to generate random types for unbounded type variables when necessary. std::mt19937& rng_; }; diff --git a/velox/functions/prestosql/DecimalFunctions.cpp b/velox/functions/prestosql/DecimalFunctions.cpp index 747163a9fc36b..d4ec90ca26696 100644 --- a/velox/functions/prestosql/DecimalFunctions.cpp +++ b/velox/functions/prestosql/DecimalFunctions.cpp @@ -152,6 +152,25 @@ template struct DecimalMultiplyFunction { VELOX_DEFINE_FUNCTION_TYPES(TExec); + template + void initialize( + const std::vector& inputTypes, + const core::QueryConfig& /*config*/, + A* /*a*/, + B* /*b*/) { + const auto aType = inputTypes[0]; + const auto bType = inputTypes[1]; + const auto [aPrecision, aScale] = getDecimalPrecisionScale(*aType); + const auto [bPrecision, bScale] = getDecimalPrecisionScale(*bType); + const auto rPrecision = std::min(38, aPrecision + bPrecision); + const auto rScale = aScale + bScale; + VELOX_USER_CHECK_LE( + rScale, + rPrecision, + "DECIMAL scale must be in range [0, {}].", + rPrecision); + } + template void call(R& out, const A& a, const B& b) { out = checkedMultiply(checkedMultiply(R(a), R(b)), R(1)); @@ -364,7 +383,9 @@ void registerDecimalMultiply(const std::string& prefix) { exec::SignatureVariable( S3::name(), fmt::format( - "{a_scale} + {b_scale}", + "min({a_scale} + {b_scale}, min(38, {a_precision} + {b_precision}))", + fmt::arg("a_precision", P1::name()), + fmt::arg("b_precision", P2::name()), fmt::arg("a_scale", S1::name()), fmt::arg("b_scale", S2::name())), exec::ParameterType::kIntegerParameter), diff --git a/velox/functions/prestosql/fuzzer/CMakeLists.txt b/velox/functions/prestosql/fuzzer/CMakeLists.txt index 392d206c17222..515ed3c607958 100644 --- a/velox/functions/prestosql/fuzzer/CMakeLists.txt +++ b/velox/functions/prestosql/fuzzer/CMakeLists.txt @@ -37,3 +37,6 @@ target_link_libraries( velox_functions_prestosql gtest gtest_main) + +add_library(velox_expression_fuzzer_utility ExtremeArgumentGenerator.cpp) +target_link_libraries(velox_expression_fuzzer_utility velox_common_base) diff --git a/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp new file mode 100644 index 0000000000000..eafcf28db6e4f --- /dev/null +++ b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h" +#include "velox/expression/tests/ExpressionFuzzer.h" + +namespace facebook::velox::functions::test { + +std::vector ExtremeArgumentGenerator::generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) { + const auto argTypes = input.args; + VELOX_CHECK_GE( + argTypes.size(), + 1, + "At least one input is expected from the template signature."); + if (!argTypes[0]->isDecimal()) { + return expressionFuzzer->generateArgs(input); + } + + auto numVarArgs = + !input.variableArity ? 0 : expressionFuzzer->rand32(0, maxNumVarArgs); + std::vector inputExpressions; + inputExpressions.reserve(argTypes.size() + numVarArgs); + inputExpressions.emplace_back( + expressionFuzzer->generateArg(argTypes.at(0), input.constantArgs.at(0))); + + // Append varargs to the argument list. + for (int i = 0; i < numVarArgs; i++) { + core::TypedExprPtr argExpr; + // The varargs need to be generated following the result type of the first + // argument. But when nested expression is generated, that cannot be + // guaranteed as argument precisions and scales cannot be inferred from the + // result type through a decimal function signature. Given this limitation, + // generate constant or column only. + const auto argType = inputExpressions[0]->type(); + if (expressionFuzzer->rand32(0, 1) == 0) { + argExpr = expressionFuzzer->generateArgConstant(argType); + } else { + argExpr = expressionFuzzer->generateArgColumn(argType); + } + inputExpressions.emplace_back(argExpr); + } + return inputExpressions; +} + +} // namespace facebook::velox::functions::test diff --git a/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h new file mode 100644 index 0000000000000..f4293898f7068 --- /dev/null +++ b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/tests/ArgumentGenerator.h" + +namespace facebook::velox::functions::test { + +namespace facebook::velox::test { +class ExpressionFuzzer; +} + +/// Generates custom arguments for the "greatest" and "least" functions: decimal +/// varargs need to be constant or column. +class ExtremeArgumentGenerator : public velox::test::ArgumentGenerator { + public: + std::vector generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) override; +}; + +} // namespace facebook::velox::functions::test diff --git a/velox/functions/sparksql/MakeTimestamp.cpp b/velox/functions/sparksql/MakeTimestamp.cpp index 4466482195a1b..00e7c3195c053 100644 --- a/velox/functions/sparksql/MakeTimestamp.cpp +++ b/velox/functions/sparksql/MakeTimestamp.cpp @@ -149,7 +149,7 @@ class MakeTimestampFunction : public exec::VectorFunction { static std::vector> signatures() { return { exec::FunctionSignatureBuilder() - .integerVariable("precision") + .integerVariable("precision", "min(max(6, precision), 18)") .returnType("timestamp") .argumentType("integer") .argumentType("integer") @@ -159,7 +159,7 @@ class MakeTimestampFunction : public exec::VectorFunction { .argumentType("decimal(precision, 6)") .build(), exec::FunctionSignatureBuilder() - .integerVariable("precision") + .integerVariable("precision", "min(max(6, precision), 18)") .returnType("timestamp") .argumentType("integer") .argumentType("integer") diff --git a/velox/functions/sparksql/UnscaledValueFunction.cpp b/velox/functions/sparksql/UnscaledValueFunction.cpp index 7833db779d359..bbf2c82e5058b 100644 --- a/velox/functions/sparksql/UnscaledValueFunction.cpp +++ b/velox/functions/sparksql/UnscaledValueFunction.cpp @@ -49,8 +49,8 @@ class UnscaledValueFunction final : public exec::VectorFunction { std::vector> unscaledValueSignatures() { return {exec::FunctionSignatureBuilder() - .integerVariable("precision") - .integerVariable("scale") + .integerVariable("precision", "min(precision, 18)") + .integerVariable("scale", "min(min(precision, 18), scale)") .returnType("bigint") .argumentType("DECIMAL(precision, scale)") .build()}; diff --git a/velox/functions/sparksql/fuzzer/CMakeLists.txt b/velox/functions/sparksql/fuzzer/CMakeLists.txt index 62f8eb1cda8c5..07887b134d31f 100644 --- a/velox/functions/sparksql/fuzzer/CMakeLists.txt +++ b/velox/functions/sparksql/fuzzer/CMakeLists.txt @@ -24,3 +24,8 @@ target_link_libraries( velox_vector_test_lib gtest gtest_main) + +add_library(spark_expression_fuzzer_utility MakeTimestampArgumentGenerator.cpp + UnscaledValueArgumentGenerator.cpp) + +target_link_libraries(spark_expression_fuzzer_utility velox_common_base) diff --git a/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp new file mode 100644 index 0000000000000..bd11283280c2a --- /dev/null +++ b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h" +#include "velox/expression/tests/ExpressionFuzzer.h" + +namespace facebook::velox::functions::sparksql::test { + +std::vector MakeTimestampArgumentGenerator::generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) { + VELOX_CHECK_GE( + input.args.size(), + 6, + "At least six inputs are expected from the template signature."); + std::vector inputExpressions; + inputExpressions.reserve(6); + for (int index = 0; index < 5; ++index) { + inputExpressions.emplace_back( + expressionFuzzer->generateArg(input.args[index])); + } + + // The required result type of the sixth argument is a short decimal type with + // scale being 6. But when nested expression is generated, that cannot be + // guaranteed as argument precisions and scales cannot be inferred from the + // result type through a decimal function signature. Given this limitation, + // generate constant or column only. + core::TypedExprPtr argExpr; + if (expressionFuzzer->rand32(0, 1) == 0) { + argExpr = expressionFuzzer->generateArgConstant(input.args[5]); + } else { + argExpr = expressionFuzzer->generateArgColumn(input.args[5]); + } + inputExpressions.emplace_back(argExpr); + + if (input.args.size() == 7) { + // The 7th. argument cannot be randomly generated as it should be a valid + // timezone string. + std::vector timezoneSet = { + "Asia/Kolkata", + "America/Los_Angeles", + "Canada/Atlantic", + "+08:00", + "-10:00"}; + inputExpressions.emplace_back(std::make_shared( + VARCHAR(), variant(timezoneSet[expressionFuzzer->rand32(0, 4)]))); + } + return inputExpressions; +} + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h new file mode 100644 index 0000000000000..c601817abe187 --- /dev/null +++ b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/tests/ArgumentGenerator.h" + +namespace facebook::velox::functions::sparksql::test { + +namespace facebook::velox::test { +class ExpressionFuzzer; +} + +/// Generates custom arguments for the "make_timestamp" function: 1) decimal +/// argument needs to be constant or column. 2) timezone argument needs to be +/// valid. +class MakeTimestampArgumentGenerator : public velox::test::ArgumentGenerator { + public: + std::vector generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) override; +}; + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp new file mode 100644 index 0000000000000..97e767cdd6710 --- /dev/null +++ b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h" +#include "velox/expression/tests/ExpressionFuzzer.h" + +namespace facebook::velox::functions::sparksql::test { + +std::vector UnscaledValueArgumentGenerator::generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) { + VELOX_CHECK_EQ( + input.args.size(), + 1, + "Only one input is expected from the template signature."); + + // The required result type of input argument is a short decimal type. But + // when nested expression is generated, that cannot be guaranteed as argument + // precisions and scales cannot be inferred from the result type through a + // decimal function signature. Given this limitation, generate constant or + // column only. + std::vector inputExpressions; + core::TypedExprPtr argExpr; + if (expressionFuzzer->rand32(0, 1) == 0) { + argExpr = expressionFuzzer->generateArgConstant(input.args[0]); + } else { + argExpr = expressionFuzzer->generateArgColumn(input.args[0]); + } + inputExpressions.emplace_back(argExpr); + return inputExpressions; +} + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h new file mode 100644 index 0000000000000..7e99d3976e0cc --- /dev/null +++ b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/tests/ArgumentGenerator.h" + +namespace facebook::velox::functions::sparksql::test { + +namespace facebook::velox::test { +class ExpressionFuzzer; +} + +/// Generates custom arguments for the "unscaled_value" function: decimal +/// argument needs to be constant or column. +class UnscaledValueArgumentGenerator : public velox::test::ArgumentGenerator { + public: + std::vector generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) override; +}; + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/type/SimpleFunctionApi.h b/velox/type/SimpleFunctionApi.h index 70848cfc79531..4ad4e3d734e3d 100644 --- a/velox/type/SimpleFunctionApi.h +++ b/velox/type/SimpleFunctionApi.h @@ -63,6 +63,9 @@ using S1 = IntegerVariable<5>; using S2 = IntegerVariable<6>; using S3 = IntegerVariable<7>; using S4 = IntegerVariable<8>; +// The pair size of precisions and scales can be represented with integer +// variables. +const uint8_t kIntegerPairSize = 4; template struct ShortDecimal {