From 3d3d40f2121f7afe42aad5c0becb57844ba045bc Mon Sep 17 00:00:00 2001 From: rui-mo Date: Tue, 26 Mar 2024 16:07:47 +0800 Subject: [PATCH] Fix --- velox/expression/ConstantExpr.cpp | 1 - .../tests/ArgumentTypeFuzzerTest.cpp | 20 ++++- velox/expression/tests/ExpressionFuzzer.cpp | 89 +++++++++++-------- velox/expression/tests/ExpressionFuzzer.h | 8 ++ .../tests/utils/ArgumentTypeFuzzer.cpp | 32 +++++-- 5 files changed, 100 insertions(+), 50 deletions(-) diff --git a/velox/expression/ConstantExpr.cpp b/velox/expression/ConstantExpr.cpp index 381d93741e45e..28c43048be338 100644 --- a/velox/expression/ConstantExpr.cpp +++ b/velox/expression/ConstantExpr.cpp @@ -162,7 +162,6 @@ void appendSqlLiteral( case TypeKind::TINYINT: case TypeKind::SMALLINT: case TypeKind::BIGINT: - case TypeKind::HUGEINT: case TypeKind::TIMESTAMP: case TypeKind::REAL: case TypeKind::DOUBLE: diff --git a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp index bbaabac61cec1..ba3396d1f386d 100644 --- a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp @@ -68,14 +68,14 @@ class ArgumentTypeFuzzerTest : public testing::Test { ASSERT_TRUE(fuzzer.fuzzArgumentTypes(kMaxVariadicArgs)); auto& argumentTypes = fuzzer.argumentTypes(); - ASSERT_LE(argumentTypes.size(), expectedArguments); + ASSERT_GE(argumentTypes.size(), expectedArguments); auto& argumentSignatures = signature->argumentTypes(); int i; for (i = 0; i < expectedArguments; ++i) { ASSERT_TRUE(argumentTypes[i]->isDecimal()) << "at " << i - << ": Expected decimal. Got: " << argumentTypes[i]->toString(); + << ": Expected DECIMAL. Got: " << argumentTypes[i]->toString(); } if (i < argumentTypes.size()) { @@ -327,6 +327,22 @@ TEST_F(ArgumentTypeFuzzerTest, decimal) { .argumentType("decimal(i1,i5)") .build(); testFuzzingDecimalSuccess(signature, 3, TypeKind::BOOLEAN); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("precision") + .integerVariable("scale") + .returnType("DECIMAL(precision, scale)") + .argumentType("DECIMAL(precision, scale)") + .variableArity() + .build(); + testFuzzingDecimalSuccess(signature, 1); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("precision", "min(max(6, precision), 18)") + .returnType("timestamp") + .argumentType("decimal(precision, 6)") + .build(); + testFuzzingDecimalSuccess(signature, 1, TypeKind::TIMESTAMP); } TEST_F(ArgumentTypeFuzzerTest, lambda) { diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index 4ff197d120bde..02ff7b5597aec 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -989,10 +989,14 @@ std::vector ExpressionFuzzer::generateExtremeFunctionArgs( // Append varargs to the argument list. for (int i = 0; i < numVarArgs; i++) { - size_t argClass = rand32(0, 1); core::TypedExprPtr argExpr; + // The varargs need to be generated following the result type of the first + // argument. But when nested expression is generated, that cannot be + // guaranteed as argument precisions and scales cannot be inferred from the + // result type through a decimal function signature. Given this limitation, + // generate constant or column only. const auto argType = inputExpressions[0]->type(); - if (argClass == kArgConstant) { + if (rand32(0, 1) == kArgConstant) { argExpr = generateArgConstant(argType); } else { argExpr = generateArgColumn(argType); @@ -1015,12 +1019,13 @@ std::vector ExpressionFuzzer::generateMakeTimestampArgs( inputExpressions.emplace_back(generateArg(input.args[index])); } - // It cannot be ensured that the generated expression follows the required - // return type, so only constant or column can be generated as the decimal - // argument. - size_t argClass = rand32(0, 1); + // The required result type of the sixth argument is a short decimal type with + // scale being 6. But when nested expression is generated, that cannot be + // guaranteed as argument precisions and scales cannot be inferred from the + // result type through a decimal function signature. Given this limitation, + // generate constant or column only. core::TypedExprPtr argExpr; - if (argClass == kArgConstant) { + if (rand32(0, 1) == kArgConstant) { argExpr = generateArgConstant(input.args[5]); } else { argExpr = generateArgColumn(input.args[5]); @@ -1028,15 +1033,16 @@ std::vector ExpressionFuzzer::generateMakeTimestampArgs( inputExpressions.emplace_back(argExpr); if (input.args.size() == 7) { + // The 7th. argument cannot be randomly generated as it should be a valid + // timezone string. std::vector timezoneSet = { "Asia/Kolkata", "America/Los_Angeles", "Canada/Atlantic", "+08:00", "-10:00"}; - size_t zoneIndex = rand32(0, 4); inputExpressions.emplace_back(std::make_shared( - VARCHAR(), variant(timezoneSet[zoneIndex]))); + VARCHAR(), variant(timezoneSet[rand32(0, 4)]))); } return inputExpressions; } @@ -1082,13 +1088,14 @@ std::vector ExpressionFuzzer::generateUnscaledValueArgs( 1, "Only one input is expected from the template signature."); - // It cannot be ensured that the generated expression follows the required - // return type, so only constant or column can be generated as the decimal - // argument. + // The required result type of input argument is a short decimal type. But + // when nested expression is generated, that cannot be guaranteed as argument + // precisions and scales cannot be inferred from the result type through a + // decimal function signature. Given this limitation, generate constant or + // column only. std::vector inputExpressions; - size_t argClass = rand32(0, 1); core::TypedExprPtr argExpr; - if (argClass == kArgConstant) { + if (rand32(0, 1) == kArgConstant) { argExpr = generateArgConstant(input.args[0]); } else { argExpr = generateArgColumn(input.args[0]); @@ -1204,33 +1211,35 @@ TypePtr ExpressionFuzzer::getConstrainedOutputType( if (signature == nullptr) { return nullptr; } - // When function is unnested, the types of args are decided by fuzzer argument - // types. For nested function, they are decided by the return types of - // children functions. To handle the constraints between input types and - // output types of a decimal function, extract the input precisions and scales - // from decimal arguments, and bind them to integer variables. // Checks if any variable is integer constrained, and get the decimal name // style. bool integerConstrained = false; char decimalNameStyle = 0; for (const auto& [variableName, variableInfo] : signature->variables()) { - const auto constraint = variableInfo.constraint(); if (variableInfo.isIntegerParameter()) { + // If constraints are empty, the integer variable is also regarded to be + // constrained as variables are shared across argument and return types. + integerConstrained = true; if (variableName == "precision" || variableName == "scale") { - decimalNameStyle = 'v'; - } else if ( - variableName.find("precision") != std::string::npos || + decimalNameStyle = 'a'; + break; + } + if (variableName.find("precision") != std::string::npos || variableName.find("scale") != std::string::npos) { - decimalNameStyle = 'p'; - } else if (variableName.find("i") != std::string::npos) { - decimalNameStyle = 'i'; + decimalNameStyle = 'b'; + break; + } + if (variableName.find("i") != std::string::npos) { + decimalNameStyle = 'c'; + break; } - integerConstrained = true; - break; } } + // To handle the constraints between input types and output types of a decimal + // function, extracts the input precisions and scales from decimal arguments, + // and bind them to integer variables. std::unordered_map decimalVariablesBindings; column_index_t decimalColIndex = 1; for (column_index_t i = 0; i < args.size(); ++i) { @@ -1238,18 +1247,18 @@ TypePtr ExpressionFuzzer::getConstrainedOutputType( if (argType->isDecimal()) { const auto [p, s] = getDecimalPrecisionScale(*argType); switch (decimalNameStyle) { - case 'v': { + case 'a': { decimalVariablesBindings["precision"] = p; decimalVariablesBindings["scale"] = s; break; } - case 'p': { + case 'b': { const auto column = std::string(1, 'a' + i); decimalVariablesBindings[column + "_precision"] = p; decimalVariablesBindings[column + "_scale"] = s; break; } - case 'i': { + case 'c': { decimalVariablesBindings["i" + std::to_string(decimalColIndex)] = p; decimalVariablesBindings ["i" + std::to_string(decimalColIndex + kIntegerPairSize)] = s; @@ -1257,14 +1266,15 @@ TypePtr ExpressionFuzzer::getConstrainedOutputType( break; } default: - VELOX_UNSUPPORTED("Unsupported decimal name style."); + VELOX_UNSUPPORTED( + "Unsupported decimal name style {}.", decimalNameStyle); } } } + // Calculates the matched return type through the argument types with argument + // type fuzzer, which evaluates the constraints internally. if (integerConstrained && decimalVariablesBindings.size() > 0 && signature) { - // Compute a correct output type according to constraints with the integer - // variables bindings. ArgumentTypeFuzzer fuzzer{*signature, rng_, decimalVariablesBindings}; return fuzzer.fuzzReturnType(); } @@ -1277,10 +1287,13 @@ core::TypedExprPtr ExpressionFuzzer::getCallExprFromCallable( const exec::FunctionSignature* signature) { auto args = getArgsForCallable(callable); - // If a constrained output type is generated, use it to avoid breaking the - // constraints between input types and output types. Otherwise, generate a - // CallTypedExpr with type because callable.returnType may not have the - // required field names. + // For a decimal function (especially a nested one), as argument precisions + // and scales are randomly generated, callable.returnType does not follow the + // required constraints, and the matched result type needs to be recalculated + // from the argument types. If a constrained output type can be generated, use + // it to avoid breaking the constraints between input types and output types. + // Otherwise, generate a CallTypedExpr with type because callable.returnType + // may not have the required field names. const auto constrainedType = getConstrainedOutputType(args, signature); return std::make_shared( constrainedType ? constrainedType : type, args, callable.name); diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index 508999633d972..31f412ad3fba1 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -253,9 +253,13 @@ class ExpressionFuzzer { std::vector generateEmptyApproxSetArgs( const CallableSignature& input); + /// Specialization for the "greatest" and "least" functions: decimal varargs + /// need to be constant or column. std::vector generateExtremeFunctionArgs( const CallableSignature& input); + /// Specialization for the "make_timestamp" function: 1) decimal argument + /// needs to be constant or column. 2) timezone argument needs to be valid. std::vector generateMakeTimestampArgs( const CallableSignature& input); @@ -273,6 +277,8 @@ class ExpressionFuzzer { std::vector generateSwitchArgs( const CallableSignature& input); + /// Specialization for the "unscaled_value" function: decimal argument needs + /// to be constant or column. std::vector generateUnscaledValueArgs( const CallableSignature& input); @@ -280,6 +286,8 @@ class ExpressionFuzzer { std::vector getArgsForCallable( const CallableSignature& callable); + /// Given the argument types, calculates the return type of a decimal function + /// by evaluating constraints. TypePtr getConstrainedOutputType( const std::vector& args, const exec::FunctionSignature* signature); diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp index 93ed98cac6666..838c5ab50f760 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp @@ -47,26 +47,39 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() { continue; } + // When variable name is 'precision' and result type is decimal, input type + // is regarded the same with the result type. if (variableName == "precision" && returnType_ && returnType_->isDecimal()) { const auto [precision, scale] = getDecimalPrecisionScale(*returnType_); integerVariablesBindings_["precision"] = precision; integerVariablesBindings_["scale"] = scale; - } else if (auto pos = variableName.find("precision"); - pos != std::string::npos) { - // Handle decimal precisions and scales. + continue; + } + + // When decimal function is registered as vector function, the variable name + // contains 'precision' like 'a_precision'. + if (auto pos = variableName.find("precision"); pos != std::string::npos) { + // Generate a random precision, and corresponding scale should not exceed + // the precision. const auto precision = boost::random::uniform_int_distribution(1, 38)(rng_); integerVariablesBindings_[variableName] = precision; const auto colName = variableName.substr(0, pos); - // Corresponding scale should not exceed the generated precision. integerVariablesBindings_[colName + "scale"] = boost::random::uniform_int_distribution(0, precision)(rng_); - } else if (auto pos = variableName.find("i"); pos != std::string::npos) { + continue; + } + + // When decimal function is registered as simple function, the variable name + // contains 'i' like 'i1'. + if (auto pos = variableName.find("i"); pos != std::string::npos) { VELOX_USER_CHECK_GE(variableName.size(), 2); const auto index = std::stoi(variableName.substr(pos + 1, variableName.size())); if (index <= kIntegerPairSize) { + // Generate a random precision, and corresponding scale should not + // exceed the precision. const auto precision = boost::random::uniform_int_distribution(1, 38)(rng_); integerVariablesBindings_[variableName] = precision; @@ -76,10 +89,11 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() { boost::random::uniform_int_distribution( 0, precision)(rng_); } - } else { - integerVariablesBindings_[variableName] = - boost::random::uniform_int_distribution()(rng_); + continue; } + + integerVariablesBindings_[variableName] = + boost::random::uniform_int_distribution()(rng_); } // Handle constraints. @@ -88,7 +102,7 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() { if (constraint == "") { continue; } - auto calculation = fmt::format("{}={}", variableName, constraint); + const auto calculation = fmt::format("{}={}", variableName, constraint); expression::calculation::evaluate(calculation, integerVariablesBindings_); } }