Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Mar 26, 2024
1 parent 9c8cc1b commit 3d3d40f
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 50 deletions.
1 change: 0 additions & 1 deletion velox/expression/ConstantExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ void appendSqlLiteral(
case TypeKind::TINYINT:
case TypeKind::SMALLINT:
case TypeKind::BIGINT:
case TypeKind::HUGEINT:
case TypeKind::TIMESTAMP:
case TypeKind::REAL:
case TypeKind::DOUBLE:
Expand Down
20 changes: 18 additions & 2 deletions velox/expression/tests/ArgumentTypeFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ class ArgumentTypeFuzzerTest : public testing::Test {
ASSERT_TRUE(fuzzer.fuzzArgumentTypes(kMaxVariadicArgs));

auto& argumentTypes = fuzzer.argumentTypes();
ASSERT_LE(argumentTypes.size(), expectedArguments);
ASSERT_GE(argumentTypes.size(), expectedArguments);

auto& argumentSignatures = signature->argumentTypes();
int i;
for (i = 0; i < expectedArguments; ++i) {
ASSERT_TRUE(argumentTypes[i]->isDecimal())
<< "at " << i
<< ": Expected decimal. Got: " << argumentTypes[i]->toString();
<< ": Expected DECIMAL. Got: " << argumentTypes[i]->toString();
}

if (i < argumentTypes.size()) {
Expand Down Expand Up @@ -327,6 +327,22 @@ TEST_F(ArgumentTypeFuzzerTest, decimal) {
.argumentType("decimal(i1,i5)")
.build();
testFuzzingDecimalSuccess(signature, 3, TypeKind::BOOLEAN);

signature = exec::FunctionSignatureBuilder()
.integerVariable("precision")
.integerVariable("scale")
.returnType("DECIMAL(precision, scale)")
.argumentType("DECIMAL(precision, scale)")
.variableArity()
.build();
testFuzzingDecimalSuccess(signature, 1);

signature = exec::FunctionSignatureBuilder()
.integerVariable("precision", "min(max(6, precision), 18)")
.returnType("timestamp")
.argumentType("decimal(precision, 6)")
.build();
testFuzzingDecimalSuccess(signature, 1, TypeKind::TIMESTAMP);
}

TEST_F(ArgumentTypeFuzzerTest, lambda) {
Expand Down
89 changes: 51 additions & 38 deletions velox/expression/tests/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -989,10 +989,14 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateExtremeFunctionArgs(

// Append varargs to the argument list.
for (int i = 0; i < numVarArgs; i++) {
size_t argClass = rand32(0, 1);
core::TypedExprPtr argExpr;
// The varargs need to be generated following the result type of the first
// argument. But when nested expression is generated, that cannot be
// guaranteed as argument precisions and scales cannot be inferred from the
// result type through a decimal function signature. Given this limitation,
// generate constant or column only.
const auto argType = inputExpressions[0]->type();
if (argClass == kArgConstant) {
if (rand32(0, 1) == kArgConstant) {
argExpr = generateArgConstant(argType);
} else {
argExpr = generateArgColumn(argType);
Expand All @@ -1015,28 +1019,30 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateMakeTimestampArgs(
inputExpressions.emplace_back(generateArg(input.args[index]));
}

// It cannot be ensured that the generated expression follows the required
// return type, so only constant or column can be generated as the decimal
// argument.
size_t argClass = rand32(0, 1);
// The required result type of the sixth argument is a short decimal type with
// scale being 6. But when nested expression is generated, that cannot be
// guaranteed as argument precisions and scales cannot be inferred from the
// result type through a decimal function signature. Given this limitation,
// generate constant or column only.
core::TypedExprPtr argExpr;
if (argClass == kArgConstant) {
if (rand32(0, 1) == kArgConstant) {
argExpr = generateArgConstant(input.args[5]);
} else {
argExpr = generateArgColumn(input.args[5]);
}
inputExpressions.emplace_back(argExpr);

if (input.args.size() == 7) {
// The 7th. argument cannot be randomly generated as it should be a valid
// timezone string.
std::vector<std::string> timezoneSet = {
"Asia/Kolkata",
"America/Los_Angeles",
"Canada/Atlantic",
"+08:00",
"-10:00"};
size_t zoneIndex = rand32(0, 4);
inputExpressions.emplace_back(std::make_shared<core::ConstantTypedExpr>(
VARCHAR(), variant(timezoneSet[zoneIndex])));
VARCHAR(), variant(timezoneSet[rand32(0, 4)])));
}
return inputExpressions;
}
Expand Down Expand Up @@ -1082,13 +1088,14 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateUnscaledValueArgs(
1,
"Only one input is expected from the template signature.");

// It cannot be ensured that the generated expression follows the required
// return type, so only constant or column can be generated as the decimal
// argument.
// The required result type of input argument is a short decimal type. But
// when nested expression is generated, that cannot be guaranteed as argument
// precisions and scales cannot be inferred from the result type through a
// decimal function signature. Given this limitation, generate constant or
// column only.
std::vector<core::TypedExprPtr> inputExpressions;
size_t argClass = rand32(0, 1);
core::TypedExprPtr argExpr;
if (argClass == kArgConstant) {
if (rand32(0, 1) == kArgConstant) {
argExpr = generateArgConstant(input.args[0]);
} else {
argExpr = generateArgColumn(input.args[0]);
Expand Down Expand Up @@ -1204,67 +1211,70 @@ TypePtr ExpressionFuzzer::getConstrainedOutputType(
if (signature == nullptr) {
return nullptr;
}
// When function is unnested, the types of args are decided by fuzzer argument
// types. For nested function, they are decided by the return types of
// children functions. To handle the constraints between input types and
// output types of a decimal function, extract the input precisions and scales
// from decimal arguments, and bind them to integer variables.

// Checks if any variable is integer constrained, and get the decimal name
// style.
bool integerConstrained = false;
char decimalNameStyle = 0;
for (const auto& [variableName, variableInfo] : signature->variables()) {
const auto constraint = variableInfo.constraint();
if (variableInfo.isIntegerParameter()) {
// If constraints are empty, the integer variable is also regarded to be
// constrained as variables are shared across argument and return types.
integerConstrained = true;
if (variableName == "precision" || variableName == "scale") {
decimalNameStyle = 'v';
} else if (
variableName.find("precision") != std::string::npos ||
decimalNameStyle = 'a';
break;
}
if (variableName.find("precision") != std::string::npos ||
variableName.find("scale") != std::string::npos) {
decimalNameStyle = 'p';
} else if (variableName.find("i") != std::string::npos) {
decimalNameStyle = 'i';
decimalNameStyle = 'b';
break;
}
if (variableName.find("i") != std::string::npos) {
decimalNameStyle = 'c';
break;
}
integerConstrained = true;
break;
}
}

// To handle the constraints between input types and output types of a decimal
// function, extracts the input precisions and scales from decimal arguments,
// and bind them to integer variables.
std::unordered_map<std::string, int> decimalVariablesBindings;
column_index_t decimalColIndex = 1;
for (column_index_t i = 0; i < args.size(); ++i) {
const auto argType = args[i]->type();
if (argType->isDecimal()) {
const auto [p, s] = getDecimalPrecisionScale(*argType);
switch (decimalNameStyle) {
case 'v': {
case 'a': {
decimalVariablesBindings["precision"] = p;
decimalVariablesBindings["scale"] = s;
break;
}
case 'p': {
case 'b': {
const auto column = std::string(1, 'a' + i);
decimalVariablesBindings[column + "_precision"] = p;
decimalVariablesBindings[column + "_scale"] = s;
break;
}
case 'i': {
case 'c': {
decimalVariablesBindings["i" + std::to_string(decimalColIndex)] = p;
decimalVariablesBindings
["i" + std::to_string(decimalColIndex + kIntegerPairSize)] = s;
decimalColIndex++;
break;
}
default:
VELOX_UNSUPPORTED("Unsupported decimal name style.");
VELOX_UNSUPPORTED(
"Unsupported decimal name style {}.", decimalNameStyle);
}
}
}

// Calculates the matched return type through the argument types with argument
// type fuzzer, which evaluates the constraints internally.
if (integerConstrained && decimalVariablesBindings.size() > 0 && signature) {
// Compute a correct output type according to constraints with the integer
// variables bindings.
ArgumentTypeFuzzer fuzzer{*signature, rng_, decimalVariablesBindings};
return fuzzer.fuzzReturnType();
}
Expand All @@ -1277,10 +1287,13 @@ core::TypedExprPtr ExpressionFuzzer::getCallExprFromCallable(
const exec::FunctionSignature* signature) {
auto args = getArgsForCallable(callable);

// If a constrained output type is generated, use it to avoid breaking the
// constraints between input types and output types. Otherwise, generate a
// CallTypedExpr with type because callable.returnType may not have the
// required field names.
// For a decimal function (especially a nested one), as argument precisions
// and scales are randomly generated, callable.returnType does not follow the
// required constraints, and the matched result type needs to be recalculated
// from the argument types. If a constrained output type can be generated, use
// it to avoid breaking the constraints between input types and output types.
// Otherwise, generate a CallTypedExpr with type because callable.returnType
// may not have the required field names.
const auto constrainedType = getConstrainedOutputType(args, signature);
return std::make_shared<core::CallTypedExpr>(
constrainedType ? constrainedType : type, args, callable.name);
Expand Down
8 changes: 8 additions & 0 deletions velox/expression/tests/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,13 @@ class ExpressionFuzzer {
std::vector<core::TypedExprPtr> generateEmptyApproxSetArgs(
const CallableSignature& input);

/// Specialization for the "greatest" and "least" functions: decimal varargs
/// need to be constant or column.
std::vector<core::TypedExprPtr> generateExtremeFunctionArgs(
const CallableSignature& input);

/// Specialization for the "make_timestamp" function: 1) decimal argument
/// needs to be constant or column. 2) timezone argument needs to be valid.
std::vector<core::TypedExprPtr> generateMakeTimestampArgs(
const CallableSignature& input);

Expand All @@ -273,13 +277,17 @@ class ExpressionFuzzer {
std::vector<core::TypedExprPtr> generateSwitchArgs(
const CallableSignature& input);

/// Specialization for the "unscaled_value" function: decimal argument needs
/// to be constant or column.
std::vector<core::TypedExprPtr> generateUnscaledValueArgs(
const CallableSignature& input);

// Return a vector of expressions for each argument of callable in order.
std::vector<core::TypedExprPtr> getArgsForCallable(
const CallableSignature& callable);

/// Given the argument types, calculates the return type of a decimal function
/// by evaluating constraints.
TypePtr getConstrainedOutputType(
const std::vector<core::TypedExprPtr>& args,
const exec::FunctionSignature* signature);
Expand Down
32 changes: 23 additions & 9 deletions velox/expression/tests/utils/ArgumentTypeFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,39 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() {
continue;
}

// When variable name is 'precision' and result type is decimal, input type
// is regarded the same with the result type.
if (variableName == "precision" && returnType_ &&
returnType_->isDecimal()) {
const auto [precision, scale] = getDecimalPrecisionScale(*returnType_);
integerVariablesBindings_["precision"] = precision;
integerVariablesBindings_["scale"] = scale;
} else if (auto pos = variableName.find("precision");
pos != std::string::npos) {
// Handle decimal precisions and scales.
continue;
}

// When decimal function is registered as vector function, the variable name
// contains 'precision' like 'a_precision'.
if (auto pos = variableName.find("precision"); pos != std::string::npos) {
// Generate a random precision, and corresponding scale should not exceed
// the precision.
const auto precision =
boost::random::uniform_int_distribution<uint32_t>(1, 38)(rng_);
integerVariablesBindings_[variableName] = precision;
const auto colName = variableName.substr(0, pos);
// Corresponding scale should not exceed the generated precision.
integerVariablesBindings_[colName + "scale"] =
boost::random::uniform_int_distribution<uint32_t>(0, precision)(rng_);
} else if (auto pos = variableName.find("i"); pos != std::string::npos) {
continue;
}

// When decimal function is registered as simple function, the variable name
// contains 'i' like 'i1'.
if (auto pos = variableName.find("i"); pos != std::string::npos) {
VELOX_USER_CHECK_GE(variableName.size(), 2);
const auto index =
std::stoi(variableName.substr(pos + 1, variableName.size()));
if (index <= kIntegerPairSize) {
// Generate a random precision, and corresponding scale should not
// exceed the precision.
const auto precision =
boost::random::uniform_int_distribution<uint32_t>(1, 38)(rng_);
integerVariablesBindings_[variableName] = precision;
Expand All @@ -76,10 +89,11 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() {
boost::random::uniform_int_distribution<uint32_t>(
0, precision)(rng_);
}
} else {
integerVariablesBindings_[variableName] =
boost::random::uniform_int_distribution<int32_t>()(rng_);
continue;
}

integerVariablesBindings_[variableName] =
boost::random::uniform_int_distribution<int32_t>()(rng_);
}

// Handle constraints.
Expand All @@ -88,7 +102,7 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() {
if (constraint == "") {
continue;
}
auto calculation = fmt::format("{}={}", variableName, constraint);
const auto calculation = fmt::format("{}={}", variableName, constraint);
expression::calculation::evaluate(calculation, integerVariablesBindings_);
}
}
Expand Down

0 comments on commit 3d3d40f

Please sign in to comment.