Fix

rui-mo · Mar 26, 2024 · 3d3d40f · 3d3d40f
1 parent 9c8cc1b
commit 3d3d40f
Show file tree

Hide file tree

Showing 5 changed files with 100 additions and 50 deletions.
diff --git a/velox/expression/ConstantExpr.cpp b/velox/expression/ConstantExpr.cpp
@@ -162,7 +162,6 @@ void appendSqlLiteral(
     case TypeKind::TINYINT:
     case TypeKind::SMALLINT:
     case TypeKind::BIGINT:
-    case TypeKind::HUGEINT:
     case TypeKind::TIMESTAMP:
     case TypeKind::REAL:
     case TypeKind::DOUBLE:

diff --git a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp
@@ -68,14 +68,14 @@ class ArgumentTypeFuzzerTest : public testing::Test {
     ASSERT_TRUE(fuzzer.fuzzArgumentTypes(kMaxVariadicArgs));
 
     auto& argumentTypes = fuzzer.argumentTypes();
-    ASSERT_LE(argumentTypes.size(), expectedArguments);
+    ASSERT_GE(argumentTypes.size(), expectedArguments);
 
     auto& argumentSignatures = signature->argumentTypes();
     int i;
     for (i = 0; i < expectedArguments; ++i) {
       ASSERT_TRUE(argumentTypes[i]->isDecimal())
           << "at " << i
-          << ": Expected decimal. Got: " << argumentTypes[i]->toString();
+          << ": Expected DECIMAL. Got: " << argumentTypes[i]->toString();
     }
 
     if (i < argumentTypes.size()) {
@@ -327,6 +327,22 @@ TEST_F(ArgumentTypeFuzzerTest, decimal) {
                   .argumentType("decimal(i1,i5)")
                   .build();
   testFuzzingDecimalSuccess(signature, 3, TypeKind::BOOLEAN);
+
+  signature = exec::FunctionSignatureBuilder()
+                  .integerVariable("precision")
+                  .integerVariable("scale")
+                  .returnType("DECIMAL(precision, scale)")
+                  .argumentType("DECIMAL(precision, scale)")
+                  .variableArity()
+                  .build();
+  testFuzzingDecimalSuccess(signature, 1);
+
+  signature = exec::FunctionSignatureBuilder()
+                  .integerVariable("precision", "min(max(6, precision), 18)")
+                  .returnType("timestamp")
+                  .argumentType("decimal(precision, 6)")
+                  .build();
+  testFuzzingDecimalSuccess(signature, 1, TypeKind::TIMESTAMP);
 }
 
 TEST_F(ArgumentTypeFuzzerTest, lambda) {

diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp
@@ -989,10 +989,14 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateExtremeFunctionArgs(
 
   // Append varargs to the argument list.
   for (int i = 0; i < numVarArgs; i++) {
-    size_t argClass = rand32(0, 1);
     core::TypedExprPtr argExpr;
+    // The varargs need to be generated following the result type of the first
+    // argument. But when nested expression is generated, that cannot be
+    // guaranteed as argument precisions and scales cannot be inferred from the
+    // result type through a decimal function signature. Given this limitation,
+    // generate constant or column only.
     const auto argType = inputExpressions[0]->type();
-    if (argClass == kArgConstant) {
+    if (rand32(0, 1) == kArgConstant) {
       argExpr = generateArgConstant(argType);
     } else {
       argExpr = generateArgColumn(argType);
@@ -1015,28 +1019,30 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateMakeTimestampArgs(
     inputExpressions.emplace_back(generateArg(input.args[index]));
   }
 
-  // It cannot be ensured that the generated expression follows the required
-  // return type, so only constant or column can be generated as the decimal
-  // argument.
-  size_t argClass = rand32(0, 1);
+  // The required result type of the sixth argument is a short decimal type with
+  // scale being 6. But when nested expression is generated, that cannot be
+  // guaranteed as argument precisions and scales cannot be inferred from the
+  // result type through a decimal function signature. Given this limitation,
+  // generate constant or column only.
   core::TypedExprPtr argExpr;
-  if (argClass == kArgConstant) {
+  if (rand32(0, 1) == kArgConstant) {
     argExpr = generateArgConstant(input.args[5]);
   } else {
     argExpr = generateArgColumn(input.args[5]);
   }
   inputExpressions.emplace_back(argExpr);
 
   if (input.args.size() == 7) {
+    // The 7th. argument cannot be randomly generated as it should be a valid
+    // timezone string.
     std::vector<std::string> timezoneSet = {
         "Asia/Kolkata",
         "America/Los_Angeles",
         "Canada/Atlantic",
         "+08:00",
         "-10:00"};
-    size_t zoneIndex = rand32(0, 4);
     inputExpressions.emplace_back(std::make_shared<core::ConstantTypedExpr>(
-        VARCHAR(), variant(timezoneSet[zoneIndex])));
+        VARCHAR(), variant(timezoneSet[rand32(0, 4)])));
   }
   return inputExpressions;
 }
@@ -1082,13 +1088,14 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateUnscaledValueArgs(
       1,
       "Only one input is expected from the template signature.");
 
-  // It cannot be ensured that the generated expression follows the required
-  // return type, so only constant or column can be generated as the decimal
-  // argument.
+  // The required result type of input argument is a short decimal type. But
+  // when nested expression is generated, that cannot be guaranteed as argument
+  // precisions and scales cannot be inferred from the result type through a
+  // decimal function signature. Given this limitation, generate constant or
+  // column only.
   std::vector<core::TypedExprPtr> inputExpressions;
-  size_t argClass = rand32(0, 1);
   core::TypedExprPtr argExpr;
-  if (argClass == kArgConstant) {
+  if (rand32(0, 1) == kArgConstant) {
     argExpr = generateArgConstant(input.args[0]);
   } else {
     argExpr = generateArgColumn(input.args[0]);
@@ -1204,67 +1211,70 @@ TypePtr ExpressionFuzzer::getConstrainedOutputType(
   if (signature == nullptr) {
     return nullptr;
   }
-  // When function is unnested, the types of args are decided by fuzzer argument
-  // types. For nested function, they are decided by the return types of
-  // children functions. To handle the constraints between input types and
-  // output types of a decimal function, extract the input precisions and scales
-  // from decimal arguments, and bind them to integer variables.
 
   // Checks if any variable is integer constrained, and get the decimal name
   // style.
   bool integerConstrained = false;
   char decimalNameStyle = 0;
   for (const auto& [variableName, variableInfo] : signature->variables()) {
-    const auto constraint = variableInfo.constraint();
     if (variableInfo.isIntegerParameter()) {
+      // If constraints are empty, the integer variable is also regarded to be
+      // constrained as variables are shared across argument and return types.
+      integerConstrained = true;
       if (variableName == "precision" || variableName == "scale") {
-        decimalNameStyle = 'v';
-      } else if (
-          variableName.find("precision") != std::string::npos ||
+        decimalNameStyle = 'a';
+        break;
+      }
+      if (variableName.find("precision") != std::string::npos ||
           variableName.find("scale") != std::string::npos) {
-        decimalNameStyle = 'p';
-      } else if (variableName.find("i") != std::string::npos) {
-        decimalNameStyle = 'i';
+        decimalNameStyle = 'b';
+        break;
+      }
+      if (variableName.find("i") != std::string::npos) {
+        decimalNameStyle = 'c';
+        break;
       }
-      integerConstrained = true;
-      break;
     }
   }
 
+  // To handle the constraints between input types and output types of a decimal
+  // function, extracts the input precisions and scales from decimal arguments,
+  // and bind them to integer variables.
   std::unordered_map<std::string, int> decimalVariablesBindings;
   column_index_t decimalColIndex = 1;
   for (column_index_t i = 0; i < args.size(); ++i) {
     const auto argType = args[i]->type();
     if (argType->isDecimal()) {
       const auto [p, s] = getDecimalPrecisionScale(*argType);
       switch (decimalNameStyle) {
-        case 'v': {
+        case 'a': {
           decimalVariablesBindings["precision"] = p;
           decimalVariablesBindings["scale"] = s;
           break;
         }
-        case 'p': {
+        case 'b': {
           const auto column = std::string(1, 'a' + i);
           decimalVariablesBindings[column + "_precision"] = p;
           decimalVariablesBindings[column + "_scale"] = s;
           break;
         }
-        case 'i': {
+        case 'c': {
           decimalVariablesBindings["i" + std::to_string(decimalColIndex)] = p;
           decimalVariablesBindings
               ["i" + std::to_string(decimalColIndex + kIntegerPairSize)] = s;
           decimalColIndex++;
           break;
         }
         default:
-          VELOX_UNSUPPORTED("Unsupported decimal name style.");
+          VELOX_UNSUPPORTED(
+              "Unsupported decimal name style {}.", decimalNameStyle);
       }
     }
   }
 
+  // Calculates the matched return type through the argument types with argument
+  // type fuzzer, which evaluates the constraints internally.
   if (integerConstrained && decimalVariablesBindings.size() > 0 && signature) {
-    // Compute a correct output type according to constraints with the integer
-    // variables bindings.
     ArgumentTypeFuzzer fuzzer{*signature, rng_, decimalVariablesBindings};
     return fuzzer.fuzzReturnType();
   }
@@ -1277,10 +1287,13 @@ core::TypedExprPtr ExpressionFuzzer::getCallExprFromCallable(
     const exec::FunctionSignature* signature) {
   auto args = getArgsForCallable(callable);
 
-  // If a constrained output type is generated, use it to avoid breaking the
-  // constraints between input types and output types. Otherwise, generate a
-  // CallTypedExpr with type because callable.returnType may not have the
-  // required field names.
+  // For a decimal function (especially a nested one), as argument precisions
+  // and scales are randomly generated, callable.returnType does not follow the
+  // required constraints, and the matched result type needs to be recalculated
+  // from the argument types. If a constrained output type can be generated, use
+  // it to avoid breaking the constraints between input types and output types.
+  // Otherwise, generate a CallTypedExpr with type because callable.returnType
+  // may not have the required field names.
   const auto constrainedType = getConstrainedOutputType(args, signature);
   return std::make_shared<core::CallTypedExpr>(
       constrainedType ? constrainedType : type, args, callable.name);

diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h
@@ -253,9 +253,13 @@ class ExpressionFuzzer {
   std::vector<core::TypedExprPtr> generateEmptyApproxSetArgs(
       const CallableSignature& input);
 
+  /// Specialization for the "greatest" and "least" functions: decimal varargs
+  /// need to be constant or column.
   std::vector<core::TypedExprPtr> generateExtremeFunctionArgs(
       const CallableSignature& input);
 
+  /// Specialization for the "make_timestamp" function: 1) decimal argument
+  /// needs to be constant or column. 2) timezone argument needs to be valid.
   std::vector<core::TypedExprPtr> generateMakeTimestampArgs(
       const CallableSignature& input);
 
@@ -273,13 +277,17 @@ class ExpressionFuzzer {
   std::vector<core::TypedExprPtr> generateSwitchArgs(
       const CallableSignature& input);
 
+  /// Specialization for the "unscaled_value" function: decimal argument needs
+  /// to be constant or column.
   std::vector<core::TypedExprPtr> generateUnscaledValueArgs(
       const CallableSignature& input);
 
   // Return a vector of expressions for each argument of callable in order.
   std::vector<core::TypedExprPtr> getArgsForCallable(
       const CallableSignature& callable);
 
+  /// Given the argument types, calculates the return type of a decimal function
+  /// by evaluating constraints.
   TypePtr getConstrainedOutputType(
       const std::vector<core::TypedExprPtr>& args,
       const exec::FunctionSignature* signature);

diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp
@@ -47,26 +47,39 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() {
       continue;
     }
 
+    // When variable name is 'precision' and result type is decimal, input type
+    // is regarded the same with the result type.
     if (variableName == "precision" && returnType_ &&
         returnType_->isDecimal()) {
       const auto [precision, scale] = getDecimalPrecisionScale(*returnType_);
       integerVariablesBindings_["precision"] = precision;
       integerVariablesBindings_["scale"] = scale;
-    } else if (auto pos = variableName.find("precision");
-               pos != std::string::npos) {
-      // Handle decimal precisions and scales.
+      continue;
+    }
+
+    // When decimal function is registered as vector function, the variable name
+    // contains 'precision' like 'a_precision'.
+    if (auto pos = variableName.find("precision"); pos != std::string::npos) {
+      // Generate a random precision, and corresponding scale should not exceed
+      // the precision.
       const auto precision =
           boost::random::uniform_int_distribution<uint32_t>(1, 38)(rng_);
       integerVariablesBindings_[variableName] = precision;
       const auto colName = variableName.substr(0, pos);
-      // Corresponding scale should not exceed the generated precision.
       integerVariablesBindings_[colName + "scale"] =
           boost::random::uniform_int_distribution<uint32_t>(0, precision)(rng_);
-    } else if (auto pos = variableName.find("i"); pos != std::string::npos) {
+      continue;
+    }
+
+    // When decimal function is registered as simple function, the variable name
+    // contains 'i' like 'i1'.
+    if (auto pos = variableName.find("i"); pos != std::string::npos) {
       VELOX_USER_CHECK_GE(variableName.size(), 2);
       const auto index =
           std::stoi(variableName.substr(pos + 1, variableName.size()));
       if (index <= kIntegerPairSize) {
+        // Generate a random precision, and corresponding scale should not
+        // exceed the precision.
         const auto precision =
             boost::random::uniform_int_distribution<uint32_t>(1, 38)(rng_);
         integerVariablesBindings_[variableName] = precision;
@@ -76,10 +89,11 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() {
             boost::random::uniform_int_distribution<uint32_t>(
                 0, precision)(rng_);
       }
-    } else {
-      integerVariablesBindings_[variableName] =
-          boost::random::uniform_int_distribution<int32_t>()(rng_);
+      continue;
     }
+
+    integerVariablesBindings_[variableName] =
+        boost::random::uniform_int_distribution<int32_t>()(rng_);
   }
 
   // Handle constraints.
@@ -88,7 +102,7 @@ void ArgumentTypeFuzzer::determineUnboundedIntegerVariables() {
     if (constraint == "") {
       continue;
     }
-    auto calculation = fmt::format("{}={}", variableName, constraint);
+    const auto calculation = fmt::format("{}={}", variableName, constraint);
     expression::calculation::evaluate(calculation, integerVariablesBindings_);
   }
 }