Skip to content

Commit

Permalink
Support decimal in fuzzer test
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Apr 11, 2024
1 parent ae741ca commit 8c9c123
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 132 deletions.
2 changes: 2 additions & 0 deletions velox/docs/develop/testing/fuzzer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer:

* ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false.

* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false.

* ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0.

* ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false.
Expand Down
20 changes: 0 additions & 20 deletions velox/expression/ReverseSignatureBinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,7 @@

namespace facebook::velox::exec {

bool ReverseSignatureBinder::hasConstrainedIntegerVariable(
const TypeSignature& type) const {
if (type.parameters().empty()) {
auto it = variables().find(type.baseName());
return it != variables().end() && it->second.isIntegerParameter() &&
it->second.constraint() != "";
}

const auto& parameters = type.parameters();
for (const auto& parameter : parameters) {
if (hasConstrainedIntegerVariable(parameter)) {
return true;
}
}
return false;
}

bool ReverseSignatureBinder::tryBind() {
if (hasConstrainedIntegerVariable(signature_.returnType())) {
return false;
}
tryBindSucceeded_ =
SignatureBinderBase::tryBind(signature_.returnType(), returnType_);
return tryBindSucceeded_;
Expand Down
4 changes: 0 additions & 4 deletions velox/expression/ReverseSignatureBinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase {
}

private:
// Return whether there is a constraint on an integer variable in type
// signature.
bool hasConstrainedIntegerVariable(const TypeSignature& type) const;

const TypePtr returnType_;

// True if 'tryBind' has been called and succeeded. False otherwise.
Expand Down
156 changes: 82 additions & 74 deletions velox/expression/tests/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,52 +39,38 @@ int32_t rand(FuzzerGenerator& rng) {

class DecimalArgGeneratorBase : public ArgGenerator {
public:
TypePtr generateReturnType(
const exec::FunctionSignature& /*signature*/,
FuzzerGenerator& /*rng*/) override {
auto [p, s] = inputs_.begin()->first;
return DECIMAL(p, s);
}

std::vector<TypePtr> generateArgs(
const exec::FunctionSignature& /*signature*/,
const TypePtr& returnType,
FuzzerGenerator& rng) override {
auto inputs = findInputs(returnType, rng);
if (inputs.a == nullptr || inputs.b == nullptr) {
return {};
for (const auto& input : inputs) {
if (input == nullptr) {
return {};
}
}

return {std::move(inputs.a), std::move(inputs.b)};
return inputs;
}

protected:
// Compute result type for all possible pairs of decimal input types. Store
// the results in 'inputs_' maps keyed by return type.
void initialize() {
std::vector<TypePtr> allTypes;
for (auto p = 1; p < 38; ++p) {
for (auto s = 0; s <= p; ++s) {
allTypes.push_back(DECIMAL(p, s));
}
}

for (auto& a : allTypes) {
for (auto& b : allTypes) {
// By default, the result type is considered to be calculated from two input
// decimal types.
for (const auto& a : getAllTypes()) {
for (const auto& b : getAllTypes()) {
auto [p1, s1] = getDecimalPrecisionScale(*a);
auto [p2, s2] = getDecimalPrecisionScale(*b);

if (auto returnType = toReturnType(p1, s1, p2, s2)) {
if (auto returnType = toReturnType(4, p1, s1, p2, s2)) {
inputs_[returnType.value()].push_back({a, b});
}
}
}
}

struct Inputs {
TypePtr a;
TypePtr b;
};
using Inputs = std::vector<TypePtr>;

// Return randomly selected pair of input types that produce the specified
// result type.
Expand All @@ -100,10 +86,24 @@ class DecimalArgGeneratorBase : public ArgGenerator {
return it->second[index];
}

const std::vector<TypePtr>& getAllTypes() const {
const auto generateAllTypes = []() {
std::vector<TypePtr> allTypes;
for (auto p = 1; p < 38; ++p) {
for (auto s = 0; s <= p; ++s) {
allTypes.push_back(DECIMAL(p, s));
}
}
return allTypes;
};

static std::vector<TypePtr> allTypes = generateAllTypes();
return allTypes;
}

// Given precisions and scales of the inputs, return precision and scale of
// the result.
virtual std::optional<std::pair<int, int>>
toReturnType(int p1, int s1, int p2, int s2) = 0;
virtual std::optional<std::pair<int, int>> toReturnType(int count, ...) = 0;

std::map<std::pair<int, int>, std::vector<Inputs>> inputs_;
};
Expand All @@ -115,8 +115,16 @@ class PlusMinusArgGenerator : public DecimalArgGeneratorBase {
}

protected:
std::optional<std::pair<int, int>>
toReturnType(int p1, int s1, int p2, int s2) override {
std::optional<std::pair<int, int>> toReturnType(int count, ...) override {
VELOX_CHECK_EQ(count, 4);
va_list args;
va_start(args, count);
int p1 = va_arg(args, int);
int s1 = va_arg(args, int);
int p2 = va_arg(args, int);
int s2 = va_arg(args, int);
va_end(args);

auto s = std::max(s1, s2);
auto p = std::min(38, std::max(p1 - s1, p2 - s2) + 1 + s);
return {{p, s}};
Expand All @@ -130,8 +138,16 @@ class MultiplyArgGenerator : public DecimalArgGeneratorBase {
}

protected:
std::optional<std::pair<int, int>>
toReturnType(int p1, int s1, int p2, int s2) override {
std::optional<std::pair<int, int>> toReturnType(int count, ...) override {
VELOX_CHECK_EQ(count, 4);
va_list args;
va_start(args, count);
int p1 = va_arg(args, int);
int s1 = va_arg(args, int);
int p2 = va_arg(args, int);
int s2 = va_arg(args, int);
va_end(args);

if (s1 + s2 > 38) {
return std::nullopt;
}
Expand All @@ -149,8 +165,16 @@ class DivideArgGenerator : public DecimalArgGeneratorBase {
}

protected:
std::optional<std::pair<int, int>>
toReturnType(int p1, int s1, int p2, int s2) override {
std::optional<std::pair<int, int>> toReturnType(int count, ...) override {
VELOX_CHECK_EQ(count, 4);
va_list args;
va_start(args, count);
int p1 = va_arg(args, int);
int s1 = va_arg(args, int);
int p2 = va_arg(args, int);
int s2 = va_arg(args, int);
va_end(args);

if (s1 + s2 > 38) {
return std::nullopt;
}
Expand All @@ -163,19 +187,6 @@ class DivideArgGenerator : public DecimalArgGeneratorBase {

class FloorAndRoundArgGenerator : public ArgGenerator {
public:
TypePtr generateReturnType(
const exec::FunctionSignature& signature,
FuzzerGenerator& rng) override {
if (signature.argumentTypes().size() == 1) {
auto p = 1 + rand(rng) % 38;
return DECIMAL(p, 0);
} else {
auto p = 2 + rand(rng) % 37;
auto s = rand(rng) % p;
return DECIMAL(p, s);
}
}

std::vector<TypePtr> generateArgs(
const exec::FunctionSignature& signature,
const TypePtr& returnType,
Expand Down Expand Up @@ -624,16 +635,20 @@ bool useTypeName(

bool isSupportedSignature(
const exec::FunctionSignature& signature,
bool enableComplexType) {
// Not supporting lambda functions, or functions using decimal and
// timestamp with time zone types.
bool enableComplexType,
bool enableDecimalType) {
// When enableComplexType is disabled, not supporting complex functions.
const bool useComplexType = useTypeName(signature, "array") ||
useTypeName(signature, "map") || useTypeName(signature, "row");
// When enableDecimalType is disabled, not supporting decimal functions. Not
// supporting lambda functions, or functions using timestamp with time zone
// types.
return !(
useTypeName(signature, "opaque") ||
// useTypeName(signature, "long_decimal") ||
// useTypeName(signature, "short_decimal") ||
// useTypeName(signature, "decimal") ||
useTypeName(signature, "timestamp with time zone") ||
useTypeName(signature, "interval day to second") ||
(!enableDecimalType && useTypeName(signature, "decimal")) ||
(!enableComplexType && useComplexType) ||
(enableComplexType && useTypeName(signature, "unknown")));
}

Expand Down Expand Up @@ -751,10 +766,14 @@ ExpressionFuzzer::ExpressionFuzzer(
for (const auto& signature : function.second) {
++totalFunctionSignatures;

if (!isSupportedSignature(*signature, options_.enableComplexTypes)) {
if (!isSupportedSignature(
*signature,
options_.enableComplexTypes,
options_.enableDecimalType)) {
continue;
}
if (!(signature->variables().empty() || options_.enableComplexTypes)) {
if (!(signature->variables().empty() || options_.enableComplexTypes ||
options_.enableDecimalType)) {
LOG(WARNING) << "Skipping unsupported signature: " << function.first
<< signature->toString();
continue;
Expand Down Expand Up @@ -793,27 +812,13 @@ ExpressionFuzzer::ExpressionFuzzer(
ArgumentTypeFuzzer typeFuzzer{*signature, localRng};
auto returnType = typeFuzzer.fuzzReturnType();
bool ok = typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs);
if (!ok) {
auto it = argGenerators_.find(function.first);
if (it != argGenerators_.end()) {
returnType = it->second->generateReturnType(*signature, localRng);
argTypes =
it->second->generateArgs(*signature, returnType, localRng);
VELOX_CHECK(
!argTypes.empty(),
"Failed to generate arguments for {} with return type {}",
function.first,
returnType->toString());
} else {
VELOX_FAIL("Failed to generate arguments");
}
} else {
if (ok) {
argTypes = typeFuzzer.argumentTypes();
}
}
if (!isDeterministic(function.first, argTypes)) {
LOG(WARNING) << "Skipping non-deterministic function: "
<< function.first << signature->toString();
<< function.first << signature->toString();
continue;
}

Expand Down Expand Up @@ -894,8 +899,10 @@ ExpressionFuzzer::ExpressionFuzzer(
sortSignatureTemplates(signatureTemplates_);

for (const auto& it : signatureTemplates_) {
auto& returnType = it.signature->returnType().baseName();
auto* returnTypeKey = &returnType;
const auto returnType =
exec::sanitizeName(it.signature->returnType().baseName());
const auto* returnTypeKey = &returnType;

if (it.typeVariables.find(returnType) != it.typeVariables.end()) {
// Return type is a template variable.
returnTypeKey = &kTypeParameterName;
Expand Down Expand Up @@ -969,7 +976,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes(
const std::string& funcName) {
int tickets = getTickets(funcName);
for (int i = 0; i < tickets; i++) {
typeToExpressionList_[type].push_back(funcName);
typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName);
}
}

Expand Down Expand Up @@ -1243,7 +1250,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression(
} else {
expression = generateExpressionFromConcreteSignatures(
returnType, chosenFunctionName);
if (!expression && options_.enableComplexTypes) {
if (!expression &&
(options_.enableComplexTypes || options_.enableDecimalType)) {
expression = generateExpressionFromSignatureTemplate(
returnType, chosenFunctionName);
}
Expand Down
10 changes: 4 additions & 6 deletions velox/expression/tests/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@ class ArgGenerator {
const exec::FunctionSignature& signature,
const TypePtr& returnType,
FuzzerGenerator& rng) = 0;

/// Returns a randomly generated valid return type for a given signature.
/// TODO Remove. This API should not be needed.
virtual TypePtr generateReturnType(
const exec::FunctionSignature& signature,
FuzzerGenerator& rng) = 0;
};

// A tool that can be used to generate random expressions.
Expand All @@ -67,6 +61,10 @@ class ExpressionFuzzer {
// types.
bool enableComplexTypes = false;

// Enable testing of function signatures with decimal argument or return
// types.
bool enableDecimalType = false;

// Enable generation of expressions where one input column can be used by
// multiple subexpressions.
bool enableColumnReuse = false;
Expand Down
6 changes: 6 additions & 0 deletions velox/expression/tests/FuzzerRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ DEFINE_bool(
false,
"Enable testing of function signatures with complex argument or return types.");

DEFINE_bool(
velox_fuzzer_enable_decimal_type,
false,
"Enable testing of function signatures with decimal argument or return types.");

DEFINE_bool(
velox_fuzzer_enable_column_reuse,
false,
Expand Down Expand Up @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions(
opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures;
opts.enableDereference = FLAGS_enable_dereference;
opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types;
opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type;
opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse;
opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse;
opts.functionTickets = FLAGS_assign_function_tickets;
Expand Down
9 changes: 7 additions & 2 deletions velox/expression/tests/utils/ArgumentTypeFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,20 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) {
const auto& formalArgs = signature_.argumentTypes();
auto formalArgsCnt = formalArgs.size();

std::unordered_map<std::string, int> integerBindings;

if (returnType_) {
exec::ReverseSignatureBinder binder{signature_, returnType_};
if (!binder.tryBind()) {
return false;
}
bindings_ = binder.bindings();
integerBindings_ = binder.integerBindings();
for (const auto& [name, _] : integerBindings_) {
// If the variable used by result type is constrained, argument types can
// not be generated without following the constraints.
if (variables().count(name) && variables().at(name).constraint() != "") {
return false;
}
}
} else {
for (const auto& [name, _] : signature_.variables()) {
bindings_.insert({name, nullptr});
Expand Down
Loading

0 comments on commit 8c9c123

Please sign in to comment.