Skip to content

Commit

Permalink
Add support for DECIMAL types to Expression Fuzzer
Browse files Browse the repository at this point in the history
  • Loading branch information
mbasmanova authored and rui-mo committed Jun 4, 2024
1 parent 61c3379 commit 8fc2681
Show file tree
Hide file tree
Showing 15 changed files with 249 additions and 61 deletions.
105 changes: 104 additions & 1 deletion .github/workflows/experimental.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"
ref: "${{ inputs.ref || github.head_ref }}"

- name: "Install dependencies"
run: cd velox && source ./scripts/setup-ubuntu.sh && install_apt_deps
Expand Down Expand Up @@ -103,6 +103,17 @@ jobs:
name: join
path: velox/_build/debug/velox/exec/tests/velox_join_fuzzer_test

- name: Upload Presto expression fuzzer
uses: actions/upload-artifact@v3
with:
name: presto_expression_fuzzer
path: velox/_build/debug/velox/expression/fuzzer/velox_expression_fuzzer_test

- name: Upload Spark expression fuzzer
uses: actions/upload-artifact@v3
with:
name: spark_expression_fuzzer
path: velox/_build/debug/velox/expression/fuzzer/spark_expression_fuzzer_test

presto-java-aggregation-fuzzer-run:
runs-on: 8-core
Expand Down Expand Up @@ -250,3 +261,95 @@ jobs:
name: join-fuzzer-failure-artifacts
path: |
/tmp/join_fuzzer_repro
presto-expression-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:

- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"

- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh && install_apt_deps

- name: Download presto fuzzer
uses: actions/download-artifact@v3
with:
name: presto_expression_fuzzer

- name: "Run Presto Fuzzer"
run: |
mkdir -p /tmp/presto_fuzzer_repro/
rm -rfv /tmp/presto_fuzzer_repro/*
chmod -R 777 /tmp/presto_fuzzer_repro
chmod +x velox_expression_fuzzer_test
./velox_expression_fuzzer_test \
--seed ${RANDOM} \
--enable_variadic_signatures \
--velox_fuzzer_enable_complex_types \
--velox_fuzzer_enable_decimal_type \
--lazy_vector_generation_ratio 0.2 \
--velox_fuzzer_enable_column_reuse \
--velox_fuzzer_enable_expression_reuse \
--max_expression_trees_per_step 2 \
--retry_with_try \
--enable_dereference \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=1 \
--repro_persist_path=/tmp/presto_fuzzer_repro \
&& echo -e "\n\nFuzzer run finished successfully."
- name: Archive Presto expression production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: presto-fuzzer-failure-artifacts
path: |
/tmp/presto_fuzzer_repro
spark-expression-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:

- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"

- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh && install_apt_deps

- name: Download spark fuzzer
uses: actions/download-artifact@v3
with:
name: spark_expression_fuzzer

- name: "Run Spark Fuzzer"
run: |
mkdir -p /tmp/spark_fuzzer_repro/
rm -rfv /tmp/spark_fuzzer_repro/*
chmod -R 777 /tmp/spark_fuzzer_repro
chmod +x spark_expression_fuzzer_test
./spark_expression_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=1 \
--repro_persist_path=/tmp/spark_fuzzer_repro \
--velox_fuzzer_enable_decimal_type \
&& echo -e "\n\nSpark Fuzzer run finished successfully."
- name: Archive Spark expression production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: spark-fuzzer-failure-artifacts
path: |
/tmp/spark_fuzzer_repro
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,11 @@ functions:
```
# Test the new function in isolation. Use --only flag to restrict the set of functions
# and run for 60 seconds or longer.
velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
# Test the new function in combination with other functions. Do not restrict the set
# of functions and run for 10 minutes (600 seconds) or longer.
velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
```

Here are example PRs:
Expand Down
2 changes: 2 additions & 0 deletions velox/docs/develop/testing/fuzzer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer:

* ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false.

* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false.

* ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0.

* ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false.
Expand Down
20 changes: 0 additions & 20 deletions velox/expression/ReverseSignatureBinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,7 @@

namespace facebook::velox::exec {

bool ReverseSignatureBinder::hasConstrainedIntegerVariable(
const TypeSignature& type) const {
if (type.parameters().empty()) {
auto it = variables().find(type.baseName());
return it != variables().end() && it->second.isIntegerParameter() &&
it->second.constraint() != "";
}

const auto& parameters = type.parameters();
for (const auto& parameter : parameters) {
if (hasConstrainedIntegerVariable(parameter)) {
return true;
}
}
return false;
}

bool ReverseSignatureBinder::tryBind() {
if (hasConstrainedIntegerVariable(signature_.returnType())) {
return false;
}
tryBindSucceeded_ =
SignatureBinderBase::tryBind(signature_.returnType(), returnType_);
return tryBindSucceeded_;
Expand Down
4 changes: 0 additions & 4 deletions velox/expression/ReverseSignatureBinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase {
}

private:
// Return whether there is a constraint on an integer variable in type
// signature.
bool hasConstrainedIntegerVariable(const TypeSignature& type) const;

const TypePtr returnType_;

// True if 'tryBind' has been called and succeeded. False otherwise.
Expand Down
7 changes: 7 additions & 0 deletions velox/expression/fuzzer/ArgumentTypeFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) {
if (!binder.tryBind()) {
return false;
}
for (const auto& [name, _] : integerBindings_) {
// If the variable used by result type is constrained, argument types can
// not be generated without following the constraints.
if (variables().count(name) && variables().at(name).constraint() != "") {
return false;
}
}
bindings_ = binder.bindings();
integerBindings_ = binder.integerBindings();
} else {
Expand Down
75 changes: 55 additions & 20 deletions velox/expression/fuzzer/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,8 @@ bool isDeterministic(
return typeAndMetadata->second.deterministic;
}

// functionName must be a special form.
// functionName must be a special form or a function whose determinism cannot
// be established.
LOG(WARNING) << "Unable to determine if '" << functionName
<< "' is deterministic or not. Assuming it is.";
return true;
Expand Down Expand Up @@ -435,16 +436,20 @@ bool useTypeName(

bool isSupportedSignature(
const exec::FunctionSignature& signature,
bool enableComplexType) {
// Not supporting lambda functions, or functions using decimal and
// timestamp with time zone types.
bool enableComplexType,
bool enableDecimalType) {
// When enableComplexType is disabled, not supporting complex functions.
const bool useComplexType = useTypeName(signature, "array") ||
useTypeName(signature, "map") || useTypeName(signature, "row");
// When enableDecimalType is disabled, not supporting decimal functions. Not
// supporting lambda functions, or functions using timestamp with time zone
// types.
return !(
useTypeName(signature, "opaque") ||
useTypeName(signature, "long_decimal") ||
useTypeName(signature, "short_decimal") ||
useTypeName(signature, "decimal") ||
useTypeName(signature, "timestamp with time zone") ||
useTypeName(signature, "interval day to second") ||
(!enableDecimalType && useTypeName(signature, "decimal")) ||
(!enableComplexType && useComplexType) ||
(enableComplexType && useTypeName(signature, "unknown")));
}

Expand Down Expand Up @@ -526,10 +531,13 @@ ExpressionFuzzer::ExpressionFuzzer(
FunctionSignatureMap signatureMap,
size_t initialSeed,
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
const std::optional<ExpressionFuzzer::Options>& options)
const std::optional<ExpressionFuzzer::Options>& options,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators)
: options_(options.value_or(Options())),
vectorFuzzer_(vectorFuzzer),
state{rng_, std::max(1, options_.maxLevelOfNesting)} {
state{rng_, std::max(1, options_.maxLevelOfNesting)},
argGenerators_(argGenerators) {
VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided");
seed(initialSeed);

Expand All @@ -553,10 +561,14 @@ ExpressionFuzzer::ExpressionFuzzer(
for (const auto& signature : function.second) {
++totalFunctionSignatures;

if (!isSupportedSignature(*signature, options_.enableComplexTypes)) {
if (!isSupportedSignature(
*signature,
options_.enableComplexTypes,
options_.enableDecimalType)) {
continue;
}
if (!(signature->variables().empty() || options_.enableComplexTypes)) {
if (!(signature->variables().empty() || options_.enableComplexTypes ||
options_.enableDecimalType)) {
LOG(WARNING) << "Skipping unsupported signature: " << function.first
<< signature->toString();
continue;
Expand Down Expand Up @@ -590,11 +602,13 @@ ExpressionFuzzer::ExpressionFuzzer(
continue;
}
} else {
// TODO Remove this code. argTypes are used only to call
// isDeterministic() which can be be figured out without the argTypes.
ArgumentTypeFuzzer typeFuzzer{*signature, localRng};
typeFuzzer.fuzzReturnType();
VELOX_CHECK_EQ(
typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true);
argTypes = typeFuzzer.argumentTypes();
if (typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs)) {
argTypes = typeFuzzer.argumentTypes();
}
}
if (!isDeterministic(function.first, argTypes)) {
LOG(WARNING) << "Skipping non-deterministic function: "
Expand Down Expand Up @@ -679,8 +693,10 @@ ExpressionFuzzer::ExpressionFuzzer(
sortSignatureTemplates(signatureTemplates_);

for (const auto& it : signatureTemplates_) {
auto& returnType = it.signature->returnType().baseName();
auto* returnTypeKey = &returnType;
const auto returnType =
exec::sanitizeName(it.signature->returnType().baseName());
const auto* returnTypeKey = &returnType;

if (it.typeVariables.find(returnType) != it.typeVariables.end()) {
// Return type is a template variable.
returnTypeKey = &kTypeParameterName;
Expand Down Expand Up @@ -754,7 +770,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes(
const std::string& funcName) {
int tickets = getTickets(funcName);
for (int i = 0; i < tickets; i++) {
typeToExpressionList_[type].push_back(funcName);
typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName);
}
}

Expand Down Expand Up @@ -1028,7 +1044,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression(
} else {
expression = generateExpressionFromConcreteSignatures(
returnType, chosenFunctionName);
if (!expression && options_.enableComplexTypes) {
if (!expression &&
(options_.enableComplexTypes || options_.enableDecimalType)) {
expression = generateExpressionFromSignatureTemplate(
returnType, chosenFunctionName);
}
Expand Down Expand Up @@ -1223,8 +1240,26 @@ core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate(

auto chosenSignature = *chosen->signature;
ArgumentTypeFuzzer fuzzer{chosenSignature, returnType, rng_};
VELOX_CHECK_EQ(fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true);
auto& argumentTypes = fuzzer.argumentTypes();

std::vector<TypePtr> argumentTypes;
const auto it = argGenerators_.find(functionName);
if (useTypeName(chosenSignature, "decimal") && it != argGenerators_.end()) {
// For function using decimal type, prioritize to use argument generator if
// provided.
argumentTypes = it->second->generateArgs(chosenSignature, returnType, rng_);
if (argumentTypes.empty()) {
return nullptr;
}
} else {
// Use the argument fuzzer to generate argument types.
VELOX_CHECK(
fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs),
"Cannot generate argument types for {} with return type {}.",
functionName,
returnType->toString());
argumentTypes = fuzzer.argumentTypes();
}

auto constantArguments = chosenSignature.constantArguments();

// ArgumentFuzzer may generate duplicate arguments if the signature's
Expand Down
12 changes: 11 additions & 1 deletion velox/expression/fuzzer/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "velox/core/ITypedExpr.h"
#include "velox/core/QueryCtx.h"
#include "velox/expression/Expr.h"
#include "velox/expression/fuzzer/ArgGenerator.h"
#include "velox/expression/fuzzer/FuzzerToolkit.h"
#include "velox/expression/tests/ExpressionVerifier.h"
#include "velox/functions/FunctionRegistry.h"
Expand Down Expand Up @@ -48,6 +49,10 @@ class ExpressionFuzzer {
// types.
bool enableComplexTypes = false;

// Enable testing of function signatures with decimal argument or return
// types.
bool enableDecimalType = false;

// Enable generation of expressions where one input column can be used by
// multiple subexpressions.
bool enableColumnReuse = false;
Expand Down Expand Up @@ -103,7 +108,9 @@ class ExpressionFuzzer {
FunctionSignatureMap signatureMap,
size_t initialSeed,
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
const std::optional<ExpressionFuzzer::Options>& options = std::nullopt);
const std::optional<ExpressionFuzzer::Options>& options = std::nullopt,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators = {});

template <typename TFunc>
void registerFuncOverride(TFunc func, const std::string& name);
Expand Down Expand Up @@ -416,6 +423,9 @@ class ExpressionFuzzer {

} state;
friend class ExpressionFuzzerUnitTest;

// Maps from function name to a specific generator of argument types.
std::unordered_map<std::string, std::shared_ptr<ArgGenerator>> argGenerators_;
};

} // namespace facebook::velox::fuzzer
Loading

0 comments on commit 8fc2681

Please sign in to comment.