Skip to content

Commit

Permalink
Create ExpressionFuzzerVerifier::Options and move all flags to Fuzzer…
Browse files Browse the repository at this point in the history
…Runner (facebookincubator#7634)

Summary:

Fuzzer runner is the component that wraps ExpressionFuzzerVerifier into a unit test.
Instead of ExpressionFuzzerVerifier reading the options from global flags this diff
add ExpressionFuzzerVerifier::Options.

Fuzzer runner will read all configs and pass it to ExpressionFuzzerVerifier.
With this change there is no more global configs controlling ExpressionFuzzerVerifier or
ExpressionFuzzer. All config reading happens in FuzzerRunner.cpp only.

Reviewed By: kevinwilfong

Differential Revision: D51417731
  • Loading branch information
laithsakka authored and facebook-github-bot committed Dec 4, 2023
1 parent 345510c commit ebafcf5
Show file tree
Hide file tree
Showing 6 changed files with 285 additions and 201 deletions.
2 changes: 2 additions & 0 deletions velox/expression/tests/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ DEFINE_int64(
"Initial seed for random number generator used to reproduce previous "
"results (0 means start with random seed).");

using facebook::velox::test::FuzzerRunner;

int main(int argc, char** argv) {
facebook::velox::functions::prestosql::registerAllScalarFunctions();

Expand Down
229 changes: 34 additions & 195 deletions velox/expression/tests/ExpressionFuzzerVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,190 +26,12 @@
#include "velox/expression/ReverseSignatureBinder.h"
#include "velox/expression/tests/ExpressionFuzzer.h"

DEFINE_int32(steps, 10, "Number of expressions to generate and execute.");

DEFINE_int32(
duration_sec,
0,
"For how long it should run (in seconds). If zero, "
"it executes exactly --steps iterations and exits.");

DEFINE_int32(
batch_size,
100,
"The number of elements on each generated vector.");

DEFINE_bool(
retry_with_try,
false,
"Retry failed expressions by wrapping it using a try() statement.");

DEFINE_bool(
find_minimal_subexpression,
false,
"Automatically seeks minimum failed subexpression on result mismatch");

DEFINE_bool(
disable_constant_folding,
false,
"Disable constant-folding in the common evaluation path.");

DEFINE_string(
repro_persist_path,
"",
"Directory path for persistence of data and SQL when fuzzer fails for "
"future reproduction. Empty string disables this feature.");

DEFINE_bool(
persist_and_run_once,
false,
"Persist repro info before evaluation and only run one iteration. "
"This is to rerun with the seed number and persist repro info upon a "
"crash failure. Only effective if repro_persist_path is set.");

DEFINE_double(
lazy_vector_generation_ratio,
0.0,
"Specifies the probability with which columns in the input row "
"vector will be selected to be wrapped in lazy encoding "
"(expressed as double from 0 to 1).");

DEFINE_int32(
max_expression_trees_per_step,
1,
"This sets an upper limit on the number of expression trees to generate "
"per step. These trees would be executed in the same ExprSet and can "
"re-use already generated columns and subexpressions (if re-use is "
"enabled).");

// The flags bellow are used to initialize ExpressionFuzzer::options.
DEFINE_string(
only,
"",
"If specified, Fuzzer will only choose functions from "
"this comma separated list of function names "
"(e.g: --only \"split\" or --only \"substr,ltrim\").");

DEFINE_string(
special_forms,
"and,or,cast,coalesce,if,switch",
"Comma-separated list of special forms to use in generated expression. "
"Supported special forms: and, or, coalesce, if, switch, cast.");

DEFINE_int32(
velox_fuzzer_max_level_of_nesting,
10,
"Max levels of expression nesting. The default value is 10 and minimum is 1.");

DEFINE_int32(
max_num_varargs,
5,
"The maximum number of variadic arguments fuzzer will generate for "
"functions that accept variadic arguments. Fuzzer will generate up to "
"max_num_varargs arguments for the variadic list in addition to the "
"required arguments by the function.");

DEFINE_double(
null_ratio,
0.1,
"Chance of adding a null constant to the plan, or null value in a vector "
"(expressed as double from 0 to 1).");

DEFINE_bool(
enable_variadic_signatures,
false,
"Enable testing of function signatures with variadic arguments.");

DEFINE_bool(
enable_dereference,
false,
"Allow fuzzer to generate random expressions with dereference and row_constructor functions.");

DEFINE_bool(
velox_fuzzer_enable_complex_types,
false,
"Enable testing of function signatures with complex argument or return types.");

DEFINE_bool(
velox_fuzzer_enable_column_reuse,
false,
"Enable generation of expressions where one input column can be "
"used by multiple subexpressions");

DEFINE_bool(
velox_fuzzer_enable_expression_reuse,
false,
"Enable generation of expressions that re-uses already generated "
"subexpressions.");

DEFINE_string(
assign_function_tickets,
"",
"Comma separated list of function names and their tickets in the format "
"<function_name>=<tickets>. Every ticket represents an opportunity for "
"a function to be chosen from a pool of candidates. By default, "
"every function has one ticket, and the likelihood of a function "
"being picked can be increased by allotting it more tickets. Note "
"that in practice, increasing the number of tickets does not "
"proportionally increase the likelihood of selection, as the selection "
"process involves filtering the pool of candidates by a required "
"return type so not all functions may compete against the same number "
"of functions at every instance. Number of tickets must be a positive "
"integer. Example: eq=3,floor=5");

namespace facebook::velox::test {

namespace {

using exec::SignatureBinder;

VectorFuzzer::Options getVectorFuzzerOptions() {
VectorFuzzer::Options opts;
opts.vectorSize = FLAGS_batch_size;
opts.stringVariableLength = true;
opts.stringLength = 100;
opts.nullRatio = FLAGS_null_ratio;
return opts;
}

ExpressionFuzzer::Options getExpressionFuzzerOptions(
const std::unordered_set<std::string>& skipFunctions) {
ExpressionFuzzer::Options opts;
opts.maxLevelOfNesting = FLAGS_velox_fuzzer_max_level_of_nesting;
opts.maxNumVarArgs = FLAGS_max_num_varargs;
opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures;
opts.enableDereference = FLAGS_enable_dereference;
opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types;
opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse;
opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse;
opts.functionTickets = FLAGS_assign_function_tickets;
opts.nullRatio = FLAGS_null_ratio;
opts.specialForms = FLAGS_special_forms;
opts.useOnlyFunctions = FLAGS_only;
opts.skipFunctions = skipFunctions;
return opts;
}

// Randomly pick columns from the input row vector to wrap in lazy.
// Negative column indices represent lazy vectors that have been preloaded
// before feeding them to the evaluator. This list is sorted on the absolute
// value of the entries.
std::vector<int> generateLazyColumnIds(
const RowVectorPtr& rowVector,
VectorFuzzer& vectorFuzzer) {
std::vector<int> columnsToWrapInLazy;
if (FLAGS_lazy_vector_generation_ratio > 0) {
for (int idx = 0; idx < rowVector->childrenSize(); idx++) {
VELOX_CHECK_NOT_NULL(rowVector->childAt(idx));
if (vectorFuzzer.coinToss(FLAGS_lazy_vector_generation_ratio)) {
columnsToWrapInLazy.push_back(
vectorFuzzer.coinToss(0.8) ? idx : -1 * idx);
}
}
}
return columnsToWrapInLazy;
}

/// Returns row numbers for non-null rows among all children in'data' or null
/// if all rows are null.
BufferPtr extractNonNullIndices(const RowVectorPtr& data) {
Expand Down Expand Up @@ -258,20 +80,21 @@ RowVectorPtr wrapChildren(
ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
const FunctionSignatureMap& signatureMap,
size_t initialSeed,
const std::unordered_set<std::string>& skipFunctions)
: verifier_(
const ExpressionFuzzerVerifier::Options& options)
: options_(options),
verifier_(
&execCtx_,
{FLAGS_disable_constant_folding,
FLAGS_repro_persist_path,
FLAGS_persist_and_run_once}),
{options_.disableConstantFolding,
options_.reproPersistPath,
options_.persistAndRunOnce}),
vectorFuzzer_(std::make_shared<VectorFuzzer>(
getVectorFuzzerOptions(),
options_.vectorFuzzerOptions,
execCtx_.pool())),
expressionFuzzer_(
signatureMap,
initialSeed,
vectorFuzzer_,
getExpressionFuzzerOptions(skipFunctions)) {
options.expressionFuzzerOptions) {
seed(initialSeed);

// Init stats and register listener.
Expand All @@ -284,6 +107,22 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
}
}

std::vector<int> ExpressionFuzzerVerifier::generateLazyColumnIds(
const RowVectorPtr& rowVector,
VectorFuzzer& vectorFuzzer) {
std::vector<int> columnsToWrapInLazy;
if (options_.lazyVectorGenerationRatio > 0) {
for (int idx = 0; idx < rowVector->childrenSize(); idx++) {
VELOX_CHECK_NOT_NULL(rowVector->childAt(idx));
if (vectorFuzzer.coinToss(options_.lazyVectorGenerationRatio)) {
columnsToWrapInLazy.push_back(
vectorFuzzer.coinToss(0.8) ? idx : -1 * idx);
}
}
}
return columnsToWrapInLazy;
}

void ExpressionFuzzerVerifier::reSeed() {
seed(rng_());
}
Expand All @@ -297,12 +136,12 @@ void ExpressionFuzzerVerifier::seed(size_t seed) {

template <typename T>
bool ExpressionFuzzerVerifier::isDone(size_t i, T startTime) const {
if (FLAGS_duration_sec > 0) {
if (options_.durationSeconds > 0) {
std::chrono::duration<double> elapsed =
std::chrono::system_clock::now() - startTime;
return elapsed.count() >= FLAGS_duration_sec;
return elapsed.count() >= options_.durationSeconds;
}
return i >= FLAGS_steps;
return i >= options_.steps;
}

void ExpressionFuzzerVerifier::logStats() {
Expand Down Expand Up @@ -408,7 +247,7 @@ void ExpressionFuzzerVerifier::retryWithTry(
columnsToWrapInLazy)
.result;
} catch (const std::exception& e) {
if (FLAGS_find_minimal_subexpression) {
if (options_.findMinimalSubexpression) {
computeMinimumSubExpression(
{&execCtx_, {false, ""}},
*vectorFuzzer_,
Expand Down Expand Up @@ -439,7 +278,7 @@ void ExpressionFuzzerVerifier::retryWithTry(
false, // canThrow
columnsToWrapInLazy);
} catch (const std::exception& e) {
if (FLAGS_find_minimal_subexpression) {
if (options_.findMinimalSubexpression) {
computeMinimumSubExpression(
{&execCtx_, {false, ""}},
*vectorFuzzer_,
Expand All @@ -454,10 +293,10 @@ void ExpressionFuzzerVerifier::retryWithTry(

void ExpressionFuzzerVerifier::go() {
VELOX_CHECK(
FLAGS_steps > 0 || FLAGS_duration_sec > 0,
options_.steps > 0 || options_.durationSeconds > 0,
"Either --steps or --duration_sec needs to be greater than zero.")
VELOX_CHECK_GT(
FLAGS_max_expression_trees_per_step,
options_.maxExpressionTreesPerStep,
0,
"--max_expression_trees_per_step needs to be greater than zero.")

Expand All @@ -472,7 +311,7 @@ void ExpressionFuzzerVerifier::go() {
// Generate multiple expression trees and input data vectors. They can
// re-use columns and share sub-expressions if the appropriate flag is set.
int numExpressionTrees = boost::random::uniform_int_distribution<int>(
1, FLAGS_max_expression_trees_per_step)(rng_);
1, options_.maxExpressionTreesPerStep)(rng_);
auto [expressions, inputType, selectionStats] =
expressionFuzzer_.fuzzExpressions(numExpressionTrees);

Expand All @@ -497,7 +336,7 @@ void ExpressionFuzzerVerifier::go() {
true, // canThrow
columnsToWrapInLazy);
} catch (const std::exception& e) {
if (FLAGS_find_minimal_subexpression) {
if (options_.findMinimalSubexpression) {
computeMinimumSubExpression(
{&execCtx_, {false, ""}},
*vectorFuzzer_,
Expand All @@ -515,7 +354,7 @@ void ExpressionFuzzerVerifier::go() {
// If both paths threw compatible exceptions, we add a try() function to
// the expression's root and execute it again. This time the expression
// cannot throw.
if (result.exceptionPtr && FLAGS_retry_with_try) {
if (result.exceptionPtr && options_.retryWithTry) {
LOG(INFO)
<< "Both paths failed with compatible exceptions. Retrying expression using try().";
retryWithTry(plans, rowVector, resultVectors, columnsToWrapInLazy);
Expand Down
Loading

0 comments on commit ebafcf5

Please sign in to comment.