Skip to content

Commit

Permalink
make skipFunctions, onlyFunctions, and specialForms as ExpressionFuzz…
Browse files Browse the repository at this point in the history
…er options. (facebookincubator#7615)

Summary:

More refactoring for the expression fuzzer.

1) Push the skipFunctions, onlyFunctions, and specialForms as ExpressionFuzzer options.
2) FuzzerRunner: just a tool that wrap ExpressionFuzzerVerifier into a unit test.
3) Move the comment from FuzzerRunner class to ExpressionFuzzerVerifier since it describes the later.


Next diff :
4) Move all the flags from ExpressionFuzzerVerifier to FuzzerRunner and pass them through ExpressionFuzzerVerifier::Options .

1. spark fuzzer used to only support and, or not it uses all of them "and,or,cast,coalesce,if,switch".

Reviewed By: kevinwilfong

Differential Revision: D51408535
  • Loading branch information
laithsakka authored and facebook-github-bot committed Dec 4, 2023
1 parent 4574284 commit 345510c
Show file tree
Hide file tree
Showing 8 changed files with 321 additions and 343 deletions.
251 changes: 249 additions & 2 deletions velox/expression/tests/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,252 @@
namespace facebook::velox::test {

namespace {

using exec::SignatureBinder;

static const std::vector<std::string> kIntegralTypes{
"tinyint",
"smallint",
"integer",
"bigint",
"boolean"};

static const std::vector<std::string> kFloatingPointTypes{"real", "double"};

facebook::velox::exec::FunctionSignaturePtr makeCastSignature(
const std::string& fromType,
const std::string& toType) {
return facebook::velox::exec::FunctionSignatureBuilder()
.argumentType(fromType)
.returnType(toType)
.build();
}

void addCastFromIntegralSignatures(
const std::string& toType,
std::vector<facebook::velox::exec::FunctionSignaturePtr>& signatures) {
for (const auto& fromType : kIntegralTypes) {
signatures.push_back(makeCastSignature(fromType, toType));
}
}

void addCastFromFloatingPointSignatures(
const std::string& toType,
std::vector<facebook::velox::exec::FunctionSignaturePtr>& signatures) {
for (const auto& fromType : kFloatingPointTypes) {
signatures.push_back(makeCastSignature(fromType, toType));
}
}

void addCastFromVarcharSignature(
const std::string& toType,
std::vector<facebook::velox::exec::FunctionSignaturePtr>& signatures) {
signatures.push_back(makeCastSignature("varchar", toType));
}

void addCastFromTimestampSignature(
const std::string& toType,
std::vector<facebook::velox::exec::FunctionSignaturePtr>& signatures) {
signatures.push_back(makeCastSignature("timestamp", toType));
}

void addCastFromDateSignature(
const std::string& toType,
std::vector<facebook::velox::exec::FunctionSignaturePtr>& signatures) {
signatures.push_back(makeCastSignature("date", toType));
}

std::vector<facebook::velox::exec::FunctionSignaturePtr>
getSignaturesForCast() {
std::vector<facebook::velox::exec::FunctionSignaturePtr> signatures;

// To integral types.
for (const auto& toType : kIntegralTypes) {
addCastFromIntegralSignatures(toType, signatures);
addCastFromFloatingPointSignatures(toType, signatures);
addCastFromVarcharSignature(toType, signatures);
}

// To floating-point types.
for (const auto& toType : kFloatingPointTypes) {
addCastFromIntegralSignatures(toType, signatures);
addCastFromFloatingPointSignatures(toType, signatures);
addCastFromVarcharSignature(toType, signatures);
}

// To varchar type.
addCastFromIntegralSignatures("varchar", signatures);
addCastFromFloatingPointSignatures("varchar", signatures);
addCastFromVarcharSignature("varchar", signatures);
addCastFromDateSignature("varchar", signatures);
addCastFromTimestampSignature("varchar", signatures);

// To timestamp type.
addCastFromVarcharSignature("timestamp", signatures);
addCastFromDateSignature("timestamp", signatures);

// To date type.
addCastFromVarcharSignature("date", signatures);
addCastFromTimestampSignature("date", signatures);

// For each supported translation pair T --> U, add signatures of array(T) -->
// array(U), map(varchar, T) --> map(varchar, U), row(T) --> row(U).
auto size = signatures.size();
for (auto i = 0; i < size; ++i) {
auto from = signatures[i]->argumentTypes()[0].baseName();
auto to = signatures[i]->returnType().baseName();

signatures.push_back(makeCastSignature(
fmt::format("array({})", from), fmt::format("array({})", to)));

signatures.push_back(makeCastSignature(
fmt::format("map(varchar, {})", from),
fmt::format("map(varchar, {})", to)));

signatures.push_back(makeCastSignature(
fmt::format("row({})", from), fmt::format("row({})", to)));
}
return signatures;
}

static const std::unordered_map<
std::string,
std::vector<facebook::velox::exec::FunctionSignaturePtr>>
kSpecialForms = {
{"and",
std::vector<facebook::velox::exec::FunctionSignaturePtr>{
// Signature: and (condition,...) -> output:
// boolean, boolean,.. -> boolean
facebook::velox::exec::FunctionSignatureBuilder()
.argumentType("boolean")
.argumentType("boolean")
.variableArity()
.returnType("boolean")
.build()}},
{"or",
std::vector<facebook::velox::exec::FunctionSignaturePtr>{
// Signature: or (condition,...) -> output:
// boolean, boolean,.. -> boolean
facebook::velox::exec::FunctionSignatureBuilder()
.argumentType("boolean")
.argumentType("boolean")
.variableArity()
.returnType("boolean")
.build()}},
{"coalesce",
std::vector<facebook::velox::exec::FunctionSignaturePtr>{
// Signature: coalesce (input,...) -> output:
// T, T,.. -> T
facebook::velox::exec::FunctionSignatureBuilder()
.typeVariable("T")
.argumentType("T")
.argumentType("T")
.variableArity()
.returnType("T")
.build()}},
{
"if",
std::vector<facebook::velox::exec::FunctionSignaturePtr>{
// Signature: if (condition, then) -> output:
// boolean, T -> T
facebook::velox::exec::FunctionSignatureBuilder()
.typeVariable("T")
.argumentType("boolean")
.argumentType("T")
.returnType("T")
.build(),
// Signature: if (condition, then, else) -> output:
// boolean, T, T -> T
facebook::velox::exec::FunctionSignatureBuilder()
.typeVariable("T")
.argumentType("boolean")
.argumentType("T")
.argumentType("T")
.returnType("T")
.build()},
},
{
"switch",
std::vector<facebook::velox::exec::FunctionSignaturePtr>{
// Signature: Switch (condition, then) -> output:
// boolean, T -> T
// This is only used to bind to a randomly selected type for the
// output, then while generating arguments, an override is used
// to generate inputs that can create variation of multiple
// cases and may or may not include a final else clause.
facebook::velox::exec::FunctionSignatureBuilder()
.typeVariable("T")
.argumentType("boolean")
.argumentType("T")
.returnType("T")
.build()},
},
{
"cast",
/// TODO: Add supported Cast signatures to CastTypedExpr and expose
/// them to fuzzer instead of hard-coding signatures here.
getSignaturesForCast(),
},
};

static std::unordered_set<std::string> splitNames(const std::string& names) {
// Parse, lower case and trim it.
std::vector<folly::StringPiece> nameList;
folly::split(',', names, nameList);
std::unordered_set<std::string> nameSet;

for (const auto& it : nameList) {
auto str = folly::trimWhitespace(it).toString();
folly::toLowerAscii(str);
nameSet.insert(str);
}
return nameSet;
}

// Parse the comma separated list of function names, and use it to filter the
// input signatures.
static void filterSignatures(
facebook::velox::FunctionSignatureMap& input,
const std::string& onlyFunctions,
const std::unordered_set<std::string>& skipFunctions) {
// Remove skip functions .
for (auto name : skipFunctions) {
folly::toLowerAscii(name);
input.erase(name);
}

if (onlyFunctions.empty()) {
return;
}

// Parse, lower case and trim it.
auto nameSet = splitNames(onlyFunctions);

// Use the generated set to filter the input signatures.
for (auto it = input.begin(); it != input.end();) {
if (!nameSet.count(it->first)) {
it = input.erase(it);
} else
it++;
}
}

static void appendSpecialForms(
facebook::velox::FunctionSignatureMap& signatureMap,
const std::string& specialForms) {
auto specialFormNames = splitNames(specialForms);
for (const auto& [name, signatures] : kSpecialForms) {
if (specialFormNames.count(name) == 0) {
LOG(INFO) << "Skipping special form: " << name;
continue;
}
std::vector<const facebook::velox::exec::FunctionSignature*> rawSignatures;
for (const auto& signature : signatures) {
rawSignatures.push_back(signature.get());
}
signatureMap.insert({name, std::move(rawSignatures)});
}
}

/// Returns if `functionName` with the given `argTypes` is deterministic.
/// Returns true if the function was not found or determinism cannot be
/// established.
Expand Down Expand Up @@ -247,7 +490,7 @@ uint32_t levelOfNesting(const TypePtr& type) {
} // namespace

ExpressionFuzzer::ExpressionFuzzer(
const FunctionSignatureMap& signatureMap,
FunctionSignatureMap signatureMap,
size_t initialSeed,
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
const std::optional<ExpressionFuzzer::Options>& options)
Expand All @@ -257,6 +500,10 @@ ExpressionFuzzer::ExpressionFuzzer(
VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided");
seed(initialSeed);

appendSpecialForms(signatureMap, options_.specialForms);
filterSignatures(
signatureMap, options_.useOnlyFunctions, options_.skipFunctions);

size_t totalFunctions = 0;
size_t totalFunctionSignatures = 0;
size_t supportedFunctionSignatures = 0;
Expand Down
15 changes: 13 additions & 2 deletions velox/expression/tests/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,21 @@ class ExpressionFuzzer {
// Chance of adding a null constant to the plan, or null value in a vector
// (expressed as double from 0 to 1).
double nullRatio = 0.1;

// If specified, Fuzzer will only choose functions from this comma separated
// list of function names (e.g: --only \"split\" or --only
// \"substr,ltrim\")."
std::string useOnlyFunctions = "";

// Comma-separated list of special forms to use in generated expression.
// Supported special forms: and, or, coalesce, if, switch, cast.")
std::string specialForms = "and,or,cast,coalesce,if,switch";

std::unordered_set<std::string> skipFunctions;
};

ExpressionFuzzer(
const FunctionSignatureMap& signatureMap,
FunctionSignatureMap signatureMap,
size_t initialSeed,
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
const std::optional<ExpressionFuzzer::Options>& options = std::nullopt);
Expand Down Expand Up @@ -289,7 +300,7 @@ class ExpressionFuzzer {

const std::string kTypeParameterName = "T";

Options options_;
const Options options_;

std::vector<CallableSignature> signatures_;
std::vector<SignatureTemplate> signatureTemplates_;
Expand Down
16 changes: 1 addition & 15 deletions velox/expression/tests/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,6 @@ DEFINE_int64(
"Initial seed for random number generator used to reproduce previous "
"results (0 means start with random seed).");

DEFINE_string(
only,
"",
"If specified, Fuzzer will only choose functions from "
"this comma separated list of function names "
"(e.g: --only \"split\" or --only \"substr,ltrim\").");

DEFINE_string(
special_forms,
"and,or,cast,coalesce,if,switch",
"Comma-separated list of special forms to use in generated expression. "
"Supported special forms: and, or, coalesce, if, switch, cast.");

int main(int argc, char** argv) {
facebook::velox::functions::prestosql::registerAllScalarFunctions();

Expand All @@ -62,6 +49,5 @@ int main(int argc, char** argv) {
"width_bucket",
};
size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed;
return FuzzerRunner::run(
FLAGS_only, initialSeed, skipFunctions, FLAGS_special_forms);
return FuzzerRunner::run(initialSeed, skipFunctions);
}
24 changes: 21 additions & 3 deletions velox/expression/tests/ExpressionFuzzerVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ DEFINE_int32(
"enabled).");

// The flags bellow are used to initialize ExpressionFuzzer::options.
DEFINE_string(
only,
"",
"If specified, Fuzzer will only choose functions from "
"this comma separated list of function names "
"(e.g: --only \"split\" or --only \"substr,ltrim\").");

DEFINE_string(
special_forms,
"and,or,cast,coalesce,if,switch",
"Comma-separated list of special forms to use in generated expression. "
"Supported special forms: and, or, coalesce, if, switch, cast.");

DEFINE_int32(
velox_fuzzer_max_level_of_nesting,
10,
Expand Down Expand Up @@ -159,7 +172,8 @@ VectorFuzzer::Options getVectorFuzzerOptions() {
return opts;
}

ExpressionFuzzer::Options getExpressionFuzzerOptions() {
ExpressionFuzzer::Options getExpressionFuzzerOptions(
const std::unordered_set<std::string>& skipFunctions) {
ExpressionFuzzer::Options opts;
opts.maxLevelOfNesting = FLAGS_velox_fuzzer_max_level_of_nesting;
opts.maxNumVarArgs = FLAGS_max_num_varargs;
Expand All @@ -170,6 +184,9 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions() {
opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse;
opts.functionTickets = FLAGS_assign_function_tickets;
opts.nullRatio = FLAGS_null_ratio;
opts.specialForms = FLAGS_special_forms;
opts.useOnlyFunctions = FLAGS_only;
opts.skipFunctions = skipFunctions;
return opts;
}

Expand Down Expand Up @@ -240,7 +257,8 @@ RowVectorPtr wrapChildren(

ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
const FunctionSignatureMap& signatureMap,
size_t initialSeed)
size_t initialSeed,
const std::unordered_set<std::string>& skipFunctions)
: verifier_(
&execCtx_,
{FLAGS_disable_constant_folding,
Expand All @@ -253,7 +271,7 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
signatureMap,
initialSeed,
vectorFuzzer_,
getExpressionFuzzerOptions()) {
getExpressionFuzzerOptions(skipFunctions)) {
seed(initialSeed);

// Init stats and register listener.
Expand Down
Loading

0 comments on commit 345510c

Please sign in to comment.