Skip to content

Commit

Permalink
Add support for lambda functions to ExpressionFuzzer (#7834)
Browse files Browse the repository at this point in the history
Summary:
The tricky part of supporting lambda functions is finding a way to generate
lambdas given both input types and the result type, i.e. generate
LambdaTypedExpr to match given FunctionType.  This implementation is very
basic. It looks for a matching function signature or signature template and if
none found uses a lambda expression that returns constant value.

For example, lambda for filter(array, function(T) -> boolean) can be
x -> is_null(x) or x -> cast(x as boolean) or x -> true.

This very simple implementation is quite effective. It helped discover a number
of bugs: #7821 #7824 #7828 #7829 #7831 #7851 #7861

This new logic cannot generate valid lambda for 'comparator' argument of
array_sort. Hence, added support for skipping individual function signatures
and used it to exclude this particular signature for array_sort.

Ran the Fuzzer multiple times for 10 minutes each and collected custom stats on
the lambda functions tested and lambda expressions generated. 20 functions
were tested using 154 different lambda expressions. More details in
https://gist.github.com/mbasmanova/1530e3cdfe2f6f4fef35a03f07ee4a39

```
velox_expression_fuzzer_test --logtostderr --velox_fuzzer_enable_complex_types --enable_variadic_signatures --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse --max_expression_trees_per_step 2 --retry_with_try --enable_dereference --duration_sec 1200
```

Pull Request resolved: #7834

Test Plan:
Imported from GitHub, without a `Test Plan:` line.

```
$ buck2 run stylus/xstream/query/functions/test:fuzzer_test

$ buck2 run stylus/xstream/query/functions/test:f3_fuzzer_test
```

Reviewed By: xiaoxmeng

Differential Revision: D51756700

Pulled By: mbasmanova

fbshipit-source-id: 4a60fd29e1e670f905e3dc8b5a2180e4440796e2
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Dec 5, 2023
1 parent 156abe3 commit 975ca3a
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 13 deletions.
5 changes: 5 additions & 0 deletions velox/expression/FunctionSignature.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ class FunctionSignature {
return constantArguments_;
}

bool hasConstantArgument() const {
return std::any_of(
constantArguments_.begin(), constantArguments_.end(), folly::identity);
}

bool variableArity() const {
return variableArity_;
}
Expand Down
169 changes: 168 additions & 1 deletion velox/expression/tests/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,42 @@ namespace facebook::velox::test {
namespace {

using exec::SignatureBinder;
using exec::SignatureBinderBase;

class FullSignatureBinder : public SignatureBinderBase {
public:
FullSignatureBinder(
const exec::FunctionSignature& signature,
const std::vector<TypePtr>& argTypes,
const TypePtr& returnType)
: SignatureBinderBase(signature) {
if (signature_.argumentTypes().size() != argTypes.size()) {
return;
}

for (auto i = 0; i < argTypes.size(); ++i) {
if (!SignatureBinderBase::tryBind(
signature_.argumentTypes()[i], argTypes[i])) {
return;
}
}

if (!SignatureBinderBase::tryBind(signature_.returnType(), returnType)) {
return;
}

bound_ = true;
}

// Returns true if argument types and return type specified in the constructor
// match specified signature.
bool tryBind() {
return bound_;
}

private:
bool bound_{false};
};

/// Returns if `functionName` with the given `argTypes` is deterministic.
/// Returns true if the function was not found or determinism cannot be
Expand Down Expand Up @@ -164,7 +200,7 @@ bool isSupportedSignature(
// Not supporting lambda functions, or functions using decimal and
// timestamp with time zone types.
return !(
useTypeName(signature, "opaque") || useTypeName(signature, "function") ||
useTypeName(signature, "opaque") ||
useTypeName(signature, "long_decimal") ||
useTypeName(signature, "short_decimal") ||
useTypeName(signature, "decimal") ||
Expand Down Expand Up @@ -578,9 +614,69 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateArgs(
return generateArgs(input.args, input.constantArgs, numVarArgs);
}

core::TypedExprPtr ExpressionFuzzer::generateArgFunction(const TypePtr& arg) {
const auto& functionType = arg->asFunction();

std::vector<TypePtr> args;
std::vector<std::string> names;
std::vector<core::TypedExprPtr> inputs;
args.reserve(arg->size() - 1);
names.reserve(arg->size() - 1);
inputs.reserve(arg->size() - 1);

for (auto i = 0; i < arg->size() - 1; ++i) {
args.push_back(arg->childAt(i));
names.push_back(fmt::format("__a{}", i));
inputs.push_back(std::make_shared<core::FieldAccessTypedExpr>(
args.back(), names.back()));
}

const auto& returnType = functionType.children().back();

const auto baseType = typeToBaseName(returnType);
const auto& baseList = typeToExpressionList_[baseType];
const auto& templateList = typeToExpressionList_[kTypeParameterName];

std::vector<std::string> eligible;
for (const auto& functionName : baseList) {
if (auto* signature =
findConcreteSignature(args, returnType, functionName)) {
eligible.push_back(functionName);
} else if (
auto* signatureTemplate =
findSignatureTemplate(args, returnType, baseType, functionName)) {
eligible.push_back(functionName);
}
}

for (const auto& functionName : templateList) {
if (auto* signatureTemplate =
findSignatureTemplate(args, returnType, baseType, functionName)) {
eligible.push_back(functionName);
}
}

if (eligible.empty()) {
return std::make_shared<core::LambdaTypedExpr>(
ROW(std::move(names), std::move(args)),
generateArgConstant(returnType));
}

const auto idx = rand32(0, eligible.size() - 1);
const auto name = eligible[idx];

return std::make_shared<core::LambdaTypedExpr>(
ROW(std::move(names), std::move(args)),
std::make_shared<core::CallTypedExpr>(returnType, inputs, name));
}

core::TypedExprPtr ExpressionFuzzer::generateArg(
const TypePtr& arg,
bool isConstant) {
if (arg->isFunction()) {
return generateArgFunction(arg);
}

if (isConstant) {
return generateArgConstant(arg);
} else {
Expand Down Expand Up @@ -768,6 +864,46 @@ const CallableSignature* ExpressionFuzzer::chooseRandomConcreteSignature(
return eligible[idx];
}

const CallableSignature* ExpressionFuzzer::findConcreteSignature(
const std::vector<TypePtr>& argTypes,
const TypePtr& returnType,
const std::string& functionName) {
if (expressionToSignature_.find(functionName) ==
expressionToSignature_.end()) {
return nullptr;
}
auto baseType = typeToBaseName(returnType);
auto it = expressionToSignature_[functionName].find(baseType);
if (it == expressionToSignature_[functionName].end()) {
return nullptr;
}

for (auto signature : it->second) {
if (!signature->returnType->equivalent(*returnType)) {
continue;
}

if (signature->args.size() != argTypes.size()) {
continue;
}

bool argTypesMatch = true;
for (auto i = 0; i < argTypes.size(); ++i) {
if (signature->constantArgs[i] ||
!signature->args[i]->equivalent(*argTypes[i])) {
argTypesMatch = false;
break;
}
}

if (argTypesMatch) {
return signature;
}
}

return nullptr;
}

core::TypedExprPtr ExpressionFuzzer::generateExpressionFromConcreteSignatures(
const TypePtr& returnType,
const std::string& functionName) {
Expand Down Expand Up @@ -812,6 +948,37 @@ const SignatureTemplate* ExpressionFuzzer::chooseRandomSignatureTemplate(
return eligible[idx];
}

const SignatureTemplate* ExpressionFuzzer::findSignatureTemplate(
const std::vector<TypePtr>& argTypes,
const TypePtr& returnType,
const std::string& typeName,
const std::string& functionName) {
std::vector<const SignatureTemplate*> eligible;
if (expressionToTemplatedSignature_.find(functionName) ==
expressionToTemplatedSignature_.end()) {
return nullptr;
}
auto it = expressionToTemplatedSignature_[functionName].find(typeName);
if (it == expressionToTemplatedSignature_[functionName].end()) {
return nullptr;
}

for (auto signatureTemplate : it->second) {
// Skip signatures with constant arguments.
if (signatureTemplate->signature->hasConstantArgument()) {
continue;
}

FullSignatureBinder binder{
*signatureTemplate->signature, argTypes, returnType};
if (binder.tryBind()) {
return signatureTemplate;
}
}

return nullptr;
}

core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate(
const TypePtr& returnType,
const std::string& functionName) {
Expand Down
25 changes: 25 additions & 0 deletions velox/expression/tests/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,16 @@ class ExpressionFuzzer {

core::TypedExprPtr generateArg(const TypePtr& arg);

// Given lambda argument type, generate matching LambdaTypedExpr.
//
// The 'arg' specifies inputs types and result type for the lambda. This
// method finds all matching signatures and signature templates, picks one
// randomly and generates LambdaTypedExpr. If no matching signatures or
// signature templates found, this method returns LambdaTypedExpr that
// represents a constant lambda, i.e lambda that returns the same value for
// all input. The constant value is generated using 'generateArgConstant'.
core::TypedExprPtr generateArgFunction(const TypePtr& arg);

std::vector<core::TypedExprPtr> generateArgs(const CallableSignature& input);

std::vector<core::TypedExprPtr> generateArgs(
Expand Down Expand Up @@ -239,6 +249,13 @@ class ExpressionFuzzer {
const TypePtr& returnType,
const std::string& functionName);

/// Returns a signature with matching input types and return type. Returns
/// nullptr if matching signature doesn't exist.
const CallableSignature* findConcreteSignature(
const std::vector<TypePtr>& argTypes,
const TypePtr& returnType,
const std::string& functionName);

/// Generate an expression by randomly selecting a concrete function
/// signature that returns 'returnType' among all signatures that the
/// function named 'functionName' supports.
Expand All @@ -254,6 +271,14 @@ class ExpressionFuzzer {
const std::string& typeName,
const std::string& functionName);

/// Returns a signature template with matching input types and return type.
/// Returns nullptr if matching signature template doesn't exist.
const SignatureTemplate* findSignatureTemplate(
const std::vector<TypePtr>& argTypes,
const TypePtr& returnType,
const std::string& typeName,
const std::string& functionName);

/// Generate an expression by randomly selecting a function signature
/// template that returns 'returnType' among all signature templates that
/// the function named 'functionName' supports.
Expand Down
5 changes: 5 additions & 0 deletions velox/expression/tests/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ int main(int argc, char** argv) {

// TODO: List of the functions that at some point crash or fail and need to
// be fixed before we can enable.
// This list can include a mix of function names and function signatures.
// Use function name to exclude all signatures of a given function from
// testing. Use function signature to exclude only a specific signature.
std::unordered_set<std::string> skipFunctions = {
// Fuzzer and the underlying engine are confused about cardinality(HLL)
// (since HLL is a user defined type), and end up trying to use
Expand All @@ -60,6 +63,8 @@ int main(int argc, char** argv) {
"cardinality",
"element_at",
"width_bucket",
// Fuzzer cannot generate valid 'comparator' lambda.
"array_sort(array(T),constant function(T,T,bigint)) -> array(T)",
};
size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed;
return FuzzerRunner::run(
Expand Down
41 changes: 37 additions & 4 deletions velox/expression/tests/FuzzerRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ class FuzzerRunner {
return nameSet;
}

static std::pair<std::string, std::string> splitSignature(
const std::string& signature) {
const auto parenPos = signature.find("(");

if (parenPos != std::string::npos) {
return {signature.substr(0, parenPos), signature.substr(parenPos)};
}

return {signature, ""};
}

// Parse the comma separated list of function names, and use it to filter the
// input signatures.
static facebook::velox::FunctionSignatureMap filterSignatures(
Expand All @@ -85,10 +96,32 @@ class FuzzerRunner {
return input;
}
facebook::velox::FunctionSignatureMap output(input);
for (auto s : skipFunctions) {
auto str = s;
folly::toLowerAscii(str);
output.erase(str);
for (auto skip : skipFunctions) {
// 'skip' can be function name or signature.
const auto [skipName, skipSignature] = splitSignature(skip);

if (skipSignature.empty()) {
output.erase(skipName);
} else {
auto it = output.find(skipName);
if (it != output.end()) {
// Compiler refuses to reference 'skipSignature' from the lambda as
// is.
const auto& signatureToRemove = skipSignature;

auto removeIt = std::find_if(
it->second.begin(),
it->second.end(),
[&](const auto& signature) {
return signature->toString() == signatureToRemove;
});
VELOX_CHECK(
removeIt != it->second.end(),
"Skip signature not found: {}",
skip);
it->second.erase(removeIt);
}
}
}
return output;
}
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/lib/Re2Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ class LikeGeneric final : public VectorFunction {

auto [it, inserted] = compiledRegularExpressions_.emplace(
key, std::make_unique<RE2>(toStringPiece(regex), opt));
VELOX_CHECK_LE(
VELOX_USER_CHECK_LE(
compiledRegularExpressions_.size(),
kMaxCompiledRegexes,
"Max number of regex reached");
Expand Down
18 changes: 11 additions & 7 deletions velox/functions/lib/tests/Re2FunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1104,20 +1104,24 @@ TEST_F(Re2FunctionsTest, likeRegexLimit) {
}

VELOX_ASSERT_THROW(
evaluate("like(c0 , c1)", makeRowVector({input, pattern})),
evaluate("like(c0, c1)", makeRowVector({input, pattern})),
"Max number of regex reached");

// Make sure try does not suppress that faluire.
VELOX_ASSERT_THROW(
evaluate("try(like(c0 , c1))", makeRowVector({input, pattern})),
"Max number of regex reached");
// First 20 rows should return false, the rest raise and error and become
// null.
result = evaluate("try(like(c0, c1))", makeRowVector({input, pattern}));
auto expected = makeFlatVector<bool>(
26,
[](auto /*row*/) { return false; },
[](auto row) { return row >= 20; });
assertEqualVectors(expected, result);

// All are complex but the same, should pass.
for (int i = 0; i < 26; i++) {
flatPattern->set(i, "b%[0-9]+.*{}.*{}.*[0-9]+");
}
assertEqualVectors(
BaseVector::createConstant(BOOLEAN(), false, 26, pool()), result);
result = evaluate("like(c0, c1)", makeRowVector({input, pattern}));
assertEqualVectors(makeConstant(false, 26), result);
}

TEST_F(Re2FunctionsTest, invalidEscapeChar) {
Expand Down

0 comments on commit 975ca3a

Please sign in to comment.