Skip to content

Commit

Permalink
Add ability to verify expression fuzzer runs on a subset of rows (fac…
Browse files Browse the repository at this point in the history
…ebookincubator#11267)

Summary:
Pull Request resolved: facebookincubator#11267

Currently, the expression fuzzer has a phase where it re-runs rows
that did not throw an error to ensure evaluation is consistent for
them. To achieve this, it currently wraps the inputs with a dictionary
that only points to the subset of those rows. This results a change in
the encoding of inputs which can cause differences in eval paths taken
between phases. To address this and ensure the same paths are taken
for each evaluation phase, this change introduces the ability for the
expression verifier to only verify a subset of the input rows. The
aforementioned fuzzer run phase can only specify the non error rows
and maintain the original input row.

Follow up: After this change, it would be relevant to also store the
input selectivity vector. A subsequent change will be added that would
add this ability and make corresponding changes to the ExpressionRunner

Reviewed By: kagamiori

Differential Revision: D64366745

fbshipit-source-id: c389306ca280506373680d0656909ed94215c731
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed Oct 30, 2024
1 parent 7377bc8 commit 7c93eba
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 50 deletions.
40 changes: 15 additions & 25 deletions velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@ namespace {

using exec::SignatureBinder;

/// Returns row numbers for non-null rows among all children in'data' or null
/// if all rows are null.
BufferPtr extractNonNullIndices(const RowVectorPtr& data) {
/// Returns all non-null rows among all children in 'data'.
SelectivityVector extractNonNullRows(const RowVectorPtr& data) {
DecodedVector decoded;
SelectivityVector nonNullRows(data->size());

Expand All @@ -48,19 +47,8 @@ BufferPtr extractNonNullIndices(const RowVectorPtr& data) {
if (rawNulls) {
nonNullRows.deselectNulls(rawNulls, 0, data->size());
}
if (!nonNullRows.hasSelections()) {
return nullptr;
}
}

BufferPtr indices = allocateIndices(nonNullRows.end(), data->pool());
auto rawIndices = indices->asMutable<vector_size_t>();
vector_size_t cnt = 0;
nonNullRows.applyToSelected(
[&](vector_size_t row) { rawIndices[cnt++] = row; });
VELOX_CHECK_GT(cnt, 0);
indices->setSize(cnt * sizeof(vector_size_t));
return indices;
return nonNullRows;
}

/// Wraps child vectors of the specified 'rowVector' in dictionary using
Expand Down Expand Up @@ -261,6 +249,7 @@ void ExpressionFuzzerVerifier::retryWithTry(
tryResult = verifier_.verify(
tryPlans,
rowVector,
std::nullopt,
resultVector ? BaseVector::copy(*resultVector) : nullptr,
false, // canThrow
columnsToWrapInLazy);
Expand All @@ -271,6 +260,7 @@ void ExpressionFuzzerVerifier::retryWithTry(
*vectorFuzzer_,
plans,
rowVector,
std::nullopt,
columnsToWrapInLazy);
}
throw;
Expand All @@ -283,21 +273,18 @@ void ExpressionFuzzerVerifier::retryWithTry(

// Re-evaluate the original expression on rows that didn't produce an
// error (i.e. returned non-NULL results when evaluated with TRY).
BufferPtr noErrorIndices = extractNonNullIndices(tryResult.result);

if (noErrorIndices != nullptr) {
auto noErrorRowVector = wrapChildren(noErrorIndices, rowVector);
SelectivityVector noErrorRows = extractNonNullRows(tryResult.result);

LOG(INFO) << "Retrying original expression on " << noErrorRowVector->size()
if (noErrorRows.hasSelections()) {
LOG(INFO) << "Retrying original expression on " << noErrorRows.end()
<< " rows without errors";

try {
verifier_.verify(
plans,
noErrorRowVector,
resultVector ? BaseVector::copy(*resultVector)
->slice(0, noErrorRowVector->size())
: nullptr,
rowVector,
noErrorRows,
resultVector ? BaseVector::copy(*resultVector) : nullptr,
false, // canThrow
columnsToWrapInLazy);
} catch (const std::exception&) {
Expand All @@ -306,7 +293,8 @@ void ExpressionFuzzerVerifier::retryWithTry(
{&execCtx_, {false, ""}, referenceQueryRunner_},
*vectorFuzzer_,
plans,
noErrorRowVector,
rowVector,
noErrorRows,
columnsToWrapInLazy);
}
throw;
Expand Down Expand Up @@ -379,6 +367,7 @@ void ExpressionFuzzerVerifier::go() {
result = verifier_.verify(
plans,
rowVector,
std::nullopt,
resultVectors ? BaseVector::copy(*resultVectors) : nullptr,
true, // canThrow
columnsToWrapInLazy);
Expand All @@ -389,6 +378,7 @@ void ExpressionFuzzerVerifier::go() {
*vectorFuzzer_,
plans,
rowVector,
std::nullopt,
columnsToWrapInLazy);
}
throw;
Expand Down
2 changes: 2 additions & 0 deletions velox/expression/tests/ExpressionRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ void ExpressionRunner::run(
verifier.verify(
typedExprs,
inputVector,
std::nullopt,
std::move(resultVector),
true,
columnsToWrapInLazy);
Expand All @@ -209,6 +210,7 @@ void ExpressionRunner::run(
fuzzer,
typedExprs,
inputVector,
std::nullopt,
columnsToWrapInLazy);
}
throw;
Expand Down
75 changes: 59 additions & 16 deletions velox/expression/tests/ExpressionVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,35 @@ bool defaultNullRowsSkipped(const exec::ExprSet& exprSet) {
}
return false;
}

// Returns a RowVector with only the rows specified in 'rows'. Does not maintain
// encodings.
RowVectorPtr reduceToSelectedRows(
const RowVectorPtr& rowVector,
const SelectivityVector& rows) {
if (rows.isAllSelected()) {
return rowVector;
}
BufferPtr indices = allocateIndices(rows.end(), rowVector->pool());
auto rawIndices = indices->asMutable<vector_size_t>();
vector_size_t cnt = 0;
rows.applyToSelected([&](vector_size_t row) { rawIndices[cnt++] = row; });
VELOX_CHECK_GT(cnt, 0);
indices->setSize(cnt * sizeof(vector_size_t));
// Top level row vector is not expected to be encoded, therefore we copy
// instead of wrapping in the indices.
RowVectorPtr reducedVector = std::dynamic_pointer_cast<RowVector>(
BaseVector::create(rowVector->type(), cnt, rowVector->pool()));
SelectivityVector rowsToCopy(cnt);
reducedVector->copy(rowVector.get(), rowsToCopy, rawIndices);
return reducedVector;
}
} // namespace

fuzzer::ResultOrError ExpressionVerifier::verify(
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
VectorPtr&& resultVector,
bool canThrow,
std::vector<int> columnsToWrapInLazy) {
Expand Down Expand Up @@ -132,7 +156,12 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
std::exception_ptr exceptionSimplifiedPtr;

VLOG(1) << "Starting common eval execution.";
SelectivityVector rows{rowVector ? rowVector->size() : 1};
SelectivityVector rows;
if (rowsToVerify.has_value()) {
rows = *rowsToVerify;
} else {
rows = SelectivityVector{rowVector ? rowVector->size() : 1};
}

// Execute with common expression eval path. Some columns of the input row
// vector will be wrapped in lazy as specified in 'columnsToWrapInLazy'.
Expand Down Expand Up @@ -171,7 +200,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
copiedInput,
BaseVector::copy(*inputRowVector),
"Copy of original input",
"Input after common");
"Input after common",
rows);
}
} catch (const VeloxException& e) {
if (e.errorCode() == error_code::kUnsupportedInputUncatchable) {
Expand Down Expand Up @@ -201,9 +231,11 @@ fuzzer::ResultOrError ExpressionVerifier::verify(

if (referenceQueryRunner_ != nullptr) {
VLOG(1) << "Execute with reference DB.";
auto projectionPlan = makeProjectionPlan(rowVector, plans);
auto inputRowVector = rowVector;
inputRowVector = reduceToSelectedRows(rowVector, rows);
auto projectionPlan = makeProjectionPlan(inputRowVector, plans);
auto referenceResultOrError = computeReferenceResults(
projectionPlan, {rowVector}, referenceQueryRunner_.get());
projectionPlan, {inputRowVector}, referenceQueryRunner_.get());

auto referenceEvalResult = referenceResultOrError.first;

Expand Down Expand Up @@ -246,6 +278,7 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
nullptr,
commonEvalResult[0]->size(),
commonEvalResult);
commonEvalResultRow = reduceToSelectedRows(commonEvalResultRow, rows);
VELOX_CHECK(
exec::test::assertEqualResults(
referenceEvalResult.value(),
Expand Down Expand Up @@ -280,7 +313,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
copy,
BaseVector::copy(*rowVector),
"Copy of original input",
"Input after simplified");
"Input after simplified",
rows);
} catch (const VeloxException& e) {
if (e.errorCode() == error_code::kUnsupportedInputUncatchable) {
unsupportedInputUncatchableError = true;
Expand Down Expand Up @@ -479,12 +513,13 @@ class MinimalSubExpressionFinder {
void findMinimalExpression(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
if (verifyWithResults(plan, rowVector, columnsToWrapInLazy)) {
if (verifyWithResults(plan, rowVector, rowsToVerify, columnsToWrapInLazy)) {
errorExit("Retry should have failed");
}
bool minimalFound =
findMinimalRecursive(plan, rowVector, columnsToWrapInLazy);
bool minimalFound = findMinimalRecursive(
plan, rowVector, rowsToVerify, columnsToWrapInLazy);
if (minimalFound) {
errorExit("Found minimal failing expression.");
} else {
Expand All @@ -504,13 +539,15 @@ class MinimalSubExpressionFinder {
bool findMinimalRecursive(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
bool anyFailed = false;
for (auto& input : plan->inputs()) {
if (!verifyWithResults(input, rowVector, columnsToWrapInLazy)) {
if (!verifyWithResults(
input, rowVector, rowsToVerify, columnsToWrapInLazy)) {
anyFailed = true;
bool minimalFound =
findMinimalRecursive(input, rowVector, columnsToWrapInLazy);
bool minimalFound = findMinimalRecursive(
input, rowVector, rowsToVerify, columnsToWrapInLazy);
if (minimalFound) {
return true;
}
Expand All @@ -519,10 +556,10 @@ class MinimalSubExpressionFinder {
if (!anyFailed) {
LOG(INFO) << "Failed with all children succeeding: " << plan->toString();
// Re-running the minimum failed. Put breakpoint here to debug.
verifyWithResults(plan, rowVector, columnsToWrapInLazy);
verifyWithResults(plan, rowVector, rowsToVerify, columnsToWrapInLazy);
if (!columnsToWrapInLazy.empty()) {
LOG(INFO) << "Trying without lazy:";
if (verifyWithResults(plan, rowVector, {})) {
if (verifyWithResults(plan, rowVector, rowsToVerify, {})) {
LOG(INFO) << "Minimal failure succeeded without lazy vectors";
}
}
Expand All @@ -539,14 +576,16 @@ class MinimalSubExpressionFinder {
bool verifyWithResults(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
VectorPtr result;
LOG(INFO) << "Running with empty results vector :" << plan->toString();
bool emptyResult = verifyPlan(plan, rowVector, columnsToWrapInLazy, result);
bool emptyResult =
verifyPlan(plan, rowVector, rowsToVerify, columnsToWrapInLazy, result);
LOG(INFO) << "Running with non empty vector :" << plan->toString();
result = vectorFuzzer_.fuzzFlat(plan->type());
bool filledResult =
verifyPlan(plan, rowVector, columnsToWrapInLazy, result);
verifyPlan(plan, rowVector, rowsToVerify, columnsToWrapInLazy, result);
if (emptyResult != filledResult) {
LOG(ERROR) << fmt::format(
"Different results for empty vs populated ! Empty result = {} filledResult = {}",
Expand All @@ -561,6 +600,7 @@ class MinimalSubExpressionFinder {
bool verifyPlan(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy,
VectorPtr results) {
// Turn off unnecessary logging.
Expand All @@ -571,6 +611,7 @@ class MinimalSubExpressionFinder {
verifier_.verify(
{plan},
rowVector,
rowsToVerify,
results ? BaseVector::copy(*results) : nullptr,
true, // canThrow
columnsToWrapInLazy);
Expand All @@ -591,6 +632,7 @@ void computeMinimumSubExpression(
VectorFuzzer& fuzzer,
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
auto finder = MinimalSubExpressionFinder(std::move(minimalVerifier), fuzzer);
if (plans.size() > 1) {
Expand All @@ -602,7 +644,8 @@ void computeMinimumSubExpression(
for (auto plan : plans) {
LOG(INFO) << "============================================";
LOG(INFO) << "Finding minimal subexpression for plan:" << plan->toString();
finder.findMinimalExpression(plan, rowVector, columnsToWrapInLazy);
finder.findMinimalExpression(
plan, rowVector, rowsToVerify, columnsToWrapInLazy);
LOG(INFO) << "============================================";
}
}
Expand Down
21 changes: 13 additions & 8 deletions velox/expression/tests/ExpressionVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,17 @@ class ExpressionVerifier {
options_(options),
referenceQueryRunner_{referenceQueryRunner} {}

// Executes expressions both using common path (all evaluation
// optimizations) and simplified path. Additionally, a list of column
// indices can be passed via 'columnsToWrapInLazy' which specify the
// columns/children in the input row vector that should be wrapped in a lazy
// layer before running it through the common evaluation path. The list can
// contain negative column indices that represent lazy vectors that should be
// preloaded before being fed to the evaluator. This list is sorted on the
// absolute value of the entries.
// Executes expressions using common path (all evaluation
// optimizations) and compares the result with either the simplified path or a
// reference query runner. An optional selectivity vector 'rowsToVerify' can
// be passed which specifies which rows to evaluate and verify. If its not
// provided (by passing std::nullopt) then all rows will be verified.
// Additionally, a list of column indices can be passed via
// 'columnsToWrapInLazy' which specify the columns/children in the input row
// vector that should be wrapped in a lazy layer before running it through the
// common evaluation path. The list can contain negative column indices that
// represent lazy vectors that should be preloaded before being fed to the
// evaluator. This list is sorted on the absolute value of the entries.
// Returns:
// - result of evaluating the expressions if both paths succeeded and
// returned the exact same vectors.
Expand All @@ -74,6 +77,7 @@ class ExpressionVerifier {
fuzzer::ResultOrError verify(
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
VectorPtr&& resultVector,
bool canThrow,
std::vector<int> columnsToWarpInLazy = {});
Expand Down Expand Up @@ -112,5 +116,6 @@ void computeMinimumSubExpression(
VectorFuzzer& fuzzer,
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy);
} // namespace facebook::velox::test
Loading

0 comments on commit 7c93eba

Please sign in to comment.