Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to verify expression fuzzer runs on a subset of rows #11267

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 15 additions & 25 deletions velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@ namespace {

using exec::SignatureBinder;

/// Returns row numbers for non-null rows among all children in'data' or null
/// if all rows are null.
BufferPtr extractNonNullIndices(const RowVectorPtr& data) {
/// Returns all non-null rows among all children in 'data'.
SelectivityVector extractNonNullRows(const RowVectorPtr& data) {
DecodedVector decoded;
SelectivityVector nonNullRows(data->size());

Expand All @@ -48,19 +47,8 @@ BufferPtr extractNonNullIndices(const RowVectorPtr& data) {
if (rawNulls) {
nonNullRows.deselectNulls(rawNulls, 0, data->size());
}
if (!nonNullRows.hasSelections()) {
return nullptr;
}
}

BufferPtr indices = allocateIndices(nonNullRows.end(), data->pool());
auto rawIndices = indices->asMutable<vector_size_t>();
vector_size_t cnt = 0;
nonNullRows.applyToSelected(
[&](vector_size_t row) { rawIndices[cnt++] = row; });
VELOX_CHECK_GT(cnt, 0);
indices->setSize(cnt * sizeof(vector_size_t));
return indices;
return nonNullRows;
}

/// Wraps child vectors of the specified 'rowVector' in dictionary using
Expand Down Expand Up @@ -261,6 +249,7 @@ void ExpressionFuzzerVerifier::retryWithTry(
tryResult = verifier_.verify(
tryPlans,
rowVector,
std::nullopt,
resultVector ? BaseVector::copy(*resultVector) : nullptr,
false, // canThrow
columnsToWrapInLazy);
Expand All @@ -271,6 +260,7 @@ void ExpressionFuzzerVerifier::retryWithTry(
*vectorFuzzer_,
plans,
rowVector,
std::nullopt,
columnsToWrapInLazy);
}
throw;
Expand All @@ -283,21 +273,18 @@ void ExpressionFuzzerVerifier::retryWithTry(

// Re-evaluate the original expression on rows that didn't produce an
// error (i.e. returned non-NULL results when evaluated with TRY).
BufferPtr noErrorIndices = extractNonNullIndices(tryResult.result);

if (noErrorIndices != nullptr) {
auto noErrorRowVector = wrapChildren(noErrorIndices, rowVector);
SelectivityVector noErrorRows = extractNonNullRows(tryResult.result);

LOG(INFO) << "Retrying original expression on " << noErrorRowVector->size()
if (noErrorRows.hasSelections()) {
LOG(INFO) << "Retrying original expression on " << noErrorRows.end()
<< " rows without errors";

try {
verifier_.verify(
plans,
noErrorRowVector,
resultVector ? BaseVector::copy(*resultVector)
->slice(0, noErrorRowVector->size())
: nullptr,
rowVector,
noErrorRows,
resultVector ? BaseVector::copy(*resultVector) : nullptr,
false, // canThrow
columnsToWrapInLazy);
} catch (const std::exception&) {
Expand All @@ -306,7 +293,8 @@ void ExpressionFuzzerVerifier::retryWithTry(
{&execCtx_, {false, ""}, referenceQueryRunner_},
*vectorFuzzer_,
plans,
noErrorRowVector,
rowVector,
noErrorRows,
columnsToWrapInLazy);
}
throw;
Expand Down Expand Up @@ -379,6 +367,7 @@ void ExpressionFuzzerVerifier::go() {
result = verifier_.verify(
plans,
rowVector,
std::nullopt,
resultVectors ? BaseVector::copy(*resultVectors) : nullptr,
true, // canThrow
columnsToWrapInLazy);
Expand All @@ -389,6 +378,7 @@ void ExpressionFuzzerVerifier::go() {
*vectorFuzzer_,
plans,
rowVector,
std::nullopt,
columnsToWrapInLazy);
}
throw;
Expand Down
2 changes: 2 additions & 0 deletions velox/expression/tests/ExpressionRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ void ExpressionRunner::run(
verifier.verify(
typedExprs,
inputVector,
std::nullopt,
std::move(resultVector),
true,
columnsToWrapInLazy);
Expand All @@ -209,6 +210,7 @@ void ExpressionRunner::run(
fuzzer,
typedExprs,
inputVector,
std::nullopt,
columnsToWrapInLazy);
}
throw;
Expand Down
75 changes: 59 additions & 16 deletions velox/expression/tests/ExpressionVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,35 @@ bool defaultNullRowsSkipped(const exec::ExprSet& exprSet) {
}
return false;
}

// Returns a RowVector with only the rows specified in 'rows'. Does not maintain
// encodings.
RowVectorPtr reduceToSelectedRows(
const RowVectorPtr& rowVector,
const SelectivityVector& rows) {
if (rows.isAllSelected()) {
return rowVector;
}
BufferPtr indices = allocateIndices(rows.end(), rowVector->pool());
auto rawIndices = indices->asMutable<vector_size_t>();
vector_size_t cnt = 0;
rows.applyToSelected([&](vector_size_t row) { rawIndices[cnt++] = row; });
VELOX_CHECK_GT(cnt, 0);
indices->setSize(cnt * sizeof(vector_size_t));
// Top level row vector is not expected to be encoded, therefore we copy
// instead of wrapping in the indices.
RowVectorPtr reducedVector = std::dynamic_pointer_cast<RowVector>(
BaseVector::create(rowVector->type(), cnt, rowVector->pool()));
SelectivityVector rowsToCopy(cnt);
reducedVector->copy(rowVector.get(), rowsToCopy, rawIndices);
Comment on lines +88 to +91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This piece of code can be simply replaced with BaseVector::wrapInDictionary(nullptr, indices, cnt, rowVector)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, the only reason I did this was to avoid creating a top level RowVector which is dictionary encoded which we do not expect in velox. This basically reduces some steps if we did (wrapInDictionary + flattenVector) instead.

This however will only be used by the presto runner, where it will probably write this to disk first. I have not had a chance to run it with PQR yet, but if that codepath supports an encoded top level row vector then we should be good and I can replace this with your suggestion.

return reducedVector;
}
} // namespace

fuzzer::ResultOrError ExpressionVerifier::verify(
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
VectorPtr&& resultVector,
bool canThrow,
std::vector<int> columnsToWrapInLazy) {
Expand Down Expand Up @@ -132,7 +156,12 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
std::exception_ptr exceptionSimplifiedPtr;

VLOG(1) << "Starting common eval execution.";
SelectivityVector rows{rowVector ? rowVector->size() : 1};
SelectivityVector rows;
if (rowsToVerify.has_value()) {
rows = *rowsToVerify;
} else {
rows = SelectivityVector{rowVector ? rowVector->size() : 1};
}

// Execute with common expression eval path. Some columns of the input row
// vector will be wrapped in lazy as specified in 'columnsToWrapInLazy'.
Expand Down Expand Up @@ -171,7 +200,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
copiedInput,
BaseVector::copy(*inputRowVector),
"Copy of original input",
"Input after common");
"Input after common",
rows);
}
} catch (const VeloxException& e) {
if (e.errorCode() == error_code::kUnsupportedInputUncatchable) {
Expand Down Expand Up @@ -201,9 +231,11 @@ fuzzer::ResultOrError ExpressionVerifier::verify(

if (referenceQueryRunner_ != nullptr) {
VLOG(1) << "Execute with reference DB.";
auto projectionPlan = makeProjectionPlan(rowVector, plans);
auto inputRowVector = rowVector;
inputRowVector = reduceToSelectedRows(rowVector, rows);
auto projectionPlan = makeProjectionPlan(inputRowVector, plans);
auto referenceResultOrError = computeReferenceResults(
projectionPlan, {rowVector}, referenceQueryRunner_.get());
projectionPlan, {inputRowVector}, referenceQueryRunner_.get());

auto referenceEvalResult = referenceResultOrError.first;

Expand Down Expand Up @@ -246,6 +278,7 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
nullptr,
commonEvalResult[0]->size(),
commonEvalResult);
commonEvalResultRow = reduceToSelectedRows(commonEvalResultRow, rows);
VELOX_CHECK(
exec::test::assertEqualResults(
referenceEvalResult.value(),
Expand Down Expand Up @@ -280,7 +313,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify(
copy,
BaseVector::copy(*rowVector),
"Copy of original input",
"Input after simplified");
"Input after simplified",
rows);
} catch (const VeloxException& e) {
if (e.errorCode() == error_code::kUnsupportedInputUncatchable) {
unsupportedInputUncatchableError = true;
Expand Down Expand Up @@ -479,12 +513,13 @@ class MinimalSubExpressionFinder {
void findMinimalExpression(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
if (verifyWithResults(plan, rowVector, columnsToWrapInLazy)) {
if (verifyWithResults(plan, rowVector, rowsToVerify, columnsToWrapInLazy)) {
errorExit("Retry should have failed");
}
bool minimalFound =
findMinimalRecursive(plan, rowVector, columnsToWrapInLazy);
bool minimalFound = findMinimalRecursive(
plan, rowVector, rowsToVerify, columnsToWrapInLazy);
if (minimalFound) {
errorExit("Found minimal failing expression.");
} else {
Expand All @@ -504,13 +539,15 @@ class MinimalSubExpressionFinder {
bool findMinimalRecursive(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
bool anyFailed = false;
for (auto& input : plan->inputs()) {
if (!verifyWithResults(input, rowVector, columnsToWrapInLazy)) {
if (!verifyWithResults(
input, rowVector, rowsToVerify, columnsToWrapInLazy)) {
anyFailed = true;
bool minimalFound =
findMinimalRecursive(input, rowVector, columnsToWrapInLazy);
bool minimalFound = findMinimalRecursive(
input, rowVector, rowsToVerify, columnsToWrapInLazy);
if (minimalFound) {
return true;
}
Expand All @@ -519,10 +556,10 @@ class MinimalSubExpressionFinder {
if (!anyFailed) {
LOG(INFO) << "Failed with all children succeeding: " << plan->toString();
// Re-running the minimum failed. Put breakpoint here to debug.
verifyWithResults(plan, rowVector, columnsToWrapInLazy);
verifyWithResults(plan, rowVector, rowsToVerify, columnsToWrapInLazy);
if (!columnsToWrapInLazy.empty()) {
LOG(INFO) << "Trying without lazy:";
if (verifyWithResults(plan, rowVector, {})) {
if (verifyWithResults(plan, rowVector, rowsToVerify, {})) {
LOG(INFO) << "Minimal failure succeeded without lazy vectors";
}
}
Expand All @@ -539,14 +576,16 @@ class MinimalSubExpressionFinder {
bool verifyWithResults(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
VectorPtr result;
LOG(INFO) << "Running with empty results vector :" << plan->toString();
bool emptyResult = verifyPlan(plan, rowVector, columnsToWrapInLazy, result);
bool emptyResult =
verifyPlan(plan, rowVector, rowsToVerify, columnsToWrapInLazy, result);
LOG(INFO) << "Running with non empty vector :" << plan->toString();
result = vectorFuzzer_.fuzzFlat(plan->type());
bool filledResult =
verifyPlan(plan, rowVector, columnsToWrapInLazy, result);
verifyPlan(plan, rowVector, rowsToVerify, columnsToWrapInLazy, result);
if (emptyResult != filledResult) {
LOG(ERROR) << fmt::format(
"Different results for empty vs populated ! Empty result = {} filledResult = {}",
Expand All @@ -561,6 +600,7 @@ class MinimalSubExpressionFinder {
bool verifyPlan(
core::TypedExprPtr plan,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy,
VectorPtr results) {
// Turn off unnecessary logging.
Expand All @@ -571,6 +611,7 @@ class MinimalSubExpressionFinder {
verifier_.verify(
{plan},
rowVector,
rowsToVerify,
results ? BaseVector::copy(*results) : nullptr,
true, // canThrow
columnsToWrapInLazy);
Expand All @@ -591,6 +632,7 @@ void computeMinimumSubExpression(
VectorFuzzer& fuzzer,
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy) {
auto finder = MinimalSubExpressionFinder(std::move(minimalVerifier), fuzzer);
if (plans.size() > 1) {
Expand All @@ -602,7 +644,8 @@ void computeMinimumSubExpression(
for (auto plan : plans) {
LOG(INFO) << "============================================";
LOG(INFO) << "Finding minimal subexpression for plan:" << plan->toString();
finder.findMinimalExpression(plan, rowVector, columnsToWrapInLazy);
finder.findMinimalExpression(
plan, rowVector, rowsToVerify, columnsToWrapInLazy);
LOG(INFO) << "============================================";
}
}
Expand Down
21 changes: 13 additions & 8 deletions velox/expression/tests/ExpressionVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,17 @@ class ExpressionVerifier {
options_(options),
referenceQueryRunner_{referenceQueryRunner} {}

// Executes expressions both using common path (all evaluation
// optimizations) and simplified path. Additionally, a list of column
// indices can be passed via 'columnsToWrapInLazy' which specify the
// columns/children in the input row vector that should be wrapped in a lazy
// layer before running it through the common evaluation path. The list can
// contain negative column indices that represent lazy vectors that should be
// preloaded before being fed to the evaluator. This list is sorted on the
// absolute value of the entries.
// Executes expressions using common path (all evaluation
// optimizations) and compares the result with either the simplified path or a
// reference query runner. An optional selectivity vector 'rowsToVerify' can
// be passed which specifies which rows to evaluate and verify. If its not
// provided (by passing std::nullopt) then all rows will be verified.
// Additionally, a list of column indices can be passed via
// 'columnsToWrapInLazy' which specify the columns/children in the input row
// vector that should be wrapped in a lazy layer before running it through the
// common evaluation path. The list can contain negative column indices that
// represent lazy vectors that should be preloaded before being fed to the
// evaluator. This list is sorted on the absolute value of the entries.
// Returns:
// - result of evaluating the expressions if both paths succeeded and
// returned the exact same vectors.
Expand All @@ -74,6 +77,7 @@ class ExpressionVerifier {
fuzzer::ResultOrError verify(
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
VectorPtr&& resultVector,
bool canThrow,
std::vector<int> columnsToWarpInLazy = {});
Expand Down Expand Up @@ -112,5 +116,6 @@ void computeMinimumSubExpression(
VectorFuzzer& fuzzer,
const std::vector<core::TypedExprPtr>& plans,
const RowVectorPtr& rowVector,
const std::optional<SelectivityVector>& rowsToVerify,
const std::vector<int>& columnsToWrapInLazy);
} // namespace facebook::velox::test
Loading
Loading