diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp index cad98cb0ee0e4..f37133ca6eda8 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp @@ -36,9 +36,8 @@ namespace { using exec::SignatureBinder; -/// Returns row numbers for non-null rows among all children in'data' or null -/// if all rows are null. -BufferPtr extractNonNullIndices(const RowVectorPtr& data) { +/// Returns all non-null rows among all children in 'data'. +SelectivityVector extractNonNullRows(const RowVectorPtr& data) { DecodedVector decoded; SelectivityVector nonNullRows(data->size()); @@ -48,19 +47,8 @@ BufferPtr extractNonNullIndices(const RowVectorPtr& data) { if (rawNulls) { nonNullRows.deselectNulls(rawNulls, 0, data->size()); } - if (!nonNullRows.hasSelections()) { - return nullptr; - } } - - BufferPtr indices = allocateIndices(nonNullRows.end(), data->pool()); - auto rawIndices = indices->asMutable(); - vector_size_t cnt = 0; - nonNullRows.applyToSelected( - [&](vector_size_t row) { rawIndices[cnt++] = row; }); - VELOX_CHECK_GT(cnt, 0); - indices->setSize(cnt * sizeof(vector_size_t)); - return indices; + return nonNullRows; } /// Wraps child vectors of the specified 'rowVector' in dictionary using @@ -261,6 +249,7 @@ void ExpressionFuzzerVerifier::retryWithTry( tryResult = verifier_.verify( tryPlans, rowVector, + std::nullopt, resultVector ? BaseVector::copy(*resultVector) : nullptr, false, // canThrow columnsToWrapInLazy); @@ -271,6 +260,7 @@ void ExpressionFuzzerVerifier::retryWithTry( *vectorFuzzer_, plans, rowVector, + std::nullopt, columnsToWrapInLazy); } throw; @@ -283,21 +273,18 @@ void ExpressionFuzzerVerifier::retryWithTry( // Re-evaluate the original expression on rows that didn't produce an // error (i.e. returned non-NULL results when evaluated with TRY). - BufferPtr noErrorIndices = extractNonNullIndices(tryResult.result); - - if (noErrorIndices != nullptr) { - auto noErrorRowVector = wrapChildren(noErrorIndices, rowVector); + SelectivityVector noErrorRows = extractNonNullRows(tryResult.result); - LOG(INFO) << "Retrying original expression on " << noErrorRowVector->size() + if (noErrorRows.hasSelections()) { + LOG(INFO) << "Retrying original expression on " << noErrorRows.end() << " rows without errors"; try { verifier_.verify( plans, - noErrorRowVector, - resultVector ? BaseVector::copy(*resultVector) - ->slice(0, noErrorRowVector->size()) - : nullptr, + rowVector, + noErrorRows, + resultVector ? BaseVector::copy(*resultVector) : nullptr, false, // canThrow columnsToWrapInLazy); } catch (const std::exception&) { @@ -306,7 +293,8 @@ void ExpressionFuzzerVerifier::retryWithTry( {&execCtx_, {false, ""}, referenceQueryRunner_}, *vectorFuzzer_, plans, - noErrorRowVector, + rowVector, + noErrorRows, columnsToWrapInLazy); } throw; @@ -379,6 +367,7 @@ void ExpressionFuzzerVerifier::go() { result = verifier_.verify( plans, rowVector, + std::nullopt, resultVectors ? BaseVector::copy(*resultVectors) : nullptr, true, // canThrow columnsToWrapInLazy); @@ -389,6 +378,7 @@ void ExpressionFuzzerVerifier::go() { *vectorFuzzer_, plans, rowVector, + std::nullopt, columnsToWrapInLazy); } throw; diff --git a/velox/expression/tests/ExpressionRunner.cpp b/velox/expression/tests/ExpressionRunner.cpp index d406de11f9ea7..85356e7cd1d61 100644 --- a/velox/expression/tests/ExpressionRunner.cpp +++ b/velox/expression/tests/ExpressionRunner.cpp @@ -197,6 +197,7 @@ void ExpressionRunner::run( verifier.verify( typedExprs, inputVector, + std::nullopt, std::move(resultVector), true, columnsToWrapInLazy); @@ -209,6 +210,7 @@ void ExpressionRunner::run( fuzzer, typedExprs, inputVector, + std::nullopt, columnsToWrapInLazy); } throw; diff --git a/velox/expression/tests/ExpressionVerifier.cpp b/velox/expression/tests/ExpressionVerifier.cpp index 02991c1c8114d..e6a99f0017f43 100644 --- a/velox/expression/tests/ExpressionVerifier.cpp +++ b/velox/expression/tests/ExpressionVerifier.cpp @@ -68,11 +68,33 @@ bool defaultNullRowsSkipped(const exec::ExprSet& exprSet) { } return false; } + +// Returns a RowVector with only the rows specified in 'rows'. Does not maintain +// encodings. +RowVectorPtr reduceToSelectedRows( + const RowVectorPtr& rowVector, + const SelectivityVector& rows) { + if (rows.isAllSelected()) { + return rowVector; + } + BufferPtr indices = allocateIndices(rows.end(), rowVector->pool()); + auto rawIndices = indices->asMutable(); + vector_size_t cnt = 0; + rows.applyToSelected([&](vector_size_t row) { rawIndices[cnt++] = row; }); + VELOX_CHECK_GT(cnt, 0); + indices->setSize(cnt * sizeof(vector_size_t)); + RowVectorPtr reducedVector = std::dynamic_pointer_cast( + BaseVector::create(rowVector->type(), cnt, rowVector->pool())); + SelectivityVector rowsToCopy(cnt); + reducedVector->copy(rowVector.get(), rowsToCopy, rawIndices); + return reducedVector; +} } // namespace fuzzer::ResultOrError ExpressionVerifier::verify( const std::vector& plans, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, VectorPtr&& resultVector, bool canThrow, std::vector columnsToWrapInLazy) { @@ -132,7 +154,12 @@ fuzzer::ResultOrError ExpressionVerifier::verify( std::exception_ptr exceptionSimplifiedPtr; VLOG(1) << "Starting common eval execution."; - SelectivityVector rows{rowVector ? rowVector->size() : 1}; + SelectivityVector rows; + if (rowsToVerify.has_value()) { + rows = *rowsToVerify; + } else { + rows = SelectivityVector{rowVector ? rowVector->size() : 1}; + } // Execute with common expression eval path. Some columns of the input row // vector will be wrapped in lazy as specified in 'columnsToWrapInLazy'. @@ -171,7 +198,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify( copiedInput, BaseVector::copy(*inputRowVector), "Copy of original input", - "Input after common"); + "Input after common", + rows); } } catch (const VeloxException& e) { if (e.errorCode() == error_code::kUnsupportedInputUncatchable) { @@ -201,9 +229,11 @@ fuzzer::ResultOrError ExpressionVerifier::verify( if (referenceQueryRunner_ != nullptr) { VLOG(1) << "Execute with reference DB."; - auto projectionPlan = makeProjectionPlan(rowVector, plans); + auto inputRowVector = rowVector; + inputRowVector = reduceToSelectedRows(rowVector, rows); + auto projectionPlan = makeProjectionPlan(inputRowVector, plans); auto referenceResultOrError = computeReferenceResults( - projectionPlan, {rowVector}, referenceQueryRunner_.get()); + projectionPlan, {inputRowVector}, referenceQueryRunner_.get()); auto referenceEvalResult = referenceResultOrError.first; @@ -246,6 +276,7 @@ fuzzer::ResultOrError ExpressionVerifier::verify( nullptr, commonEvalResult[0]->size(), commonEvalResult); + commonEvalResultRow = reduceToSelectedRows(commonEvalResultRow, rows); VELOX_CHECK( exec::test::assertEqualResults( referenceEvalResult.value(), @@ -280,7 +311,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify( copy, BaseVector::copy(*rowVector), "Copy of original input", - "Input after simplified"); + "Input after simplified", + rows); } catch (const VeloxException& e) { if (e.errorCode() == error_code::kUnsupportedInputUncatchable) { unsupportedInputUncatchableError = true; @@ -479,12 +511,13 @@ class MinimalSubExpressionFinder { void findMinimalExpression( core::TypedExprPtr plan, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, const std::vector& columnsToWrapInLazy) { - if (verifyWithResults(plan, rowVector, columnsToWrapInLazy)) { + if (verifyWithResults(plan, rowVector, rowsToVerify, columnsToWrapInLazy)) { errorExit("Retry should have failed"); } - bool minimalFound = - findMinimalRecursive(plan, rowVector, columnsToWrapInLazy); + bool minimalFound = findMinimalRecursive( + plan, rowVector, rowsToVerify, columnsToWrapInLazy); if (minimalFound) { errorExit("Found minimal failing expression."); } else { @@ -504,13 +537,15 @@ class MinimalSubExpressionFinder { bool findMinimalRecursive( core::TypedExprPtr plan, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, const std::vector& columnsToWrapInLazy) { bool anyFailed = false; for (auto& input : plan->inputs()) { - if (!verifyWithResults(input, rowVector, columnsToWrapInLazy)) { + if (!verifyWithResults( + input, rowVector, rowsToVerify, columnsToWrapInLazy)) { anyFailed = true; - bool minimalFound = - findMinimalRecursive(input, rowVector, columnsToWrapInLazy); + bool minimalFound = findMinimalRecursive( + input, rowVector, rowsToVerify, columnsToWrapInLazy); if (minimalFound) { return true; } @@ -519,10 +554,10 @@ class MinimalSubExpressionFinder { if (!anyFailed) { LOG(INFO) << "Failed with all children succeeding: " << plan->toString(); // Re-running the minimum failed. Put breakpoint here to debug. - verifyWithResults(plan, rowVector, columnsToWrapInLazy); + verifyWithResults(plan, rowVector, rowsToVerify, columnsToWrapInLazy); if (!columnsToWrapInLazy.empty()) { LOG(INFO) << "Trying without lazy:"; - if (verifyWithResults(plan, rowVector, {})) { + if (verifyWithResults(plan, rowVector, rowsToVerify, {})) { LOG(INFO) << "Minimal failure succeeded without lazy vectors"; } } @@ -539,14 +574,16 @@ class MinimalSubExpressionFinder { bool verifyWithResults( core::TypedExprPtr plan, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, const std::vector& columnsToWrapInLazy) { VectorPtr result; LOG(INFO) << "Running with empty results vector :" << plan->toString(); - bool emptyResult = verifyPlan(plan, rowVector, columnsToWrapInLazy, result); + bool emptyResult = + verifyPlan(plan, rowVector, rowsToVerify, columnsToWrapInLazy, result); LOG(INFO) << "Running with non empty vector :" << plan->toString(); result = vectorFuzzer_.fuzzFlat(plan->type()); bool filledResult = - verifyPlan(plan, rowVector, columnsToWrapInLazy, result); + verifyPlan(plan, rowVector, rowsToVerify, columnsToWrapInLazy, result); if (emptyResult != filledResult) { LOG(ERROR) << fmt::format( "Different results for empty vs populated ! Empty result = {} filledResult = {}", @@ -561,6 +598,7 @@ class MinimalSubExpressionFinder { bool verifyPlan( core::TypedExprPtr plan, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, const std::vector& columnsToWrapInLazy, VectorPtr results) { // Turn off unnecessary logging. @@ -571,6 +609,7 @@ class MinimalSubExpressionFinder { verifier_.verify( {plan}, rowVector, + rowsToVerify, results ? BaseVector::copy(*results) : nullptr, true, // canThrow columnsToWrapInLazy); @@ -591,6 +630,7 @@ void computeMinimumSubExpression( VectorFuzzer& fuzzer, const std::vector& plans, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, const std::vector& columnsToWrapInLazy) { auto finder = MinimalSubExpressionFinder(std::move(minimalVerifier), fuzzer); if (plans.size() > 1) { @@ -602,7 +642,8 @@ void computeMinimumSubExpression( for (auto plan : plans) { LOG(INFO) << "============================================"; LOG(INFO) << "Finding minimal subexpression for plan:" << plan->toString(); - finder.findMinimalExpression(plan, rowVector, columnsToWrapInLazy); + finder.findMinimalExpression( + plan, rowVector, rowsToVerify, columnsToWrapInLazy); LOG(INFO) << "============================================"; } } diff --git a/velox/expression/tests/ExpressionVerifier.h b/velox/expression/tests/ExpressionVerifier.h index 5b21826d29605..4aaa2f4baa290 100644 --- a/velox/expression/tests/ExpressionVerifier.h +++ b/velox/expression/tests/ExpressionVerifier.h @@ -57,14 +57,17 @@ class ExpressionVerifier { options_(options), referenceQueryRunner_{referenceQueryRunner} {} - // Executes expressions both using common path (all evaluation - // optimizations) and simplified path. Additionally, a list of column - // indices can be passed via 'columnsToWrapInLazy' which specify the - // columns/children in the input row vector that should be wrapped in a lazy - // layer before running it through the common evaluation path. The list can - // contain negative column indices that represent lazy vectors that should be - // preloaded before being fed to the evaluator. This list is sorted on the - // absolute value of the entries. + // Executes expressions using common path (all evaluation + // optimizations) and compares the result with either the simplified path or a + // reference query runner. An optional selectivity vector 'rowsToVerify' can + // be passed which specifies which rows to evaluate and verify. If its not + // provided (by passing std::nullopt) then all rows will be verified. + // Additionally, a list of column indices can be passed via + // 'columnsToWrapInLazy' which specify the columns/children in the input row + // vector that should be wrapped in a lazy layer before running it through the + // common evaluation path. The list can contain negative column indices that + // represent lazy vectors that should be preloaded before being fed to the + // evaluator. This list is sorted on the absolute value of the entries. // Returns: // - result of evaluating the expressions if both paths succeeded and // returned the exact same vectors. @@ -74,6 +77,7 @@ class ExpressionVerifier { fuzzer::ResultOrError verify( const std::vector& plans, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, VectorPtr&& resultVector, bool canThrow, std::vector columnsToWarpInLazy = {}); @@ -112,5 +116,6 @@ void computeMinimumSubExpression( VectorFuzzer& fuzzer, const std::vector& plans, const RowVectorPtr& rowVector, + const std::optional& rowsToVerify, const std::vector& columnsToWrapInLazy); } // namespace facebook::velox::test diff --git a/velox/expression/tests/ExpressionVerifierUnitTest.cpp b/velox/expression/tests/ExpressionVerifierUnitTest.cpp index 4857ecfa8dd2d..b5f3914360964 100644 --- a/velox/expression/tests/ExpressionVerifierUnitTest.cpp +++ b/velox/expression/tests/ExpressionVerifierUnitTest.cpp @@ -102,7 +102,8 @@ TEST_F(ExpressionVerifierUnitTest, persistReproInfo) { auto plan = parseExpression("always_throws(c0)", asRowType(data->type())); removeDirecrtoryIfExist(localFs, reproPath); - VELOX_ASSERT_THROW(verifier.verify({plan}, data, nullptr, false), ""); + VELOX_ASSERT_THROW( + verifier.verify({plan}, data, std::nullopt, nullptr, false), ""); EXPECT_TRUE(localFs->exists(reproPath)); EXPECT_FALSE(localFs->list(reproPath).empty()); removeDirecrtoryIfExist(localFs, reproPath);