Skip to content

Commit

Permalink
fix(fuzzer): Reduce invalid or unsupported test cases in window fuzzer (
Browse files Browse the repository at this point in the history
#11902)

Summary:
WindowFuzzer currently generate many invalid or unsupported test cases. When a test case is invalid, the execution throws in Velox and is not verified against the reference DB. Test cases unsupported by the ReferenceQueryRunner are also not verified against the reference DB. As the result, the test coverage becomes limited.

This PR fixes WindowFuzzer to reduce the percentage of invalid and unsupported test cases. Specifically, this PR includes the following fixes and adjustments:
1. When generating partition-by and order-by keys, only use scalar types supported by the ReferenceQueryRunner.
2. Update the type of the row_number column to be INTEGER to match the type of the offset columns of K-Rows frames.
3. Avoid generating NULLs in the offset columns of K-Rows frames.
4. Fail the fuzzer test if less than 50% iterations are verified, either against reference DB or through custom verifiers.


Differential Revision: D67360981

Pulled By: kagamiori
  • Loading branch information
kagamiori authored and facebook-github-bot committed Dec 17, 2024
1 parent 9b64b94 commit b4c6ed4
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 16 deletions.
21 changes: 13 additions & 8 deletions velox/exec/fuzzer/AggregationFuzzerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,11 @@ std::vector<std::string> AggregationFuzzerBase::generateSortingKeys(
std::vector<std::string>& names,
std::vector<TypePtr>& types,
bool rangeFrame,
const std::vector<TypePtr>& scalarTypes,
std::optional<uint32_t> numKeys) {
std::vector<std::string> keys;
vector_size_t maxDepth;
std::vector<TypePtr> sortingKeyTypes = defaultScalarTypes();
std::vector<TypePtr> sortingKeyTypes = scalarTypes;

// If frame has k-RANGE bound, only one sorting key should be present, and it
// should be a scalar type which supports '+', '-' arithmetic operations.
Expand Down Expand Up @@ -328,9 +329,10 @@ std::vector<RowVectorPtr> AggregationFuzzerBase::generateInputDataWithRowNumber(
std::vector<std::string> names,
std::vector<TypePtr> types,
const std::vector<std::string>& partitionKeys,
const std::vector<std::string>& windowFrameBounds,
const CallableSignature& signature) {
names.push_back("row_number");
types.push_back(BIGINT());
types.push_back(INTEGER());

auto generator = findInputGenerator(signature);

Expand All @@ -339,11 +341,10 @@ std::vector<RowVectorPtr> AggregationFuzzerBase::generateInputDataWithRowNumber(
velox::test::VectorMaker vectorMaker{pool_.get()};
int64_t rowNumber = 0;

std::unordered_set<std::string> partitionKeySet;
partitionKeySet.reserve(partitionKeys.size());
for (auto partitionKey : partitionKeys) {
partitionKeySet.insert(partitionKey);
}
std::unordered_set<std::string> partitionKeySet{
partitionKeys.begin(), partitionKeys.end()};
std::unordered_set<std::string> windowFrameBoundsSet{
windowFrameBounds.begin(), windowFrameBounds.end()};

for (auto j = 0; j < FLAGS_num_batches; ++j) {
std::vector<VectorPtr> children;
Expand All @@ -365,11 +366,15 @@ std::vector<RowVectorPtr> AggregationFuzzerBase::generateInputDataWithRowNumber(
auto baseVector = vectorFuzzer_.fuzz(types[i], numPartitions);
children.push_back(
BaseVector::wrapInDictionary(nulls, indices, size, baseVector));
} else if (
windowFrameBoundsSet.find(names[i]) != windowFrameBoundsSet.end()) {
// Frame bound columns cannot have NULLs.
children.push_back(vectorFuzzer_.fuzzNotNull(types[i], size));
} else {
children.push_back(vectorFuzzer_.fuzz(types[i], size));
}
}
children.push_back(vectorMaker.flatVector<int64_t>(
children.push_back(vectorMaker.flatVector<int32_t>(
size, [&](auto /*row*/) { return rowNumber++; }));
input.push_back(vectorMaker.rowVector(names, children));
}
Expand Down
12 changes: 8 additions & 4 deletions velox/exec/fuzzer/AggregationFuzzerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ class AggregationFuzzerBase {
std::vector<std::string>& names,
std::vector<TypePtr>& types,
bool rangeFrame = false,
const std::vector<TypePtr>& scalarTypes = defaultScalarTypes(),
std::optional<uint32_t> numKeys = std::nullopt);

std::pair<CallableSignature, SignatureStats&> pickSignature();
Expand All @@ -197,14 +198,17 @@ class AggregationFuzzerBase {
std::vector<TypePtr> types,
const std::optional<CallableSignature>& signature);

// Generate a RowVector of the given types of children with an additional
// child named "row_number" of BIGINT row numbers that differentiates every
// row. Row numbers start from 0. This additional input vector is needed for
// result verification of window aggregations.
/// Generate a RowVector of the given types of children with an additional
/// child named "row_number" of INTEGER row numbers that differentiates every
/// row. Row numbers start from 0. This additional input vector is needed for
/// result verification of window aggregations.
/// @param windowFrameBounds Names of frame bound columns of a window
/// operation. These columns are fuzzed without NULLs.
std::vector<RowVectorPtr> generateInputDataWithRowNumber(
std::vector<std::string> names,
std::vector<TypePtr> types,
const std::vector<std::string>& partitionKeys,
const std::vector<std::string>& windowFrameBounds,
const CallableSignature& signature);

velox::fuzzer::ResultOrError execute(
Expand Down
28 changes: 24 additions & 4 deletions velox/exec/fuzzer/WindowFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,14 @@ std::vector<SortingKeyAndOrder> WindowFuzzer::generateSortingKeysAndOrders(
std::vector<TypePtr>& types,
bool isKRangeFrame,
std::optional<uint32_t> numKeys) {
auto keys = generateSortingKeys(prefix, names, types, isKRangeFrame, numKeys);
VELOX_CHECK_NOT_NULL(referenceQueryRunner_);
auto keys = generateSortingKeys(
prefix,
names,
types,
isKRangeFrame,
referenceQueryRunner_->supportedScalarTypes(),
numKeys);
std::vector<SortingKeyAndOrder> results;
for (auto i = 0; i < keys.size(); ++i) {
auto asc = vectorFuzzer_.coinToss(0.5);
Expand Down Expand Up @@ -455,8 +462,14 @@ void WindowFuzzer::go() {

const uint32_t numKeys =
boost::random::uniform_int_distribution<uint32_t>(1, 15)(rng_);
const auto partitionKeys =
generateSortingKeys("p", argNames, argTypes, false, numKeys);
VELOX_CHECK_NOT_NULL(referenceQueryRunner_);
const auto partitionKeys = generateSortingKeys(
"p",
argNames,
argTypes,
false,
referenceQueryRunner_->supportedScalarTypes(),
numKeys);

std::vector<SortingKeyAndOrder> sortingKeysAndOrders;
TypeKind orderByTypeKind;
Expand All @@ -479,7 +492,7 @@ void WindowFuzzer::go() {
}

auto input = generateInputDataWithRowNumber(
argNames, argTypes, partitionKeys, signature);
argNames, argTypes, partitionKeys, kBoundColumns, signature);
// Offset column names used for k-RANGE frame bounds have fixed names: off0
// and off1, representing the precomputed offset columns used as frame start
// and frame end bound respectively.
Expand Down Expand Up @@ -547,6 +560,13 @@ void WindowFuzzer::go() {
}

stats_.print(iteration);
// Check that at least half of the iterations were verified, either against
// the reference DB or through custom result verifiers.
// stats_.numVerificationSkipped tracks the number of iterations verified
// through custom result verifiers.
VELOX_CHECK_GE(
(stats_.numVerified + stats_.numVerificationSkipped) / (double)iteration,
0.5);
printSignatureStats();
}

Expand Down

0 comments on commit b4c6ed4

Please sign in to comment.