Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(fuzzer): Reduce invalid or unsupported test cases in window fuzzer #11902

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion velox/exec/fuzzer/AggregationFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ void AggregationFuzzer::go() {
auto partitionKeys = generateKeys("p", argNames, argTypes);
auto sortingKeys = generateSortingKeys("s", argNames, argTypes);
auto input = generateInputDataWithRowNumber(
argNames, argTypes, partitionKeys, signature);
argNames, argTypes, partitionKeys, {}, signature);

logVectors(input);

Expand Down
21 changes: 13 additions & 8 deletions velox/exec/fuzzer/AggregationFuzzerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,11 @@ std::vector<std::string> AggregationFuzzerBase::generateSortingKeys(
std::vector<std::string>& names,
std::vector<TypePtr>& types,
bool rangeFrame,
const std::vector<TypePtr>& scalarTypes,
std::optional<uint32_t> numKeys) {
std::vector<std::string> keys;
vector_size_t maxDepth;
std::vector<TypePtr> sortingKeyTypes = defaultScalarTypes();
std::vector<TypePtr> sortingKeyTypes = scalarTypes;

// If frame has k-RANGE bound, only one sorting key should be present, and it
// should be a scalar type which supports '+', '-' arithmetic operations.
Expand Down Expand Up @@ -328,9 +329,10 @@ std::vector<RowVectorPtr> AggregationFuzzerBase::generateInputDataWithRowNumber(
std::vector<std::string> names,
std::vector<TypePtr> types,
const std::vector<std::string>& partitionKeys,
const std::vector<std::string>& windowFrameBounds,
const CallableSignature& signature) {
names.push_back("row_number");
types.push_back(BIGINT());
types.push_back(INTEGER());

auto generator = findInputGenerator(signature);

Expand All @@ -339,11 +341,10 @@ std::vector<RowVectorPtr> AggregationFuzzerBase::generateInputDataWithRowNumber(
velox::test::VectorMaker vectorMaker{pool_.get()};
int64_t rowNumber = 0;

std::unordered_set<std::string> partitionKeySet;
partitionKeySet.reserve(partitionKeys.size());
for (auto partitionKey : partitionKeys) {
partitionKeySet.insert(partitionKey);
}
std::unordered_set<std::string> partitionKeySet{
partitionKeys.begin(), partitionKeys.end()};
std::unordered_set<std::string> windowFrameBoundsSet{
windowFrameBounds.begin(), windowFrameBounds.end()};

for (auto j = 0; j < FLAGS_num_batches; ++j) {
std::vector<VectorPtr> children;
Expand All @@ -365,11 +366,15 @@ std::vector<RowVectorPtr> AggregationFuzzerBase::generateInputDataWithRowNumber(
auto baseVector = vectorFuzzer_.fuzz(types[i], numPartitions);
children.push_back(
BaseVector::wrapInDictionary(nulls, indices, size, baseVector));
} else if (
windowFrameBoundsSet.find(names[i]) != windowFrameBoundsSet.end()) {
// Frame bound columns cannot have NULLs.
children.push_back(vectorFuzzer_.fuzzNotNull(types[i], size));
} else {
children.push_back(vectorFuzzer_.fuzz(types[i], size));
}
}
children.push_back(vectorMaker.flatVector<int64_t>(
children.push_back(vectorMaker.flatVector<int32_t>(
size, [&](auto /*row*/) { return rowNumber++; }));
input.push_back(vectorMaker.rowVector(names, children));
}
Expand Down
12 changes: 8 additions & 4 deletions velox/exec/fuzzer/AggregationFuzzerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ class AggregationFuzzerBase {
std::vector<std::string>& names,
std::vector<TypePtr>& types,
bool rangeFrame = false,
const std::vector<TypePtr>& scalarTypes = defaultScalarTypes(),
std::optional<uint32_t> numKeys = std::nullopt);

std::pair<CallableSignature, SignatureStats&> pickSignature();
Expand All @@ -197,14 +198,17 @@ class AggregationFuzzerBase {
std::vector<TypePtr> types,
const std::optional<CallableSignature>& signature);

// Generate a RowVector of the given types of children with an additional
// child named "row_number" of BIGINT row numbers that differentiates every
// row. Row numbers start from 0. This additional input vector is needed for
// result verification of window aggregations.
/// Generate a RowVector of the given types of children with an additional
/// child named "row_number" of INTEGER row numbers that differentiates every
/// row. Row numbers start from 0. This additional input vector is needed for
/// result verification of window aggregations.
/// @param windowFrameBounds Names of frame bound columns of a window
/// operation. These columns are fuzzed without NULLs.
std::vector<RowVectorPtr> generateInputDataWithRowNumber(
std::vector<std::string> names,
std::vector<TypePtr> types,
const std::vector<std::string>& partitionKeys,
const std::vector<std::string>& windowFrameBounds,
const CallableSignature& signature);

velox::fuzzer::ResultOrError execute(
Expand Down
28 changes: 24 additions & 4 deletions velox/exec/fuzzer/WindowFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,14 @@ std::vector<SortingKeyAndOrder> WindowFuzzer::generateSortingKeysAndOrders(
std::vector<TypePtr>& types,
bool isKRangeFrame,
std::optional<uint32_t> numKeys) {
auto keys = generateSortingKeys(prefix, names, types, isKRangeFrame, numKeys);
VELOX_CHECK_NOT_NULL(referenceQueryRunner_);
auto keys = generateSortingKeys(
prefix,
names,
types,
isKRangeFrame,
referenceQueryRunner_->supportedScalarTypes(),
numKeys);
std::vector<SortingKeyAndOrder> results;
for (auto i = 0; i < keys.size(); ++i) {
auto asc = vectorFuzzer_.coinToss(0.5);
Expand Down Expand Up @@ -455,8 +462,14 @@ void WindowFuzzer::go() {

const uint32_t numKeys =
boost::random::uniform_int_distribution<uint32_t>(1, 15)(rng_);
const auto partitionKeys =
generateSortingKeys("p", argNames, argTypes, false, numKeys);
VELOX_CHECK_NOT_NULL(referenceQueryRunner_);
const auto partitionKeys = generateSortingKeys(
"p",
argNames,
argTypes,
false,
referenceQueryRunner_->supportedScalarTypes(),
numKeys);

std::vector<SortingKeyAndOrder> sortingKeysAndOrders;
TypeKind orderByTypeKind;
Expand All @@ -479,7 +492,7 @@ void WindowFuzzer::go() {
}

auto input = generateInputDataWithRowNumber(
argNames, argTypes, partitionKeys, signature);
argNames, argTypes, partitionKeys, kBoundColumns, signature);
// Offset column names used for k-RANGE frame bounds have fixed names: off0
// and off1, representing the precomputed offset columns used as frame start
// and frame end bound respectively.
Expand Down Expand Up @@ -547,6 +560,13 @@ void WindowFuzzer::go() {
}

stats_.print(iteration);
// Check that at least half of the iterations were verified, either against
// the reference DB or through custom result verifiers.
// stats_.numVerificationSkipped tracks the number of iterations verified
// through custom result verifiers.
VELOX_CHECK_GE(
(stats_.numVerified + stats_.numVerificationSkipped) / (double)iteration,
0.5);
printSignatureStats();
}

Expand Down
Loading