diff --git a/velox/exec/fuzzer/AggregationFuzzer.cpp b/velox/exec/fuzzer/AggregationFuzzer.cpp index d778f39df475..b21c8a7be1e9 100644 --- a/velox/exec/fuzzer/AggregationFuzzer.cpp +++ b/velox/exec/fuzzer/AggregationFuzzer.cpp @@ -385,7 +385,7 @@ void AggregationFuzzer::go() { auto partitionKeys = generateKeys("p", argNames, argTypes); auto sortingKeys = generateSortingKeys("s", argNames, argTypes); auto input = generateInputDataWithRowNumber( - argNames, argTypes, partitionKeys, signature); + argNames, argTypes, partitionKeys, {}, signature); logVectors(input); diff --git a/velox/exec/fuzzer/AggregationFuzzerBase.cpp b/velox/exec/fuzzer/AggregationFuzzerBase.cpp index b6fe99a14491..ef8a61f4a65a 100644 --- a/velox/exec/fuzzer/AggregationFuzzerBase.cpp +++ b/velox/exec/fuzzer/AggregationFuzzerBase.cpp @@ -243,10 +243,11 @@ std::vector AggregationFuzzerBase::generateSortingKeys( std::vector& names, std::vector& types, bool rangeFrame, + const std::vector& scalarTypes, std::optional numKeys) { std::vector keys; vector_size_t maxDepth; - std::vector sortingKeyTypes = defaultScalarTypes(); + std::vector sortingKeyTypes = scalarTypes; // If frame has k-RANGE bound, only one sorting key should be present, and it // should be a scalar type which supports '+', '-' arithmetic operations. @@ -328,9 +329,10 @@ std::vector AggregationFuzzerBase::generateInputDataWithRowNumber( std::vector names, std::vector types, const std::vector& partitionKeys, + const std::vector& windowFrameBounds, const CallableSignature& signature) { names.push_back("row_number"); - types.push_back(BIGINT()); + types.push_back(INTEGER()); auto generator = findInputGenerator(signature); @@ -339,11 +341,10 @@ std::vector AggregationFuzzerBase::generateInputDataWithRowNumber( velox::test::VectorMaker vectorMaker{pool_.get()}; int64_t rowNumber = 0; - std::unordered_set partitionKeySet; - partitionKeySet.reserve(partitionKeys.size()); - for (auto partitionKey : partitionKeys) { - partitionKeySet.insert(partitionKey); - } + std::unordered_set partitionKeySet{ + partitionKeys.begin(), partitionKeys.end()}; + std::unordered_set windowFrameBoundsSet{ + windowFrameBounds.begin(), windowFrameBounds.end()}; for (auto j = 0; j < FLAGS_num_batches; ++j) { std::vector children; @@ -365,11 +366,15 @@ std::vector AggregationFuzzerBase::generateInputDataWithRowNumber( auto baseVector = vectorFuzzer_.fuzz(types[i], numPartitions); children.push_back( BaseVector::wrapInDictionary(nulls, indices, size, baseVector)); + } else if ( + windowFrameBoundsSet.find(names[i]) != windowFrameBoundsSet.end()) { + // Frame bound columns cannot have NULLs. + children.push_back(vectorFuzzer_.fuzzNotNull(types[i], size)); } else { children.push_back(vectorFuzzer_.fuzz(types[i], size)); } } - children.push_back(vectorMaker.flatVector( + children.push_back(vectorMaker.flatVector( size, [&](auto /*row*/) { return rowNumber++; })); input.push_back(vectorMaker.rowVector(names, children)); } diff --git a/velox/exec/fuzzer/AggregationFuzzerBase.h b/velox/exec/fuzzer/AggregationFuzzerBase.h index a2237f8a9d74..8e2b252fa650 100644 --- a/velox/exec/fuzzer/AggregationFuzzerBase.h +++ b/velox/exec/fuzzer/AggregationFuzzerBase.h @@ -188,6 +188,7 @@ class AggregationFuzzerBase { std::vector& names, std::vector& types, bool rangeFrame = false, + const std::vector& scalarTypes = defaultScalarTypes(), std::optional numKeys = std::nullopt); std::pair pickSignature(); @@ -197,14 +198,17 @@ class AggregationFuzzerBase { std::vector types, const std::optional& signature); - // Generate a RowVector of the given types of children with an additional - // child named "row_number" of BIGINT row numbers that differentiates every - // row. Row numbers start from 0. This additional input vector is needed for - // result verification of window aggregations. + /// Generate a RowVector of the given types of children with an additional + /// child named "row_number" of INTEGER row numbers that differentiates every + /// row. Row numbers start from 0. This additional input vector is needed for + /// result verification of window aggregations. + /// @param windowFrameBounds Names of frame bound columns of a window + /// operation. These columns are fuzzed without NULLs. std::vector generateInputDataWithRowNumber( std::vector names, std::vector types, const std::vector& partitionKeys, + const std::vector& windowFrameBounds, const CallableSignature& signature); velox::fuzzer::ResultOrError execute( diff --git a/velox/exec/fuzzer/WindowFuzzer.cpp b/velox/exec/fuzzer/WindowFuzzer.cpp index b4d8a578f3a8..eaaffea3df67 100644 --- a/velox/exec/fuzzer/WindowFuzzer.cpp +++ b/velox/exec/fuzzer/WindowFuzzer.cpp @@ -256,7 +256,14 @@ std::vector WindowFuzzer::generateSortingKeysAndOrders( std::vector& types, bool isKRangeFrame, std::optional numKeys) { - auto keys = generateSortingKeys(prefix, names, types, isKRangeFrame, numKeys); + VELOX_CHECK_NOT_NULL(referenceQueryRunner_); + auto keys = generateSortingKeys( + prefix, + names, + types, + isKRangeFrame, + referenceQueryRunner_->supportedScalarTypes(), + numKeys); std::vector results; for (auto i = 0; i < keys.size(); ++i) { auto asc = vectorFuzzer_.coinToss(0.5); @@ -455,8 +462,14 @@ void WindowFuzzer::go() { const uint32_t numKeys = boost::random::uniform_int_distribution(1, 15)(rng_); - const auto partitionKeys = - generateSortingKeys("p", argNames, argTypes, false, numKeys); + VELOX_CHECK_NOT_NULL(referenceQueryRunner_); + const auto partitionKeys = generateSortingKeys( + "p", + argNames, + argTypes, + false, + referenceQueryRunner_->supportedScalarTypes(), + numKeys); std::vector sortingKeysAndOrders; TypeKind orderByTypeKind; @@ -479,7 +492,7 @@ void WindowFuzzer::go() { } auto input = generateInputDataWithRowNumber( - argNames, argTypes, partitionKeys, signature); + argNames, argTypes, partitionKeys, kBoundColumns, signature); // Offset column names used for k-RANGE frame bounds have fixed names: off0 // and off1, representing the precomputed offset columns used as frame start // and frame end bound respectively. @@ -547,6 +560,13 @@ void WindowFuzzer::go() { } stats_.print(iteration); + // Check that at least half of the iterations were verified, either against + // the reference DB or through custom result verifiers. + // stats_.numVerificationSkipped tracks the number of iterations verified + // through custom result verifiers. + VELOX_CHECK_GE( + (stats_.numVerified + stats_.numVerificationSkipped) / (double)iteration, + 0.5); printSignatureStats(); }