From e9e323f8b128929b7ee0c894beab8c59dee8a016 Mon Sep 17 00:00:00 2001 From: duanmeng Date: Mon, 2 Oct 2023 10:56:38 -0700 Subject: [PATCH] Add VectorFuzzer::fuzzInputFlatRow API (#6849) Summary: It is a good practice to use the fuzzer to generate row vectors as test data while writing UT codes (e.g. fuzzer.fuzzRow(rowType_). But in some scenarios, we need to control the generation encoding, say the parquet writer uses arrow parquet writer, which could not support constant and dictionary encoding vectors. See discussion here https://github.com/facebookincubator/velox/pull/6608#discussion_r1337578838. With this new API, we could use fuzzer like the following, ```C++ VectorFuzzer fuzzer({.vectorSize = vectorSize}, leafPool_.get()); fuzzer.fuzzInputFlatRow(rowType)); ``` Pull Request resolved: https://github.com/facebookincubator/velox/pull/6849 Reviewed By: kgpai Differential Revision: D49830128 Pulled By: mbasmanova fbshipit-source-id: 17f5f80c9c07577fd2cef32f9e4733155f1a7113 --- velox/vector/fuzzer/VectorFuzzer.cpp | 12 +++++++++ velox/vector/fuzzer/VectorFuzzer.h | 4 +++ .../vector/fuzzer/tests/VectorFuzzerTest.cpp | 25 +++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index 1c20fa266da0..62f02d00ac7c 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -683,6 +683,18 @@ RowVectorPtr VectorFuzzer::fuzzInputRow(const RowTypePtr& rowType) { return fuzzRow(rowType, opts_.vectorSize, false); } +RowVectorPtr VectorFuzzer::fuzzInputFlatRow(const RowTypePtr& rowType) { + std::vector children; + auto size = static_cast(opts_.vectorSize); + children.reserve(rowType->size()); + for (auto i = 0; i < rowType->size(); ++i) { + children.emplace_back(fuzzFlat(rowType->childAt(i), size)); + } + + return std::make_shared( + pool_, rowType, nullptr, size, std::move(children)); +} + RowVectorPtr VectorFuzzer::fuzzRow( std::vector&& children, std::vector childrenNames, diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 8d368c846f30..9aaea1137c33 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -238,6 +238,10 @@ class VectorFuzzer { // elements. RowVectorPtr fuzzInputRow(const RowTypePtr& rowType); + /// Same as the function above, but all generated vectors are flat, i.e. no + /// constant or dictionary-encoded vectors at any level. + RowVectorPtr fuzzInputFlatRow(const RowTypePtr& rowType); + // Generates a random type, including maps, vectors, and arrays. maxDepth // limits the maximum level of nesting for complex types. maxDepth <= 1 means // no complex types are allowed. diff --git a/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp b/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp index 3b9cca5b145e..8631c6d39ea7 100644 --- a/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp +++ b/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp @@ -688,6 +688,31 @@ TEST_F(VectorFuzzerTest, fuzzRowChildrenToLazy) { ASSERT_TRUE(wrappedRow->childAt(1)->as()->isLoaded()); } +TEST_F(VectorFuzzerTest, flatInputRow) { + VectorFuzzer fuzzer({.vectorSize = 10}, pool()); + auto vector = fuzzer.fuzzInputFlatRow( + ROW({DOUBLE(), ARRAY(BIGINT()), MAP(BIGINT(), VARCHAR())})); + ASSERT_TRUE(vector->type()->kindEquals( + ROW({DOUBLE(), ARRAY(BIGINT()), MAP(BIGINT(), VARCHAR())}))); + ASSERT_EQ(VectorEncoding::Simple::FLAT, vector->childAt(0)->encoding()); + ASSERT_EQ(VectorEncoding::Simple::ARRAY, vector->childAt(1)->encoding()); + ASSERT_EQ(VectorEncoding::Simple::MAP, vector->childAt(2)->encoding()); + + // Arrays. + auto elements = vector->childAt(1)->as()->elements(); + ASSERT_TRUE(elements->type()->kindEquals(BIGINT())); + ASSERT_EQ(VectorEncoding::Simple::FLAT, elements->encoding()); + + // Maps. + auto mapKeys = vector->childAt(2)->as()->mapKeys(); + ASSERT_TRUE(mapKeys->type()->kindEquals(BIGINT())); + ASSERT_EQ(VectorEncoding::Simple::FLAT, mapKeys->encoding()); + + auto mapValues = vector->childAt(2)->as()->mapValues(); + ASSERT_TRUE(mapValues->type()->kindEquals(VARCHAR())); + ASSERT_EQ(VectorEncoding::Simple::FLAT, mapValues->encoding()); +} + void VectorFuzzerTest::validateMaxSizes(VectorPtr vector, size_t maxSize) { if (vector->typeKind() == TypeKind::ARRAY) { validateMaxSizes(vector->template as()->elements(), maxSize);