diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index a56785c227a0..49750eab0d6b 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -932,6 +932,38 @@ RowTypePtr VectorFuzzer::randRowType( return velox::randRowType(rng_, scalarTypes, maxDepth); } +TypePtr VectorFuzzer::randRowTypeByWidth( + const std::vector& scalarTypes, + int minWidth) { + return velox::randRowTypeByWidth(rng_, scalarTypes, minWidth); +} + +TypePtr VectorFuzzer::randRowTypeByWidth(int minWidth) { + return velox::randRowTypeByWidth(rng_, defaultScalarTypes(), minWidth); +} + +size_t VectorFuzzer::typeWidth(const TypePtr& type) const { + if (type->isPrimitiveType()) { + return 1; + } + switch (type->kind()) { + case TypeKind::ARRAY: + return 1 + typeWidth(type->asArray().elementType()); + case TypeKind::MAP: + return 1 + typeWidth(type->asMap().keyType()) + + typeWidth(type->asMap().valueType()); + case TypeKind::ROW: { + size_t fieldWidth = 0; + for (const auto& child : type->asRow().children()) { + fieldWidth += typeWidth(child); + } + return 1 + fieldWidth; + } + default: + VELOX_UNREACHABLE("Unsupported type: {}", type->toString()); + } +} + size_t VectorFuzzer::randInRange(size_t min, size_t max) { return rand(rng_, min, max); } @@ -1169,6 +1201,48 @@ TypePtr randMapType( randType(rng, scalarTypes, 0), randType(rng, scalarTypes, maxDepth - 1)); } +TypePtr randTypeByWidth( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + int minWidth) { + if (minWidth <= 1) { + const int numScalarTypes = scalarTypes.size(); + return scalarTypes[rand(rng) % numScalarTypes]; + } + + switch (rand(rng) % 3) { + case 0: + return ARRAY(randTypeByWidth(rng, scalarTypes, minWidth - 1)); + case 1: { + const auto keyWidth = + minWidth == 2 ? 1 : rand(rng) % (minWidth - 2); + return MAP( + randTypeByWidth(rng, scalarTypes, keyWidth), + randTypeByWidth(rng, scalarTypes, minWidth - keyWidth - 1)); + } + // case 2: + default: + return randRowTypeByWidth(rng, scalarTypes, minWidth); + } +} + +TypePtr randRowTypeByWidth( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + int minWidth) { + const auto numFields = 1 + rand(rng) % 10; + std::vector fields; + auto remainingWidth = minWidth - 1; + for (auto i = 0; i < numFields - 1; ++i) { + const auto fieldWidth = + remainingWidth > 0 ? rand(rng) % remainingWidth : 0; + fields.push_back(randTypeByWidth(rng, scalarTypes, fieldWidth)); + remainingWidth -= fieldWidth; + } + fields.push_back(randTypeByWidth(rng, scalarTypes, remainingWidth)); + return ROW(std::move(fields)); +} + TypePtr randOrderableType(FuzzerGenerator& rng, int maxDepth) { return randOrderableType(rng, defaultScalarTypes(), maxDepth); } diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 8a3fae2c614d..df46045741dc 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -276,6 +276,19 @@ class VectorFuzzer { TypePtr randType(const std::vector& scalarTypes, int maxDepth = 5); + /// Generate a random RowType with random fields including maps, vectors, and + /// arrays. minWidth limits the minimum width of the RowType, i.e., the number + /// of streams involved when reading or writing data of this type. + TypePtr randRowTypeByWidth(int minWidth); + + TypePtr randRowTypeByWidth( + const std::vector& scalarTypes, + int minWidth); + + /// Return the width of the given type. The width of a type is the number of + /// streams involved when reading or writing data of this type. + size_t typeWidth(const TypePtr& type) const; + /// Same as the function above, but only generate orderable types. /// MAP types are not generated as they are not orderable. TypePtr randOrderableType(int maxDepth = 5); @@ -441,6 +454,12 @@ RowTypePtr randRowType( const std::vector& scalarTypes, int maxDepth = 5); +/// Generate a random RowType with a minimal width. +TypePtr randRowTypeByWidth( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + int minWidth); + /// Default set of scalar types to be chosen from when generating random types. const std::vector& defaultScalarTypes(); diff --git a/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp b/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp index 4e2595cc9bd1..cf5c8548a4cc 100644 --- a/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp +++ b/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp @@ -950,4 +950,37 @@ TEST_F(VectorFuzzerTest, randMapType) { ASSERT_TRUE(fuzzer.randMapType()->isMap()); } } + +TEST_F(VectorFuzzerTest, randTypeByWidth) { + VectorFuzzer::Options opts; + VectorFuzzer fuzzer(opts, pool()); + + // Test typeWidth. + TypePtr type = BIGINT(); + EXPECT_EQ(fuzzer.typeWidth(type), 1); + type = ARRAY(BIGINT()); + EXPECT_EQ(fuzzer.typeWidth(type), 2); + type = MAP(BIGINT(), ARRAY(VARCHAR())); + EXPECT_EQ(fuzzer.typeWidth(type), 4); + type = ROW( + {INTEGER(), ARRAY(BIGINT()), MAP(VARCHAR(), DOUBLE()), ROW({TINYINT()})}); + EXPECT_EQ(fuzzer.typeWidth(type), 9); + + // Test randType by width. Results should be at least a RowType with one + // field, so the minimal type width is 2. + type = fuzzer.randRowTypeByWidth(-1); + EXPECT_GE(fuzzer.typeWidth(type), 2); + type = fuzzer.randRowTypeByWidth(0); + EXPECT_GE(fuzzer.typeWidth(type), 2); + type = fuzzer.randRowTypeByWidth(1); + EXPECT_GE(fuzzer.typeWidth(type), 2); + + folly::Random::DefaultGenerator rng; + rng.seed(0); + for (auto i = 0; i < 1000; ++i) { + const auto width = folly::Random::rand32(rng) % 128; + type = fuzzer.randRowTypeByWidth(width); + EXPECT_GE(fuzzer.typeWidth(type), width > 2 ? width : 2); + } +} } // namespace