Skip to content

Commit

Permalink
feat(fuzzer): Add VectorFuzzer::randTypeByWidth() (#11800)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11800

Add an API to allow generate a random type by the type width, i.e., the number of
streams involved when reading or writing data of this type.

Reviewed By: xiaoxmeng

Differential Revision: D66968752

fbshipit-source-id: 0cddfdf6ec350c6f3c201bb993991cd4fec8f89e
  • Loading branch information
kagamiori authored and facebook-github-bot committed Dec 20, 2024
1 parent b4a7479 commit 3810d26
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
74 changes: 74 additions & 0 deletions velox/vector/fuzzer/VectorFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,38 @@ RowTypePtr VectorFuzzer::randRowType(
return velox::randRowType(rng_, scalarTypes, maxDepth);
}

TypePtr VectorFuzzer::randRowTypeByWidth(
const std::vector<TypePtr>& scalarTypes,
int minWidth) {
return velox::randRowTypeByWidth(rng_, scalarTypes, minWidth);
}

TypePtr VectorFuzzer::randRowTypeByWidth(int minWidth) {
return velox::randRowTypeByWidth(rng_, defaultScalarTypes(), minWidth);
}

size_t VectorFuzzer::typeWidth(const TypePtr& type) const {
if (type->isPrimitiveType()) {
return 1;
}
switch (type->kind()) {
case TypeKind::ARRAY:
return 1 + typeWidth(type->asArray().elementType());
case TypeKind::MAP:
return 1 + typeWidth(type->asMap().keyType()) +
typeWidth(type->asMap().valueType());
case TypeKind::ROW: {
size_t fieldWidth = 0;
for (const auto& child : type->asRow().children()) {
fieldWidth += typeWidth(child);
}
return 1 + fieldWidth;
}
default:
VELOX_UNREACHABLE("Unsupported type: {}", type->toString());
}
}

size_t VectorFuzzer::randInRange(size_t min, size_t max) {
return rand(rng_, min, max);
}
Expand Down Expand Up @@ -1169,6 +1201,48 @@ TypePtr randMapType(
randType(rng, scalarTypes, 0), randType(rng, scalarTypes, maxDepth - 1));
}

TypePtr randTypeByWidth(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int minWidth) {
if (minWidth <= 1) {
const int numScalarTypes = scalarTypes.size();
return scalarTypes[rand<uint32_t>(rng) % numScalarTypes];
}

switch (rand<uint32_t>(rng) % 3) {
case 0:
return ARRAY(randTypeByWidth(rng, scalarTypes, minWidth - 1));
case 1: {
const auto keyWidth =
minWidth == 2 ? 1 : rand<uint32_t>(rng) % (minWidth - 2);
return MAP(
randTypeByWidth(rng, scalarTypes, keyWidth),
randTypeByWidth(rng, scalarTypes, minWidth - keyWidth - 1));
}
// case 2:
default:
return randRowTypeByWidth(rng, scalarTypes, minWidth);
}
}

TypePtr randRowTypeByWidth(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int minWidth) {
const auto numFields = 1 + rand<uint32_t>(rng) % 10;
std::vector<TypePtr> fields;
auto remainingWidth = minWidth - 1;
for (auto i = 0; i < numFields - 1; ++i) {
const auto fieldWidth =
remainingWidth > 0 ? rand<uint32_t>(rng) % remainingWidth : 0;
fields.push_back(randTypeByWidth(rng, scalarTypes, fieldWidth));
remainingWidth -= fieldWidth;
}
fields.push_back(randTypeByWidth(rng, scalarTypes, remainingWidth));
return ROW(std::move(fields));
}

TypePtr randOrderableType(FuzzerGenerator& rng, int maxDepth) {
return randOrderableType(rng, defaultScalarTypes(), maxDepth);
}
Expand Down
19 changes: 19 additions & 0 deletions velox/vector/fuzzer/VectorFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,19 @@ class VectorFuzzer {

TypePtr randType(const std::vector<TypePtr>& scalarTypes, int maxDepth = 5);

/// Generate a random RowType with random fields including maps, vectors, and
/// arrays. minWidth limits the minimum width of the RowType, i.e., the number
/// of streams involved when reading or writing data of this type.
TypePtr randRowTypeByWidth(int minWidth);

TypePtr randRowTypeByWidth(
const std::vector<TypePtr>& scalarTypes,
int minWidth);

/// Return the width of the given type. The width of a type is the number of
/// streams involved when reading or writing data of this type.
size_t typeWidth(const TypePtr& type) const;

/// Same as the function above, but only generate orderable types.
/// MAP types are not generated as they are not orderable.
TypePtr randOrderableType(int maxDepth = 5);
Expand Down Expand Up @@ -441,6 +454,12 @@ RowTypePtr randRowType(
const std::vector<TypePtr>& scalarTypes,
int maxDepth = 5);

/// Generate a random RowType with a minimal width.
TypePtr randRowTypeByWidth(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int minWidth);

/// Default set of scalar types to be chosen from when generating random types.
const std::vector<TypePtr>& defaultScalarTypes();

Expand Down
33 changes: 33 additions & 0 deletions velox/vector/fuzzer/tests/VectorFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -950,4 +950,37 @@ TEST_F(VectorFuzzerTest, randMapType) {
ASSERT_TRUE(fuzzer.randMapType()->isMap());
}
}

TEST_F(VectorFuzzerTest, randTypeByWidth) {
VectorFuzzer::Options opts;
VectorFuzzer fuzzer(opts, pool());

// Test typeWidth.
TypePtr type = BIGINT();
EXPECT_EQ(fuzzer.typeWidth(type), 1);
type = ARRAY(BIGINT());
EXPECT_EQ(fuzzer.typeWidth(type), 2);
type = MAP(BIGINT(), ARRAY(VARCHAR()));
EXPECT_EQ(fuzzer.typeWidth(type), 4);
type = ROW(
{INTEGER(), ARRAY(BIGINT()), MAP(VARCHAR(), DOUBLE()), ROW({TINYINT()})});
EXPECT_EQ(fuzzer.typeWidth(type), 9);

// Test randType by width. Results should be at least a RowType with one
// field, so the minimal type width is 2.
type = fuzzer.randRowTypeByWidth(-1);
EXPECT_GE(fuzzer.typeWidth(type), 2);
type = fuzzer.randRowTypeByWidth(0);
EXPECT_GE(fuzzer.typeWidth(type), 2);
type = fuzzer.randRowTypeByWidth(1);
EXPECT_GE(fuzzer.typeWidth(type), 2);

folly::Random::DefaultGenerator rng;
rng.seed(0);
for (auto i = 0; i < 1000; ++i) {
const auto width = folly::Random::rand32(rng) % 128;
type = fuzzer.randRowTypeByWidth(width);
EXPECT_GE(fuzzer.typeWidth(type), width > 2 ? width : 2);
}
}
} // namespace

0 comments on commit 3810d26

Please sign in to comment.