Skip to content

Commit

Permalink
feat(fuzzer): Add VectorFuzzer::randTypeByWidth() (#11800)
Browse files Browse the repository at this point in the history
Summary:

Add an API to allow generate a random type by the type width, i.e., the number of 
streams involved when reading or writing data of this type.

Differential Revision: D66968752
  • Loading branch information
kagamiori authored and facebook-github-bot committed Dec 16, 2024
1 parent 4c6ab14 commit 2e5eae3
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
75 changes: 75 additions & 0 deletions velox/vector/fuzzer/VectorFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,38 @@ RowTypePtr VectorFuzzer::randRowType(
return velox::randRowType(rng_, scalarTypes, maxDepth);
}

TypePtr VectorFuzzer::randRowTypeByWidth(
const std::vector<TypePtr>& scalarTypes,
int minWidth) {
return velox::randRowTypeByWidth(rng_, scalarTypes, minWidth);
}

TypePtr VectorFuzzer::randRowTypeByWidth(int minWidth) {
return velox::randRowTypeByWidth(rng_, defaultScalarTypes(), minWidth);
}

size_t VectorFuzzer::typeWidth(const TypePtr& type) const {
if (type->isPrimitiveType()) {
return 1;
}
switch (type->kind()) {
case TypeKind::ARRAY:
return 1 + typeWidth(type->asArray().elementType());
case TypeKind::MAP:
return 1 + typeWidth(type->asMap().keyType()) +
typeWidth(type->asMap().valueType());
case TypeKind::ROW: {
size_t fieldWidth = 0;
for (const auto& child : type->asRow().children()) {
fieldWidth += typeWidth(child);
}
return 1 + fieldWidth;
}
default:
VELOX_UNREACHABLE("Unsupported type: {}", type->toString());
}
}

size_t VectorFuzzer::randInRange(size_t min, size_t max) {
return rand(rng_, min, max);
}
Expand Down Expand Up @@ -1168,6 +1200,49 @@ TypePtr randMapType(
randType(rng, scalarTypes, 0), randType(rng, scalarTypes, maxDepth - 1));
}

TypePtr randTypeByWidth(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int minWidth) {
if (minWidth <= 1) {
const int numScalarTypes = scalarTypes.size();
return scalarTypes[rand<uint32_t>(rng) % numScalarTypes];
}

switch (rand<uint32_t>(rng) % 3) {
case 0:
return ARRAY(randTypeByWidth(rng, scalarTypes, minWidth - 1));
case 1: {
const auto keyWidth =
minWidth == 2 ? 1 : rand<uint32_t>(rng) % (minWidth - 2);
return MAP(
randTypeByWidth(rng, scalarTypes, keyWidth),
randTypeByWidth(rng, scalarTypes, minWidth - keyWidth - 1));
}
// case 2:
default: {
return randRowTypeByWidth(rng, scalarTypes, minWidth);
}
}
}

TypePtr randRowTypeByWidth(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int minWidth) {
auto numFields = 1 + rand<uint32_t>(rng) % 10;
std::vector<TypePtr> fields;
auto remainingWidth = minWidth - 1;
for (auto i = 0; i < numFields - 1; ++i) {
const auto fieldWidth =
remainingWidth > 0 ? rand<uint32_t>(rng) % remainingWidth : 0;
fields.push_back(randTypeByWidth(rng, scalarTypes, fieldWidth));
remainingWidth -= fieldWidth;
}
fields.push_back(randTypeByWidth(rng, scalarTypes, remainingWidth));
return ROW(std::move(fields));
}

TypePtr randOrderableType(FuzzerGenerator& rng, int maxDepth) {
return randOrderableType(rng, defaultScalarTypes(), maxDepth);
}
Expand Down
19 changes: 19 additions & 0 deletions velox/vector/fuzzer/VectorFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,19 @@ class VectorFuzzer {

TypePtr randType(const std::vector<TypePtr>& scalarTypes, int maxDepth = 5);

/// Generate a random RowType with random fields including maps, vectors, and
/// arrays. minWidth limits the minimum width of the RowType, i.e., the number
/// of streams involved when reading or writing data of this type.
TypePtr randRowTypeByWidth(int minWidth);

TypePtr randRowTypeByWidth(
const std::vector<TypePtr>& scalarTypes,
int minWidth);

/// Return the width of the given type. The width of a type is the number of
/// streams involved when reading or writing data of this type.
size_t typeWidth(const TypePtr& type) const;

/// Same as the function above, but only generate orderable types.
/// MAP types are not generated as they are not orderable.
TypePtr randOrderableType(int maxDepth = 5);
Expand Down Expand Up @@ -437,6 +450,12 @@ RowTypePtr randRowType(
const std::vector<TypePtr>& scalarTypes,
int maxDepth = 5);

/// Generate a random RowType with a minimal width.
TypePtr randRowTypeByWidth(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int minWidth);

/// Default set of scalar types to be chosen from when generating random types.
const std::vector<TypePtr>& defaultScalarTypes();

Expand Down
33 changes: 33 additions & 0 deletions velox/vector/fuzzer/tests/VectorFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,4 +948,37 @@ TEST_F(VectorFuzzerTest, randMapType) {
ASSERT_TRUE(fuzzer.randMapType()->isMap());
}
}

TEST_F(VectorFuzzerTest, randTypeByWidth) {
VectorFuzzer::Options opts;
VectorFuzzer fuzzer(opts, pool());

// Test typeWidth.
TypePtr type = BIGINT();
EXPECT_EQ(fuzzer.typeWidth(type), 1);
type = ARRAY(BIGINT());
EXPECT_EQ(fuzzer.typeWidth(type), 2);
type = MAP(BIGINT(), ARRAY(VARCHAR()));
EXPECT_EQ(fuzzer.typeWidth(type), 4);
type = ROW(
{INTEGER(), ARRAY(BIGINT()), MAP(VARCHAR(), DOUBLE()), ROW({TINYINT()})});
EXPECT_EQ(fuzzer.typeWidth(type), 9);

// Test randType by width. Results should be at least a RowType with one
// field, so the minimal type width is 2.
type = fuzzer.randRowTypeByWidth(-1);
EXPECT_GE(fuzzer.typeWidth(type), 2);
type = fuzzer.randRowTypeByWidth(0);
EXPECT_GE(fuzzer.typeWidth(type), 2);
type = fuzzer.randRowTypeByWidth(1);
EXPECT_GE(fuzzer.typeWidth(type), 2);

folly::Random::DefaultGenerator rng;
rng.seed(0);
for (auto i = 0; i < 1000; ++i) {
const auto width = folly::Random::rand32(rng) % 128;
type = fuzzer.randRowTypeByWidth(width);
EXPECT_GE(fuzzer.typeWidth(type), width > 2 ? width : 2);
}
}
} // namespace

0 comments on commit 2e5eae3

Please sign in to comment.