Skip to content

Commit

Permalink
feat(fuzzer): Support custom input generator in VectorFuzzer (#11466)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11466

Custom types often require custom logic to generate valid values, such as JSON. To support such
custom data generation for expression fuzzer, this diff makes two changes:

1. Require a custom type to provide a custom input generator that is automatically used when
VectorFuzzer generates vectors of this type. The custom type can provide a nullptr, in which case
VectorFuzzer generates random data in the old way.

2. Allow users of VectorFuzzer to provide a custom input generator to the API calls. (This will be
needed for custom input generation for non-custom types in expression fuzzer, such as cdf functions
that require some arguments to be positive numbers).

Differential Revision: D65576377
  • Loading branch information
kagamiori authored and facebook-github-bot committed Jan 15, 2025
1 parent bfe371c commit 37b0528
Show file tree
Hide file tree
Showing 20 changed files with 364 additions and 102 deletions.
42 changes: 24 additions & 18 deletions velox/common/fuzzer/ConstrainedGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,6 @@

namespace facebook::velox::fuzzer {

// AbstractInputGenerator
AbstractInputGenerator::AbstractInputGenerator(
size_t seed,
const TypePtr& type,
std::unique_ptr<AbstractInputGenerator>&& next,
double nullRatio)
: type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} {
rng_.seed(seed);
}

// NotEqualConstrainedGenerator
variant NotEqualConstrainedGenerator::generate() {
variant value;
Expand All @@ -49,21 +39,37 @@ variant SetConstrainedGenerator::generate() {
}

// JsonInputGenerator
folly::json::serialization_opts JsonInputGenerator::getSerializationOptions() {
JsonInputGenerator::~JsonInputGenerator() = default;

folly::json::serialization_opts getSerializationOptions(
FuzzerGenerator& rng,
bool makeRandomVariation) {
folly::json::serialization_opts opts;
opts.allow_non_string_keys = true;
opts.allow_nan_inf = true;
if (makeRandomVariation_) {
opts.convert_int_keys = rand<bool>(rng_);
opts.pretty_formatting = rand<bool>(rng_);
opts.pretty_formatting_indent_width = rand<uint32_t>(rng_, 0, 4);
opts.encode_non_ascii = rand<bool>(rng_);
opts.sort_keys = rand<bool>(rng_);
opts.skip_invalid_utf8 = rand<bool>(rng_);
if (makeRandomVariation) {
opts.convert_int_keys = rand<bool>(rng);
opts.pretty_formatting = rand<bool>(rng);
opts.pretty_formatting_indent_width = rand<uint32_t>(rng, 0, 4);
opts.encode_non_ascii = rand<bool>(rng);
opts.sort_keys = rand<bool>(rng);
opts.skip_invalid_utf8 = rand<bool>(rng);
}
return opts;
}

JsonInputGenerator::JsonInputGenerator(
size_t seed,
const TypePtr& type,
double nullRatio,
std::unique_ptr<AbstractInputGenerator>&& objectGenerator,
bool makeRandomVariation)
: AbstractInputGenerator(seed, type, nullptr, nullRatio),
objectGenerator_{std::move(objectGenerator)},
makeRandomVariation_{makeRandomVariation} {
opts_ = getSerializationOptions(rng_, makeRandomVariation_);
}

variant JsonInputGenerator::generate() {
if (coinToss(rng_, nullRatio_)) {
return variant::null(type_->kind());
Expand Down
36 changes: 2 additions & 34 deletions velox/common/fuzzer/ConstrainedGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,6 @@ namespace facebook::velox::fuzzer {

using facebook::velox::variant;

class AbstractInputGenerator {
public:
AbstractInputGenerator(
size_t seed,
const TypePtr& type,
std::unique_ptr<AbstractInputGenerator>&& next,
double nullRatio);

virtual ~AbstractInputGenerator() = default;

virtual variant generate() = 0;

TypePtr type() const {
return type_;
}

protected:
FuzzerGenerator rng_;

TypePtr type_;

std::unique_ptr<AbstractInputGenerator> next_;

double nullRatio_;
};

std::unique_ptr<AbstractInputGenerator>
getRandomInputGenerator(size_t seed, const TypePtr& type, double nullRatio);

Expand Down Expand Up @@ -356,13 +330,9 @@ class JsonInputGenerator : public AbstractInputGenerator {
const TypePtr& type,
double nullRatio,
std::unique_ptr<AbstractInputGenerator>&& objectGenerator,
bool makeRandomVariation = false)
: AbstractInputGenerator(seed, type, nullptr, nullRatio),
objectGenerator_{std::move(objectGenerator)},
makeRandomVariation_{makeRandomVariation},
opts_{getSerializationOptions()} {}
bool makeRandomVariation = false);

~JsonInputGenerator() override = default;
~JsonInputGenerator() override;

variant generate() override;

Expand All @@ -383,8 +353,6 @@ class JsonInputGenerator : public AbstractInputGenerator {

void makeRandomVariation(std::string json);

folly::json::serialization_opts getSerializationOptions();

std::unique_ptr<AbstractInputGenerator> objectGenerator_;

bool makeRandomVariation_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ class TypeFactories : public CustomTypeFactories {
return nullptr;
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}

private:
TypePtr type_;
};
Expand Down
10 changes: 10 additions & 0 deletions velox/expression/tests/CustomTypeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class FancyIntTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
VELOX_UNSUPPORTED();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};

class ToFancyIntFunction : public exec::VectorFunction {
Expand Down Expand Up @@ -147,6 +152,11 @@ class AlwaysFailingTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
VELOX_UNSUPPORTED();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
VELOX_UNSUPPORTED();
}
};
} // namespace

Expand Down
4 changes: 3 additions & 1 deletion velox/functions/prestosql/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ velox_add_library(

velox_link_libraries(
velox_presto_types
velox_type
velox_memory
velox_expression
velox_functions_util
velox_functions_json
velox_functions_lib_date_time_formatter)
velox_functions_lib_date_time_formatter
velox_constrained_input_generators)

if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/HyperLogLogType.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ class HyperLogLogTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return nullptr;
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};

void registerHyperLogLogType();
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/IPAddressType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ class IPAddressTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<IPAddressCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};

} // namespace
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/IPPrefixType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ class IPPrefixTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<IPPrefixCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};

} // namespace
Expand Down
12 changes: 12 additions & 0 deletions velox/functions/prestosql/types/JsonType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "folly/json.h"

#include "velox/common/base/Exceptions.h"
#include "velox/common/fuzzer/ConstrainedGenerators.h"
#include "velox/expression/EvalCtx.h"
#include "velox/expression/PeeledEncoding.h"
#include "velox/expression/StringWriter.h"
Expand Down Expand Up @@ -1288,6 +1289,17 @@ class JsonTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<JsonCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return std::make_shared<fuzzer::JsonInputGenerator>(
config.seed_,
JSON(),
config.nullRatio_,
fuzzer::getRandomInputGenerator(
config.seed_, config.representedType_, config.nullRatio_),
false);
}
};

} // namespace
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/TimestampWithTimeZoneType.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ class TimestampWithTimeZoneTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return TimestampWithTimeZoneCastOperator::get();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};

void registerTimestampWithTimeZoneType();
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/UuidType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ class UuidTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<UuidCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};

} // namespace
Expand Down
5 changes: 5 additions & 0 deletions velox/type/OpaqueCustomTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ class OpaqueCustomTypeRegister {
exec::CastOperatorPtr getCastOperator() const override {
VELOX_UNSUPPORTED();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}
};
};
} // namespace facebook::velox
15 changes: 15 additions & 0 deletions velox/type/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,21 @@ exec::CastOperatorPtr getCustomTypeCastOperator(const std::string& name) {
return nullptr;
}

CustomTypeFactories::~CustomTypeFactories() = default;

AbstractInputGenerator::~AbstractInputGenerator() = default;

AbstractInputGeneratorPtr getCustomTypeInputGenerator(
const std::string& name,
const InputGeneratorConfig& config) {
auto factories = getTypeFactories(name);
if (factories) {
return factories->getInputGenerator(config);
}

return nullptr;
}

void toTypeSql(const TypePtr& type, std::ostream& out) {
switch (type->kind()) {
case TypeKind::ARRAY:
Expand Down
56 changes: 55 additions & 1 deletion velox/type/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <folly/CPortability.h>
#include <folly/Random.h>
#include <folly/Range.h>
#include <folly/dynamic.h>

Expand Down Expand Up @@ -2003,11 +2004,27 @@ class CastOperator;
using CastOperatorPtr = std::shared_ptr<const CastOperator>;
} // namespace exec

/// Forward declaration.
class variant;
class AbstractInputGenerator;

using AbstractInputGeneratorPtr = std::shared_ptr<AbstractInputGenerator>;
using FuzzerGenerator = folly::detail::DefaultGenerator;

struct InputGeneratorConfig {
size_t seed_;
double nullRatio_;

// Type of data represented by JSON. This config should be ignored by non-JSON
// input generators.
const TypePtr& representedType_;
};

/// Associates custom types with their custom operators to be the payload in
/// the custom type registry.
class CustomTypeFactories {
public:
virtual ~CustomTypeFactories() = default;
virtual ~CustomTypeFactories();

/// Returns a shared pointer to the custom type.
virtual TypePtr getType() const = 0;
Expand All @@ -2017,6 +2034,38 @@ class CustomTypeFactories {
/// return a nullptr. If a custom type does not support castings, throw an
/// exception.
virtual exec::CastOperatorPtr getCastOperator() const = 0;

virtual AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const = 0;
};

class AbstractInputGenerator {
public:
AbstractInputGenerator(
size_t seed,
const TypePtr& type,
std::unique_ptr<AbstractInputGenerator>&& next,
double nullRatio)
: type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} {
rng_.seed(seed);
}

virtual ~AbstractInputGenerator();

virtual variant generate() = 0;

TypePtr type() const {
return type_;
}

protected:
FuzzerGenerator rng_;

TypePtr type_;

std::unique_ptr<AbstractInputGenerator> next_;

double nullRatio_;
};

/// Adds custom type to the registry if it doesn't exist already. No-op if
Expand Down Expand Up @@ -2083,6 +2132,11 @@ bool unregisterCustomType(const std::string& name);
/// does not have a dedicated custom cast operator.
exec::CastOperatorPtr getCustomTypeCastOperator(const std::string& name);

/// Returns the input generator for the custom type with the specified name.
AbstractInputGeneratorPtr getCustomTypeInputGenerator(
const std::string& name,
const InputGeneratorConfig& config);

// Allows us to transparently use folly::toAppend(), folly::join(), etc.
template <class TString>
void toAppend(
Expand Down
5 changes: 5 additions & 0 deletions velox/type/parser/tests/TypeParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ class TypeFactories : public CustomTypeFactories {
return nullptr;
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& /*config*/) const override {
return nullptr;
}

private:
TypePtr type_;
};
Expand Down
Loading

0 comments on commit 37b0528

Please sign in to comment.