Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinwilfong authored and facebook-github-bot committed Dec 17, 2024
1 parent a3673e4 commit ebcb021
Show file tree
Hide file tree
Showing 13 changed files with 228 additions and 4 deletions.
1 change: 1 addition & 0 deletions velox/exec/fuzzer/AggregationFuzzerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class AggregationFuzzerBase {
registerHiveConnector(hiveConfigs);
dwrf::registerDwrfReaderFactory();
dwrf::registerDwrfWriterFactory();
referenceQueryRunner_->registerCustomVectorFuzzers(vectorFuzzer_);
seed(initialSeed);
}

Expand Down
1 change: 1 addition & 0 deletions velox/exec/fuzzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ target_link_libraries(
velox_exec_test_lib
velox_expression_functions
velox_presto_types
velox_presto_types_fuzzer
cpr::cpr
Boost::regex
velox_type_parser
Expand Down
9 changes: 9 additions & 0 deletions velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include "velox/functions/prestosql/types/IPAddressType.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"
#include "velox/functions/prestosql/types/fuzzer/TimestampWithTimeZoneFuzzer.h"
#include "velox/serializers/PrestoSerializer.h"
#include "velox/type/parser/TypeParser.h"

Expand Down Expand Up @@ -253,6 +255,13 @@ const std::vector<TypePtr>& PrestoQueryRunner::supportedScalarTypes() const {
return kScalarTypes;
}

void PrestoQueryRunner::registerCustomVectorFuzzers(
VectorFuzzer& vectorFuzzer) const {
vectorFuzzer.registerCustomVectorFuzzer(
TIMESTAMP_WITH_TIME_ZONE(),
std::make_unique<TimestampWithTimeZoneVectorFuzzer>());
}

const std::unordered_map<std::string, DataSpec>&
PrestoQueryRunner::aggregationFunctionDataSpecs() const {
// For some functions, velox supports NaN, Infinity better than presto query
Expand Down
2 changes: 2 additions & 0 deletions velox/exec/fuzzer/PrestoQueryRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner {

const std::vector<TypePtr>& supportedScalarTypes() const override;

void registerCustomVectorFuzzers(VectorFuzzer& vectorFuzzer) const override;

const std::unordered_map<std::string, DataSpec>&
aggregationFunctionDataSpecs() const override;

Expand Down
6 changes: 6 additions & 0 deletions velox/exec/fuzzer/ReferenceQueryRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class ReferenceQueryRunner {
return defaultScalarTypes();
}

/// Register CustomVectorFuzzers specific to the reference query engine, e.g.
/// custom types only supported by this engine.
virtual void registerCustomVectorFuzzers(VectorFuzzer& vectorFuzzer) const {
return;
}

virtual const std::unordered_map<std::string, DataSpec>&
aggregationFunctionDataSpecs() const = 0;

Expand Down
2 changes: 2 additions & 0 deletions velox/functions/prestosql/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ velox_link_libraries(
if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()

add_subdirectory(fuzzer)
18 changes: 18 additions & 0 deletions velox/functions/prestosql/types/fuzzer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

velox_add_library(velox_presto_types_fuzzer TimestampWithTimeZoneFuzzer.h)

velox_link_libraries(velox_presto_types_fuzzer velox_vector_fuzzer
velox_presto_types velox_type_tz)
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <boost/random/uniform_int_distribution.hpp>

#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"
#include "velox/type/tz/TimeZoneMap.h"
#include "velox/vector/fuzzer/CustomVectorFuzzer.h"

namespace facebook::velox {
/// A CustomVectorFuzzer for TimestampWithTimeZoneType. The millisUtc is random,
/// and the time zone is selected randomly from the list of known time zones.
class TimestampWithTimeZoneVectorFuzzer : public CustomVectorFuzzer {
public:
TimestampWithTimeZoneVectorFuzzer()
: CustomVectorFuzzer(), timeZoneIds_(tz::getTimeZoneIDs()) {}

const VectorPtr fuzzFlat(
memory::MemoryPool* pool,
const TypePtr& type,
vector_size_t size,
FuzzerGenerator& rng) override {
VELOX_CHECK(isTimestampWithTimeZoneType(type));

auto result = BaseVector::create(type, size, pool);
auto flatResult = result->asFlatVector<int64_t>();
for (auto i = 0; i < size; ++i) {
int16_t timeZoneId = timeZoneIds_
[boost::random::uniform_int_distribution<size_t>()(rng) %
timeZoneIds_.size()];
flatResult->set(
i,
pack(
boost::random::uniform_int_distribution<int64_t>()(rng),
timeZoneId));
}
return result;
}

const VectorPtr fuzzConstant(
memory::MemoryPool* pool,
const TypePtr& type,
vector_size_t size,
FuzzerGenerator& rng) override {
VELOX_CHECK(isTimestampWithTimeZoneType(type));

int16_t timeZoneId = timeZoneIds_
[boost::random::uniform_int_distribution<size_t>()(rng) %
timeZoneIds_.size()];

return std::make_shared<ConstantVector<int64_t>>(
pool,
size,
false,
type,
pack(
boost::random::uniform_int_distribution<int64_t>()(rng),
timeZoneId));
}

private:
const std::vector<int16_t> timeZoneIds_;
};
} // namespace facebook::velox
15 changes: 15 additions & 0 deletions velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,21 @@ int16_t getTimeZoneID(int32_t offsetMinutes) {
}
}

std::vector<int16_t> getTimeZoneIDs() {
const auto& timeZoneDatabase = getTimeZoneDatabase();

std::vector<int16_t> ids;
ids.reserve(timeZoneDatabase.size());

for (int16_t i = 0; i < timeZoneDatabase.size(); ++i) {
if (timeZoneDatabase[i] != nullptr) {
ids.push_back(i);
}
}

return ids;
}

TimeZone::seconds TimeZone::to_sys(
TimeZone::seconds timestamp,
TimeZone::TChoose choose) const {
Expand Down
4 changes: 4 additions & 0 deletions velox/type/tz/TimeZoneMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <chrono>
#include <string>
#include <vector>

namespace facebook::velox::date {
class time_zone;
Expand Down Expand Up @@ -63,6 +64,9 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError = true);
/// [-14:00, +14:00] range.
int16_t getTimeZoneID(int32_t offsetMinutes);

/// Returns all valid time zone IDs.
std::vector<int16_t> getTimeZoneIDs();

// Validates that the time point can be safely used by the external date
// library.
template <typename T>
Expand Down
49 changes: 49 additions & 0 deletions velox/vector/fuzzer/CustomVectorFuzzer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "velox/vector/BaseVector.h"
#include "velox/vector/fuzzer/Utils.h"

namespace facebook::velox {
/// An interface for fuzzing Vectors of a custom type. This is intended for use
/// when a custom type does not support the full range of values of the backing
/// physical type.
///
/// Implementations of this interface need to be registered in an instance of
/// VectorFuzzer via registerCustomVectorFuzzer in order for them to be used in
/// general purpose fuzzing.
class CustomVectorFuzzer {
public:
virtual ~CustomVectorFuzzer() = default;

/// Should return a flat Vector of the given size without nulls.
virtual const VectorPtr fuzzFlat(
memory::MemoryPool* pool,
const TypePtr& type,
vector_size_t size,
FuzzerGenerator& rng) = 0;

/// Should return a ConstantVector of the given size backed by a single
/// non-null scalar value (complex types do not need to implement this).
virtual const VectorPtr fuzzConstant(
memory::MemoryPool* pool,
const TypePtr& type,
vector_size_t size,
FuzzerGenerator& rng) = 0;
};
} // namespace facebook::velox
20 changes: 16 additions & 4 deletions velox/vector/fuzzer/VectorFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,14 @@ VectorPtr VectorFuzzer::fuzz(const TypePtr& type, vector_size_t size) {
// 20% chance of adding a constant vector.
if (coinToss(0.2)) {
vector = fuzzConstant(type, vectorSize);
} else if (const auto it = customVectorFuzzers_.find(type);
it != customVectorFuzzers_.end()) {
vector = it->second->fuzzFlat(pool_, type, vectorSize, rng_);
for (size_t i = 0; i < vector->size(); ++i) {
if (coinToss(opts_.nullRatio)) {
vector->setNull(i, true);
}
}
} else if (type->isPrimitiveType()) {
vector = fuzzFlatPrimitive(type, vectorSize);
} else if (type->isOpaque()) {
Expand Down Expand Up @@ -467,6 +475,10 @@ VectorPtr VectorFuzzer::fuzzConstant(const TypePtr& type, vector_size_t size) {
}
if (type->isUnKnown()) {
return BaseVector::createNullConstant(type, size, pool_);
}
if (const auto it = customVectorFuzzers_.find(type);
it != customVectorFuzzers_.end()) {
return it->second->fuzzConstant(pool_, type, size, rng_);
} else {
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH_ALL(
fuzzConstantPrimitiveImpl,
Expand Down Expand Up @@ -898,11 +910,11 @@ std::pair<int8_t, int8_t> VectorFuzzer::randPrecisionScale(
}

TypePtr VectorFuzzer::randType(int maxDepth) {
return velox::randType(rng_, maxDepth);
return velox::randType(rng_, scalarTypes_, maxDepth);
}

TypePtr VectorFuzzer::randOrderableType(int maxDepth) {
return velox::randOrderableType(rng_, maxDepth);
return velox::randOrderableType(rng_, scalarTypes_, maxDepth);
}

TypePtr VectorFuzzer::randOrderableType(
Expand All @@ -918,11 +930,11 @@ TypePtr VectorFuzzer::randType(
}

TypePtr VectorFuzzer::randMapType(int maxDepth) {
return velox::randMapType(rng_, defaultScalarTypes(), maxDepth);
return velox::randMapType(rng_, scalarTypes_, maxDepth);
}

RowTypePtr VectorFuzzer::randRowType(int maxDepth) {
return velox::randRowType(rng_, maxDepth);
return velox::randRowType(rng_, scalarTypes_, maxDepth);
}

RowTypePtr VectorFuzzer::randRowType(
Expand Down
27 changes: 27 additions & 0 deletions velox/vector/fuzzer/VectorFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "velox/type/Type.h"
#include "velox/vector/BaseVector.h"
#include "velox/vector/ComplexVector.h"
#include "velox/vector/fuzzer/CustomVectorFuzzer.h"
#include "velox/vector/fuzzer/GeneratorSpec.h"
#include "velox/vector/fuzzer/Utils.h"

Expand Down Expand Up @@ -348,6 +349,27 @@ class VectorFuzzer {
opaqueTypeGenerators_[std::type_index(typeid(Class))] = generator;
}

/// Register a CustomVectorFuzzer for a given type. This adds the type to the
/// list of known scalar types and causes the VectorFuzzer to use the
/// CustomVectorFuzzer when producing flat/constant Vectors of that type.
///
/// Currently complex types are not supported.
void registerCustomVectorFuzzer(
const TypePtr& type,
std::unique_ptr<CustomVectorFuzzer>&& customVectorFuzzer) {
customVectorFuzzers_[type] = std::move(customVectorFuzzer);

VELOX_CHECK(
type->isPrimitiveType(),
"Currently only primitive types are supported.");

if (type->isPrimitiveType() &&
std::find(scalarTypes_.begin(), scalarTypes_.end(), type) ==
scalarTypes_.end()) {
scalarTypes_.push_back(type);
}
}

private:
// Generates a flat vector for primitive types.
VectorPtr fuzzFlatPrimitive(const TypePtr& type, vector_size_t size);
Expand Down Expand Up @@ -393,6 +415,9 @@ class VectorFuzzer {
// evaluated, which can lead to inconsistent results across platforms.
FuzzerGenerator rng_;

std::unordered_map<TypePtr, std::unique_ptr<CustomVectorFuzzer>>
customVectorFuzzers_;

// Since the underlying type of opaque types are transparent to Velox, we
// require callers to register a generator for each underlying type, so we're
// able to generate random data for opaque types.
Expand All @@ -401,6 +426,8 @@ class VectorFuzzer {
std::type_index,
std::function<std::shared_ptr<void>(FuzzerGenerator& rng)>>
opaqueTypeGenerators_;

std::vector<TypePtr> scalarTypes_ = defaultScalarTypes();
};

/// Generates a random type, including maps, structs, and arrays. maxDepth
Expand Down

0 comments on commit ebcb021

Please sign in to comment.