diff --git a/velox/exec/fuzzer/AggregationFuzzerBase.h b/velox/exec/fuzzer/AggregationFuzzerBase.h index a2237f8a9d74..74e5c63b5d72 100644 --- a/velox/exec/fuzzer/AggregationFuzzerBase.h +++ b/velox/exec/fuzzer/AggregationFuzzerBase.h @@ -82,6 +82,7 @@ class AggregationFuzzerBase { registerHiveConnector(hiveConfigs); dwrf::registerDwrfReaderFactory(); dwrf::registerDwrfWriterFactory(); + referenceQueryRunner_->registerCustomVectorFuzzers(vectorFuzzer_); seed(initialSeed); } diff --git a/velox/exec/fuzzer/CMakeLists.txt b/velox/exec/fuzzer/CMakeLists.txt index 856373b54fb4..986c52e58e68 100644 --- a/velox/exec/fuzzer/CMakeLists.txt +++ b/velox/exec/fuzzer/CMakeLists.txt @@ -22,6 +22,7 @@ target_link_libraries( velox_exec_test_lib velox_expression_functions velox_presto_types + velox_presto_types_fuzzer cpr::cpr Boost::regex velox_type_parser diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index c8bba9cdb64d..a1265195d381 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -33,6 +33,8 @@ #include "velox/functions/prestosql/types/IPAddressType.h" #include "velox/functions/prestosql/types/IPPrefixType.h" #include "velox/functions/prestosql/types/JsonType.h" +#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" +#include "velox/functions/prestosql/types/fuzzer/TimestampWithTimeZoneFuzzer.h" #include "velox/serializers/PrestoSerializer.h" #include "velox/type/parser/TypeParser.h" @@ -253,6 +255,13 @@ const std::vector& PrestoQueryRunner::supportedScalarTypes() const { return kScalarTypes; } +void PrestoQueryRunner::registerCustomVectorFuzzers( + VectorFuzzer& vectorFuzzer) const { + vectorFuzzer.registerCustomVectorFuzzer( + TIMESTAMP_WITH_TIME_ZONE(), + std::make_unique()); +} + const std::unordered_map& PrestoQueryRunner::aggregationFunctionDataSpecs() const { // For some functions, velox supports NaN, Infinity better than presto query diff --git a/velox/exec/fuzzer/PrestoQueryRunner.h b/velox/exec/fuzzer/PrestoQueryRunner.h index a72cae913e10..43326cc7239e 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.h +++ b/velox/exec/fuzzer/PrestoQueryRunner.h @@ -53,6 +53,8 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner { const std::vector& supportedScalarTypes() const override; + void registerCustomVectorFuzzers(VectorFuzzer& vectorFuzzer) const override; + const std::unordered_map& aggregationFunctionDataSpecs() const override; diff --git a/velox/exec/fuzzer/ReferenceQueryRunner.h b/velox/exec/fuzzer/ReferenceQueryRunner.h index 5d0c24afdc24..d71b7f84fe0c 100644 --- a/velox/exec/fuzzer/ReferenceQueryRunner.h +++ b/velox/exec/fuzzer/ReferenceQueryRunner.h @@ -46,6 +46,12 @@ class ReferenceQueryRunner { return defaultScalarTypes(); } + /// Register CustomVectorFuzzers specific to the reference query engine, e.g. + /// custom types only supported by this engine. + virtual void registerCustomVectorFuzzers(VectorFuzzer& vectorFuzzer) const { + return; + } + virtual const std::unordered_map& aggregationFunctionDataSpecs() const = 0; diff --git a/velox/functions/prestosql/types/CMakeLists.txt b/velox/functions/prestosql/types/CMakeLists.txt index 14c407ba89ce..d2571005f6ec 100644 --- a/velox/functions/prestosql/types/CMakeLists.txt +++ b/velox/functions/prestosql/types/CMakeLists.txt @@ -31,3 +31,5 @@ velox_link_libraries( if(${VELOX_BUILD_TESTING}) add_subdirectory(tests) endif() + +add_subdirectory(fuzzer) diff --git a/velox/functions/prestosql/types/fuzzer/CMakeLists.txt b/velox/functions/prestosql/types/fuzzer/CMakeLists.txt new file mode 100644 index 000000000000..d2741bad9779 --- /dev/null +++ b/velox/functions/prestosql/types/fuzzer/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +velox_add_library(velox_presto_types_fuzzer TimestampWithTimeZoneFuzzer.h) + +velox_link_libraries(velox_presto_types_fuzzer velox_vector_fuzzer + velox_presto_types velox_type_tz) diff --git a/velox/functions/prestosql/types/fuzzer/TimestampWithTimeZoneFuzzer.h b/velox/functions/prestosql/types/fuzzer/TimestampWithTimeZoneFuzzer.h new file mode 100644 index 000000000000..9dc328127acd --- /dev/null +++ b/velox/functions/prestosql/types/fuzzer/TimestampWithTimeZoneFuzzer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" +#include "velox/type/tz/TimeZoneMap.h" +#include "velox/vector/fuzzer/CustomVectorFuzzer.h" + +namespace facebook::velox { +/// A CustomVectorFuzzer for TimestampWithTimeZoneType. The millisUtc is random, +/// and the time zone is selected randomly from the list of known time zones. +class TimestampWithTimeZoneVectorFuzzer : public CustomVectorFuzzer { + public: + TimestampWithTimeZoneVectorFuzzer() + : CustomVectorFuzzer(), timeZoneIds_(tz::getTimeZoneIDs()) {} + + const VectorPtr fuzzFlat( + memory::MemoryPool* pool, + const TypePtr& type, + vector_size_t size, + FuzzerGenerator& rng) override { + VELOX_CHECK(isTimestampWithTimeZoneType(type)); + + auto result = BaseVector::create(type, size, pool); + auto flatResult = result->asFlatVector(); + for (auto i = 0; i < size; ++i) { + int16_t timeZoneId = timeZoneIds_ + [boost::random::uniform_int_distribution()(rng) % + timeZoneIds_.size()]; + flatResult->set( + i, + pack( + boost::random::uniform_int_distribution()(rng), + timeZoneId)); + } + return result; + } + + const VectorPtr fuzzConstant( + memory::MemoryPool* pool, + const TypePtr& type, + vector_size_t size, + FuzzerGenerator& rng) override { + VELOX_CHECK(isTimestampWithTimeZoneType(type)); + + int16_t timeZoneId = timeZoneIds_ + [boost::random::uniform_int_distribution()(rng) % + timeZoneIds_.size()]; + + return std::make_shared>( + pool, + size, + false, + type, + pack( + boost::random::uniform_int_distribution()(rng), + timeZoneId)); + } + + private: + const std::vector timeZoneIds_; +}; +} // namespace facebook::velox diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index e7935a308310..b1221561475d 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -424,6 +424,21 @@ int16_t getTimeZoneID(int32_t offsetMinutes) { } } +std::vector getTimeZoneIDs() { + const auto& timeZoneDatabase = getTimeZoneDatabase(); + + std::vector ids; + ids.reserve(timeZoneDatabase.size()); + + for (int16_t i = 0; i < timeZoneDatabase.size(); ++i) { + if (timeZoneDatabase[i] != nullptr) { + ids.push_back(i); + } + } + + return ids; +} + TimeZone::seconds TimeZone::to_sys( TimeZone::seconds timestamp, TimeZone::TChoose choose) const { diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 627fdc133355..12e4c05ef460 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -18,6 +18,7 @@ #include #include +#include namespace facebook::velox::date { class time_zone; @@ -63,6 +64,9 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError = true); /// [-14:00, +14:00] range. int16_t getTimeZoneID(int32_t offsetMinutes); +/// Returns all valid time zone IDs. +std::vector getTimeZoneIDs(); + // Validates that the time point can be safely used by the external date // library. template diff --git a/velox/vector/fuzzer/CustomVectorFuzzer.h b/velox/vector/fuzzer/CustomVectorFuzzer.h new file mode 100644 index 000000000000..5af2b9502a93 --- /dev/null +++ b/velox/vector/fuzzer/CustomVectorFuzzer.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/vector/BaseVector.h" +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox { +/// An interface for fuzzing Vectors of a custom type. This is intended for use +/// when a custom type does not support the full range of values of the backing +/// physical type. +/// +/// Implementations of this interface need to be registered in an instance of +/// VectorFuzzer via registerCustomVectorFuzzer in order for them to be used in +/// general purpose fuzzing. +class CustomVectorFuzzer { + public: + virtual ~CustomVectorFuzzer() = default; + + /// Should return a flat Vector of the given size without nulls. + virtual const VectorPtr fuzzFlat( + memory::MemoryPool* pool, + const TypePtr& type, + vector_size_t size, + FuzzerGenerator& rng) = 0; + + /// Should return a ConstantVector of the given size backed by a single + /// non-null scalar value (complex types do not need to implement this). + virtual const VectorPtr fuzzConstant( + memory::MemoryPool* pool, + const TypePtr& type, + vector_size_t size, + FuzzerGenerator& rng) = 0; +}; +} // namespace facebook::velox diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index c7fd600bf015..0b8361546a90 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -415,6 +415,14 @@ VectorPtr VectorFuzzer::fuzz(const TypePtr& type, vector_size_t size) { // 20% chance of adding a constant vector. if (coinToss(0.2)) { vector = fuzzConstant(type, vectorSize); + } else if (const auto it = customVectorFuzzers_.find(type); + it != customVectorFuzzers_.end()) { + vector = it->second->fuzzFlat(pool_, type, vectorSize, rng_); + for (size_t i = 0; i < vector->size(); ++i) { + if (coinToss(opts_.nullRatio)) { + vector->setNull(i, true); + } + } } else if (type->isPrimitiveType()) { vector = fuzzFlatPrimitive(type, vectorSize); } else if (type->isOpaque()) { @@ -467,6 +475,10 @@ VectorPtr VectorFuzzer::fuzzConstant(const TypePtr& type, vector_size_t size) { } if (type->isUnKnown()) { return BaseVector::createNullConstant(type, size, pool_); + } + if (const auto it = customVectorFuzzers_.find(type); + it != customVectorFuzzers_.end()) { + return it->second->fuzzConstant(pool_, type, size, rng_); } else { return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH_ALL( fuzzConstantPrimitiveImpl, @@ -898,11 +910,11 @@ std::pair VectorFuzzer::randPrecisionScale( } TypePtr VectorFuzzer::randType(int maxDepth) { - return velox::randType(rng_, maxDepth); + return velox::randType(rng_, scalarTypes_, maxDepth); } TypePtr VectorFuzzer::randOrderableType(int maxDepth) { - return velox::randOrderableType(rng_, maxDepth); + return velox::randOrderableType(rng_, scalarTypes_, maxDepth); } TypePtr VectorFuzzer::randOrderableType( @@ -918,11 +930,11 @@ TypePtr VectorFuzzer::randType( } TypePtr VectorFuzzer::randMapType(int maxDepth) { - return velox::randMapType(rng_, defaultScalarTypes(), maxDepth); + return velox::randMapType(rng_, scalarTypes_, maxDepth); } RowTypePtr VectorFuzzer::randRowType(int maxDepth) { - return velox::randRowType(rng_, maxDepth); + return velox::randRowType(rng_, scalarTypes_, maxDepth); } RowTypePtr VectorFuzzer::randRowType( diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 00a01527e234..3b146b4f9880 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -21,6 +21,7 @@ #include "velox/type/Type.h" #include "velox/vector/BaseVector.h" #include "velox/vector/ComplexVector.h" +#include "velox/vector/fuzzer/CustomVectorFuzzer.h" #include "velox/vector/fuzzer/GeneratorSpec.h" #include "velox/vector/fuzzer/Utils.h" @@ -348,6 +349,27 @@ class VectorFuzzer { opaqueTypeGenerators_[std::type_index(typeid(Class))] = generator; } + /// Register a CustomVectorFuzzer for a given type. This adds the type to the + /// list of known scalar types and causes the VectorFuzzer to use the + /// CustomVectorFuzzer when producing flat/constant Vectors of that type. + /// + /// Currently complex types are not supported. + void registerCustomVectorFuzzer( + const TypePtr& type, + std::unique_ptr&& customVectorFuzzer) { + customVectorFuzzers_[type] = std::move(customVectorFuzzer); + + VELOX_CHECK( + type->isPrimitiveType(), + "Currently only primitive types are supported."); + + if (type->isPrimitiveType() && + std::find(scalarTypes_.begin(), scalarTypes_.end(), type) == + scalarTypes_.end()) { + scalarTypes_.push_back(type); + } + } + private: // Generates a flat vector for primitive types. VectorPtr fuzzFlatPrimitive(const TypePtr& type, vector_size_t size); @@ -393,6 +415,9 @@ class VectorFuzzer { // evaluated, which can lead to inconsistent results across platforms. FuzzerGenerator rng_; + std::unordered_map> + customVectorFuzzers_; + // Since the underlying type of opaque types are transparent to Velox, we // require callers to register a generator for each underlying type, so we're // able to generate random data for opaque types. @@ -401,6 +426,8 @@ class VectorFuzzer { std::type_index, std::function(FuzzerGenerator& rng)>> opaqueTypeGenerators_; + + std::vector scalarTypes_ = defaultScalarTypes(); }; /// Generates a random type, including maps, structs, and arrays. maxDepth