From 1d8aafdd56a0d0576864201743736aa8652bda51 Mon Sep 17 00:00:00 2001 From: Raymond Wu Date: Sun, 24 Nov 2024 09:58:25 -0800 Subject: [PATCH] test(fuzzer): add supporting map functions (#11536) Summary: This diff adds some supporting functions for my diff for new benchmark tests for map_concat. Reviewed By: darrenfu Differential Revision: D65755011 --- velox/vector/fuzzer/VectorFuzzer.cpp | 65 +++++++++++++++++++++------- velox/vector/fuzzer/VectorFuzzer.h | 22 ++++++++++ 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index 56cd03a27c895..d82af1af91015 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -537,16 +537,7 @@ VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type, vector_size_t size) { // Do not initialize keys and values inline in the fuzzMap call as C++ does // not specify the order they'll be called in, leading to inconsistent // results across platforms. - const auto& keyType = type->asMap().keyType(); - const auto& valueType = type->asMap().valueType(); - auto length = getElementsVectorLength(opts_, size); - - auto keys = opts_.normalizeMapKeys || !opts_.containerHasNulls - ? fuzzFlatNotNull(keyType, length) - : fuzzFlat(keyType, length); - auto values = opts_.containerHasNulls ? fuzzFlat(valueType, length) - : fuzzFlatNotNull(valueType, length); - return fuzzMap(keys, values, size); + return fuzzMap(type->asMap().keyType(), type->asMap().valueType(), size); } // Rows. else if (type->isRow()) { @@ -568,6 +559,28 @@ VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type, vector_size_t size) { } } +VectorPtr VectorFuzzer::fuzzKeys(const TypePtr& keyType) { + return fuzzKeys(keyType, opts_.vectorSize); +} + +VectorPtr VectorFuzzer::fuzzKeys(const TypePtr& keyType, vector_size_t size) { + auto length = getElementsVectorLength(opts_, size); + return opts_.normalizeMapKeys || !opts_.containerHasNulls + ? fuzzFlatNotNull(keyType, length) + : fuzzFlat(keyType, length); +} + +VectorPtr VectorFuzzer::fuzzMap( + const TypePtr& keyType, + const TypePtr& valueType, + vector_size_t size) { + auto keys = fuzzKeys(keyType, size); + auto length = getElementsVectorLength(opts_, size); + auto values = opts_.containerHasNulls ? fuzzFlat(valueType, length) + : fuzzFlatNotNull(valueType, length); + return fuzzMap(keys, values, size); +} + VectorPtr VectorFuzzer::fuzzFlatPrimitive( const TypePtr& type, vector_size_t size) { @@ -912,6 +925,10 @@ TypePtr VectorFuzzer::randType( return velox::randType(rng_, scalarTypes, maxDepth); } +TypePtr VectorFuzzer::randMapType(int maxDepth) { + return velox::randMapType(rng_, defaultScalarTypes(), maxDepth); +} + RowTypePtr VectorFuzzer::randRowType(int maxDepth) { return velox::randRowType(rng_, maxDepth); } @@ -922,6 +939,10 @@ RowTypePtr VectorFuzzer::randRowType( return velox::randRowType(rng_, scalarTypes, maxDepth); } +size_t VectorFuzzer::randInRange(size_t min, size_t max) { + return rand(rng_, min, max); +} + VectorPtr VectorFuzzer::wrapInLazyVector(VectorPtr baseVector) { if (hasNestedDictionaryLayers(baseVector)) { auto indices = baseVector->wrapInfo(); @@ -1139,9 +1160,7 @@ TypePtr randType( } switch (rand(rng) % 3) { case 0: - return MAP( - randType(rng, scalarTypes, 0), - randType(rng, scalarTypes, maxDepth - 1)); + return randMapType(rng, scalarTypes, maxDepth); case 1: return ARRAY(randType(rng, scalarTypes, maxDepth - 1)); default: @@ -1149,6 +1168,14 @@ TypePtr randType( } } +TypePtr randMapType( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + int maxDepth) { + return MAP( + randType(rng, scalarTypes, 0), randType(rng, scalarTypes, maxDepth - 1)); +} + TypePtr randOrderableType(FuzzerGenerator& rng, int maxDepth) { return randOrderableType(rng, defaultScalarTypes(), maxDepth); } @@ -1185,10 +1212,18 @@ RowTypePtr randRowType( FuzzerGenerator& rng, const std::vector& scalarTypes, int maxDepth) { - int numFields = 1 + rand(rng) % 7; + size_t numFields = 1 + rand(rng) % 7; + return randRowType(rng, scalarTypes, numFields, maxDepth); +} + +RowTypePtr randRowType( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + size_t numFields, + int maxDepth) { std::vector names; std::vector fields; - for (int i = 0; i < numFields; ++i) { + for (size_t i = 0; i < numFields; ++i) { names.push_back(fmt::format("f{}", i)); fields.push_back(randType(rng, scalarTypes, maxDepth)); } diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 6ebbf03dc0a65..b76d537e7bb1a 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -202,6 +202,13 @@ class VectorFuzzer { VectorPtr fuzzFlatNotNull(const TypePtr& type); VectorPtr fuzzFlatNotNull(const TypePtr& type, vector_size_t size); + /// Returns a map vector with randomized values and nulls.Returns a vector + /// containing `opts_.vectorSize` or `size` elements. + VectorPtr + fuzzMap(const TypePtr& keyType, const TypePtr& valueType, vector_size_t size); + VectorPtr fuzzKeys(const TypePtr& keyType); + VectorPtr fuzzKeys(const TypePtr& keyType, vector_size_t size); + /// Returns a random constant vector (which could be a null constant). Returns /// a vector with size set to `opts_.vectorSize` or 'size'. VectorPtr fuzzConstant(const TypePtr& type); @@ -285,6 +292,10 @@ class VectorFuzzer { const std::vector& scalarTypes, int maxDepth = 5); + size_t randInRange(size_t min, size_t max); + + TypePtr randMapType(int maxDepth = 5); + /// Generates short decimal TypePtr with random precision and scale. inline TypePtr randShortDecimalType() { auto [precision, scale] = @@ -406,6 +417,11 @@ TypePtr randType( const std::vector& scalarTypes, int maxDepth = 5); +TypePtr randMapType( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + int maxDepth = 5); + /// Same as the function above, but only generate orderable types. /// MAP types are not generated as they are not orderable. TypePtr randOrderableType(FuzzerGenerator& rng, int maxDepth = 5); @@ -423,6 +439,12 @@ RowTypePtr randRowType( const std::vector& scalarTypes, int maxDepth = 5); +RowTypePtr randRowType( + FuzzerGenerator& rng, + const std::vector& scalarTypes, + size_t numFields, + int maxDepth = 5); + /// Default set of scalar types to be chosen from when generating random types. const std::vector& defaultScalarTypes();