From 8a5e49af6c987961429068191b98e650dee98c69 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Wed, 27 Sep 2023 09:22:55 -0700 Subject: [PATCH] Fix ContainerRowSerde::compare for arrays and maps (#6767) Summary: ContainerRowSerde::compare crashes or produced incorrect results when processing arrays or maps with encoded elements, keys or values. ContainerRowSerde::compare used to access base vector without first checking that encodings didn't add nulls. I.e. it used to call v->wrappedVector()->isNullAt(v->wrappedIndex (index)) without first checking that v->isNullAt(index) is false. When v->isNullAt(index) is null v->wrappedIndex(index) may be out of range or incorrect causing either a crash or wrong results. Pull Request resolved: https://github.com/facebookincubator/velox/pull/6767 Reviewed By: laithsakka Differential Revision: D49684737 Pulled By: mbasmanova fbshipit-source-id: 021ec731ee4efe395c30a07bad96f06a77f26ad8 --- velox/exec/ContainerRowSerde.cpp | 8 +-- velox/exec/tests/ContainerRowSerdeTest.cpp | 68 ++++++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/velox/exec/ContainerRowSerde.cpp b/velox/exec/ContainerRowSerde.cpp index 9df1c95d6aec..457c31a2e961 100644 --- a/velox/exec/ContainerRowSerde.cpp +++ b/velox/exec/ContainerRowSerde.cpp @@ -440,9 +440,8 @@ std::optional compareArrays( auto leftNulls = readNulls(left, leftSize); auto wrappedElements = elements.wrappedVector(); for (auto i = 0; i < compareSize; ++i) { - auto elementIndex = elements.wrappedIndex(offset + i); bool leftNull = bits::isBitSet(leftNulls.data(), i); - bool rightNull = wrappedElements->isNullAt(elementIndex); + bool rightNull = elements.isNullAt(offset + i); if (leftNull || rightNull) { auto result = BaseVector::compareNulls(leftNull, rightNull, flags); @@ -452,6 +451,7 @@ std::optional compareArrays( return result; } + auto elementIndex = elements.wrappedIndex(offset + i); auto result = compareSwitch(left, *wrappedElements, elementIndex, flags); if (result.has_value() && result.value() == 0) { continue; @@ -475,9 +475,8 @@ std::optional compareArrayIndices( auto leftNulls = readNulls(left, leftSize); auto wrappedElements = elements.wrappedVector(); for (auto i = 0; i < compareSize; ++i) { - auto elementIndex = elements.wrappedIndex(rightIndices[i]); bool leftNull = bits::isBitSet(leftNulls.data(), i); - bool rightNull = wrappedElements->isNullAt(elementIndex); + bool rightNull = elements.isNullAt(rightIndices[i]); if (leftNull || rightNull) { auto result = BaseVector::compareNulls(leftNull, rightNull, flags); @@ -487,6 +486,7 @@ std::optional compareArrayIndices( return result; } + auto elementIndex = elements.wrappedIndex(rightIndices[i]); auto result = compareSwitch(left, *wrappedElements, elementIndex, flags); if (result.has_value() && result.value() == 0) { continue; diff --git a/velox/exec/tests/ContainerRowSerdeTest.cpp b/velox/exec/tests/ContainerRowSerdeTest.cpp index b2b0009870c6..1899bc5ff44c 100644 --- a/velox/exec/tests/ContainerRowSerdeTest.cpp +++ b/velox/exec/tests/ContainerRowSerdeTest.cpp @@ -16,6 +16,7 @@ #include "velox/exec/ContainerRowSerde.h" #include #include "velox/common/memory/HashStringAllocator.h" +#include "velox/vector/fuzzer/VectorFuzzer.h" #include "velox/vector/tests/utils/VectorTestBase.h" namespace facebook::velox::exec { @@ -104,6 +105,26 @@ class ContainerRowSerdeTest : public testing::Test, } } + void testCompare(const VectorPtr& vector) { + auto positions = serializeWithPositions(vector); + + CompareFlags compareFlags{ + true, // nullsFirst + true, // ascending + true, + CompareFlags::NullHandlingMode::NoStop}; + + DecodedVector decodedVector(*vector); + + for (auto i = 0; i < positions.size(); ++i) { + ByteStream stream; + HashStringAllocator::prepareRead(positions.at(i).header, stream); + ASSERT_EQ( + 0, ContainerRowSerde::compare(stream, decodedVector, i, compareFlags)) + << "at " << i << ": " << vector->toString(i); + } + } + HashStringAllocator allocator_{pool()}; }; @@ -299,5 +320,52 @@ TEST_F(ContainerRowSerdeTest, compareNullsInRowVector) { allocator_.clear(); } +TEST_F(ContainerRowSerdeTest, fuzzCompare) { + VectorFuzzer::Options opts; + opts.vectorSize = 1'000; + opts.nullRatio = 0.5; + opts.dictionaryHasNulls = true; + + VectorFuzzer fuzzer(opts, pool_.get()); + + std::vector offsets(100); + for (auto i = 0; i < offsets.size(); ++i) { + offsets[i] = i * 10; + } + + for (auto i = 0; i < 1'000; ++i) { + auto seed = folly::Random::rand32(); + + LOG(INFO) << i << ": seed: " << seed; + + fuzzer.reSeed(seed); + + { + SCOPED_TRACE(fmt::format("seed: {}, ARRAY", seed)); + auto elements = fuzzer.fuzz(BIGINT()); + auto arrayVector = makeArrayVector(offsets, elements); + testCompare(arrayVector); + } + + { + SCOPED_TRACE(fmt::format("seed: {}, MAP", seed)); + auto keys = fuzzer.fuzz(BIGINT()); + auto values = fuzzer.fuzz(BIGINT()); + auto mapVector = makeMapVector(offsets, keys, values); + testCompare(mapVector); + } + + { + SCOPED_TRACE(fmt::format("seed: {}, ROW", seed)); + std::vector children{ + fuzzer.fuzz(BIGINT()), + fuzzer.fuzz(BIGINT()), + }; + auto rowVector = makeRowVector(children); + testCompare(rowVector); + } + } +} + } // namespace } // namespace facebook::velox::exec