diff --git a/velox/docs/functions/spark/map.rst b/velox/docs/functions/spark/map.rst index aceabe402f05f..243e0f9acc6d9 100644 --- a/velox/docs/functions/spark/map.rst +++ b/velox/docs/functions/spark/map.rst @@ -29,7 +29,7 @@ Map Functions .. spark:function:: map_from_entries(array(struct(K,V))) -> map(K,V) - Converts an array of entries (key value struct types) to a map of values. All elements in keys should not be null. + Returns a map created from the given array of entries. Keys are not allowed to be null or to contain nulls. If null entry exists in the array, return null for this whole array.:: SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'null'))); -- {1 -> 'a', 2 -> 'null'} diff --git a/velox/functions/lib/MapFromEntries.cpp b/velox/functions/lib/MapFromEntries.cpp index e402d079a1008..29e06eab942f3 100644 --- a/velox/functions/lib/MapFromEntries.cpp +++ b/velox/functions/lib/MapFromEntries.cpp @@ -31,7 +31,7 @@ static const char* kIndeterminateKeyErrorMessage = static const char* kErrorMessageEntryNotNull = "map entry cannot be null"; /// @tparam throwForNull If true, will return null if input array is null or has -/// null entry (Spark's behavior), instead of throw execeptions(Presto's +/// null entries (Spark's behavior), instead of throwing exceptions (Presto's /// behavior). template class MapFromEntriesFunction : public exec::VectorFunction { @@ -132,7 +132,7 @@ class MapFromEntriesFunction : public exec::VectorFunction { }); auto resetSize = [&](vector_size_t row) { mutableSizes[row] = 0; }; - auto nulls = allocateNulls(decodedRowVector->size(), context.pool()); + auto nulls = allocateNulls(rows.size(), context.pool()); auto* mutableNulls = nulls->asMutable(); if (decodedRowVector->mayHaveNulls() || keyVector->mayHaveNulls() || @@ -146,7 +146,6 @@ class MapFromEntriesFunction : public exec::VectorFunction { const bool isMapEntryNull = decodedRowVector->isNullAt(offset + i); if (isMapEntryNull) { if constexpr (!throwForNull) { - // Spark: For nulls in the top level row vector, return null. bits::setNull(mutableNulls, row); resetSize(row); break; @@ -229,28 +228,19 @@ class MapFromEntriesFunction : public exec::VectorFunction { // For Presto, need construct map vector based on input nulls for possible // outer expression like try(). For Spark, use the updated nulls. - std::shared_ptr mapVector; if constexpr (throwForNull) { - mapVector = std::make_shared( - context.pool(), - outputType, - inputArray->nulls(), - rows.end(), - inputArray->offsets(), - sizes, - wrappedKeys, - wrappedValues); - } else { - mapVector = std::make_shared( - context.pool(), - outputType, - nulls, - rows.end(), - inputArray->offsets(), - sizes, - wrappedKeys, - wrappedValues); + nulls = inputArray->nulls(); } + auto mapVector = std::make_shared( + context.pool(), + outputType, + nulls, + rows.end(), + inputArray->offsets(), + sizes, + wrappedKeys, + wrappedValues); + checkDuplicateKeys(mapVector, *remianingRows, context); return mapVector; } diff --git a/velox/functions/prestosql/tests/MapFromEntriesTest.cpp b/velox/functions/prestosql/tests/MapFromEntriesTest.cpp index 1a1fdc2d0f362..adf14fd87b58d 100644 --- a/velox/functions/prestosql/tests/MapFromEntriesTest.cpp +++ b/velox/functions/prestosql/tests/MapFromEntriesTest.cpp @@ -17,7 +17,6 @@ #include #include "velox/common/base/tests/GTestUtils.h" #include "velox/functions/lib/CheckDuplicateKeys.h" -// #include "velox/functions/lib/CheckDuplicateKeys.h" #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" #include "velox/vector/tests/TestingDictionaryArrayElementsFunction.h" diff --git a/velox/functions/sparksql/tests/MapFromEntriesTest.cpp b/velox/functions/sparksql/tests/MapFromEntriesTest.cpp index 2702ab0a60ddd..578a1a06bb2b9 100644 --- a/velox/functions/sparksql/tests/MapFromEntriesTest.cpp +++ b/velox/functions/sparksql/tests/MapFromEntriesTest.cpp @@ -32,7 +32,7 @@ std::optional>>> O( class MapFromEntriesTest : public SparkFunctionBaseTest { protected: - /// Create an MAP vector of size 1 using specified 'keys' and 'values' vector. + // Create an MAP vector of size 1 using specified 'keys' and 'values' vector. VectorPtr makeSingleRowMapVector( const VectorPtr& keys, const VectorPtr& values) { @@ -54,33 +54,14 @@ class MapFromEntriesTest : public SparkFunctionBaseTest { void verifyMapFromEntries( const std::vector& input, const VectorPtr& expected, - const std::string& funcArg = "C0") { + const std::string& funcArg = "c0") { const std::string expr = fmt::format("map_from_entries({})", funcArg); auto result = evaluate(expr, makeRowVector(input)); assertEqualVectors(expected, result); } - - // Evaluate an expression only, usually expect error thrown. - void evaluateExpr( - const std::string& expression, - const std::vector& input) { - evaluate(expression, makeRowVector(input)); - } }; } // namespace -TEST_F(MapFromEntriesTest, intKeyAndVarcharValue) { - auto rowType = ROW({INTEGER(), VARCHAR()}); - std::vector>>> - data = { - {{{1, "red"}}, {{2, "blue"}}, {{3, "green"}}}, - }; - auto input = makeArrayOfRowVector(data, rowType); - auto expected = makeMapVector( - {{{1, "red"_sv}, {2, "blue"_sv}, {3, "green"_sv}}}); - verifyMapFromEntries({input}, expected); -} - TEST_F(MapFromEntriesTest, nullMapEntries) { auto rowType = ROW({INTEGER(), INTEGER()}); { @@ -92,7 +73,7 @@ TEST_F(MapFromEntriesTest, nullMapEntries) { auto input = makeArrayOfRowVector(data, rowType); auto expected = makeNullableMapVector({std::nullopt, O({{1, 11}})}); - verifyMapFromEntries({input}, expected, "C0"); + verifyMapFromEntries({input}, expected, "c0"); } { // Create array(row(a,b)) where a, b sizes are 0 because all row(a, b) @@ -109,7 +90,7 @@ TEST_F(MapFromEntriesTest, nullMapEntries) { auto expected = makeNullableMapVector({std::nullopt, std::nullopt}); - verifyMapFromEntries({input}, expected, "C0"); + verifyMapFromEntries({input}, expected, "c0"); } } @@ -120,196 +101,10 @@ TEST_F(MapFromEntriesTest, nullKeys) { {variant::row({1, 11})}}; auto input = makeArrayOfRowVector(rowType, data); VELOX_ASSERT_THROW( - evaluateExpr("map_from_entries(C0)", {input}), "map key cannot be null"); -} - -TEST_F(MapFromEntriesTest, duplicateKeys) { - auto rowType = ROW({INTEGER(), INTEGER()}); - std::vector>>> data = { - {{{1, 10}}, {{1, 11}}}, - {{{2, 22}}}, - }; - auto input = makeArrayOfRowVector(data, rowType); - VELOX_ASSERT_THROW( - evaluateExpr("map_from_entries(C0)", {input}), - "Duplicate map keys (1) are not allowed"); -} - -TEST_F(MapFromEntriesTest, nullValues) { - auto rowType = ROW({INTEGER(), INTEGER()}); - std::vector> data = { - {variant::row({1, variant::null(TypeKind::INTEGER)}), - variant::row({2, 22}), - variant::row({3, 33})}}; - auto input = makeArrayOfRowVector(rowType, data); - auto expected = - makeMapVector({{{1, std::nullopt}, {2, 22}, {3, 33}}}); - verifyMapFromEntries({input}, expected); -} - -TEST_F(MapFromEntriesTest, constant) { - const vector_size_t kConstantSize = 1'000; - auto rowType = ROW({VARCHAR(), INTEGER()}); - std::vector>>> - data = { - {{{"red", 1}}, {{"blue", 2}}, {{"green", 3}}}, - {{{"red shiny car ahead", 4}}, {{"blue clear sky above", 5}}}, - {{{"r", 11}}, {{"g", 22}}, {{"b", 33}}}, - }; - auto input = makeArrayOfRowVector(data, rowType); - - auto evaluateConstant = [&](vector_size_t row, const VectorPtr& vector) { - return evaluate( - "map_from_entries(C0)", - makeRowVector( - {BaseVector::wrapInConstant(kConstantSize, row, vector)})); - }; - - auto result = evaluateConstant(0, input); - auto expected = BaseVector::wrapInConstant( - kConstantSize, - 0, - makeSingleRowMapVector( - makeFlatVector({"red"_sv, "blue"_sv, "green"_sv}), - makeFlatVector({1, 2, 3}))); - assertEqualVectors(expected, result); - - result = evaluateConstant(1, input); - expected = BaseVector::wrapInConstant( - kConstantSize, - 0, - makeSingleRowMapVector( - makeFlatVector( - {"red shiny car ahead"_sv, "blue clear sky above"_sv}), - makeFlatVector({4, 5}))); - assertEqualVectors(expected, result); - - result = evaluateConstant(2, input); - expected = BaseVector::wrapInConstant( - kConstantSize, - 0, - makeSingleRowMapVector( - makeFlatVector({"r"_sv, "g"_sv, "b"_sv}), - makeFlatVector({11, 22, 33}))); - assertEqualVectors(expected, result); -} - -TEST_F(MapFromEntriesTest, dictionaryEncodedElementsInFlat) { - exec::registerVectorFunction( - "testing_dictionary_array_elements", - facebook::velox::test::TestingDictionaryArrayElementsFunction:: - signatures(), - std::make_unique< - facebook::velox::test::TestingDictionaryArrayElementsFunction>()); - - auto rowType = ROW({INTEGER(), VARCHAR()}); - std::vector>>> - data = { - {{{1, "red"}}, {{2, "blue"}}, {{3, "green"}}}, - }; - auto input = makeArrayOfRowVector(data, rowType); - auto expected = makeMapVector( - {{{1, "red"_sv}, {2, "blue"_sv}, {3, "green"_sv}}}); - verifyMapFromEntries( - {input}, expected, "testing_dictionary_array_elements(C0)"); -} - -TEST_F(MapFromEntriesTest, outputSizeIsBoundBySelectedRows) { - // This test makes sure that map_from_entries output vector size is - // `rows.end()` instead of `rows.size()`. - - auto rowType = ROW({INTEGER(), INTEGER()}); - core::QueryConfig config({}); - auto function = - exec::getVectorFunction("map_from_entries", {ARRAY(rowType)}, {}, config); - - std::vector>>> data = { - {{{1, 11}}, {{2, 22}}, {{3, 33}}}, - {{{4, 44}}, {{5, 55}}}, - {{{6, 66}}}, - }; - auto array = makeArrayOfRowVector(data, rowType); - - auto rowVector = makeRowVector({array}); - - // Only the first 2 rows selected. - SelectivityVector rows(2); - // This is larger than input array size but rows beyond the input vector size - // are not selected. - rows.resize(1000, false); - - ASSERT_EQ(rows.size(), 1000); - ASSERT_EQ(rows.end(), 2); - ASSERT_EQ(array->size(), 3); - - auto typedExpr = - makeTypedExpr("map_from_entries(c0)", asRowType(rowVector->type())); - std::vector results(1); - - exec::ExprSet exprSet({typedExpr}, &execCtx_); - exec::EvalCtx evalCtx(&execCtx_, &exprSet, rowVector.get()); - exprSet.eval(rows, evalCtx, results); - - ASSERT_EQ(results[0]->size(), 2); -} - -TEST_F(MapFromEntriesTest, rowsWithNullsNotPassedToCheckDuplicateKey) { - auto innerRowVector = makeRowVector( - {makeNullableFlatVector({std::nullopt, 2, 3, 4}), - makeNullableFlatVector({1, 2, 3, 4})}); - - auto offsets = makeIndices({0, 2}); - auto sizes = makeIndices({2, 2}); - - auto arrayVector = std::make_shared( - pool(), - ARRAY(ROW({INTEGER(), INTEGER()})), - nullptr, - 2, - offsets, - sizes, - innerRowVector); - VELOX_ASSERT_THROW( - evaluate("map_from_entries(C0)", makeRowVector({arrayVector})), + evaluate("map_from_entries(c0)", makeRowVector({input})), "map key cannot be null"); } -TEST_F(MapFromEntriesTest, arrayOfDictionaryRowOfNulls) { - RowVectorPtr rowVector = - makeRowVector({makeFlatVector(0), makeFlatVector(0)}); - rowVector->resize(4); - rowVector->childAt(0)->resize(0); - rowVector->childAt(1)->resize(0); - for (int i = 0; i < rowVector->size(); i++) { - rowVector->setNull(i, true); - } - - EXPECT_EQ(rowVector->childAt(0)->size(), 0); - EXPECT_EQ(rowVector->childAt(1)->size(), 0); - - auto indices = makeIndices({0, 1, 2, 3}); - - auto dictionary = - BaseVector::wrapInDictionary(nullptr, indices, 4, rowVector); - - auto offsets = makeIndices({0, 2}); - auto sizes = makeIndices({2, 2}); - - auto arrayVector = std::make_shared( - pool(), - ARRAY(ROW({INTEGER(), INTEGER()})), - nullptr, - 2, - offsets, - sizes, - dictionary); - VectorPtr result = - evaluate("map_from_entries(c0)", makeRowVector({arrayVector})); - for (int i = 0; i < result->size(); i++) { - EXPECT_TRUE(result->isNullAt(i)); - } -} - TEST_F(MapFromEntriesTest, arrayOfConstantRowOfNulls) { RowVectorPtr rowVector = makeRowVector({makeFlatVector(0), makeFlatVector(0)}); @@ -340,61 +135,6 @@ TEST_F(MapFromEntriesTest, arrayOfConstantRowOfNulls) { } } -TEST_F(MapFromEntriesTest, arrayOfConstantNotNulls) { - RowVectorPtr rowVector = makeRowVector( - {makeFlatVector({1, 2}), makeFlatVector({3, 4})}); - rowVector->resize(1); - rowVector->setNull(0, false); - - VectorPtr rowVectorConstant = BaseVector::wrapInConstant(4, 0, rowVector); - { - auto offsets = makeIndices({0, 2}); - auto sizes = makeIndices({2, 2}); - - auto arrayVector = std::make_shared( - pool(), - ARRAY(ROW({INTEGER(), INTEGER()})), - nullptr, - 2, - offsets, - sizes, - rowVectorConstant); - - VELOX_ASSERT_THROW( - evaluate("map_from_entries(C0)", makeRowVector({arrayVector})), - "Duplicate map keys (1) are not allowed"); - } - - { - auto offsets = makeIndices({0, 1}); - auto sizes = makeIndices({1, 1}); - - auto arrayVector = std::make_shared( - pool(), - ARRAY(ROW({INTEGER(), INTEGER()})), - nullptr, - 2, - offsets, - sizes, - rowVectorConstant); - - // will fail due to duplicate key. - VectorPtr result = - evaluate("map_from_entries(c0)", makeRowVector({arrayVector})); - auto expected = makeMapVector( - {{{1, 2}}, {{1, 2}}, {{1, 2}}, {{1, 2}}}); - } -} - -TEST_F(MapFromEntriesTest, nestedNullInKeys) { - facebook::velox::functions::registerArrayConstructor("array_constructor"); - VELOX_ASSERT_THROW( - evaluate( - "map_from_entries(array_constructor(row_constructor(array_constructor(null), null)))", - makeRowVector({makeFlatVector(1)})), - "map key cannot be indeterminate"); -} - TEST_F(MapFromEntriesTest, unknownInputs) { facebook::velox::functions::registerArrayConstructor("array_constructor"); auto expectedType = MAP(UNKNOWN(), UNKNOWN()); @@ -425,35 +165,4 @@ TEST_F(MapFromEntriesTest, nullRowEntriesWithSmallerChildren) { evaluate("map_from_entries(c0)", makeRowVector({arrayVector})), "map key cannot be null"); } - -TEST_F(MapFromEntriesTest, allTopLevelNullsCornerCase) { - // Test a corner case where the input of mapFromEntries is an array with - // array at row 0 : [(null), (null)] - // array at row 1 : [] - // And the function is evaluated only at row 1. - - auto keys = makeNullableFlatVector({}); - auto values = makeNullableFlatVector({}); - auto rowVector = makeRowVector({keys, values}); - - EXPECT_EQ(rowVector->size(), 0); - rowVector->appendNulls(2); - - EXPECT_EQ(rowVector->size(), 2); - EXPECT_EQ(rowVector->childAt(0)->size(), 0); - EXPECT_EQ(rowVector->childAt(1)->size(), 0); - - // Array at row 0 is [(null), (null)] - // Array at row 1 is [] - auto arrayVector = makeArrayVector({0, 2}, rowVector); - - SelectivityVector rows(2); - rows.setValid(0, false); - - auto result = - evaluate("map_from_entries(c0)", makeRowVector({arrayVector}), rows); - result->validate(); - auto expected = makeMapVectorFromJson({"{}", "{}"}); - assertEqualVectors(expected, result); -} } // namespace facebook::velox::functions::sparksql::test