From c6ad4bce13188d6934608efba82367711ea6171e Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Wed, 13 Dec 2023 17:43:56 -0800 Subject: [PATCH] Fix find_first Presto function on empty arrays and invalid start index (#8030) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/8030 find_first(, 0, ) should throw. Fixes #8020 Reviewed By: xiaoxmeng Differential Revision: D52140846 fbshipit-source-id: 8305ea28e1c874d07daf458c98b9855f87dd42cf --- velox/functions/prestosql/FindFirst.cpp | 33 +++++++++++++++---- .../prestosql/tests/FindFirstTest.cpp | 21 ++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/velox/functions/prestosql/FindFirst.cpp b/velox/functions/prestosql/FindFirst.cpp index c26c5194a440..a4cc009336fb 100644 --- a/velox/functions/prestosql/FindFirst.cpp +++ b/velox/functions/prestosql/FindFirst.cpp @@ -20,6 +20,14 @@ namespace facebook::velox::functions { namespace { +void recordInvalidStartIndex(vector_size_t row, exec::EvalCtx& context) { + try { + VELOX_USER_FAIL("SQL array indices start at 1. Got 0."); + } catch (const VeloxUserError& exception) { + context.setVeloxExceptionError(row, std::current_exception()); + } +} + class FindFirstFunctionBase : public exec::VectorFunction { public: bool isDefaultNullBehavior() const override { @@ -175,11 +183,7 @@ class FindFirstFunctionBase : public exec::VectorFunction { rawAdjustedOffsets[row] = 0; rawAdjustedSizes[row] = 0; - try { - VELOX_USER_FAIL("SQL array indices start at 1. Got 0."); - } catch (const VeloxUserError& exception) { - context.setVeloxExceptionError(row, std::current_exception()); - } + recordInvalidStartIndex(row, context); } else if (start > 0) { rawAdjustedOffsets[row] = offset + (start - 1); rawAdjustedSizes[row] = size - (start - 1); @@ -271,9 +275,27 @@ class FindFirstFunction : public FindFirstFunctionBase { exec::EvalCtx& context, VectorPtr& result) const override { auto flatArray = prepareInputArray(args[0], rows, context); + auto startIndexVector = (args.size() == 3 ? args[1] : nullptr); if (flatArray->elements()->size() == 0) { // All arrays are NULL or empty. + + if (startIndexVector != nullptr) { + // Check start indices for zeros. + + exec::LocalDecodedVector startIndexDecoder( + context, *startIndexVector, rows); + rows.applyToSelected([&](auto row) { + if (flatArray->isNullAt(row) || startIndexDecoder->isNullAt(row)) { + return; + } + const auto start = startIndexDecoder->valueAt(row); + if (start == 0) { + recordInvalidStartIndex(row, context); + } + }); + } + auto localResult = BaseVector::createNullConstant( outputType, rows.end(), context.pool()); context.moveOrCopyResult(localResult, rows, result); @@ -281,7 +303,6 @@ class FindFirstFunction : public FindFirstFunctionBase { } auto* predicateVector = args.back()->asUnchecked(); - auto startIndexVector = (args.size() == 3 ? args[1] : nullptr); // Collect indices of the first matching elements or NULLs if no match or // error. diff --git a/velox/functions/prestosql/tests/FindFirstTest.cpp b/velox/functions/prestosql/tests/FindFirstTest.cpp index 610c76b29406..dfaf4f742811 100644 --- a/velox/functions/prestosql/tests/FindFirstTest.cpp +++ b/velox/functions/prestosql/tests/FindFirstTest.cpp @@ -194,6 +194,27 @@ TEST_F(FindFirstTest, invalidIndex) { auto expected = makeNullableFlatVector( {2, std::nullopt, std::nullopt, std::nullopt}); verify("find_first(c0, c1, x -> (x > 0))", data, expected); + + // All null or empty arrays. + data = makeRowVector({ + makeArrayVectorFromJson({ + "[]", + "null", + "[]", + "null", + }), + makeFlatVector({2, 0, 0, 1}), + }); + + // Index 0 is not valid. Expect an error in the 3rd row. + VELOX_ASSERT_THROW( + evaluate("find_first(c0, c1, x -> (x > 0))", data), + "SQL array indices start at 1. Got 0."); + + // Mark 3rd row null. Expect no error. + data->setNull(2, true); + expected = makeAllNullFlatVector(4); + verify("find_first(c0, c1, x -> (x > 0))", data, expected); } // Verify that null arrays with non-zero offsets/sizes are processed correctly.