Skip to content

Commit

Permalink
Fix NaN handling for array_min and array_max (#9772)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #9772

Ensure that:
- Null is returned consistently for all types
- NaN is treated as greater than infinity

Reviewed By: spershin, kagamiori

Differential Revision: D57237545

fbshipit-source-id: d6fd95c6991fed387b5ddb6dc0a855da3ddff74b
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed May 20, 2024
1 parent 7fd0deb commit c856374
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 100 deletions.
18 changes: 9 additions & 9 deletions velox/docs/functions/presto/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,30 +94,30 @@ Array Functions
.. function:: array_max(array(E)) -> E

Returns the maximum value of input array.
Returns NaN if E is REAL or DOUBLE and array contains a NaN value.
Returns NULL if array doesn't contain a NaN value, but contains a NULL value. ::
NaN is considered to be greater than Infinity.
Returns NULL if array contains a NULL value. ::

SELECT array_max(ARRAY [1, 2, 3]); -- 3
SELECT array_max(ARRAY [-1, -2, -2]); -- -1
SELECT array_max(ARRAY [-1, -2, NULL]); -- NULL
SELECT array_max(ARRAY []); -- NULL
SELECT array_max(ARRAY[NULL, nan()]); -- NaN
SELECT array_max(ARRAY [-1, nan(), NULL]); -- NULL
SELECT array_max(ARRAY[{-1, -2, -3, nan()]); -- NaN
SELECT array_max(ARRAY[-0.0001, NULL, -0.0003, nan()]); -- NaN
SELECT array_max(ARRAY[{infinity(), nan()]); -- NaN

.. function:: array_min(array(E)) -> E

Returns the minimum value of input array.
Returns NaN if E is REAL or DOUBLE and array contains a NaN value.
Returns NULL if array doesn't contain a NaN value, but contains a NULL value. ::
NaN is considered to be greater than Infinity.
Returns NULL if array contains a NULL value. ::

SELECT array_min(ARRAY [1, 2, 3]); -- 1
SELECT array_min(ARRAY [-1, -2, -2]); -- -2
SELECT array_min(ARRAY [-1, -2, NULL]); -- NULL
SELECT array_min(ARRAY []); -- NULL
SELECT array_min(ARRAY[NULL, nan()]); -- NaN
SELECT array_min(ARRAY[{-1, -2, -3, nan()]); -- NaN
SELECT array_min(ARRAY[-0.0001, NULL, -0.0003, nan()]); -- NaN
SELECT array_min(ARRAY [-1, nan(), NULL]); -- NULL
SELECT array_min(ARRAY[{-1, -2, -3, nan()]); -- -1
SELECT array_min(ARRAY[{infinity(), nan()]); -- Infinity

.. function:: array_normalize(array(E), E) -> array(E)

Expand Down
83 changes: 20 additions & 63 deletions velox/functions/prestosql/ArrayFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "velox/functions/Udf.h"
#include "velox/functions/lib/CheckedArithmetic.h"
#include "velox/type/Conversions.h"
#include "velox/type/FloatingPointUtil.h"

namespace facebook::velox::functions {

Expand All @@ -32,13 +33,27 @@ struct ArrayMinMaxFunction {

template <typename T>
void update(T& currentValue, const T& candidateValue) {
if constexpr (isMax) {
if (candidateValue > currentValue) {
currentValue = candidateValue;
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
using facebook::velox::util::floating_point::NaNAwareGreaterThan;
using facebook::velox::util::floating_point::NaNAwareLessThan;
if constexpr (isMax) {
if (NaNAwareGreaterThan<T>{}(candidateValue, currentValue)) {
currentValue = candidateValue;
}
} else {
if (NaNAwareLessThan<T>{}(candidateValue, currentValue)) {
currentValue = candidateValue;
}
}
} else {
if (candidateValue < currentValue) {
currentValue = candidateValue;
if constexpr (isMax) {
if (candidateValue > currentValue) {
currentValue = candidateValue;
}
} else {
if (candidateValue < currentValue) {
currentValue = candidateValue;
}
}
}
}
Expand All @@ -52,71 +67,13 @@ struct ArrayMinMaxFunction {
out.setNoCopy(value);
}

template <typename TReturn, typename TInput>
bool callForFloatOrDouble(TReturn& out, const TInput& array) {
bool hasNull = false;
auto it = array.begin();

// Find the first non-null item (if any)
while (it != array.end()) {
if (it->has_value()) {
break;
}

hasNull = true;
++it;
}

// Return false if end of array is reached without finding a non-null item.
if (it == array.end()) {
return false;
}

// If first non-null item is NAN, return immediately.
auto currentValue = it->value();
if (std::isnan(currentValue)) {
assign(out, currentValue);
return true;
}

++it;
while (it != array.end()) {
if (it->has_value()) {
auto newValue = it->value();
if (std::isnan(newValue)) {
assign(out, newValue);
return true;
}
update(currentValue, newValue);
} else {
hasNull = true;
}
++it;
}

// If we found a null, return false. Note that, if we found
// a NAN, the function will return at earlier stage as soon as
// a NAN is observed.
if (hasNull) {
return false;
}

assign(out, currentValue);
return true;
}

template <typename TReturn, typename TInput>
FOLLY_ALWAYS_INLINE bool call(TReturn& out, const TInput& array) {
// Result is null if array is empty.
if (array.size() == 0) {
return false;
}

if constexpr (
std::is_same_v<TReturn, float> || std::is_same_v<TReturn, double>) {
return callForFloatOrDouble(out, array);
}

if (!array.mayHaveNulls()) {
// Input array does not have nulls.
auto currentValue = *array[0];
Expand Down
53 changes: 35 additions & 18 deletions velox/functions/prestosql/tests/ArrayMaxTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,24 +145,21 @@ TEST_F(ArrayMaxTest, longVarcharNoNulls) {

// Test documented example.
TEST_F(ArrayMaxTest, docs) {
auto input1 = makeNullableArrayVector<int32_t>(
{{1, 2, 3}, {-1, -2, -2}, {-1, -2, std::nullopt}, {}});
auto expected1 =
makeNullableFlatVector<int32_t>({3, -1, std::nullopt, std::nullopt});
testArrayMax(input1, expected1);

auto input2 = makeNullableArrayVector<double>(
{{std::nullopt, std::numeric_limits<double>::quiet_NaN()},
{-1, -2, -3, std::numeric_limits<double>::quiet_NaN()},
{-0.0001,
std::nullopt,
-0.0003,
std::numeric_limits<double>::quiet_NaN()}});
auto expected2 = makeNullableFlatVector<double>(
{std::numeric_limits<double>::quiet_NaN(),
std::numeric_limits<double>::quiet_NaN(),
std::numeric_limits<double>::quiet_NaN()});
testArrayMax(input2, expected2);
{
auto input = makeNullableArrayVector<int32_t>(
{{1, 2, 3}, {-1, -2, -2}, {-1, -2, std::nullopt}, {}});
auto expected =
makeNullableFlatVector<int32_t>({3, -1, std::nullopt, std::nullopt});
testArrayMax(input, expected);
}
{
static const float kNaN = std::numeric_limits<float>::quiet_NaN();
static const float kInfinity = std::numeric_limits<float>::infinity();
auto input = makeNullableArrayVector<float>(
{{-1, kNaN, std::nullopt}, {-1, -2, -3, kNaN}, {kInfinity, kNaN}});
auto expected = makeNullableFlatVector<float>({std::nullopt, kNaN, kNaN});
testArrayMax(input, expected);
}
}

template <typename Type>
Expand Down Expand Up @@ -293,6 +290,22 @@ class ArrayMaxFloatingPointTest : public FunctionBaseTest {
0.0001});
testArrayMax(input, expected);
}

void testExtremeValues() {
static const T kNaN = std::numeric_limits<T>::quiet_NaN();
static const T kInfinity = std::numeric_limits<T>::infinity();
static const T kNegativeInfinity = -1 * std::numeric_limits<T>::infinity();
auto input = makeNullableArrayVector<T>(
{{-1, std::nullopt, kNaN},
{-1, std::nullopt, 2},
{-1, 0, 2},
{kNegativeInfinity, kNegativeInfinity},
{-1, 2, kInfinity},
{kInfinity, kNaN}});
auto expected = makeNullableFlatVector<T>(
{std::nullopt, std::nullopt, 2, kNegativeInfinity, kInfinity, kNaN});
testArrayMax(input, expected);
}
};

} // namespace
Expand All @@ -318,3 +331,7 @@ TYPED_TEST(ArrayMaxFloatingPointTest, arrayMaxNullable) {
TYPED_TEST(ArrayMaxFloatingPointTest, arrayMax) {
this->testNoNulls();
}

TYPED_TEST(ArrayMaxFloatingPointTest, arrayMaxExtreme) {
this->testExtremeValues();
}
38 changes: 28 additions & 10 deletions velox/functions/prestosql/tests/ArrayMinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,12 @@ class ArrayMinTest : public FunctionBaseTest {
makeNullableFlatVector<int64_t>({1, -2, std::nullopt, std::nullopt});
testExpr<int64_t>(expected1, "array_min(C0)", {input1});

static const float kNaN = std::numeric_limits<float>::quiet_NaN();
static const float kInfinity = std::numeric_limits<float>::infinity();
auto input2 = makeNullableArrayVector<float>(
{{std::nullopt, std::numeric_limits<double>::quiet_NaN()},
{-1, -2, -3, std::numeric_limits<float>::quiet_NaN()},
{-0.0001,
std::nullopt,
-0.0003,
std::numeric_limits<float>::quiet_NaN()}});
auto expected2 = makeNullableFlatVector<float>(
{std::numeric_limits<float>::quiet_NaN(),
std::numeric_limits<float>::quiet_NaN(),
std::numeric_limits<float>::quiet_NaN()});
{{-1, kNaN, std::nullopt}, {-1, -2, -3, kNaN}, {kInfinity, kNaN}});
auto expected2 =
makeNullableFlatVector<float>({std::nullopt, -3, kInfinity});

testExpr<float>(expected2, "array_min(C0)", {input2});
}
Expand Down Expand Up @@ -157,6 +152,24 @@ class ArrayMinTest : public FunctionBaseTest {
{false, true, false, std::nullopt, false, true});
testExpr<bool>(expected, "array_min(C0)", {arrayVector});
}

template <typename T>
void testFloatingPoint() {
static const T kNaN = std::numeric_limits<T>::quiet_NaN();
static const T kInfinity = std::numeric_limits<T>::infinity();
static const T kNegativeInfinity = -1 * std::numeric_limits<T>::infinity();
auto input = makeNullableArrayVector<T>(
{{-1, std::nullopt, kNaN},
{-1, std::nullopt, 2},
{-1, 0, 2},
{-1, kNegativeInfinity, kNaN},
{kInfinity, kNaN},
{kNaN, kNaN}});
auto expected = makeNullableFlatVector<T>(
{std::nullopt, std::nullopt, -1, kNegativeInfinity, kInfinity, kNaN});

testExpr<T>(expected, "array_min(C0)", {input});
}
};

} // namespace
Expand Down Expand Up @@ -185,6 +198,11 @@ TEST_F(ArrayMinTest, boolArrays) {
testBool();
}

TEST_F(ArrayMinTest, floatArrays) {
testFloatingPoint<float>();
testFloatingPoint<double>();
}

TEST_F(ArrayMinTest, complexTypeElements) {
auto elements = makeRowVector({
makeFlatVector<int32_t>({1, 2, 3, 3, 2, 1}),
Expand Down

0 comments on commit c856374

Please sign in to comment.