From dd7b290750e0b4afa52f0d349c1b544473b38113 Mon Sep 17 00:00:00 2001 From: bikramjeet Date: Tue, 23 Jul 2024 13:11:57 -0700 Subject: [PATCH] Fix range filters pushed down to scan to handle NaNs correctly Summary: The existing range filter implementation consistently returns false when testing NaNs, as operations involving NaN yield false results. However, given that NaN is considered greater than infinity in our context, NaNs should be allowed to pass through the filter if there is no upper bound. For example, in a filter such as 'x >= 2.0', NaNs should be permitted. This update ensures that this behavior is correctly implemented. Differential Revision: D60126489 --- velox/type/Filter.h | 10 +++++++--- velox/type/tests/FilterTest.cpp | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/velox/type/Filter.h b/velox/type/Filter.h index b5e573902444b..c60fb98d9df4e 100644 --- a/velox/type/Filter.h +++ b/velox/type/Filter.h @@ -1443,14 +1443,14 @@ class FloatingPointRange final : public AbstractRange { name, (lowerExclusive_ || lowerUnbounded_) ? "(" : "[", lowerUnbounded_ ? "-inf" : std::to_string(lower_), - upperUnbounded_ ? "+inf" : std::to_string(upper_), - (upperExclusive_ || upperUnbounded_) ? ")" : "]", + upperUnbounded_ ? "nan" : std::to_string(upper_), + (upperExclusive_ && !upperUnbounded_) ? ")" : "]", nullAllowed_ ? "with nulls" : "no nulls"); } bool testFloatingPoint(T value) const { if (std::isnan(value)) { - return false; + return upperUnbounded_; } if (!lowerUnbounded_) { if (value < lower_) { @@ -1496,6 +1496,10 @@ class FloatingPointRange final : public AbstractRange { result = values <= allUpper; } } + if (upperUnbounded_) { + auto nanResult = xsimd::isnan(values); + result = xsimd::bitwise_or(nanResult, result); + } return result; } diff --git a/velox/type/tests/FilterTest.cpp b/velox/type/tests/FilterTest.cpp index 0ff60695bfcb4..b0e7ed5f01625 100644 --- a/velox/type/tests/FilterTest.cpp +++ b/velox/type/tests/FilterTest.cpp @@ -644,6 +644,8 @@ TEST(FilterTest, doubleRange) { EXPECT_FALSE(filter->testNull()); EXPECT_FALSE(filter->testDouble(1.2)); EXPECT_FALSE(filter->testDouble(-19.267)); + EXPECT_TRUE(filter->testDouble(std::nanf("nan1"))); + EXPECT_TRUE(filter->testDouble(std::nanf("nan2"))); { double n4[] = {-1e100, std::nan("nan"), 1.3, 1e200}; checkSimd(filter.get(), n4, verify); @@ -705,6 +707,19 @@ TEST(FilterTest, floatRange) { checkSimd(filter.get(), n8, verify); } + filter = greaterThanFloat(1.2); + EXPECT_FALSE(filter->testFloat(1.1f)); + + EXPECT_FALSE(filter->testNull()); + EXPECT_FALSE(filter->testFloat(1.2f)); + EXPECT_TRUE(filter->testFloat(15.632f)); + EXPECT_TRUE(filter->testFloat(std::nanf("nan1"))); + EXPECT_TRUE(filter->testFloat(std::nanf("nan2"))); + { + float n8[] = {1.0, std::nanf("nan"), 1.3, 1e20, -1e20, 0, 1.1, 1.2}; + checkSimd(filter.get(), n8, verify); + } + EXPECT_THROW( betweenFloat(std::nanf("NAN"), std::nanf("NAN")), VeloxRuntimeError) << "able to create a FloatRange with NaN";