From e5caf6243145f385ff1227663314421fe780c1f9 Mon Sep 17 00:00:00 2001
From: Bikramjeet Vig <bikramjeet@meta.com>
Date: Tue, 23 Jul 2024 13:21:48 -0700
Subject: [PATCH] Fix range filters pushed down to scan to handle NaNs
 correctly (#10533)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/10533

The existing range filter implementation consistently returns false
when testing NaNs, as operations involving NaN yield false results.
However, given that NaN is considered greater than infinity in our
context, NaNs should be allowed to pass through the filter if there
is no upper bound. For example, in a filter such as 'x >= 2.0',
NaNs should be permitted. This update ensures that this behavior
is correctly implemented.

Differential Revision: D60126489
---
 velox/type/Filter.h             | 10 +++++++---
 velox/type/tests/FilterTest.cpp | 15 +++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/velox/type/Filter.h b/velox/type/Filter.h
index b5e573902444..c60fb98d9df4 100644
--- a/velox/type/Filter.h
+++ b/velox/type/Filter.h
@@ -1443,14 +1443,14 @@ class FloatingPointRange final : public AbstractRange {
         name,
         (lowerExclusive_ || lowerUnbounded_) ? "(" : "[",
         lowerUnbounded_ ? "-inf" : std::to_string(lower_),
-        upperUnbounded_ ? "+inf" : std::to_string(upper_),
-        (upperExclusive_ || upperUnbounded_) ? ")" : "]",
+        upperUnbounded_ ? "nan" : std::to_string(upper_),
+        (upperExclusive_ && !upperUnbounded_) ? ")" : "]",
         nullAllowed_ ? "with nulls" : "no nulls");
   }
 
   bool testFloatingPoint(T value) const {
     if (std::isnan(value)) {
-      return false;
+      return upperUnbounded_;
     }
     if (!lowerUnbounded_) {
       if (value < lower_) {
@@ -1496,6 +1496,10 @@ class FloatingPointRange final : public AbstractRange {
         result = values <= allUpper;
       }
     }
+    if (upperUnbounded_) {
+      auto nanResult = xsimd::isnan(values);
+      result = xsimd::bitwise_or(nanResult, result);
+    }
     return result;
   }
 
diff --git a/velox/type/tests/FilterTest.cpp b/velox/type/tests/FilterTest.cpp
index 0ff60695bfcb..b0e7ed5f0162 100644
--- a/velox/type/tests/FilterTest.cpp
+++ b/velox/type/tests/FilterTest.cpp
@@ -644,6 +644,8 @@ TEST(FilterTest, doubleRange) {
   EXPECT_FALSE(filter->testNull());
   EXPECT_FALSE(filter->testDouble(1.2));
   EXPECT_FALSE(filter->testDouble(-19.267));
+  EXPECT_TRUE(filter->testDouble(std::nanf("nan1")));
+  EXPECT_TRUE(filter->testDouble(std::nanf("nan2")));
   {
     double n4[] = {-1e100, std::nan("nan"), 1.3, 1e200};
     checkSimd(filter.get(), n4, verify);
@@ -705,6 +707,19 @@ TEST(FilterTest, floatRange) {
     checkSimd(filter.get(), n8, verify);
   }
 
+  filter = greaterThanFloat(1.2);
+  EXPECT_FALSE(filter->testFloat(1.1f));
+
+  EXPECT_FALSE(filter->testNull());
+  EXPECT_FALSE(filter->testFloat(1.2f));
+  EXPECT_TRUE(filter->testFloat(15.632f));
+  EXPECT_TRUE(filter->testFloat(std::nanf("nan1")));
+  EXPECT_TRUE(filter->testFloat(std::nanf("nan2")));
+  {
+    float n8[] = {1.0, std::nanf("nan"), 1.3, 1e20, -1e20, 0, 1.1, 1.2};
+    checkSimd(filter.get(), n8, verify);
+  }
+
   EXPECT_THROW(
       betweenFloat(std::nanf("NAN"), std::nanf("NAN")), VeloxRuntimeError)
       << "able to create a FloatRange with NaN";