Fix range filters pushed down to scan to handle NaNs correctly

Summary: The existing range filter implementation consistently returns false when testing NaNs, as operations involving NaN yield false results. However, given that NaN is considered greater than infinity in our context, NaNs should be allowed to pass through the filter if there is no upper bound. For example, in a filter such as 'x >= 2.0', NaNs should be permitted. This update ensures that this behavior is correctly implemented. Differential Revision: D60126489
bikramSingh91 · Jul 23, 2024 · dd7b290 · dd7b290
1 parent 9f64e42
commit dd7b290
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 3 deletions.
diff --git a/velox/type/Filter.h b/velox/type/Filter.h
@@ -1443,14 +1443,14 @@ class FloatingPointRange final : public AbstractRange {
         name,
         (lowerExclusive_ || lowerUnbounded_) ? "(" : "[",
         lowerUnbounded_ ? "-inf" : std::to_string(lower_),
-        upperUnbounded_ ? "+inf" : std::to_string(upper_),
-        (upperExclusive_ || upperUnbounded_) ? ")" : "]",
+        upperUnbounded_ ? "nan" : std::to_string(upper_),
+        (upperExclusive_ && !upperUnbounded_) ? ")" : "]",
         nullAllowed_ ? "with nulls" : "no nulls");
   }
 
   bool testFloatingPoint(T value) const {
     if (std::isnan(value)) {
-      return false;
+      return upperUnbounded_;
     }
     if (!lowerUnbounded_) {
       if (value < lower_) {
@@ -1496,6 +1496,10 @@ class FloatingPointRange final : public AbstractRange {
         result = values <= allUpper;
       }
     }
+    if (upperUnbounded_) {
+      auto nanResult = xsimd::isnan(values);
+      result = xsimd::bitwise_or(nanResult, result);
+    }
     return result;
   }
 

diff --git a/velox/type/tests/FilterTest.cpp b/velox/type/tests/FilterTest.cpp
@@ -644,6 +644,8 @@ TEST(FilterTest, doubleRange) {
   EXPECT_FALSE(filter->testNull());
   EXPECT_FALSE(filter->testDouble(1.2));
   EXPECT_FALSE(filter->testDouble(-19.267));
+  EXPECT_TRUE(filter->testDouble(std::nanf("nan1")));
+  EXPECT_TRUE(filter->testDouble(std::nanf("nan2")));
   {
     double n4[] = {-1e100, std::nan("nan"), 1.3, 1e200};
     checkSimd(filter.get(), n4, verify);
@@ -705,6 +707,19 @@ TEST(FilterTest, floatRange) {
     checkSimd(filter.get(), n8, verify);
   }
 
+  filter = greaterThanFloat(1.2);
+  EXPECT_FALSE(filter->testFloat(1.1f));
+
+  EXPECT_FALSE(filter->testNull());
+  EXPECT_FALSE(filter->testFloat(1.2f));
+  EXPECT_TRUE(filter->testFloat(15.632f));
+  EXPECT_TRUE(filter->testFloat(std::nanf("nan1")));
+  EXPECT_TRUE(filter->testFloat(std::nanf("nan2")));
+  {
+    float n8[] = {1.0, std::nanf("nan"), 1.3, 1e20, -1e20, 0, 1.1, 1.2};
+    checkSimd(filter.get(), n8, verify);
+  }
+
   EXPECT_THROW(
       betweenFloat(std::nanf("NAN"), std::nanf("NAN")), VeloxRuntimeError)
       << "able to create a FloatRange with NaN";