diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index 7a3004db1a38..1322c85e3771 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -91,7 +91,7 @@ std::unique_ptr makeFloatingPointMapKeyFilter( if (filters.size() == 1) { return std::move(filters[0]); } - return std::make_unique(std::move(filters), false, false); + return std::make_unique(std::move(filters), false); } // Recursively add subfields to scan spec. diff --git a/velox/expression/ExprToSubfieldFilter.cpp b/velox/expression/ExprToSubfieldFilter.cpp index 8760dda6e693..f876c5de3fa8 100644 --- a/velox/expression/ExprToSubfieldFilter.cpp +++ b/velox/expression/ExprToSubfieldFilter.cpp @@ -336,8 +336,7 @@ std::unique_ptr makeNotEqualFilter( std::vector> filters; filters.emplace_back(std::move(lessThanFilter)); filters.emplace_back(std::move(greaterThanFilter)); - return std::make_unique( - std::move(filters), false, false); + return std::make_unique(std::move(filters), false); } } diff --git a/velox/expression/ExprToSubfieldFilter.h b/velox/expression/ExprToSubfieldFilter.h index 193e3413ea25..f87d28e2530b 100644 --- a/velox/expression/ExprToSubfieldFilter.h +++ b/velox/expression/ExprToSubfieldFilter.h @@ -320,16 +320,12 @@ inline std::unique_ptr isNotNull() { } template -std::unique_ptr orFilter( - std::unique_ptr a, - std::unique_ptr b, - bool nullAllowed = false, - bool nanAllowed = false) { +std::unique_ptr +orFilter(std::unique_ptr a, std::unique_ptr b, bool nullAllowed = false) { std::vector> filters; filters.emplace_back(std::move(a)); filters.emplace_back(std::move(b)); - return std::make_unique( - std::move(filters), nullAllowed, nanAllowed); + return std::make_unique(std::move(filters), nullAllowed); } inline std::unique_ptr lessThanHugeint( diff --git a/velox/type/Filter.cpp b/velox/type/Filter.cpp index 07d64b3b0763..d0f6932b4aad 100644 --- a/velox/type/Filter.cpp +++ b/velox/type/Filter.cpp @@ -712,7 +712,6 @@ bool NegatedBytesValues::testingEquals(const Filter& other) const { folly::dynamic MultiRange::serialize() const { auto obj = Filter::serializeBase("MultiRange"); - obj["nanAllowed"] = nanAllowed_; folly::dynamic arr = folly::dynamic::array; for (const auto& f : filters_) { arr.push_back(f->serialize()); @@ -723,7 +722,6 @@ folly::dynamic MultiRange::serialize() const { FilterPtr MultiRange::create(const folly::dynamic& obj) { auto nullAllowed = deserializeNullAllowed(obj); - auto nanAllowed = obj["nanAllowed"].asBool(); folly::dynamic arr = obj["filters"]; auto tmpFilters = ISerializable::deserialize>(arr); @@ -733,14 +731,13 @@ FilterPtr MultiRange::create(const folly::dynamic& obj) { filters.emplace_back(f->clone()); } - return std::make_unique( - std::move(filters), nullAllowed, nanAllowed); + return std::make_unique(std::move(filters), nullAllowed); } bool MultiRange::testingEquals(const Filter& other) const { auto otherMultiRange = dynamic_cast(&other); auto res = otherMultiRange != nullptr && Filter::testingBaseEquals(other) && - nanAllowed_ == otherMultiRange->nanAllowed_ && + filters_.size() == otherMultiRange->filters_.size(); if (!res) { @@ -1357,7 +1354,7 @@ std::unique_ptr NegatedBytesRange::toMultiRange() const { if (accepted.size() == 1) { return accepted[0]->clone(nullAllowed_); } - return std::make_unique(std::move(accepted), nullAllowed_, false); + return std::make_unique(std::move(accepted), nullAllowed_); } bool NegatedBytesValues::testBytesRange( @@ -1439,17 +1436,13 @@ std::unique_ptr MultiRange::clone( if (nullAllowed) { return std::make_unique( - std::move(filters), nullAllowed.value(), nanAllowed_); + std::move(filters), nullAllowed.value()); } else { - return std::make_unique( - std::move(filters), nullAllowed_, nanAllowed_); + return std::make_unique(std::move(filters), nullAllowed_); } } bool MultiRange::testDouble(double value) const { - if (std::isnan(value)) { - return nanAllowed_; - } for (const auto& filter : filters_) { if (filter->testDouble(value)) { return true; @@ -1459,9 +1452,6 @@ bool MultiRange::testDouble(double value) const { } bool MultiRange::testFloat(float value) const { - if (std::isnan(value)) { - return nanAllowed_; - } for (const auto& filter : filters_) { if (filter->testFloat(value)) { return true; @@ -1554,7 +1544,6 @@ std::unique_ptr MultiRange::mergeWith(const Filter* other) const { case FilterKind::kBytesRange: case FilterKind::kMultiRange: { bool bothNullAllowed = nullAllowed_ && other->testNull(); - bool bothNanAllowed = nanAllowed_; std::vector otherFilters; if (other->kind() == FilterKind::kMultiRange) { @@ -1562,7 +1551,6 @@ std::unique_ptr MultiRange::mergeWith(const Filter* other) const { for (auto const& filterOther : multiRangeOther->filters()) { otherFilters.emplace_back(filterOther.get()); } - bothNanAllowed = bothNanAllowed && multiRangeOther->nanAllowed(); } else { otherFilters.emplace_back(other); } @@ -1614,8 +1602,7 @@ std::unique_ptr MultiRange::mergeWith(const Filter* other) const { } else if (merged.size() == 1) { return merged.front()->clone(bothNullAllowed); } else { - return std::make_unique( - std::move(merged), bothNullAllowed, bothNanAllowed); + return std::make_unique(std::move(merged), bothNullAllowed); } } default: @@ -2637,8 +2624,7 @@ std::unique_ptr NegatedBytesValues::mergeWith( bytesRangeOther->upperUnbounded(), hiExclusive, false)); - return std::make_unique( - std::move(ranges), bothNullAllowed, false); + return std::make_unique(std::move(ranges), bothNullAllowed); } default: VELOX_UNREACHABLE(); diff --git a/velox/type/Filter.h b/velox/type/Filter.h index c60fb98d9df4..4a84992f71d0 100644 --- a/velox/type/Filter.h +++ b/velox/type/Filter.h @@ -2042,16 +2042,14 @@ class MultiRange final : public Filter { /// All entries must support the same data types. /// @param nullAllowed Null values are passing the filter if true. nullAllowed /// flags in the 'ranges' filters are ignored. - /// @param nanAllowed Not-a-Number floating point values are passing the - /// filter if true. Applies to floating point data types only. NaN values are - /// not further tested using contained filters. + /// TODO: remove redundant param `nanAllowed` after presto removes the use of + /// this param. For now, we set a default value to avoid breaking presto. MultiRange( std::vector> filters, bool nullAllowed, - bool nanAllowed) + bool nanAllowed = false) : Filter(true, nullAllowed, FilterKind::kMultiRange), - filters_(std::move(filters)), - nanAllowed_(nanAllowed) {} + filters_(std::move(filters)) {} folly::dynamic serialize() const override; @@ -2083,15 +2081,10 @@ class MultiRange final : public Filter { std::unique_ptr mergeWith(const Filter* other) const override final; - bool nanAllowed() const { - return nanAllowed_; - } - bool testingEquals(const Filter& other) const final; private: const std::vector> filters_; - const bool nanAllowed_; }; // Helper for applying filters to different types diff --git a/velox/type/tests/FilterSerDeTest.cpp b/velox/type/tests/FilterSerDeTest.cpp index 03bd2dd363a0..0b4c52c1a902 100644 --- a/velox/type/tests/FilterSerDeTest.cpp +++ b/velox/type/tests/FilterSerDeTest.cpp @@ -152,7 +152,7 @@ TEST_F(FilterSerDeTest, multiFilter) { filters.emplace_back(std::make_unique( "ABCD", true, true, "FFFF", false, true, false)); - MultiRange multiRange(std::move(filters), true, true); + MultiRange multiRange(std::move(filters), true); testSerde(multiRange); } diff --git a/velox/type/tests/FilterTest.cpp b/velox/type/tests/FilterTest.cpp index b0e7ed5f0162..339b678adba3 100644 --- a/velox/type/tests/FilterTest.cpp +++ b/velox/type/tests/FilterTest.cpp @@ -1050,7 +1050,7 @@ TEST(FilterTest, multiRange) { EXPECT_TRUE(filter->testDouble(1.3)); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testDouble(std::nan("nan"))); + EXPECT_TRUE(filter->testDouble(std::nan("nan"))); EXPECT_FALSE(filter->testDouble(1.2)); filter = orFilter(lessThanFloat(1.2), greaterThanFloat(1.2)); @@ -1059,7 +1059,7 @@ TEST(FilterTest, multiRange) { EXPECT_TRUE(filter->testFloat(1.1f)); EXPECT_FALSE(filter->testFloat(1.2f)); EXPECT_TRUE(filter->testFloat(1.3f)); - EXPECT_FALSE(filter->testFloat(std::nanf("nan"))); + EXPECT_TRUE(filter->testFloat(std::nanf("nan"))); // != '' filter = orFilter(lessThan(""), greaterThan("")); @@ -1069,51 +1069,39 @@ TEST(FilterTest, multiRange) { TEST(FilterTest, multiRangeWithNaNs) { // x <> 1.2 with nanAllowed true - auto filter = - orFilter(lessThanFloat(1.2), greaterThanFloat(1.2), false, true); + auto filter = orFilter(lessThanFloat(1.2), greaterThanFloat(1.2), false); EXPECT_TRUE(filter->testFloat(std::nanf("nan"))); EXPECT_FALSE(filter->testFloat(1.2f)); EXPECT_TRUE(filter->testFloat(1.1f)); - filter = orFilter(lessThanDouble(1.2), greaterThanDouble(1.2), false, true); + filter = orFilter(lessThanDouble(1.2), greaterThanDouble(1.2), false); EXPECT_TRUE(filter->testDouble(std::nan("nan"))); EXPECT_FALSE(filter->testDouble(1.2)); EXPECT_TRUE(filter->testDouble(1.1)); // x <> 1.2 with nanAllowed false filter = orFilter(lessThanFloat(1.2), greaterThanFloat(1.2)); - EXPECT_FALSE(filter->testFloat(std::nanf("nan"))); + EXPECT_TRUE(filter->testFloat(std::nanf("nan"))); EXPECT_TRUE(filter->testFloat(1.0f)); filter = orFilter(lessThanDouble(1.2), greaterThanDouble(1.2)); - EXPECT_FALSE(filter->testDouble(std::nan("nan"))); + EXPECT_TRUE(filter->testDouble(std::nan("nan"))); EXPECT_TRUE(filter->testDouble(1.4)); // x NOT IN (1.2, 1.3) with nanAllowed true - filter = orFilter(lessThanFloat(1.2), greaterThanFloat(1.3), false, true); + filter = orFilter(lessThanFloat(1.2), greaterThanFloat(1.3), false); EXPECT_TRUE(filter->testFloat(std::nanf("nan"))); EXPECT_FALSE(filter->testFloat(1.2f)); EXPECT_FALSE(filter->testFloat(1.3f)); EXPECT_TRUE(filter->testFloat(1.4f)); EXPECT_TRUE(filter->testFloat(1.1f)); - filter = orFilter(lessThanDouble(1.2), greaterThanDouble(1.3), false, true); + filter = orFilter(lessThanDouble(1.2), greaterThanDouble(1.3), false); EXPECT_TRUE(filter->testDouble(std::nan("nan"))); EXPECT_FALSE(filter->testDouble(1.2)); EXPECT_FALSE(filter->testDouble(1.3)); EXPECT_TRUE(filter->testDouble(1.4)); EXPECT_TRUE(filter->testDouble(1.1)); - - // x NOT IN (1.2) with nanAllowed false - filter = orFilter(lessThanFloat(1.2), greaterThanFloat(1.2)); - EXPECT_FALSE(filter->testFloat(std::nanf("nan"))); - EXPECT_FALSE(filter->testFloat(1.2f)); - EXPECT_TRUE(filter->testFloat(1.3f)); - - filter = orFilter(lessThanDouble(1.2), greaterThanDouble(1.2)); - EXPECT_FALSE(filter->testDouble(std::nan("nan"))); - EXPECT_FALSE(filter->testDouble(1.2)); - EXPECT_TRUE(filter->testDouble(1.3)); } TEST(FilterTest, createBigintValues) { diff --git a/velox/type/tests/NegatedBytesRangeBenchmark.cpp b/velox/type/tests/NegatedBytesRangeBenchmark.cpp index 3ed0b3cef249..8459e6db74e6 100644 --- a/velox/type/tests/NegatedBytesRangeBenchmark.cpp +++ b/velox/type/tests/NegatedBytesRangeBenchmark.cpp @@ -169,8 +169,8 @@ int32_t main(int32_t argc, char** argv) { "", true, false, lo, false, true, false)); rangeFilters.emplace_back(std::make_unique( hi, false, false, "", true, false, false)); - multiRanges.emplace_back(std::make_unique( - std::move(rangeFilters), false, false)); + multiRanges.emplace_back( + std::make_unique(std::move(rangeFilters), false)); LOG(INFO) << "Generated filter for length " << len << " with percentage " << pct; diff --git a/velox/type/tests/NegatedBytesValuesBenchmark.cpp b/velox/type/tests/NegatedBytesValuesBenchmark.cpp index cac1d449f904..78da7f714dca 100644 --- a/velox/type/tests/NegatedBytesValuesBenchmark.cpp +++ b/velox/type/tests/NegatedBytesValuesBenchmark.cpp @@ -178,7 +178,7 @@ int32_t main(int32_t argc, char* argv[]) { range_filters.emplace_back(std::make_unique( *back, false, true, "", true, true, false)); multi_ranges.emplace_back(std::make_unique( - std::move(range_filters), false, false)); + std::move(range_filters), false)); LOG(INFO) << "Generated filter for length " << len << " with size " << size;