Skip to content

Commit

Permalink
Fix NaN handling in map_subset (#9893)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #9893

Ensure NaNs values of any binary representations are treated as equal and
can be identified as keys in a map.

Summary of changes:
- For primitive type keys: Use a map that employs the correct hashing and
equality operators for floating types.
- For complex type keys: It uses GenericView as a key that uses BaseVector's
hashing and equality operators. Those recursively call SimpleVector's
respective functions that were addressed in #9963

Reviewed By: spershin

Differential Revision: D57681657

fbshipit-source-id: 0ce281f303dd0633e698e4b386627c3831567d92
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed Jun 4, 2024
1 parent cda5d8a commit a10bd49
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 3 deletions.
3 changes: 2 additions & 1 deletion velox/docs/functions/presto/map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ Map Functions

.. function:: map_subset(map(K,V), array(k)) -> map(K,V)

Constructs a map from those entries of ``map`` for which the key is in the array given::
Constructs a map from those entries of ``map`` for which the key is in the array given
For keys containing REAL and DOUBLE, NANs (Not-a-Number) are considered equal. ::

SELECT map_subset(MAP(ARRAY[1,2], ARRAY['a','b']), ARRAY[10]); -- {}
SELECT map_subset(MAP(ARRAY[1,2], ARRAY['a','b']), ARRAY[1]); -- {1->'a'}
Expand Down
3 changes: 2 additions & 1 deletion velox/functions/prestosql/MapSubset.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "velox/expression/ComplexViewTypes.h"
#include "velox/functions/Udf.h"
#include "velox/type/FloatingPointUtil.h"

namespace facebook::velox::functions {

Expand Down Expand Up @@ -84,7 +85,7 @@ struct MapSubsetPrimitiveFunction {
}

bool constantSearchKeys_{false};
folly::F14FastSet<arg_type<Key>> searchKeys_;
util::floating_point::HashSetNaNAware<arg_type<Key>> searchKeys_;
};

/// Fast path for constant string keys: map_subset(m, array['a', 'b', 'c']).
Expand Down
67 changes: 66 additions & 1 deletion velox/functions/prestosql/tests/MapSubsetTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,67 @@ using namespace facebook::velox::test;
namespace facebook::velox::functions {
namespace {

class MapSubsetTest : public test::FunctionBaseTest {};
class MapSubsetTest : public test::FunctionBaseTest {
public:
template <typename T>
void testFloatNaNs() {
static const auto kNaN = std::numeric_limits<T>::quiet_NaN();
static const auto kSNaN = std::numeric_limits<T>::signaling_NaN();

// Case 1: Non-constant search keys.
auto data = makeRowVector(
{makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 3:null, 4:40, 5:50, 6:60}",
"{NaN:20}",
}),
makeArrayVector<T>({{1, kNaN, 5}, {kSNaN, 3}})});

auto expected = makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 5:50}",
"{NaN:20}",
});
auto result = evaluate("map_subset(c0, c1)", data);
assertEqualVectors(expected, result);

// Case 2: Constant search keys.
data = makeRowVector(
{makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 3:null, 4:40, 5:50, 6:60}",
"{NaN:20}",
}),
BaseVector::wrapInConstant(2, 0, makeArrayVector<T>({{1, kNaN, 5}}))});
expected = makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 5:50}",
"{NaN:20}",
});
result = evaluate("map_subset(c0, c1)", data);
assertEqualVectors(expected, result);

// Case 3: Map with Complex type as key.
// Map: { [{1, NaN,3}: 1, {4, 5}: 2], [{NaN, 3}: 3, {1, 2}: 4] }
data = makeRowVector({
makeMapVector(
{0, 2},
makeArrayVector<T>({{1, kNaN, 3}, {4, 5}, {kSNaN, 3}, {1, 2}}),
makeFlatVector<int32_t>({1, 2, 3, 4})),
makeNestedArrayVectorFromJson<T>({
"[[1, NaN, 3], [4, 5]]",
"[[1, 2, 3], [NaN, 3]]",
}),
});
expected = makeMapVector(
{0, 2},
makeArrayVectorFromJson<T>({
"[1, NaN, 3]",
"[4, 5]",
"[NaN, 3]",
}),
makeFlatVector<int32_t>({1, 2, 3}));

result = evaluate("map_subset(c0, c1)", data);
assertEqualVectors(expected, result);
}
};

TEST_F(MapSubsetTest, bigintKey) {
auto data = makeRowVector({
Expand Down Expand Up @@ -133,5 +193,10 @@ TEST_F(MapSubsetTest, arrayKey) {
assertEqualVectors(expected, result);
}

TEST_F(MapSubsetTest, floatNaNs) {
testFloatNaNs<float>();
testFloatNaNs<double>();
}

} // namespace
} // namespace facebook::velox::functions
1 change: 1 addition & 0 deletions velox/vector/tests/utils/VectorMaker.h
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,7 @@ class VectorMaker {
folly::json::serialization_opts options;
options.convert_int_keys = true;
options.allow_non_string_keys = true;
options.allow_nan_inf = true;
folly::dynamic mapObject = folly::parseJson(jsonMap, options);
if (mapObject.isNull()) {
// Null map.
Expand Down

0 comments on commit a10bd49

Please sign in to comment.