Skip to content

Commit

Permalink
add type matcher to explicitly raise NotImplemented for unsupported t…
Browse files Browse the repository at this point in the history
…ypes
  • Loading branch information
kszucs committed Dec 19, 2024
1 parent 4ac6c81 commit d046d7a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
26 changes: 24 additions & 2 deletions cpp/src/arrow/compute/kernels/scalar_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,26 @@ struct FastHashScalar {
}
};

class HashableMatcher : public TypeMatcher {
public:
HashableMatcher() {}

bool Matches(const DataType& type) const override {
return !(is_union(type) || is_binary_view_like(type) || is_list_view(type) ||
type.id() == Type::RUN_END_ENCODED);
}

bool Equals(const TypeMatcher& other) const override {
if (this == &other) {
return true;
}
auto casted = dynamic_cast<const HashableMatcher*>(&other);
return casted != nullptr;
}

std::string ToString() const override { return "hashable"; }
};

const FunctionDoc hash32_doc{
"Construct a hash for every element of the input argument",
("This function is not suitable for cryptographic purposes.\n"
Expand All @@ -191,6 +211,7 @@ const FunctionDoc hash64_doc{
("This function is not suitable for cryptographic purposes.\n"
"Hash results are 64-bit and emitted for each row, including NULLs."),
{"hash_input"}};

} // namespace

void RegisterScalarHash(FunctionRegistry* registry) {
Expand All @@ -199,9 +220,10 @@ void RegisterScalarHash(FunctionRegistry* registry) {
auto hash64 = std::make_shared<ScalarFunction>("hash64", Arity::Unary(), hash64_doc);

// Add 32-bit and 64-bit kernels to hash32 and hash64 functions
ScalarKernel kernel32({InputType()}, OutputType(uint32()),
auto type_matcher = std::make_shared<HashableMatcher>();
ScalarKernel kernel32({InputType(type_matcher)}, OutputType(uint32()),
FastHashScalar<UInt32Type, Hashing32>::Exec);
ScalarKernel kernel64({InputType()}, OutputType(uint64()),
ScalarKernel kernel64({InputType(type_matcher)}, OutputType(uint64()),
FastHashScalar<UInt64Type, Hashing64>::Exec);
kernel32.null_handling = NullHandling::OUTPUT_NOT_NULL;
kernel64.null_handling = NullHandling::OUTPUT_NOT_NULL;
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_hash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,22 @@ TEST_F(TestScalarHash, RandomMap) {
}
}

TEST_F(TestScalarHash, UnsuppoertedTypes) {
auto rand = random::RandomArrayGenerator(kSeed);
auto types = {list_view(int64()),
large_list_view(int64()),
binary_view(),
utf8_view(),
dense_union({field("a", int64()), field("b", binary())}),
sparse_union({field("a", int64()), field("b", binary())}),
run_end_encoded(int16(), utf8())};
for (auto type : types) {
auto arr = rand.ArrayOf(type, 1, 0);
ASSERT_RAISES(NotImplemented, CallFunction("hash32", {arr}));
ASSERT_RAISES(NotImplemented, CallFunction("hash64", {arr}));
}
}

// copied from cpp/src/arrow/util/hashing_test.cc
template <typename Integer>
static std::unordered_set<Integer> MakeSequentialIntegers(int32_t n_values) {
Expand Down

0 comments on commit d046d7a

Please sign in to comment.