Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add murmur hash implementation #1239

Merged
merged 6 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions common/algorithms/hashes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Compiler for PHP (aka KPHP)
// Copyright (c) 2025 LLC «V Kontakte»
// Distributed under the GPL v3 License, see LICENSE.notice.txt

#include "common/algorithms/hashes.h"

#include <cstddef>
#include <cstdint>

namespace {

size_t unaligned_load(const char *p) {
size_t result = 0;
__builtin_memcpy(&result, p, sizeof(result));
return result;
}

// Loads n bytes, where 1 <= n < 8.
size_t load_bytes(const char *p, int n) {
size_t result = 0;
--n;
do {
result = (result << 8) + static_cast<unsigned char>(p[n]);
} while (--n >= 0);
return result;
}

size_t shift_mix(size_t v) {
return v ^ (v >> 47);
}

} // namespace

namespace vk {

// MurMur hash function was taken from libstdc++
template<>
uint64_t murmur_hash<uint64_t>(const void *ptr, size_t len, size_t seed) noexcept {
static constexpr size_t mul = (static_cast<size_t>(0xc6a4a793UL) << 32UL) + static_cast<size_t>(0x5bd1e995UL);
const char *const buf = static_cast<const char *>(ptr);
// Remove the bytes not divisible by the sizeof(size_t). This
// allows the main loop to process the data as 64-bit integers.
const int len_aligned = len & ~0x7;
const char *const end = buf + len_aligned;
size_t hash = seed ^ (len * mul);
for (const char *p = buf; p != end; p += 8) {
const size_t data = shift_mix(unaligned_load(p) * mul) * mul;
hash ^= data;
hash *= mul;
}
if ((len & 0x7) != 0) {
const size_t data = load_bytes(end, len & 0x7);
hash ^= data;
hash *= mul;
}
hash = shift_mix(hash) * mul;
hash = shift_mix(hash);
return hash;
}

template<>
uint32_t murmur_hash<uint32_t>(const void *ptr, size_t len, size_t seed) noexcept {
uint64_t res = murmur_hash<uint64_t>(ptr, len, seed);
uint64_t mask = (1UL << 32UL) - 1;
auto head = static_cast<uint32_t>(res & mask);
auto tail = static_cast<uint32_t>((res >> 32UL) & (mask));
return head ^ tail;
}

} // namespace vk
16 changes: 9 additions & 7 deletions common/algorithms/hashes.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@
#define ENGINE_HASHES_H

#include <array>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iterator>
#include <numeric>
#include <utility>

#include "common/type_traits/range_value_type.h"
#include "common/wrappers/span.h"

namespace vk {

template<typename T>
T murmur_hash(const void *ptr, size_t len, size_t seed = static_cast<size_t>(0xc70f6907UL)) noexcept;

inline void hash_combine(size_t &seed, size_t new_hash) {
const uint64_t m = 0xc6a4a7935bd1e995;
const uint32_t r = 47;
Expand Down Expand Up @@ -47,9 +50,8 @@ size_t hash_range(const Rng &range, Hasher hasher = Hasher()) {
template<class Rng, class Hasher = std::hash<range_value_type<Rng>>>
class range_hasher : Hasher {
public:
explicit range_hasher(Hasher hasher = Hasher()) :
Hasher(std::move(hasher)) {
}
explicit range_hasher(Hasher hasher = Hasher())
: Hasher(std::move(hasher)) {}

size_t operator()(const Rng &range) const {
return hash_range(range, static_cast<const Hasher &>(*this));
Expand All @@ -62,8 +64,8 @@ size_t std_hash(const T &obj) {
return std::hash<T>{}(obj);
}

template<class ...Ts>
size_t hash_sequence(const Ts &... val) {
template<class... Ts>
size_t hash_sequence(const Ts &...val) {
size_t res = 0;
auto hashes = std::array<size_t, sizeof...(Ts)>{vk::std_hash(val)...};
for (auto hash : hashes) {
Expand All @@ -79,7 +81,7 @@ namespace std {
template<class T1, class T2>
class hash<std::pair<T1, T2>> {
public:
size_t operator()(const std::pair<T1, T2> & pair) const {
size_t operator()(const std::pair<T1, T2> &pair) const {
return vk::hash_sequence(pair.first, pair.second);
}
};
Expand Down
Loading