From 84ff83ba82c796400532a03b1de86fd19f9df63d Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Wed, 19 Feb 2025 20:01:39 +0300 Subject: [PATCH 1/6] add murmurhash implementation --- common/algorithms/hashes.cpp | 70 ++++++++++++++++++++++++++++++++++++ common/algorithms/hashes.h | 16 +++++---- 2 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 common/algorithms/hashes.cpp diff --git a/common/algorithms/hashes.cpp b/common/algorithms/hashes.cpp new file mode 100644 index 0000000000..938d11ed90 --- /dev/null +++ b/common/algorithms/hashes.cpp @@ -0,0 +1,70 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2020 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "common/algorithms/hashes.h" + +#include +#include + +namespace { + +size_t unaligned_load(const char *p) { + size_t result = 0; + __builtin_memcpy(&result, p, sizeof(result)); + return result; +} + +// Loads n bytes, where 1 <= n < 8. +size_t load_bytes(const char *p, int n) { + size_t result = 0; + --n; + do { + result = (result << 8) + static_cast(p[n]); + } while (--n >= 0); + return result; +} + +size_t shift_mix(size_t v) { + return v ^ (v >> 47); +} + +} // namespace + +namespace vk { + +// MurMur hash function was taken from libstdc++ +template<> +uint64_t murmur_hash(const void *ptr, size_t len, size_t seed) noexcept { + static const size_t mul = (static_cast(0xc6a4a793UL) << 32UL) + static_cast(0x5bd1e995UL); + const char *const buf = static_cast(ptr); + // Remove the bytes not divisible by the sizeof(size_t). This + // allows the main loop to process the data as 64-bit integers. + const int len_aligned = len & ~0x7; + const char *const end = buf + len_aligned; + size_t hash = seed ^ (len * mul); + for (const char *p = buf; p != end; p += 8) { + const size_t data = shift_mix(unaligned_load(p) * mul) * mul; + hash ^= data; + hash *= mul; + } + if ((len & 0x7) != 0) { + const size_t data = load_bytes(end, len & 0x7); + hash ^= data; + hash *= mul; + } + hash = shift_mix(hash) * mul; + hash = shift_mix(hash); + return hash; +} + +template<> +uint32_t murmur_hash(const void *ptr, size_t len, size_t seed) noexcept { + uint64_t res = murmur_hash(ptr, len, seed); + uint64_t mask = (1UL << 32UL) - 1; + auto head = static_cast(res & mask); + auto tail = static_cast((res >> 32UL) & (mask)); + return head ^ tail; +} + +} // namespace vk diff --git a/common/algorithms/hashes.h b/common/algorithms/hashes.h index 6da073f881..7ecde65f4e 100644 --- a/common/algorithms/hashes.h +++ b/common/algorithms/hashes.h @@ -6,10 +6,10 @@ #define ENGINE_HASHES_H #include +#include #include #include #include -#include #include #include "common/type_traits/range_value_type.h" @@ -17,6 +17,9 @@ namespace vk { +template +T murmur_hash(const void *ptr, size_t len, size_t seed = static_cast(0xc70f6907UL)) noexcept; + inline void hash_combine(size_t &seed, size_t new_hash) { const uint64_t m = 0xc6a4a7935bd1e995; const uint32_t r = 47; @@ -47,9 +50,8 @@ size_t hash_range(const Rng &range, Hasher hasher = Hasher()) { template>> class range_hasher : Hasher { public: - explicit range_hasher(Hasher hasher = Hasher()) : - Hasher(std::move(hasher)) { - } + explicit range_hasher(Hasher hasher = Hasher()) + : Hasher(std::move(hasher)) {} size_t operator()(const Rng &range) const { return hash_range(range, static_cast(*this)); @@ -62,8 +64,8 @@ size_t std_hash(const T &obj) { return std::hash{}(obj); } -template -size_t hash_sequence(const Ts &... val) { +template +size_t hash_sequence(const Ts &...val) { size_t res = 0; auto hashes = std::array{vk::std_hash(val)...}; for (auto hash : hashes) { @@ -79,7 +81,7 @@ namespace std { template class hash> { public: - size_t operator()(const std::pair & pair) const { + size_t operator()(const std::pair &pair) const { return vk::hash_sequence(pair.first, pair.second); } }; From f98c3a5340fe856b5b2b32d084095043713a0542 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Thu, 20 Feb 2025 16:17:28 +0300 Subject: [PATCH 2/6] fix copyright year --- common/algorithms/hashes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/algorithms/hashes.cpp b/common/algorithms/hashes.cpp index 938d11ed90..4717b7f38d 100644 --- a/common/algorithms/hashes.cpp +++ b/common/algorithms/hashes.cpp @@ -1,5 +1,5 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» +// Copyright (c) 2025 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "common/algorithms/hashes.h" From b21a75efa78240dfc6e75b917783cd9367c12274 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Fri, 21 Feb 2025 13:01:29 +0300 Subject: [PATCH 3/6] replace static const variable with static constexpr --- common/algorithms/hashes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/algorithms/hashes.cpp b/common/algorithms/hashes.cpp index 4717b7f38d..944124cc7f 100644 --- a/common/algorithms/hashes.cpp +++ b/common/algorithms/hashes.cpp @@ -36,7 +36,7 @@ namespace vk { // MurMur hash function was taken from libstdc++ template<> uint64_t murmur_hash(const void *ptr, size_t len, size_t seed) noexcept { - static const size_t mul = (static_cast(0xc6a4a793UL) << 32UL) + static_cast(0x5bd1e995UL); + static constexpr size_t mul = (static_cast(0xc6a4a793UL) << 32UL) + static_cast(0x5bd1e995UL); const char *const buf = static_cast(ptr); // Remove the bytes not divisible by the sizeof(size_t). This // allows the main loop to process the data as 64-bit integers. From 2d345be5b173d825e3cf33e2678f1d8120342314 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Mon, 24 Feb 2025 18:22:58 +0300 Subject: [PATCH 4/6] mark some functions noexcept --- common/algorithms/hashes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/algorithms/hashes.cpp b/common/algorithms/hashes.cpp index 944124cc7f..a2799ff2de 100644 --- a/common/algorithms/hashes.cpp +++ b/common/algorithms/hashes.cpp @@ -9,14 +9,14 @@ namespace { -size_t unaligned_load(const char *p) { +size_t unaligned_load(const char *p) noexcept { size_t result = 0; __builtin_memcpy(&result, p, sizeof(result)); return result; } // Loads n bytes, where 1 <= n < 8. -size_t load_bytes(const char *p, int n) { +size_t load_bytes(const char *p, int n) noexcept { size_t result = 0; --n; do { @@ -25,7 +25,7 @@ size_t load_bytes(const char *p, int n) { return result; } -size_t shift_mix(size_t v) { +size_t shift_mix(size_t v) noexcept { return v ^ (v >> 47); } From 43002982226dc4a25d30e192a0e09f06d6e178f2 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Tue, 25 Feb 2025 13:32:05 +0300 Subject: [PATCH 5/6] rename hashes.cpp -> murmur-hash.cpp and change license --- .../{hashes.cpp => murmur-hash.cpp} | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) rename common/algorithms/{hashes.cpp => murmur-hash.cpp} (68%) diff --git a/common/algorithms/hashes.cpp b/common/algorithms/murmur-hash.cpp similarity index 68% rename from common/algorithms/hashes.cpp rename to common/algorithms/murmur-hash.cpp index a2799ff2de..2ae39f2059 100644 --- a/common/algorithms/hashes.cpp +++ b/common/algorithms/murmur-hash.cpp @@ -1,6 +1,19 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2025 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt +// Modified by LLC «V Kontakte», 2025 February 25 +// +// This file is part of the GNU C Library. +// Copyright (C) 2002-2024 Free Software Foundation, Inc. +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . #include "common/algorithms/hashes.h" From 5b426ca8f00f1b45b177210bcb69f14f53f014b4 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Thu, 27 Feb 2025 12:11:31 +0300 Subject: [PATCH 6/6] remove unnecessary diff --- common/algorithms/hashes.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/common/algorithms/hashes.h b/common/algorithms/hashes.h index 7ecde65f4e..773c2d3d56 100644 --- a/common/algorithms/hashes.h +++ b/common/algorithms/hashes.h @@ -50,8 +50,9 @@ size_t hash_range(const Rng &range, Hasher hasher = Hasher()) { template>> class range_hasher : Hasher { public: - explicit range_hasher(Hasher hasher = Hasher()) - : Hasher(std::move(hasher)) {} + explicit range_hasher(Hasher hasher = Hasher()) : + Hasher(std::move(hasher)) { + } size_t operator()(const Rng &range) const { return hash_range(range, static_cast(*this)); @@ -64,8 +65,8 @@ size_t std_hash(const T &obj) { return std::hash{}(obj); } -template -size_t hash_sequence(const Ts &...val) { +template +size_t hash_sequence(const Ts &... val) { size_t res = 0; auto hashes = std::array{vk::std_hash(val)...}; for (auto hash : hashes) { @@ -81,7 +82,7 @@ namespace std { template class hash> { public: - size_t operator()(const std::pair &pair) const { + size_t operator()(const std::pair & pair) const { return vk::hash_sequence(pair.first, pair.second); } };