From 868d4840e7ed48567cda6e09cb3bbbdd8988b2b5 Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Tue, 25 Jul 2023 23:43:53 +0000 Subject: [PATCH] Super fast binaries_search --- .secret_cpp/bench_psi.cpp | 147 ++ .secret_cpp/{not_main.cpp => category.cpp} | 1 - .secret_cpp/include/hopscotch_growth_policy.h | 405 ++++ .secret_cpp/include/psi.hpp | 35 + .secret_cpp/include/qpx.hpp | 49 +- .secret_cpp/include/tsl/hopscotch_hash.h | 1896 +++++++++++++++++ .secret_cpp/include/tsl/hopscotch_set.h | 595 ++++++ .secret_cpp/main.cpp | 109 - 8 files changed, 3108 insertions(+), 129 deletions(-) create mode 100644 .secret_cpp/bench_psi.cpp rename .secret_cpp/{not_main.cpp => category.cpp} (99%) create mode 100644 .secret_cpp/include/hopscotch_growth_policy.h create mode 100644 .secret_cpp/include/psi.hpp create mode 100644 .secret_cpp/include/tsl/hopscotch_hash.h create mode 100644 .secret_cpp/include/tsl/hopscotch_set.h delete mode 100644 .secret_cpp/main.cpp diff --git a/.secret_cpp/bench_psi.cpp b/.secret_cpp/bench_psi.cpp new file mode 100644 index 0000000..944ebdc --- /dev/null +++ b/.secret_cpp/bench_psi.cpp @@ -0,0 +1,147 @@ + +#include +#include +#include "qpx.hpp" +#include "psi.hpp" +#include +#include +#include +#include +#include +#include +#include + +spin_det_t parse_line(std::string s) { + spin_det_t sd; + for(int i = 0; i < s.size(); i++) { + if(s[i] == '-') sd.push_back(0); + if(s[i] == '+') sd.push_back(1); + } + return sd; +} + +std::vector setup() { + std::vector psi; + + std::ifstream fs("/home/applenco/QuantumEnvelope/data/c2_eq_hf_dz_15.101224det.wf"); + while(true) { + std::string coef_str_or_empty; + if(!std::getline(fs, coef_str_or_empty)) break; + + std::string alpha_str; + std::getline(fs, alpha_str); + parse_line(alpha_str); + std::string beta_str; + std::getline(fs, beta_str); + std::string emptyline_str; + std::getline(fs, emptyline_str); + + psi.push_back({parse_line(alpha_str), parse_line(beta_str)}); + } + return psi; +} + +bool get_element_naive(det_t target, std::vector& psi) { + for(auto& det : psi) { + if(det == target) return true; + } + return false; +} + +static void NaiveLookup(benchmark::State& state) { + auto psi = setup(); + std::sort(psi.begin(), psi.end()); + + + std::vector psi_random{psi.begin(), psi.end()}; + std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); + + for(auto _ : state) { + for(auto d : psi_random) { + bool f = get_element_naive(d, psi); + benchmark::DoNotOptimize(f); + } + } + state.counters["LookupRate"] = + benchmark::Counter(psi.size(), benchmark::Counter::kIsIterationInvariantRate); +} + +static void BinarySearchLookup(benchmark::State& state) { + auto psi = setup(); + std::sort(psi.begin(), psi.end()); + + + std::vector psi_random{psi.begin(), psi.end()}; + std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); + + for(auto _ : state) { + for(auto d : psi_random) { + bool f = std::binary_search(psi.begin(), psi.end(), d); + benchmark::DoNotOptimize(f); + } + } + state.counters["LookupRate"] = + benchmark::Counter(psi.size(), benchmark::Counter::kIsIterationInvariantRate); +} + +static void BinarySearchesLookup(benchmark::State& state) { + auto psi = setup(); + std::sort(psi.begin(), psi.end()); + + std::vector vals{psi.begin(), psi.end()}; + std::vector result(vals.size()); + + for(auto _ : state) { + binary_searchs(psi.begin(), psi.end(), vals.begin(), vals.end(), vals.begin() + vals.size() / 2, + result.begin() + vals.size() / 2); + } + + state.counters["LookupRate"] = + benchmark::Counter(psi.size(), benchmark::Counter::kIsIterationInvariantRate); +} + + +static void stdHashLookup(benchmark::State& state) { + auto psi = setup(); + std::unordered_set psi_s; + for(auto d : psi) { psi_s.insert(d); } + + std::vector psi_random{psi.begin(), psi.end()}; + std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); + + for(auto _ : state) { + for(auto d : psi_random) { + bool f = (psi_s.find(d) != psi_s.end()); + benchmark::DoNotOptimize(f); + } + } + state.counters["LookupRate"] = + benchmark::Counter(psi.size(), benchmark::Counter::kIsIterationInvariantRate); +} + +static void tslHashLookup(benchmark::State& state) { + auto psi = setup(); + tsl::hopscotch_set psi_s; + for(auto d : psi) { psi_s.insert(d); } + + std::vector psi_random{psi.begin(), psi.end()}; + std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); + + for(auto _ : state) { + for(auto d : psi_random) { + bool f = (psi_s.find(d) != psi_s.end()); + benchmark::DoNotOptimize(f); + } + } + state.counters["LookupRate"] = + benchmark::Counter(psi.size(), benchmark::Counter::kIsIterationInvariantRate); +} + +// Register the function as a benchmark +//BENCHMARK(NaiveLookup); +BENCHMARK(BinarySearchLookup); +BENCHMARK(BinarySearchesLookup); +BENCHMARK(stdHashLookup); +BENCHMARK(tslHashLookup); +// Run the benchmark +BENCHMARK_MAIN(); diff --git a/.secret_cpp/not_main.cpp b/.secret_cpp/category.cpp similarity index 99% rename from .secret_cpp/not_main.cpp rename to .secret_cpp/category.cpp index 68a3c70..1a65f0c 100644 --- a/.secret_cpp/not_main.cpp +++ b/.secret_cpp/category.cpp @@ -52,7 +52,6 @@ spin_det_t vec_to_spin_det(std::vector idx, size_t n_orb) { // Phase - spin_det_t get_phase_mask(spin_det_t p) { size_t i = 0; while(true) { diff --git a/.secret_cpp/include/hopscotch_growth_policy.h b/.secret_cpp/include/hopscotch_growth_policy.h new file mode 100644 index 0000000..80b5b01 --- /dev/null +++ b/.secret_cpp/include/hopscotch_growth_policy.h @@ -0,0 +1,405 @@ +/** + * MIT License + * + * Copyright (c) 2018 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H +#define TSL_HOPSCOTCH_GROWTH_POLICY_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Only activate tsl_hh_assert if TSL_DEBUG is defined. + * This way we avoid the performance hit when NDEBUG is not defined with assert + * as tsl_hh_assert is used a lot (people usually compile with "-O3" and not + * "-O3 -DNDEBUG"). + */ +#ifdef TSL_DEBUG +#define tsl_hh_assert(expr) assert(expr) +#else +#define tsl_hh_assert(expr) (static_cast(0)) +#endif + +/** + * If exceptions are enabled, throw the exception passed in parameter, otherwise + * call std::terminate. + */ +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \ + (defined(_MSC_VER) && defined(_CPPUNWIND))) && \ + !defined(TSL_NO_EXCEPTIONS) +#define TSL_HH_THROW_OR_TERMINATE(ex, msg) throw ex(msg) +#else +#define TSL_HH_NO_EXCEPTIONS +#include +#ifdef TSL_DEBUG +#include +#define TSL_HH_THROW_OR_TERMINATE(ex, msg) \ + do { \ + std::cerr << msg << std::endl; \ + std::terminate(); \ + } while (0) +#else +#define TSL_HH_THROW_OR_TERMINATE(ex, msg) std::terminate() +#endif +#endif + +namespace tsl { +namespace hh { + +/** + * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a + * power of two. It allows the table to use a mask operation instead of a modulo + * operation to map a hash to a bucket. + * + * GrowthFactor must be a power of two >= 2. + */ +template +class power_of_two_growth_policy { + public: + /** + * Called on the hash table creation and on rehash. The number of buckets for + * the table is passed in parameter. This number is a minimum, the policy may + * update this value with a higher value if needed (but not lower). + * + * If 0 is given, min_bucket_count_in_out must still be 0 after the policy + * creation and bucket_for_hash must always return 0 in this case. + */ + explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { + if (min_bucket_count_in_out > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + if (min_bucket_count_in_out > 0) { + min_bucket_count_in_out = + round_up_to_power_of_two(min_bucket_count_in_out); + m_mask = min_bucket_count_in_out - 1; + } else { + m_mask = 0; + } + } + + /** + * Return the bucket [0, bucket_count()) to which the hash belongs. + * If bucket_count() is 0, it must always return 0. + */ + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash & m_mask; + } + + /** + * Return the bucket count to use when the bucket array grows on rehash. + */ + std::size_t next_bucket_count() const { + if ((m_mask + 1) > max_bucket_count() / GrowthFactor) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + return (m_mask + 1) * GrowthFactor; + } + + /** + * Return the maximum number of buckets supported by the policy. + */ + std::size_t max_bucket_count() const { + // Largest power of two. + return (std::numeric_limits::max() / 2) + 1; + } + + /** + * Reset the growth policy as if it was created with a bucket count of 0. + * After a clear, the policy must always return 0 when bucket_for_hash is + * called. + */ + void clear() noexcept { m_mask = 0; } + + private: + static std::size_t round_up_to_power_of_two(std::size_t value) { + if (is_power_of_two(value)) { + return value; + } + + if (value == 0) { + return 1; + } + + --value; + for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { + value |= value >> i; + } + + return value + 1; + } + + static constexpr bool is_power_of_two(std::size_t value) { + return value != 0 && (value & (value - 1)) == 0; + } + + private: + static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, + "GrowthFactor must be a power of two >= 2."); + + std::size_t m_mask; +}; + +/** + * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo + * to map a hash to a bucket. Slower but it can be useful if you want a slower + * growth. + */ +template > +class mod_growth_policy { + public: + explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { + if (min_bucket_count_in_out > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + if (min_bucket_count_in_out > 0) { + m_mod = min_bucket_count_in_out; + } else { + m_mod = 1; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash % m_mod; + } + + std::size_t next_bucket_count() const { + if (m_mod == max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + const double next_bucket_count = + std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); + if (!std::isnormal(next_bucket_count)) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + if (next_bucket_count > double(max_bucket_count())) { + return max_bucket_count(); + } else { + return std::size_t(next_bucket_count); + } + } + + std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; } + + void clear() noexcept { m_mod = 1; } + + private: + static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = + 1.0 * GrowthFactor::num / GrowthFactor::den; + static const std::size_t MAX_BUCKET_COUNT = + std::size_t(double(std::numeric_limits::max() / + REHASH_SIZE_MULTIPLICATION_FACTOR)); + + static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, + "Growth factor should be >= 1.1."); + + std::size_t m_mod; +}; + +namespace detail { + +#if SIZE_MAX >= ULLONG_MAX +#define TSL_HH_NB_PRIMES 51 +#elif SIZE_MAX >= ULONG_MAX +#define TSL_HH_NB_PRIMES 40 +#else +#define TSL_HH_NB_PRIMES 23 +#endif + +static constexpr const std::array PRIMES = {{ + 1u, + 5u, + 17u, + 29u, + 37u, + 53u, + 67u, + 79u, + 97u, + 131u, + 193u, + 257u, + 389u, + 521u, + 769u, + 1031u, + 1543u, + 2053u, + 3079u, + 6151u, + 12289u, + 24593u, + 49157u, +#if SIZE_MAX >= ULONG_MAX + 98317ul, + 196613ul, + 393241ul, + 786433ul, + 1572869ul, + 3145739ul, + 6291469ul, + 12582917ul, + 25165843ul, + 50331653ul, + 100663319ul, + 201326611ul, + 402653189ul, + 805306457ul, + 1610612741ul, + 3221225473ul, + 4294967291ul, +#endif +#if SIZE_MAX >= ULLONG_MAX + 6442450939ull, + 12884901893ull, + 25769803751ull, + 51539607551ull, + 103079215111ull, + 206158430209ull, + 412316860441ull, + 824633720831ull, + 1649267441651ull, + 3298534883309ull, + 6597069766657ull, +#endif +}}; + +template +static constexpr std::size_t mod(std::size_t hash) { + return hash % PRIMES[IPrime]; +} + +// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for +// faster modulo as the compiler can optimize the modulo code better with a +// constant known at the compilation. +static constexpr const std::array + MOD_PRIME = {{ + &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, + &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>, + &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, + &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, +#if SIZE_MAX >= ULONG_MAX + &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, + &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>, + &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>, +#endif +#if SIZE_MAX >= ULLONG_MAX + &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, + &mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>, +#endif + }}; + +} // namespace detail + +/** + * Grow the hash table by using prime numbers as bucket count. Slower than + * tsl::hh::power_of_two_growth_policy in general but will probably distribute + * the values around better in the buckets with a poor hash function. + * + * To allow the compiler to optimize the modulo operation, a lookup table is + * used with constant primes numbers. + * + * With a switch the code would look like: + * \code + * switch(iprime) { // iprime is the current prime of the hash table + * case 0: hash % 5ul; + * break; + * case 1: hash % 17ul; + * break; + * case 2: hash % 29ul; + * break; + * ... + * } + * \endcode + * + * Due to the constant variable in the modulo the compiler is able to optimize + * the operation by a series of multiplications, substractions and shifts. + * + * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) + * * 5' in a 64 bits environment. + */ +class prime_growth_policy { + public: + explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { + auto it_prime = std::lower_bound( + detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out); + if (it_prime == detail::PRIMES.end()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + m_iprime = static_cast( + std::distance(detail::PRIMES.begin(), it_prime)); + if (min_bucket_count_in_out > 0) { + min_bucket_count_in_out = *it_prime; + } else { + min_bucket_count_in_out = 0; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return detail::MOD_PRIME[m_iprime](hash); + } + + std::size_t next_bucket_count() const { + if (m_iprime + 1 >= detail::PRIMES.size()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + return detail::PRIMES[m_iprime + 1]; + } + + std::size_t max_bucket_count() const { return detail::PRIMES.back(); } + + void clear() noexcept { m_iprime = 0; } + + private: + unsigned int m_iprime; + + static_assert(std::numeric_limits::max() >= + detail::PRIMES.size(), + "The type of m_iprime is not big enough."); +}; + +} // namespace hh +} // namespace tsl + +#endif diff --git a/.secret_cpp/include/psi.hpp b/.secret_cpp/include/psi.hpp new file mode 100644 index 0000000..1aa6d22 --- /dev/null +++ b/.secret_cpp/include/psi.hpp @@ -0,0 +1,35 @@ +#pragma once + +template +void binary_searchs(ForwardIterator first, ForwardIterator last, ForwardIterator v_first, + ForwardIterator v_last, ForwardIterator v, ForwardIteratorBool i) { + // Return: `*v` ∈? [first, last) + // Precondition: `v` ∈ `v_first`, `v_last`. + + // pos of `*v` inside `v` array using binary search + auto pos = std::lower_bound(first, last, *v); + // Find it or not find it? + *i = (pos != last && !(*v < *pos)); + // Find new values to search for + // v_first v v_last + // |-----------|----------------| + { + // Botom Half [v_first, v) + // End the recursion when v_first == v, we search for all our values + // if distance == 1, need to search for v_first + auto d = std::distance(v_first, v); + if(pos != first && d > 1) binary_searchs(first, pos, v_first, v - 1, v - d / 2, i - d / 2); + } + { + // Top Half (v, v_last) + // We cannot deference v_last, so we stop the recursion when d == 1 + auto d = std::distance(v, v_last); + if(pos != last && d > 1) binary_searchs(pos, last, v + 1, v_last, v + d / 2, i + d / 2); + } +} + +template +void binary_searchs(T1& enumerable, T2& values, T3& result) { + binary_searchs(enumerable.begin(), enumerable.end(), values.begin(), values.end(), + values.begin() + values.size() / 2, result.begin() + values.size() / 2); +} diff --git a/.secret_cpp/include/qpx.hpp b/.secret_cpp/include/qpx.hpp index da29ca1..534f72d 100644 --- a/.secret_cpp/include/qpx.hpp +++ b/.secret_cpp/include/qpx.hpp @@ -1,7 +1,10 @@ +#pragma once + #include #include -#include - +#include +#include +#include typedef sul::dynamic_bitset<> spin_det_t; @@ -11,28 +14,37 @@ struct std::hash { return std::hash()(s.to_string()); } }; + #define N_SPIN_SPECIES 2 -//typedef std::array det_t; -class det_t { // The class +class det_t { // The class - public: // Access specifier - spin_det_t alpha; - spin_det_t beta; +public: // Access specifier + spin_det_t alpha; + spin_det_t beta; - det_t(spin_det_t _alpha, spin_det_t _beta) { - alpha = _alpha; - beta = _beta; - } + det_t(spin_det_t _alpha, spin_det_t _beta) { + alpha = _alpha; + beta = _beta; + } - bool operator < ( const det_t &b ) const { - return (alpha < b.alpha) && (beta < b.beta) ; - } + bool operator<(const det_t& b) const { + if(alpha == b.alpha) return (beta < b.beta); + return (alpha < b.alpha); + } - bool operator == ( const det_t &b ) const { - return (alpha == b.alpha) && (beta == b.beta) ; - } + bool operator==(const det_t& b) const { return (alpha == b.alpha) && (beta == b.beta); } + spin_det_t& operator[](unsigned i) { + assert(i < N_SPIN_SPECIES); + switch(i) { + case 0: return alpha; + // Avoid `non-void function does not return a value in all control paths` + default: return beta; + } + } + // https://stackoverflow.com/a/27830679/7674852 seem to recommand doing the other way arround + const spin_det_t& operator[](unsigned i) const { return (*this)[i]; } }; template<> @@ -43,6 +55,7 @@ struct std::hash { return h1 ^ (h2 << 1); } }; + std::ostream& operator<<(std::ostream& os, const det_t& obj) { return os << "(" << obj.alpha << "," << obj.beta << ")"; } @@ -54,5 +67,3 @@ typedef sul::dynamic_bitset<> spin_unoccupancy_mask_t; typedef std::array unoccupancy_mask_t; typedef std::array eri_4idx_t; - - diff --git a/.secret_cpp/include/tsl/hopscotch_hash.h b/.secret_cpp/include/tsl/hopscotch_hash.h new file mode 100644 index 0000000..ed5f858 --- /dev/null +++ b/.secret_cpp/include/tsl/hopscotch_hash.h @@ -0,0 +1,1896 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_HASH_H +#define TSL_HOPSCOTCH_HASH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_growth_policy.h" + +#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)) +#define TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR +#endif + +namespace tsl { +namespace detail_hopscotch_hash { + +template +struct make_void { + using type = void; +}; + +template +struct has_is_transparent : std::false_type {}; + +template +struct has_is_transparent::type> + : std::true_type {}; + +template +struct has_key_compare : std::false_type {}; + +template +struct has_key_compare::type> + : std::true_type {}; + +template +struct is_power_of_two_policy : std::false_type {}; + +template +struct is_power_of_two_policy> + : std::true_type {}; + +template +static T numeric_cast(U value, + const char* error_message = "numeric_cast() failed.") { + T ret = static_cast(value); + if (static_cast(ret) != value) { + TSL_HH_THROW_OR_TERMINATE(std::runtime_error, error_message); + } + + const bool is_same_signedness = + (std::is_unsigned::value && std::is_unsigned::value) || + (std::is_signed::value && std::is_signed::value); + if (!is_same_signedness && (ret < T{}) != (value < U{})) { + TSL_HH_THROW_OR_TERMINATE(std::runtime_error, error_message); + } + + return ret; +} + +/* + * smallest_type_for_min_bits::type returns the smallest type that can fit + * MinBits. + */ +static const std::size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64; +template +class smallest_type_for_min_bits {}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 0) && (MinBits <= 8)>::type> { + public: + using type = std::uint_least8_t; +}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 8) && (MinBits <= 16)>::type> { + public: + using type = std::uint_least16_t; +}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 16) && (MinBits <= 32)>::type> { + public: + using type = std::uint_least32_t; +}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 32) && (MinBits <= 64)>::type> { + public: + using type = std::uint_least64_t; +}; + +/* + * Each bucket may store up to three elements: + * - An aligned storage to store a value_type object with placement-new. + * - An (optional) hash of the value in the bucket. + * - An unsigned integer of type neighborhood_bitmap used to tell us which + * buckets in the neighborhood of the current bucket contain a value with a hash + * belonging to the current bucket. + * + * For a bucket 'bct', a bit 'i' (counting from 0 and from the least significant + * bit to the most significant) set to 1 means that the bucket 'bct + i' + * contains a value with a hash belonging to bucket 'bct'. The bits used for + * that, start from the third least significant bit. The two least significant + * bits are reserved: + * - The least significant bit is set to 1 if there is a value in the bucket + * storage. + * - The second least significant bit is set to 1 if there is an overflow. More + * than NeighborhoodSize values give the same hash, all overflow values are + * stored in the m_overflow_elements list of the map. + * + * Details regarding hopscotch hashing an its implementation can be found here: + * https://tessil.github.io/2016/08/29/hopscotch-hashing.html + */ +static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; + +using truncated_hash_type = std::uint_least32_t; + +/** + * Helper class that stores a truncated hash if StoreHash is true and nothing + * otherwise. + */ +template +class hopscotch_bucket_hash { + public: + bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { return true; } + + truncated_hash_type truncated_bucket_hash() const noexcept { return 0; } + + protected: + void copy_hash(const hopscotch_bucket_hash&) noexcept {} + + void set_hash(truncated_hash_type /*hash*/) noexcept {} +}; + +template <> +class hopscotch_bucket_hash { + public: + bool bucket_hash_equal(std::size_t hash) const noexcept { + return m_hash == truncated_hash_type(hash); + } + + truncated_hash_type truncated_bucket_hash() const noexcept { return m_hash; } + + protected: + void copy_hash(const hopscotch_bucket_hash& bucket) noexcept { + m_hash = bucket.m_hash; + } + + void set_hash(truncated_hash_type hash) noexcept { m_hash = hash; } + + private: + truncated_hash_type m_hash; +}; + +template +class hopscotch_bucket : public hopscotch_bucket_hash { + private: + static const std::size_t MIN_NEIGHBORHOOD_SIZE = 4; + static const std::size_t MAX_NEIGHBORHOOD_SIZE = + SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; + + static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4."); + // We can't put a variable in the message, ensure coherence + static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); + + static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62."); + // We can't put a variable in the message, ensure coherence + static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); + + static_assert(!StoreHash || NeighborhoodSize <= 30, + "NeighborhoodSize should be <= 30 if StoreHash is true."); + // We can't put a variable in the message, ensure coherence + static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, ""); + + using bucket_hash = hopscotch_bucket_hash; + + public: + using value_type = ValueType; + using neighborhood_bitmap = typename smallest_type_for_min_bits< + NeighborhoodSize + NB_RESERVED_BITS_IN_NEIGHBORHOOD>::type; + + hopscotch_bucket() noexcept : bucket_hash(), m_neighborhood_infos(0) { + tsl_hh_assert(empty()); + } + + hopscotch_bucket(const hopscotch_bucket& bucket) noexcept( + std::is_nothrow_copy_constructible::value) + : bucket_hash(bucket), m_neighborhood_infos(0) { + if (!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) + value_type(bucket.value()); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + hopscotch_bucket(hopscotch_bucket&& bucket) noexcept( + std::is_nothrow_move_constructible::value) + : bucket_hash(std::move(bucket)), m_neighborhood_infos(0) { + if (!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) + value_type(std::move(bucket.value())); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + hopscotch_bucket& operator=(const hopscotch_bucket& bucket) noexcept( + std::is_nothrow_copy_constructible::value) { + if (this != &bucket) { + remove_value(); + + bucket_hash::operator=(bucket); + if (!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) + value_type(bucket.value()); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + return *this; + } + + hopscotch_bucket& operator=(hopscotch_bucket&&) = delete; + + ~hopscotch_bucket() noexcept { + if (!empty()) { + destroy_value(); + } + } + + neighborhood_bitmap neighborhood_infos() const noexcept { + return neighborhood_bitmap(m_neighborhood_infos >> + NB_RESERVED_BITS_IN_NEIGHBORHOOD); + } + + void set_overflow(bool has_overflow) noexcept { + if (has_overflow) { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2); + } else { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2); + } + } + + bool has_overflow() const noexcept { return (m_neighborhood_infos & 2) != 0; } + + bool empty() const noexcept { return (m_neighborhood_infos & 1) == 0; } + + void toggle_neighbor_presence(std::size_t ineighbor) noexcept { + tsl_hh_assert(ineighbor <= NeighborhoodSize); + m_neighborhood_infos = neighborhood_bitmap( + m_neighborhood_infos ^ + (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD))); + } + + bool check_neighbor_presence(std::size_t ineighbor) const noexcept { + tsl_hh_assert(ineighbor <= NeighborhoodSize); + if (((m_neighborhood_infos >> + (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & + 1) == 1) { + return true; + } + + return false; + } + + value_type& value() noexcept { + tsl_hh_assert(!empty()); +#if defined(__cplusplus) && __cplusplus >= 201703L + return *std::launder( + reinterpret_cast(std::addressof(m_value))); +#else + return *reinterpret_cast(std::addressof(m_value)); +#endif + } + + const value_type& value() const noexcept { + tsl_hh_assert(!empty()); +#if defined(__cplusplus) && __cplusplus >= 201703L + return *std::launder( + reinterpret_cast(std::addressof(m_value))); +#else + return *reinterpret_cast(std::addressof(m_value)); +#endif + } + + template + void set_value_of_empty_bucket(truncated_hash_type hash, + Args&&... value_type_args) { + tsl_hh_assert(empty()); + + ::new (static_cast(std::addressof(m_value))) + value_type(std::forward(value_type_args)...); + set_empty(false); + this->set_hash(hash); + } + + void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) { + tsl_hh_assert(empty_bucket.empty()); + if (!empty()) { + ::new (static_cast(std::addressof(empty_bucket.m_value))) + value_type(std::move(value())); + empty_bucket.copy_hash(*this); + empty_bucket.set_empty(false); + + destroy_value(); + set_empty(true); + } + } + + void remove_value() noexcept { + if (!empty()) { + destroy_value(); + set_empty(true); + } + } + + void clear() noexcept { + if (!empty()) { + destroy_value(); + } + + m_neighborhood_infos = 0; + tsl_hh_assert(empty()); + } + + static truncated_hash_type truncate_hash(std::size_t hash) noexcept { + return truncated_hash_type(hash); + } + + private: + void set_empty(bool is_empty) noexcept { + if (is_empty) { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1); + } else { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1); + } + } + + void destroy_value() noexcept { + tsl_hh_assert(!empty()); + value().~value_type(); + } + + private: + using storage = typename std::aligned_storage::type; + + neighborhood_bitmap m_neighborhood_infos; + storage m_value; +}; + +/** + * Internal common class used by (b)hopscotch_map and (b)hopscotch_set. + * + * ValueType is what will be stored by hopscotch_hash (usually std::pair + * for a map and Key for a set). + * + * KeySelect should be a FunctionObject which takes a ValueType in parameter and + * returns a reference to the key. + * + * ValueSelect should be a FunctionObject which takes a ValueType in parameter + * and returns a reference to the value. ValueSelect should be void if there is + * no value (in a set for example). + * + * OverflowContainer will be used as containers for overflown elements. Usually + * it should be a list or a set/map. + */ +template +class hopscotch_hash : private Hash, private KeyEqual, private GrowthPolicy { + private: + template + using has_mapped_type = + typename std::integral_constant::value>; + + static_assert( + noexcept(std::declval().bucket_for_hash(std::size_t(0))), + "GrowthPolicy::bucket_for_hash must be noexcept."); + static_assert(noexcept(std::declval().clear()), + "GrowthPolicy::clear must be noexcept."); + + public: + template + class hopscotch_iterator; + + using key_type = typename KeySelect::key_type; + using value_type = ValueType; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = hopscotch_iterator; + using const_iterator = hopscotch_iterator; + + private: + using hopscotch_bucket = + tsl::detail_hopscotch_hash::hopscotch_bucket; + using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap; + + using buckets_allocator = typename std::allocator_traits< + allocator_type>::template rebind_alloc; + using buckets_container_type = + std::vector; + + using overflow_container_type = OverflowContainer; + + static_assert(std::is_same::value, + "OverflowContainer should have ValueType as type."); + + static_assert(std::is_same::value, + "Invalid allocator, not the same type as the value_type."); + + using iterator_buckets = typename buckets_container_type::iterator; + using const_iterator_buckets = + typename buckets_container_type::const_iterator; + + using iterator_overflow = typename overflow_container_type::iterator; + using const_iterator_overflow = + typename overflow_container_type::const_iterator; + + public: + /** + * The `operator*()` and `operator->()` methods return a const reference and + * const pointer respectively to the stored value type. + * + * In case of a map, to get a modifiable reference to the value associated to + * a key (the `.second` in the stored pair), you have to call `value()`. + */ + template + class hopscotch_iterator { + friend class hopscotch_hash; + + private: + using iterator_bucket = typename std::conditional< + IsConst, typename hopscotch_hash::const_iterator_buckets, + typename hopscotch_hash::iterator_buckets>::type; + using iterator_overflow = typename std::conditional< + IsConst, typename hopscotch_hash::const_iterator_overflow, + typename hopscotch_hash::iterator_overflow>::type; + + hopscotch_iterator(iterator_bucket buckets_iterator, + iterator_bucket buckets_end_iterator, + iterator_overflow overflow_iterator) noexcept + : m_buckets_iterator(buckets_iterator), + m_buckets_end_iterator(buckets_end_iterator), + m_overflow_iterator(overflow_iterator) {} + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = const typename hopscotch_hash::value_type; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + + hopscotch_iterator() noexcept {} + + // Copy constructor from iterator to const_iterator. + template ::type* = nullptr> + hopscotch_iterator(const hopscotch_iterator& other) noexcept + : m_buckets_iterator(other.m_buckets_iterator), + m_buckets_end_iterator(other.m_buckets_end_iterator), + m_overflow_iterator(other.m_overflow_iterator) {} + + hopscotch_iterator(const hopscotch_iterator& other) = default; + hopscotch_iterator(hopscotch_iterator&& other) = default; + hopscotch_iterator& operator=(const hopscotch_iterator& other) = default; + hopscotch_iterator& operator=(hopscotch_iterator&& other) = default; + + const typename hopscotch_hash::key_type& key() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return KeySelect()(m_buckets_iterator->value()); + } + + return KeySelect()(*m_overflow_iterator); + } + + template < + class U = ValueSelect, + typename std::enable_if::value>::type* = nullptr> + typename std::conditional::type + value() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return U()(m_buckets_iterator->value()); + } + + return U()(*m_overflow_iterator); + } + + reference operator*() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return m_buckets_iterator->value(); + } + + return *m_overflow_iterator; + } + + pointer operator->() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return std::addressof(m_buckets_iterator->value()); + } + + return std::addressof(*m_overflow_iterator); + } + + hopscotch_iterator& operator++() { + if (m_buckets_iterator == m_buckets_end_iterator) { + ++m_overflow_iterator; + return *this; + } + + do { + ++m_buckets_iterator; + } while (m_buckets_iterator != m_buckets_end_iterator && + m_buckets_iterator->empty()); + + return *this; + } + + hopscotch_iterator operator++(int) { + hopscotch_iterator tmp(*this); + ++*this; + + return tmp; + } + + friend bool operator==(const hopscotch_iterator& lhs, + const hopscotch_iterator& rhs) { + return lhs.m_buckets_iterator == rhs.m_buckets_iterator && + lhs.m_overflow_iterator == rhs.m_overflow_iterator; + } + + friend bool operator!=(const hopscotch_iterator& lhs, + const hopscotch_iterator& rhs) { + return !(lhs == rhs); + } + + private: + iterator_bucket m_buckets_iterator; + iterator_bucket m_buckets_end_iterator; + iterator_overflow m_overflow_iterator; + }; + + public: + template < + class OC = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + hopscotch_hash(size_type bucket_count, const Hash& hash, + const KeyEqual& equal, const Allocator& alloc, + float max_load_factor) + : Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets_data(alloc), + m_overflow_elements(alloc), + m_buckets(static_empty_bucket_ptr()), + m_nb_elements(0) { + if (bucket_count > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The map exceeds its maximum size."); + } + + if (bucket_count > 0) { + static_assert(NeighborhoodSize - 1 > 0, ""); + + // Can't directly construct with the appropriate size in the initializer + // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8 + m_buckets_data.resize(bucket_count + NeighborhoodSize - 1); + m_buckets = m_buckets_data.data(); + } + + this->max_load_factor(max_load_factor); + + // Check in the constructor instead of outside of a function to avoid + // compilation issues when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "value_type must be either copy constructible or nothrow " + "move constructible."); + } + + template < + class OC = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + hopscotch_hash(size_type bucket_count, const Hash& hash, + const KeyEqual& equal, const Allocator& alloc, + float max_load_factor, const typename OC::key_compare& comp) + : Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets_data(alloc), + m_overflow_elements(comp, alloc), + m_buckets(static_empty_bucket_ptr()), + m_nb_elements(0) { + if (bucket_count > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The map exceeds its maximum size."); + } + + if (bucket_count > 0) { + static_assert(NeighborhoodSize - 1 > 0, ""); + + // Can't directly construct with the appropriate size in the initializer + // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8 + m_buckets_data.resize(bucket_count + NeighborhoodSize - 1); + m_buckets = m_buckets_data.data(); + } + + this->max_load_factor(max_load_factor); + + // Check in the constructor instead of outside of a function to avoid + // compilation issues when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "value_type must be either copy constructible or nothrow " + "move constructible."); + } + + hopscotch_hash(const hopscotch_hash& other) + : hopscotch_hash(other, other.get_allocator()) {} + + hopscotch_hash(const hopscotch_hash& other, const Allocator& alloc) + : Hash(other), + KeyEqual(other), + GrowthPolicy(other), + m_buckets_data(other.m_buckets_data, alloc), + m_overflow_elements(other.m_overflow_elements), + m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() + : m_buckets_data.data()), + m_nb_elements(other.m_nb_elements), + m_min_load_threshold_rehash(other.m_min_load_threshold_rehash), + m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), + m_max_load_factor(other.m_max_load_factor) {} + + hopscotch_hash(hopscotch_hash&& other) noexcept( + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& std:: + is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible< + overflow_container_type>::value) + : Hash(std::move(static_cast(other))), + KeyEqual(std::move(static_cast(other))), + GrowthPolicy(std::move(static_cast(other))), + m_buckets_data(std::move(other.m_buckets_data)), + m_overflow_elements(std::move(other.m_overflow_elements)), + m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() + : m_buckets_data.data()), + m_nb_elements(other.m_nb_elements), + m_min_load_threshold_rehash(other.m_min_load_threshold_rehash), + m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), + m_max_load_factor(other.m_max_load_factor) { + other.GrowthPolicy::clear(); + other.m_buckets_data.clear(); + other.m_overflow_elements.clear(); + other.m_buckets = static_empty_bucket_ptr(); + other.m_nb_elements = 0; + other.m_min_load_threshold_rehash = 0; + other.m_max_load_threshold_rehash = 0; + } + + hopscotch_hash& operator=(const hopscotch_hash& other) { + if (&other != this) { + Hash::operator=(other); + KeyEqual::operator=(other); + GrowthPolicy::operator=(other); + + m_buckets_data = other.m_buckets_data; + m_overflow_elements = other.m_overflow_elements; + m_buckets = m_buckets_data.empty() ? static_empty_bucket_ptr() + : m_buckets_data.data(); + m_nb_elements = other.m_nb_elements; + + m_min_load_threshold_rehash = other.m_min_load_threshold_rehash; + m_max_load_threshold_rehash = other.m_max_load_threshold_rehash; + m_max_load_factor = other.m_max_load_factor; + } + + return *this; + } + + hopscotch_hash& operator=(hopscotch_hash&& other) { + other.swap(*this); + other.clear(); + + return *this; + } + + allocator_type get_allocator() const { + return m_buckets_data.get_allocator(); + } + + /* + * Iterators + */ + iterator begin() noexcept { + auto begin = m_buckets_data.begin(); + while (begin != m_buckets_data.end() && begin->empty()) { + ++begin; + } + + return iterator(begin, m_buckets_data.end(), m_overflow_elements.begin()); + } + + const_iterator begin() const noexcept { return cbegin(); } + + const_iterator cbegin() const noexcept { + auto begin = m_buckets_data.cbegin(); + while (begin != m_buckets_data.cend() && begin->empty()) { + ++begin; + } + + return const_iterator(begin, m_buckets_data.cend(), + m_overflow_elements.cbegin()); + } + + iterator end() noexcept { + return iterator(m_buckets_data.end(), m_buckets_data.end(), + m_overflow_elements.end()); + } + + const_iterator end() const noexcept { return cend(); } + + const_iterator cend() const noexcept { + return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), + m_overflow_elements.cend()); + } + + /* + * Capacity + */ + bool empty() const noexcept { return m_nb_elements == 0; } + + size_type size() const noexcept { return m_nb_elements; } + + size_type max_size() const noexcept { return m_buckets_data.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { + for (auto& bucket : m_buckets_data) { + bucket.clear(); + } + + m_overflow_elements.clear(); + m_nb_elements = 0; + } + + std::pair insert(const value_type& value) { + return insert_impl(value); + } + + template ::value>::type* = nullptr> + std::pair insert(P&& value) { + return insert_impl(value_type(std::forward

(value))); + } + + std::pair insert(value_type&& value) { + return insert_impl(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + if (hint != cend() && + compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(value).first; + } + + template ::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return emplace_hint(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + if (hint != cend() && + compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(std::move(value)).first; + } + + template + void insert(InputIt first, InputIt last) { + if (std::is_base_of< + std::forward_iterator_tag, + typename std::iterator_traits::iterator_category>::value) { + const auto nb_elements_insert = std::distance(first, last); + const std::size_t nb_elements_in_buckets = + m_nb_elements - m_overflow_elements.size(); + const std::size_t nb_free_buckets = + m_max_load_threshold_rehash - nb_elements_in_buckets; + tsl_hh_assert(m_nb_elements >= m_overflow_elements.size()); + tsl_hh_assert(m_max_load_threshold_rehash >= nb_elements_in_buckets); + + if (nb_elements_insert > 0 && + nb_free_buckets < std::size_t(nb_elements_insert)) { + reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert)); + } + } + + for (; first != last; ++first) { + insert(*first); + } + } + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return insert_or_assign_impl(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return insert_or_assign_impl(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(k, std::forward(obj)).first; + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(std::move(k), std::forward(obj)).first; + } + + template + std::pair emplace(Args&&... args) { + return insert(value_type(std::forward(args)...)); + } + + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return insert(hint, value_type(std::forward(args)...)); + } + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return try_emplace_impl(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return try_emplace_impl(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + return mutable_iterator(hint); + } + + return try_emplace(k, std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + return mutable_iterator(hint); + } + + return try_emplace(std::move(k), std::forward(args)...).first; + } + + /** + * Here to avoid `template size_type erase(const K& key)` being used + * when we use an iterator instead of a const_iterator. + */ + iterator erase(iterator pos) { return erase(const_iterator(pos)); } + + iterator erase(const_iterator pos) { + const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key())); + + if (pos.m_buckets_iterator != pos.m_buckets_end_iterator) { + auto it_bucket = + m_buckets_data.begin() + + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator); + erase_from_bucket(*it_bucket, ibucket_for_hash); + + return ++iterator(it_bucket, m_buckets_data.end(), + m_overflow_elements.begin()); + } else { + auto it_next_overflow = + erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash); + return iterator(m_buckets_data.end(), m_buckets_data.end(), + it_next_overflow); + } + } + + iterator erase(const_iterator first, const_iterator last) { + if (first == last) { + return mutable_iterator(first); + } + + auto to_delete = erase(first); + while (to_delete != last) { + to_delete = erase(to_delete); + } + + return to_delete; + } + + template + size_type erase(const K& key) { + return erase(key, hash_key(key)); + } + + template + size_type erase(const K& key, std::size_t hash) { + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, m_buckets + ibucket_for_hash); + if (bucket_found != nullptr) { + erase_from_bucket(*bucket_found, ibucket_for_hash); + + return 1; + } + + if (m_buckets[ibucket_for_hash].has_overflow()) { + auto it_overflow = find_in_overflow(key); + if (it_overflow != m_overflow_elements.end()) { + erase_from_overflow(it_overflow, ibucket_for_hash); + + return 1; + } + } + + return 0; + } + + void swap(hopscotch_hash& other) { + using std::swap; + + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(m_buckets_data, other.m_buckets_data); + swap(m_overflow_elements, other.m_overflow_elements); + swap(m_buckets, other.m_buckets); + swap(m_nb_elements, other.m_nb_elements); + swap(m_min_load_threshold_rehash, other.m_min_load_threshold_rehash); + swap(m_max_load_threshold_rehash, other.m_max_load_threshold_rehash); + swap(m_max_load_factor, other.m_max_load_factor); + } + + /* + * Lookup + */ + template ::value>::type* = nullptr> + typename U::value_type& at(const K& key) { + return at(key, hash_key(key)); + } + + template ::value>::type* = nullptr> + typename U::value_type& at(const K& key, std::size_t hash) { + return const_cast( + static_cast(this)->at(key, hash)); + } + + template ::value>::type* = nullptr> + const typename U::value_type& at(const K& key) const { + return at(key, hash_key(key)); + } + + template ::value>::type* = nullptr> + const typename U::value_type& at(const K& key, std::size_t hash) const { + using T = typename U::value_type; + + const T* value = + find_value_impl(key, hash, m_buckets + bucket_for_hash(hash)); + if (value == nullptr) { + TSL_HH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key."); + } else { + return *value; + } + } + + template ::value>::type* = nullptr> + typename U::value_type& operator[](K&& key) { + using T = typename U::value_type; + + const std::size_t hash = hash_key(key); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + T* value = find_value_impl(key, hash, m_buckets + ibucket_for_hash); + if (value != nullptr) { + return *value; + } else { + return insert_value(ibucket_for_hash, hash, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple()) + .first.value(); + } + } + + template + size_type count(const K& key) const { + return count(key, hash_key(key)); + } + + template + size_type count(const K& key, std::size_t hash) const { + return count_impl(key, hash, m_buckets + bucket_for_hash(hash)); + } + + template + iterator find(const K& key) { + return find(key, hash_key(key)); + } + + template + iterator find(const K& key, std::size_t hash) { + return find_impl(key, hash, m_buckets + bucket_for_hash(hash)); + } + + template + const_iterator find(const K& key) const { + return find(key, hash_key(key)); + } + + template + const_iterator find(const K& key, std::size_t hash) const { + return find_impl(key, hash, m_buckets + bucket_for_hash(hash)); + } + + template + bool contains(const K& key) const { + return contains(key, hash_key(key)); + } + + template + bool contains(const K& key, std::size_t hash) const { + return count(key, hash) != 0; + } + + template + std::pair equal_range(const K& key) { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) { + iterator it = find(key, hash); + return std::make_pair(it, (it == end()) ? it : std::next(it)); + } + + template + std::pair equal_range(const K& key) const { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range( + const K& key, std::size_t hash) const { + const_iterator it = find(key, hash); + return std::make_pair(it, (it == cend()) ? it : std::next(it)); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { + /* + * So that the last bucket can have NeighborhoodSize neighbors, the size of + * the bucket array is a little bigger than the real number of buckets when + * not empty. We could use some of the buckets at the beginning, but it is + * faster this way as we avoid extra checks. + */ + if (m_buckets_data.empty()) { + return 0; + } + + return m_buckets_data.size() - NeighborhoodSize + 1; + } + + size_type max_bucket_count() const { + const std::size_t max_bucket_count = + std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size()); + return max_bucket_count - NeighborhoodSize + 1; + } + + /* + * Hash policy + */ + float load_factor() const { + if (bucket_count() == 0) { + return 0; + } + + return float(m_nb_elements) / float(bucket_count()); + } + + float max_load_factor() const { return m_max_load_factor; } + + void max_load_factor(float ml) { + m_max_load_factor = std::max(0.1f, std::min(ml, 0.95f)); + m_min_load_threshold_rehash = + size_type(float(bucket_count()) * MIN_LOAD_FACTOR_FOR_REHASH); + m_max_load_threshold_rehash = + size_type(float(bucket_count()) * m_max_load_factor); + } + + void rehash(size_type count_) { + count_ = std::max(count_, + size_type(std::ceil(float(size()) / max_load_factor()))); + rehash_impl(count_); + } + + void reserve(size_type count_) { + rehash(size_type(std::ceil(float(count_) / max_load_factor()))); + } + + /* + * Observers + */ + hasher hash_function() const { return static_cast(*this); } + + key_equal key_eq() const { return static_cast(*this); } + + /* + * Other + */ + iterator mutable_iterator(const_iterator pos) { + if (pos.m_buckets_iterator != pos.m_buckets_end_iterator) { + // Get a non-const iterator + auto it = m_buckets_data.begin() + + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator); + return iterator(it, m_buckets_data.end(), m_overflow_elements.begin()); + } else { + // Get a non-const iterator + auto it = mutable_overflow_iterator(pos.m_overflow_iterator); + return iterator(m_buckets_data.end(), m_buckets_data.end(), it); + } + } + + size_type overflow_size() const noexcept { + return m_overflow_elements.size(); + } + + template ::value>::type* = nullptr> + typename U::key_compare key_comp() const { + return m_overflow_elements.key_comp(); + } + + private: + template + std::size_t hash_key(const K& key) const { + return Hash::operator()(key); + } + + template + bool compare_keys(const K1& key1, const K2& key2) const { + return KeyEqual::operator()(key1, key2); + } + + std::size_t bucket_for_hash(std::size_t hash) const { + const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); + tsl_hh_assert(bucket < m_buckets_data.size() || + (bucket == 0 && m_buckets_data.empty())); + + return bucket; + } + + template ::value>::type* = nullptr> + void rehash_impl(size_type count_) { + hopscotch_hash new_map = new_hopscotch_hash(count_); + + if (!m_overflow_elements.empty()) { + new_map.m_overflow_elements.swap(m_overflow_elements); + new_map.m_nb_elements += new_map.m_overflow_elements.size(); + + for (const value_type& value : new_map.m_overflow_elements) { + const std::size_t ibucket_for_hash = + new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value))); + new_map.m_buckets[ibucket_for_hash].set_overflow(true); + } + } + +#ifndef TSL_HH_NO_EXCEPTIONS + try { +#endif + const bool use_stored_hash = + USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for (auto it_bucket = m_buckets_data.begin(); + it_bucket != m_buckets_data.end(); ++it_bucket) { + if (it_bucket->empty()) { + continue; + } + + const std::size_t hash = + use_stored_hash ? it_bucket->truncated_bucket_hash() + : new_map.hash_key(KeySelect()(it_bucket->value())); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_value(ibucket_for_hash, hash, + std::move(it_bucket->value())); + + erase_from_bucket(*it_bucket, bucket_for_hash(hash)); + } +#ifndef TSL_HH_NO_EXCEPTIONS + } + /* + * The call to insert_value may throw an exception if an element is added to + * the overflow list and the memory allocation fails. Rollback the elements + * in this case. + */ + catch (...) { + m_overflow_elements.swap(new_map.m_overflow_elements); + + const bool use_stored_hash = + USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for (auto it_bucket = new_map.m_buckets_data.begin(); + it_bucket != new_map.m_buckets_data.end(); ++it_bucket) { + if (it_bucket->empty()) { + continue; + } + + const std::size_t hash = + use_stored_hash ? it_bucket->truncated_bucket_hash() + : hash_key(KeySelect()(it_bucket->value())); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // The elements we insert were not in the overflow list before the + // switch. They will not be go in the overflow list if we rollback the + // switch. + insert_value(ibucket_for_hash, hash, std::move(it_bucket->value())); + } + + throw; + } +#endif + + new_map.swap(*this); + } + + template ::value && + !std::is_nothrow_move_constructible::value>::type* = nullptr> + void rehash_impl(size_type count_) { + hopscotch_hash new_map = new_hopscotch_hash(count_); + + const bool use_stored_hash = + USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for (const hopscotch_bucket& bucket : m_buckets_data) { + if (bucket.empty()) { + continue; + } + + const std::size_t hash = + use_stored_hash ? bucket.truncated_bucket_hash() + : new_map.hash_key(KeySelect()(bucket.value())); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_value(ibucket_for_hash, hash, bucket.value()); + } + + for (const value_type& value : m_overflow_elements) { + const std::size_t hash = new_map.hash_key(KeySelect()(value)); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_value(ibucket_for_hash, hash, value); + } + + new_map.swap(*this); + } + +#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR + iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { + return std::next(m_overflow_elements.begin(), + std::distance(m_overflow_elements.cbegin(), it)); + } +#else + iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { + return m_overflow_elements.erase(it, it); + } +#endif + + // iterator is in overflow list + iterator_overflow erase_from_overflow(const_iterator_overflow pos, + std::size_t ibucket_for_hash) { +#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR + auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos)); +#else + auto it_next = m_overflow_elements.erase(pos); +#endif + m_nb_elements--; + + // Check if we can remove the overflow flag + tsl_hh_assert(m_buckets[ibucket_for_hash].has_overflow()); + for (const value_type& value : m_overflow_elements) { + const std::size_t bucket_for_value = + bucket_for_hash(hash_key(KeySelect()(value))); + if (bucket_for_value == ibucket_for_hash) { + return it_next; + } + } + + m_buckets[ibucket_for_hash].set_overflow(false); + return it_next; + } + + /** + * bucket_for_value is the bucket in which the value is. + * ibucket_for_hash is the bucket where the value belongs. + */ + void erase_from_bucket(hopscotch_bucket& bucket_for_value, + std::size_t ibucket_for_hash) noexcept { + const std::size_t ibucket_for_value = + std::distance(m_buckets_data.data(), &bucket_for_value); + tsl_hh_assert(ibucket_for_value >= ibucket_for_hash); + + bucket_for_value.remove_value(); + m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_value - + ibucket_for_hash); + m_nb_elements--; + } + + template + std::pair insert_or_assign_impl(K&& key, M&& obj) { + auto it = try_emplace_impl(std::forward(key), std::forward(obj)); + if (!it.second) { + it.first.value() = std::forward(obj); + } + + return it; + } + + template + std::pair try_emplace_impl(P&& key, Args&&... args_value) { + const std::size_t hash = hash_key(key); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // Check if already presents + auto it_find = find_impl(key, hash, m_buckets + ibucket_for_hash); + if (it_find != end()) { + return std::make_pair(it_find, false); + } + + return insert_value( + ibucket_for_hash, hash, std::piecewise_construct, + std::forward_as_tuple(std::forward

(key)), + std::forward_as_tuple(std::forward(args_value)...)); + } + + template + std::pair insert_impl(P&& value) { + const std::size_t hash = hash_key(KeySelect()(value)); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // Check if already presents + auto it_find = + find_impl(KeySelect()(value), hash, m_buckets + ibucket_for_hash); + if (it_find != end()) { + return std::make_pair(it_find, false); + } + + return insert_value(ibucket_for_hash, hash, std::forward

(value)); + } + + template + std::pair insert_value(std::size_t ibucket_for_hash, + std::size_t hash, + Args&&... value_type_args) { + if ((m_nb_elements - m_overflow_elements.size()) >= + m_max_load_threshold_rehash) { + rehash(GrowthPolicy::next_bucket_count()); + ibucket_for_hash = bucket_for_hash(hash); + } + + std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash); + if (ibucket_empty < m_buckets_data.size()) { + do { + tsl_hh_assert(ibucket_empty >= ibucket_for_hash); + + // Empty bucket is in range of NeighborhoodSize, use it + if (ibucket_empty - ibucket_for_hash < NeighborhoodSize) { + auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, hash, + std::forward(value_type_args)...); + return std::make_pair( + iterator(it, m_buckets_data.end(), m_overflow_elements.begin()), + true); + } + } + // else, try to swap values to get a closer empty bucket + while (swap_empty_bucket_closer(ibucket_empty)); + } + + // Load factor is too low or a rehash will not change the neighborhood, put + // the value in overflow list + if (size() < m_min_load_threshold_rehash || + !will_neighborhood_change_on_rehash(ibucket_for_hash)) { + auto it = insert_in_overflow(ibucket_for_hash, + std::forward(value_type_args)...); + return std::make_pair( + iterator(m_buckets_data.end(), m_buckets_data.end(), it), true); + } + + rehash(GrowthPolicy::next_bucket_count()); + ibucket_for_hash = bucket_for_hash(hash); + + return insert_value(ibucket_for_hash, hash, + std::forward(value_type_args)...); + } + + /* + * Return true if a rehash will change the position of a key-value in the + * neighborhood of ibucket_neighborhood_check. In this case a rehash is needed + * instead of puting the value in overflow list. + */ + bool will_neighborhood_change_on_rehash( + size_t ibucket_neighborhood_check) const { + std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count(); + GrowthPolicy expand_growth_policy(expand_bucket_count); + + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(expand_bucket_count); + for (size_t ibucket = ibucket_neighborhood_check; + ibucket < m_buckets_data.size() && + (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; + ++ibucket) { + tsl_hh_assert(!m_buckets[ibucket].empty()); + + const size_t hash = + use_stored_hash ? m_buckets[ibucket].truncated_bucket_hash() + : hash_key(KeySelect()(m_buckets[ibucket].value())); + if (bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) { + return true; + } + } + + return false; + } + + /* + * Return the index of an empty bucket in m_buckets_data. + * If none, the returned index equals m_buckets_data.size() + */ + std::size_t find_empty_bucket(std::size_t ibucket_start) const { + const std::size_t limit = std::min( + ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets_data.size()); + for (; ibucket_start < limit; ibucket_start++) { + if (m_buckets[ibucket_start].empty()) { + return ibucket_start; + } + } + + return m_buckets_data.size(); + } + + /* + * Insert value in ibucket_empty where value originally belongs to + * ibucket_for_hash + * + * Return bucket iterator to ibucket_empty + */ + template + iterator_buckets insert_in_bucket(std::size_t ibucket_empty, + std::size_t ibucket_for_hash, + std::size_t hash, + Args&&... value_type_args) { + tsl_hh_assert(ibucket_empty >= ibucket_for_hash); + tsl_hh_assert(m_buckets[ibucket_empty].empty()); + m_buckets[ibucket_empty].set_value_of_empty_bucket( + hopscotch_bucket::truncate_hash(hash), + std::forward(value_type_args)...); + + tsl_hh_assert(!m_buckets[ibucket_for_hash].empty()); + m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - + ibucket_for_hash); + m_nb_elements++; + + return m_buckets_data.begin() + ibucket_empty; + } + + template < + class... Args, class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, + Args&&... value_type_args) { + auto it = m_overflow_elements.emplace( + m_overflow_elements.end(), std::forward(value_type_args)...); + + m_buckets[ibucket_for_hash].set_overflow(true); + m_nb_elements++; + + return it; + } + + template ::value>::type* = nullptr> + iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, + Args&&... value_type_args) { + auto it = + m_overflow_elements.emplace(std::forward(value_type_args)...) + .first; + + m_buckets[ibucket_for_hash].set_overflow(true); + m_nb_elements++; + + return it; + } + + /* + * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it + * while keeping the neighborhood conditions correct. + * + * If a swap was possible, the position of ibucket_empty_in_out will be closer + * to 0 and true will re returned. + */ + bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) { + tsl_hh_assert(ibucket_empty_in_out >= NeighborhoodSize); + const std::size_t neighborhood_start = + ibucket_empty_in_out - NeighborhoodSize + 1; + + for (std::size_t to_check = neighborhood_start; + to_check < ibucket_empty_in_out; to_check++) { + neighborhood_bitmap neighborhood_infos = + m_buckets[to_check].neighborhood_infos(); + std::size_t to_swap = to_check; + + while (neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) { + if ((neighborhood_infos & 1) == 1) { + tsl_hh_assert(m_buckets[ibucket_empty_in_out].empty()); + tsl_hh_assert(!m_buckets[to_swap].empty()); + + m_buckets[to_swap].swap_value_into_empty_bucket( + m_buckets[ibucket_empty_in_out]); + + tsl_hh_assert(!m_buckets[to_check].check_neighbor_presence( + ibucket_empty_in_out - to_check)); + tsl_hh_assert( + m_buckets[to_check].check_neighbor_presence(to_swap - to_check)); + + m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - + to_check); + m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check); + + ibucket_empty_in_out = to_swap; + + return true; + } + + to_swap++; + neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); + } + } + + return false; + } + + template ::value>::type* = nullptr> + typename U::value_type* find_value_impl(const K& key, std::size_t hash, + hopscotch_bucket* bucket_for_hash) { + return const_cast( + static_cast(this)->find_value_impl( + key, hash, bucket_for_hash)); + } + + /* + * Avoid the creation of an iterator to just get the value for operator[] and + * at() in maps. Faster this way. + * + * Return null if no value for the key (TODO use std::optional when + * available). + */ + template ::value>::type* = nullptr> + const typename U::value_type* find_value_impl( + const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + const hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, bucket_for_hash); + if (bucket_found != nullptr) { + return std::addressof(ValueSelect()(bucket_found->value())); + } + + if (bucket_for_hash->has_overflow()) { + auto it_overflow = find_in_overflow(key); + if (it_overflow != m_overflow_elements.end()) { + return std::addressof(ValueSelect()(*it_overflow)); + } + } + + return nullptr; + } + + template + size_type count_impl(const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + if (find_in_buckets(key, hash, bucket_for_hash) != nullptr) { + return 1; + } else if (bucket_for_hash->has_overflow() && + find_in_overflow(key) != m_overflow_elements.cend()) { + return 1; + } else { + return 0; + } + } + + template + iterator find_impl(const K& key, std::size_t hash, + hopscotch_bucket* bucket_for_hash) { + hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, bucket_for_hash); + if (bucket_found != nullptr) { + return iterator(m_buckets_data.begin() + + std::distance(m_buckets_data.data(), bucket_found), + m_buckets_data.end(), m_overflow_elements.begin()); + } + + if (!bucket_for_hash->has_overflow()) { + return end(); + } + + return iterator(m_buckets_data.end(), m_buckets_data.end(), + find_in_overflow(key)); + } + + template + const_iterator find_impl(const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + const hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, bucket_for_hash); + if (bucket_found != nullptr) { + return const_iterator( + m_buckets_data.cbegin() + + std::distance(m_buckets_data.data(), bucket_found), + m_buckets_data.cend(), m_overflow_elements.cbegin()); + } + + if (!bucket_for_hash->has_overflow()) { + return cend(); + } + + return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), + find_in_overflow(key)); + } + + template + hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, + hopscotch_bucket* bucket_for_hash) { + const hopscotch_bucket* bucket_found = + static_cast(this)->find_in_buckets( + key, hash, bucket_for_hash); + return const_cast(bucket_found); + } + + /** + * Return a pointer to the bucket which has the value, nullptr otherwise. + */ + template + const hopscotch_bucket* find_in_buckets( + const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + (void)hash; // Avoid warning of unused variable when StoreHash is false; + + // TODO Try to optimize the function. + // I tried to use ffs and __builtin_ffs functions but I could not reduce + // the time the function takes with -march=native + + neighborhood_bitmap neighborhood_infos = + bucket_for_hash->neighborhood_infos(); + while (neighborhood_infos != 0) { + if ((neighborhood_infos & 1) == 1) { + // Check StoreHash before calling bucket_hash_equal. Functionally it + // doesn't change anythin. If StoreHash is false, bucket_hash_equal is a + // no-op. Avoiding the call is there to help GCC optimizes `hash` + // parameter away, it seems to not be able to do without this hint. + if ((!StoreHash || bucket_for_hash->bucket_hash_equal(hash)) && + compare_keys(KeySelect()(bucket_for_hash->value()), key)) { + return bucket_for_hash; + } + } + + ++bucket_for_hash; + neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); + } + + return nullptr; + } + + template < + class K, class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + iterator_overflow find_in_overflow(const K& key) { + return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), + [&](const value_type& value) { + return compare_keys(key, KeySelect()(value)); + }); + } + + template < + class K, class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + const_iterator_overflow find_in_overflow(const K& key) const { + return std::find_if(m_overflow_elements.cbegin(), + m_overflow_elements.cend(), + [&](const value_type& value) { + return compare_keys(key, KeySelect()(value)); + }); + } + + template ::value>::type* = nullptr> + iterator_overflow find_in_overflow(const K& key) { + return m_overflow_elements.find(key); + } + + template ::value>::type* = nullptr> + const_iterator_overflow find_in_overflow(const K& key) const { + return m_overflow_elements.find(key); + } + + template < + class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + hopscotch_hash new_hopscotch_hash(size_type bucket_count) { + return hopscotch_hash(bucket_count, static_cast(*this), + static_cast(*this), get_allocator(), + m_max_load_factor); + } + + template ::value>::type* = nullptr> + hopscotch_hash new_hopscotch_hash(size_type bucket_count) { + return hopscotch_hash(bucket_count, static_cast(*this), + static_cast(*this), get_allocator(), + m_max_load_factor, m_overflow_elements.key_comp()); + } + + public: + static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0; + static constexpr float DEFAULT_MAX_LOAD_FACTOR = + (NeighborhoodSize <= 30) ? 0.8f : 0.9f; + + private: + static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12 * NeighborhoodSize; + static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.1f; + + /** + * We can only use the hash on rehash if the size of the hash type is the same + * as the stored one or if we use a power of two modulo. In the case of the + * power of two modulo, we just mask the least significant bytes, we just have + * to check that the truncated_hash_type didn't truncated too much bytes. + */ + template ::value>::type* = nullptr> + static bool USE_STORED_HASH_ON_REHASH(size_type /*bucket_count*/) { + return StoreHash; + } + + template ::value>::type* = nullptr> + static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { + (void)bucket_count; + if (StoreHash && is_power_of_two_policy::value) { + tsl_hh_assert(bucket_count > 0); + return (bucket_count - 1) <= + std::numeric_limits::max(); + } else { + return false; + } + } + + /** + * Return an always valid pointer to an static empty hopscotch_bucket. + */ + hopscotch_bucket* static_empty_bucket_ptr() { + static hopscotch_bucket empty_bucket; + return &empty_bucket; + } + + private: + buckets_container_type m_buckets_data; + overflow_container_type m_overflow_elements; + + /** + * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points + * to static_empty_bucket_ptr. This variable is useful to avoid the cost of + * checking if m_buckets_data is empty when trying to find an element. + * + * TODO Remove m_buckets_data and only use a pointer+size instead of a + * pointer+vector to save some space in the hopscotch_hash object. + */ + hopscotch_bucket* m_buckets; + + size_type m_nb_elements; + + /** + * Min size of the hash table before a rehash can occurs automatically (except + * if m_max_load_threshold_rehash os reached). If the neighborhood of a bucket + * is full before the min is reacher, the elements are put into + * m_overflow_elements. + */ + size_type m_min_load_threshold_rehash; + + /** + * Max size of the hash table before a rehash occurs automatically to grow the + * table. + */ + size_type m_max_load_threshold_rehash; + + float m_max_load_factor; +}; + +} // end namespace detail_hopscotch_hash + +} // end namespace tsl + +#endif diff --git a/.secret_cpp/include/tsl/hopscotch_set.h b/.secret_cpp/include/tsl/hopscotch_set.h new file mode 100644 index 0000000..05fac58 --- /dev/null +++ b/.secret_cpp/include/tsl/hopscotch_set.h @@ -0,0 +1,595 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_SET_H +#define TSL_HOPSCOTCH_SET_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_hash.h" + +namespace tsl { + +/** + * Implementation of a hash set using the hopscotch hashing algorithm. + * + * The Key must be either nothrow move-constructible, copy-constructible or + * both. + * + * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if + * StoreHash is false. When StoreHash is true, 32-bits of the hash will be + * stored alongside the neighborhood limiting the NeighborhoodSize to <= 30. + * There is no memory usage difference between 'NeighborhoodSize 62; StoreHash + * false' and 'NeighborhoodSize 30; StoreHash true'. + * + * Storing the hash may improve performance on insert during the rehash process + * if the hash takes time to compute. It may also improve read performance if + * the KeyEqual function takes time (or incurs a cache-miss). If used with + * simple Hash and KeyEqual it may slow things down. + * + * StoreHash can only be set if the GrowthPolicy is set to + * tsl::power_of_two_growth_policy. + * + * GrowthPolicy defines how the set grows and consequently how a hash value is + * mapped to a bucket. By default the set uses tsl::power_of_two_growth_policy. + * This policy keeps the number of buckets to a power of two and uses a mask to + * set the hash to a bucket instead of the slow modulo. You may define your own + * growth policy, check tsl::power_of_two_growth_policy for the interface. + * + * If the destructor of Key throws an exception, behaviour of the class is + * undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective + * insert, invalidate the iterators if a displacement is needed to resolve a + * collision (which mean that most of the time, insert will invalidate the + * iterators). Or if there is a rehash. + * - erase: iterator on the erased element is the only one which become + * invalid. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class hopscotch_set { + private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const { return key; } + + key_type& operator()(Key& key) { return key; } + }; + + using overflow_container_type = std::list; + using ht = detail_hopscotch_hash::hopscotch_hash< + Key, KeySelect, void, Hash, KeyEqual, Allocator, NeighborhoodSize, + StoreHash, GrowthPolicy, overflow_container_type>; + + public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {} + + explicit hopscotch_set(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} + + hopscotch_set(size_type bucket_count, const Allocator& alloc) + : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) {} + + hopscotch_set(size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : hopscotch_set(bucket_count, hash, KeyEqual(), alloc) {} + + hopscotch_set(const hopscotch_set& other, const Allocator& alloc) + : m_ht(other.m_ht, alloc) {} + + explicit hopscotch_set(const Allocator& alloc) + : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {} + + template + hopscotch_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : hopscotch_set(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + hopscotch_set(InputIt first, InputIt last, size_type bucket_count, + const Allocator& alloc) + : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + hopscotch_set(InputIt first, InputIt last, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + hopscotch_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, + alloc) {} + + hopscotch_set(std::initializer_list init, size_type bucket_count, + const Allocator& alloc) + : hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), + KeyEqual(), alloc) {} + + hopscotch_set(std::initializer_list init, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + hopscotch_set& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to insert(hint, + * value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type erase(const K& key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type count(const K& key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + iterator find(const K& key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const_iterator find(const K& key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if (it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::hopscotch_set`. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, bool StoreHash = false> +using hopscotch_pg_set = + hopscotch_set; + +} // end namespace tsl + +#endif diff --git a/.secret_cpp/main.cpp b/.secret_cpp/main.cpp deleted file mode 100644 index 98f0abe..0000000 --- a/.secret_cpp/main.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include -#include -#include -#include -#include "qpx.hpp" -#include -#include - -spin_det_t parse_line(std::string s) { - spin_det_t sd; - for(int i = 0; i < s.size(); i++) { - if(s[i] == '-') sd.push_back(0); - if(s[i] == '+') sd.push_back(1); - } - return sd; -} - -std::vector setup() { - std::vector psi; - - std::ifstream fs("/home/applenco/QuantumEnvelope/data/c2_eq_hf_dz_15.101224det.wf"); - while(true) { - std::string coef_str_or_empty; - if(!std::getline(fs, coef_str_or_empty)) break; - - std::string alpha_str; - std::getline(fs, alpha_str); - parse_line(alpha_str); - std::string beta_str; - std::getline(fs, beta_str); - std::string emptyline_str; - std::getline(fs, emptyline_str); - - psi.push_back({parse_line(alpha_str), parse_line(beta_str)}); - } - return psi; -} - -bool get_element_naive(det_t target, std::vector& psi) { - for (auto &det : psi) { - if (det == target) - return true; - } - return false; -} - -bool get_element_binary_search(det_t target, std::vector &psi) { - return std::binary_search(psi.begin(), psi.end(), target); -} - -bool get_element_hash(det_t target, std::unordered_set &psi) { - return psi.find(target) != psi.end(); -} - -static void NaiveLookup(benchmark::State& state) { - auto psi = setup(); - std::sort(psi.begin(), psi.end()); - - - std::vector psi_random{psi.begin(), psi.end()}; - std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); - - for (auto _ : state) { - for (auto d: psi_random) - get_element_naive(d, psi); - } - state.counters["LookupRate"] = benchmark::Counter(psi.size(), benchmark::Counter::kIsRate); - -} - -static void BinarySearchLookup(benchmark::State& state) { - auto psi = setup(); - std::sort(psi.begin(), psi.end()); - - - std::vector psi_random{psi.begin(), psi.end()}; - std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); - - for (auto _ : state) { - for (auto d: psi_random) - get_element_binary_search(d, psi); - } - state.counters["LookupRate"] = benchmark::Counter(psi.size(), benchmark::Counter::kIsRate); -} - -static void HashLookup(benchmark::State& state) { - auto psi = setup(); - std::unordered_set psi_m; - for (auto d: psi) { - psi_m.insert(d); - } - - std::vector psi_random{psi.begin(), psi.end()}; - std::shuffle(psi_random.begin(), psi_random.end(), std::default_random_engine{}); - - for (auto _ : state) { - for (auto d: psi_random) - get_element_hash(d, psi_m); - } - state.counters["LookupRate"] = benchmark::Counter(psi.size(), benchmark::Counter::kIsRate); -} - -// Register the function as a benchmark -BENCHMARK(NaiveLookup); -BENCHMARK(BinarySearchLookup); -BENCHMARK(HashLookup); -// Run the benchmark -BENCHMARK_MAIN(); -