diff --git a/immer/detail/hamts/bits.hpp b/immer/detail/hamts/bits.hpp index 7ab413fb..24359797 100644 --- a/immer/detail/hamts/bits.hpp +++ b/immer/detail/hamts/bits.hpp @@ -20,7 +20,6 @@ namespace detail { namespace hamts { using size_t = std::size_t; -using hash_t = std::size_t; using bits_t = std::uint32_t; using count_t = std::uint32_t; using shift_t = std::uint32_t; @@ -53,14 +52,14 @@ struct get_bitmap_type<4u> template constexpr T branches = T{1u} << B; -template +template constexpr T mask = branches - 1u; -template +template constexpr T max_depth = (sizeof(hash_t) * 8u + B - 1u) / B; -template -constexpr T max_shift = max_depth * B; +template +constexpr T max_shift = max_depth * B; #define IMMER_HAS_BUILTIN_POPCOUNT 1 diff --git a/immer/detail/hamts/champ.hpp b/immer/detail/hamts/champ.hpp index da2cd6e7..005c4613 100644 --- a/immer/detail/hamts/champ.hpp +++ b/immer/detail/hamts/champ.hpp @@ -131,6 +131,7 @@ struct champ using edit_t = typename MemoryPolicy::transience_t::edit; using owner_t = typename MemoryPolicy::transience_t::owner; using bitmap_t = typename get_bitmap_type::type; + using hash_t = typename node_t::hash_t; static_assert(branches <= sizeof(bitmap_t) * 8, ""); @@ -202,7 +203,7 @@ struct champ size_t hash_mask) const { auto result = std::size_t{}; - if (depth < max_depth) { + if (depth < max_depth) { auto nodemap = node->nodemap(); if (nodemap) { auto fst = node->children(); @@ -213,7 +214,7 @@ struct champ do_check_champ(child, depth + 1, path_hash | (idx << (B * depth)), - (hash_mask << B) | mask); + (hash_mask << B) | mask); } } } @@ -248,7 +249,7 @@ struct champ // due some value being moved out of the champ when it should have not. bool check_champ() const { - auto r = do_check_champ(root, 0, 0, mask); + auto r = do_check_champ(root, 0, 0, mask); // assert(r == size); return r == size; } @@ -258,7 +259,7 @@ struct champ node_t* node, count_t depth) const { - if (depth < max_depth) { + if (depth < max_depth) { ++stats.inner_node_count; stats.inner_node_w_value_count += node->data_count() > 0; stats.inner_node_w_child_count += node->children_count() > 0; @@ -317,7 +318,7 @@ struct champ void for_each_chunk_traversal(const node_t* node, count_t depth, Fn&& fn) const { - if (depth < max_depth) { + if (depth < max_depth) { auto datamap = node->datamap(); if (datamap) fn(node->values(), node->values() + node->data_count()); @@ -348,7 +349,7 @@ struct champ { if (old_node == new_node) return; - if (depth < max_depth) { + if (depth < max_depth) { auto old_nodemap = old_node->nodemap(); auto new_nodemap = new_node->nodemap(); auto old_datamap = old_node->datamap(); @@ -537,8 +538,8 @@ struct champ { auto node = root; auto hash = Hash{}(k); - for (auto i = count_t{}; i < max_depth; ++i) { - auto bit = bitmap_t{1u} << (hash & mask); + for (auto i = count_t{}; i < max_depth; ++i) { + auto bit = bitmap_t{1u} << (hash & mask); if (node->nodemap() & bit) { auto offset = node->children_count(bit); node = node->children()[offset]; @@ -571,7 +572,7 @@ struct champ add_result do_add(node_t* node, T v, hash_t hash, shift_t shift) const { assert(node); - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (; fst != lst; ++fst) @@ -581,7 +582,7 @@ struct champ false}; return {node_t::copy_collision_insert(node, std::move(v)), true}; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -644,7 +645,7 @@ struct champ do_add_mut(edit_t e, node_t* node, T v, hash_t hash, shift_t shift) const { assert(node); - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (; fst != lst; ++fst) @@ -663,7 +664,7 @@ struct champ : node_t::copy_collision_insert(node, std::move(v)); return {node_t::owned(r, e), true, mutate}; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -756,7 +757,7 @@ struct champ update_result do_update(node_t* node, K&& k, Fn&& fn, hash_t hash, shift_t shift) const { - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (; fst != lst; ++fst) @@ -774,7 +775,7 @@ struct champ std::forward(fn)(Default{}()))), true}; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -851,7 +852,7 @@ struct champ node_t* do_update_if_exists( node_t* node, K&& k, Fn&& fn, hash_t hash, shift_t shift) const { - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (; fst != lst; ++fst) @@ -864,7 +865,7 @@ struct champ Project{}(detail::as_const(*fst))))); return nullptr; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -929,7 +930,7 @@ struct champ hash_t hash, shift_t shift) const { - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (; fst != lst; ++fst) @@ -956,7 +957,7 @@ struct champ : node_t::copy_collision_insert(node, std::move(v)); return {node_t::owned(r, e), true, mutate}; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -1068,7 +1069,7 @@ struct champ hash_t hash, shift_t shift) const { - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (; fst != lst; ++fst) @@ -1090,7 +1091,7 @@ struct champ } return {nullptr, false}; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -1204,7 +1205,7 @@ struct champ sub_result do_sub(node_t* node, const K& k, hash_t hash, shift_t shift) const { - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (auto cur = fst; cur != lst; ++cur) @@ -1227,7 +1228,7 @@ struct champ #endif #endif } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -1350,7 +1351,7 @@ struct champ void* store) const { auto mutate = node->can_mutate(e); - if (shift == max_shift) { + if (shift == max_shift) { auto fst = node->collisions(); auto lst = fst + node->collision_count(); for (auto cur = fst; cur != lst; ++cur) { @@ -1374,7 +1375,7 @@ struct champ } return {}; } else { - auto idx = (hash & (mask << shift)) >> shift; + auto idx = (hash & (mask << shift)) >> shift; auto bit = bitmap_t{1u} << idx; if (node->nodemap() & bit) { auto offset = node->children_count(bit); @@ -1511,7 +1512,7 @@ struct champ { if (a == b) return true; - else if (depth == max_depth) { + else if (depth == max_depth) { auto nv = a->collision_count(); return nv == b->collision_count() && equals_collisions(a->collisions(), b->collisions(), nv); diff --git a/immer/detail/hamts/champ_iterator.hpp b/immer/detail/hamts/champ_iterator.hpp index 5ff31060..c7d811fe 100644 --- a/immer/detail/hamts/champ_iterator.hpp +++ b/immer/detail/hamts/champ_iterator.hpp @@ -26,6 +26,7 @@ struct champ_iterator { using tree_t = champ; using node_t = typename tree_t::node_t; + using hash_t = typename node_t::hash_t; champ_iterator() = default; @@ -67,7 +68,7 @@ struct champ_iterator T* cur_; T* end_; count_t depth_; - node_t* const* path_[max_depth + 1] = { + node_t* const* path_[max_depth + 1] = { 0, }; @@ -79,7 +80,7 @@ struct champ_iterator bool step_down() { - if (depth_ < max_depth) { + if (depth_ < max_depth) { auto parent = *path_[depth_]; assert(parent); if (parent->nodemap()) { @@ -87,7 +88,7 @@ struct champ_iterator path_[depth_] = parent->children(); auto child = *path_[depth_]; assert(child); - if (depth_ < max_depth) { + if (depth_ < max_depth) { if (child->datamap()) { cur_ = child->values(); end_ = cur_ + child->data_count(); @@ -112,7 +113,7 @@ struct champ_iterator path_[depth_] = next; auto child = *path_[depth_]; assert(child); - if (depth_ < max_depth) { + if (depth_ < max_depth) { if (child->datamap()) { cur_ = child->values(); end_ = cur_ + child->data_count(); diff --git a/immer/detail/hamts/node.hpp b/immer/detail/hamts/node.hpp index 11398dd8..0959789e 100644 --- a/immer/detail/hamts/node.hpp +++ b/immer/detail/hamts/node.hpp @@ -47,6 +47,7 @@ struct node using edit_t = typename transience::edit; using value_t = T; using bitmap_t = typename get_bitmap_type::type; + using hash_t = decltype(Hash{}(std::declval())); enum class kind_t { @@ -991,9 +992,9 @@ struct node static node_t* make_merged(shift_t shift, T v1, hash_t hash1, T v2, hash_t hash2) { - if (shift < max_shift) { - auto idx1 = hash1 & (mask << shift); - auto idx2 = hash2 & (mask << shift); + if (shift < max_shift) { + auto idx1 = hash1 & (mask << shift); + auto idx2 = hash2 & (mask << shift); if (idx1 == idx2) { auto merged = make_merged( shift + B, std::move(v1), hash1, std::move(v2), hash2); @@ -1020,9 +1021,9 @@ struct node static node_t* make_merged_e( edit_t e, shift_t shift, T v1, hash_t hash1, T v2, hash_t hash2) { - if (shift < max_shift) { - auto idx1 = hash1 & (mask << shift); - auto idx2 = hash2 & (mask << shift); + if (shift < max_shift) { + auto idx1 = hash1 & (mask << shift); + auto idx2 = hash2 & (mask << shift); if (idx1 == idx2) { auto merged = make_merged_e( e, shift + B, std::move(v1), hash1, std::move(v2), hash2); @@ -1097,7 +1098,7 @@ struct node static void delete_deep(node_t* p, shift_t s) { - if (s == max_depth) + if (s == max_depth) delete_collision(p); else { auto fst = p->children(); @@ -1111,7 +1112,7 @@ struct node static void delete_deep_shift(node_t* p, shift_t s) { - if (s == max_shift) + if (s == max_shift) delete_collision(p); else { auto fst = p->children(); diff --git a/immer/extra/persist/detail/champ/output.hpp b/immer/extra/persist/detail/champ/output.hpp index 42b82309..462637de 100644 --- a/immer/extra/persist/detail/champ/output.hpp +++ b/immer/extra/persist/detail/champ/output.hpp @@ -61,8 +61,9 @@ struct output_pool_builder void visit(const Node* node, immer::detail::hamts::count_t depth) { using immer::detail::hamts::max_depth; + using hash_t = typename Node::hash_t; - if (depth < max_depth) { + if (depth < max_depth) { visit_inner(node, depth); } else { visit_collision(node); diff --git a/immer/extra/persist/detail/rbts/pool.hpp b/immer/extra/persist/detail/rbts/pool.hpp index 0c828f25..b25affff 100644 --- a/immer/extra/persist/detail/rbts/pool.hpp +++ b/immer/extra/persist/detail/rbts/pool.hpp @@ -15,14 +15,14 @@ namespace immer::persist::rbts { -struct inner_node +struct rbts_info { - immer::vector children; - bool relaxed = {}; + node_id root; + node_id tail; - auto tie() const { return std::tie(children, relaxed); } + auto tie() const { return std::tie(root, tail); } - friend bool operator==(const inner_node& left, const inner_node& right) + friend bool operator==(const rbts_info& left, const rbts_info& right) { return left.tie() == right.tie(); } @@ -30,18 +30,40 @@ struct inner_node template void serialize(Archive& ar) { - ar(CEREAL_NVP(children), CEREAL_NVP(relaxed)); + ar(CEREAL_NVP(root), CEREAL_NVP(tail)); } }; -struct rbts_info +} // namespace immer::persist::rbts + +template <> +struct std::hash { - node_id root; - node_id tail; + auto operator()(const immer::persist::rbts::rbts_info& x) const + { + const auto boost_combine = [](std::size_t& seed, std::size_t hash) { + seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2); + }; - auto tie() const { return std::tie(root, tail); } + auto seed = std::size_t{}; + boost_combine(seed, + hash{}(x.root.value)); + boost_combine(seed, + hash{}(x.tail.value)); + return seed; + } +}; - friend bool operator==(const rbts_info& left, const rbts_info& right) +namespace immer::persist::rbts { + +struct inner_node +{ + immer::vector children; + bool relaxed = {}; + + auto tie() const { return std::tie(children, relaxed); } + + friend bool operator==(const inner_node& left, const inner_node& right) { return left.tie() == right.tie(); } @@ -49,7 +71,7 @@ struct rbts_info template void serialize(Archive& ar) { - ar(CEREAL_NVP(root), CEREAL_NVP(tail)); + ar(CEREAL_NVP(children), CEREAL_NVP(relaxed)); } }; @@ -172,25 +194,3 @@ input_pool to_input_pool(output_pool ar) } } // namespace immer::persist::rbts - -namespace std { - -template <> -struct hash -{ - auto operator()(const immer::persist::rbts::rbts_info& x) const - { - const auto boost_combine = [](std::size_t& seed, std::size_t hash) { - seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2); - }; - - auto seed = std::size_t{}; - boost_combine(seed, - hash{}(x.root.value)); - boost_combine(seed, - hash{}(x.tail.value)); - return seed; - } -}; - -} // namespace std diff --git a/immer/extra/persist/xxhash/xxhash.hpp b/immer/extra/persist/xxhash/xxhash.hpp index 45d765ba..bab4ffe6 100644 --- a/immer/extra/persist/xxhash/xxhash.hpp +++ b/immer/extra/persist/xxhash/xxhash.hpp @@ -14,15 +14,15 @@ namespace immer::persist { template struct xx_hash { - std::size_t operator()(const T& val) const { return xx_hash_value(val); } + std::uint64_t operator()(const T& val) const { return xx_hash_value(val); } }; -std::size_t xx_hash_value_string(const std::string& str); +std::uint64_t xx_hash_value_string(const std::string& str); template <> struct xx_hash { - std::size_t operator()(const std::string& val) const + std::uint64_t operator()(const std::string& val) const { return xx_hash_value_string(val); } diff --git a/immer/extra/persist/xxhash/xxhash_64.cpp b/immer/extra/persist/xxhash/xxhash_64.cpp index 8111b310..68ee183a 100644 --- a/immer/extra/persist/xxhash/xxhash_64.cpp +++ b/immer/extra/persist/xxhash/xxhash_64.cpp @@ -4,10 +4,7 @@ namespace immer::persist { -static_assert(sizeof(std::size_t) == 8); // 64 bits -static_assert(sizeof(XXH64_hash_t) == sizeof(std::size_t)); - -std::size_t xx_hash_value_string(const std::string& str) +std::uint64_t xx_hash_value_string(const std::string& str) { return XXH3_64bits(str.c_str(), str.size()); } diff --git a/immer/table.hpp b/immer/table.hpp index 068075a2..9306fb09 100644 --- a/immer/table.hpp +++ b/immer/table.hpp @@ -150,13 +150,10 @@ class table struct hash_key { - std::size_t operator()(const value_t& v) const - { - return Hash{}(KeyFn{}(v)); - } + auto operator()(const value_t& v) const { return Hash{}(KeyFn{}(v)); } template - std::size_t operator()(const Key& v) const + auto operator()(const Key& v) const { return Hash{}(v); } diff --git a/test/extra/persist/CMakeLists.txt b/test/extra/persist/CMakeLists.txt index d2986db3..2e479be2 100644 --- a/test/extra/persist/CMakeLists.txt +++ b/test/extra/persist/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable( test_table_box_recursive.cpp test_for_docs.cpp test_containers_cereal.cpp + test_hash_size.cpp ${PROJECT_SOURCE_DIR}/immer/extra/persist/xxhash/xxhash_64.cpp) target_precompile_headers( persist-tests PRIVATE @@ -34,8 +35,9 @@ target_link_libraries(persist-tests PRIVATE fmt::fmt Catch2::Catch2WithMain target_compile_options(persist-tests PRIVATE -O1 -fno-optimize-sibling-calls -g -fno-omit-frame-pointer) -target_compile_options(persist-tests PRIVATE -Wno-unused-function - -Wno-c++20-designator) +target_compile_options( + persist-tests PRIVATE -Wno-unused-function -Wno-c++20-designator + -Wimplicit-int-conversion) if(ENABLE_ASAN) target_compile_options( @@ -43,7 +45,8 @@ if(ENABLE_ASAN) target_link_options(persist-tests PRIVATE -fsanitize=address) endif() target_compile_definitions(persist-tests PRIVATE BOOST_USE_ASAN=1) -target_compile_definitions(persist-tests PRIVATE IMMER_NO_FREE_LIST=1) +target_compile_definitions(persist-tests PRIVATE IMMER_NO_FREE_LIST=1 + IMMER_DEBUG_STATS=1) install(TARGETS persist-tests DESTINATION bin) install(FILES valgrind.supp DESTINATION bin) diff --git a/test/extra/persist/test_hash_size.cpp b/test/extra/persist/test_hash_size.cpp new file mode 100644 index 00000000..b4f354ce --- /dev/null +++ b/test/extra/persist/test_hash_size.cpp @@ -0,0 +1,149 @@ +#include + +#include + +namespace { + +template +struct my_hash_t +{ + rep_t data{}; + + my_hash_t() = default; + + constexpr explicit my_hash_t(rep_t data_) + : data{data_} + { + } + + explicit operator bool() { return data; } + + my_hash_t& operator=(rep_t data_) + { + data = data_; + return *this; + } + + friend bool operator==(const my_hash_t& left, const my_hash_t& right) + { + return left.data == right.data; + } + + template + friend constexpr auto operator<<(my_hash_t left, const T& other) + { + auto result = static_cast(left.data << other); + return my_hash_t{result}; + } + + // NOTE: Here we escape from the my_hash_t + template + friend constexpr auto operator<<(const T& other, my_hash_t right) + { + return other << right.data; + } + + // NOTE: Here we escape from the my_hash_t + template + friend constexpr auto operator>>(my_hash_t left, const T& other) + { + return left.data >> other; + } + + template + friend constexpr auto operator-(my_hash_t left, const T& other) + { + auto result = static_cast(left.data - other); + return my_hash_t{result}; + } + + template + friend constexpr auto operator&(my_hash_t left, const my_hash_t& other) + { + auto result = static_cast(left.data & other.data); + return my_hash_t{result}; + } + + template + friend constexpr std::enable_if_t, + my_hash_t> + operator&(const T& other, my_hash_t right) + { + auto result = static_cast(other & right.data); + return my_hash_t{result}; + } +}; + +struct small_hash +{ + my_hash_t<> operator()(const std::string& str) const + { + return my_hash_t{ + static_cast(XXH3_64bits(str.c_str(), str.size()))}; + } +}; + +auto gen_strings() +{ + auto result = std::vector{}; + for (int i = 0; i < 1000; ++i) { + result.push_back(fmt::format("__{}__", i)); + } + return result; +} + +struct table_item +{ + std::string id; + int data; +}; + +} // namespace + +TEST_CASE("Test hash size for set") +{ + const auto strings = gen_strings(); + REQUIRE(strings.size() == 1000); + + const auto set = + immer::set{strings.begin(), strings.end()}; + REQUIRE(set.size() == strings.size()); + for (const auto& item : strings) { + REQUIRE(set.count(item)); + } +} + +TEST_CASE("Test hash size for map") +{ + const auto strings = gen_strings(); + REQUIRE(strings.size() == 1000); + + auto map = immer::map{}; + for (const auto& [index, item] : boost::adaptors::index(strings)) { + map = std::move(map).set(item, index); + } + + REQUIRE(map.size() == strings.size()); + for (const auto& [index, item] : boost::adaptors::index(strings)) { + REQUIRE(map[item] == index); + } +} + +TEST_CASE("Test hash size for table") +{ + const auto strings = gen_strings(); + REQUIRE(strings.size() == 1000); + + auto table = immer::table{}; + for (const auto& [index, item] : boost::adaptors::index(strings)) { + table = std::move(table).insert(table_item{ + .id = item, + .data = static_cast(index), + }); + } + + REQUIRE(table.size() == strings.size()); + for (const auto& [index, item] : boost::adaptors::index(strings)) { + REQUIRE(table[item].data == index); + } +}