From af5ca43c579fadd3e9ff182c2dbaec7c9978be86 Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Fri, 15 Oct 2021 15:05:05 -0500 Subject: [PATCH 1/7] store extra information in iterator needed to find base of allocation When the user intends to use an explicit, relocating garbage collector in conjunction with the champ type, it becomes necessary to be able to adjust the pointers inside a champ_iterator to point to the new address of the relocated collection node that is currently on the iterator's path. In order to do this, it is necessary to be able to compute the base pointer of the allocation for the node containing the buffer of node pointers into which that particular path entry is pointing. However, this is not currently deducible from the information stored inside the iterator itself. Here, at the cost of an additional max_depth bytes (which amounts to 16 bytes by default including padding) per iterator created, we maintain that information in the form of an integer offset for each nonzero entry in the path. The garbage collector can then take each node** in the path and subtract the corresponding offset from it using pointer arithmetic in order to obtain the address corresponding to the start of the children buffer, from which the base of the allocation is a known constant offset. In order to save space, we assume that each node can have at most 256 children. This seems to me to be a safe assumption, because the B value that is a template parameter appears to trigger a static assertion if it is ever set greater than 6. By the same logic, we also allocate an additional 4 bytes in order to perform the same set of steps with the pointers to the values. By default, on 64-bit machines, these four bytes live inside what was previously padding between `depth_` and `path_`, thus they do not actually affect the overall size of the iterator. This may not be true on 32-bit machines, however. --- immer/detail/hamts/champ_iterator.hpp | 37 +++++++++++++++++++-------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/immer/detail/hamts/champ_iterator.hpp b/immer/detail/hamts/champ_iterator.hpp index 373198bb..cf948f12 100644 --- a/immer/detail/hamts/champ_iterator.hpp +++ b/immer/detail/hamts/champ_iterator.hpp @@ -32,6 +32,7 @@ struct champ_iterator champ_iterator(const tree_t& v) : depth_{0} + , cur_off_{0} { if (v.root->datamap()) { cur_ = v.root->values(); @@ -47,6 +48,7 @@ struct champ_iterator : cur_{nullptr} , end_{nullptr} , depth_{0} + , cur_off_{0} { path_[0] = &v.root; } @@ -55,6 +57,7 @@ struct champ_iterator : cur_{other.cur_} , end_{other.end_} , depth_{other.depth_} + , cur_off_{other.cur_off_} { std::copy(other.path_, other.path_ + depth_ + 1, path_); } @@ -65,13 +68,18 @@ struct champ_iterator T* cur_; T* end_; count_t depth_; + count_t cur_off_; node_t* const* path_[max_depth + 1] = { 0, }; + std::uint8_t path_off_[max_depth] = { + 0, + }; void increment() { ++cur_; + ++cur_off_; ensure_valid_(); } @@ -82,17 +90,20 @@ struct champ_iterator assert(parent); if (parent->nodemap()) { ++depth_; - path_[depth_] = parent->children(); - auto child = *path_[depth_]; + path_[depth_] = parent->children(); + path_off_[depth_ - 1] = 0; + auto child = *path_[depth_]; assert(child); if (depth_ < max_depth) { if (child->datamap()) { - cur_ = child->values(); - end_ = cur_ + child->data_count(); + cur_ = child->values(); + end_ = cur_ + child->data_count(); + cur_off_ = 0; } } else { - cur_ = child->collisions(); - end_ = cur_ + child->collision_count(); + cur_ = child->collisions(); + end_ = cur_ + child->collision_count(); + cur_off_ = 0; } return true; } @@ -108,16 +119,19 @@ struct champ_iterator auto next = path_[depth_] + 1; if (next < last) { path_[depth_] = next; - auto child = *path_[depth_]; + ++path_off_[depth_ - 1]; + auto child = *path_[depth_]; assert(child); if (depth_ < max_depth) { if (child->datamap()) { - cur_ = child->values(); - end_ = cur_ + child->data_count(); + cur_ = child->values(); + end_ = cur_ + child->data_count(); + cur_off_ = 0; } } else { - cur_ = child->collisions(); - end_ = cur_ + child->collision_count(); + cur_ = child->collisions(); + end_ = cur_ + child->collision_count(); + cur_off_ = 0; } return true; } @@ -136,6 +150,7 @@ struct champ_iterator // end of sequence assert(depth_ == 0); cur_ = end_ = nullptr; + cur_off_ = 0; return; } } From b26d2cbf61ee969089ca7886205e6a6a038f8d44 Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Fri, 5 Nov 2021 13:03:04 -0500 Subject: [PATCH 2/7] create a new memory policy: relocation_policy This commit refactors the previous implementation of this PR so that both the memory and compute overhead of tracking relocation information in the iterator is only performed if a specific memory_policy is enabled. We create a new relocation_policy policy and track two different implementations: no_relocation_policy, which is the default in all cases, and gc_relocation_policy, which should be enabled if the user is using the library with a relocating garbage collector. Because so much information about how to track relocation information is type-specific, the policy acts merely as a marker, and each type that needs to track relocation information is responsible for defining a template that does nothing and has no members if relocation is disabled, and otherwise stores the necessary information and maintains it if that information is required. We then instrument the class that needs to track data by inserting calls to methods on the relocation information template, which will be responsible for updating the information as necessary. --- immer/detail/hamts/champ_iterator.hpp | 80 +++++++++++++++-------- immer/memory_policy.hpp | 5 +- immer/relocation/gc_relocation_policy.hpp | 22 +++++++ immer/relocation/no_relocation_policy.hpp | 22 +++++++ 4 files changed, 101 insertions(+), 28 deletions(-) create mode 100644 immer/relocation/gc_relocation_policy.hpp create mode 100644 immer/relocation/no_relocation_policy.hpp diff --git a/immer/detail/hamts/champ_iterator.hpp b/immer/detail/hamts/champ_iterator.hpp index cf948f12..3343c11a 100644 --- a/immer/detail/hamts/champ_iterator.hpp +++ b/immer/detail/hamts/champ_iterator.hpp @@ -10,11 +10,38 @@ #include #include +#include namespace immer { namespace detail { namespace hamts { +template +struct relocation_info_t +{ + void reset() {} + void increment() {} + void step_right(count_t) {} + void step_down(count_t) {} +}; + +template +struct relocation_info_t +{ + count_t cur_off_; + std::uint8_t path_off_[max_depth] = { + 0, + }; + + void reset() { cur_off_ = 0; } + + void increment() { cur_off_++; } + + void step_right(count_t depth) { ++path_off_[depth - 1]; } + + void step_down(count_t depth) { path_off_[depth - 1] = 0; } +}; + template struct champ_iterator : iterator_facade, @@ -24,15 +51,17 @@ struct champ_iterator std::ptrdiff_t, const T*> { - using tree_t = champ; - using node_t = typename tree_t::node_t; + using memory = MP; + using relocation = typename memory::relocation; + using tree_t = champ; + using node_t = typename tree_t::node_t; struct end_t {}; champ_iterator(const tree_t& v) : depth_{0} - , cur_off_{0} + , relocation_info_{} { if (v.root->datamap()) { cur_ = v.root->values(); @@ -48,7 +77,7 @@ struct champ_iterator : cur_{nullptr} , end_{nullptr} , depth_{0} - , cur_off_{0} + , relocation_info_{} { path_[0] = &v.root; } @@ -57,7 +86,7 @@ struct champ_iterator : cur_{other.cur_} , end_{other.end_} , depth_{other.depth_} - , cur_off_{other.cur_off_} + , relocation_info_{other.relocation_info_} { std::copy(other.path_, other.path_ + depth_ + 1, path_); } @@ -68,18 +97,15 @@ struct champ_iterator T* cur_; T* end_; count_t depth_; - count_t cur_off_; node_t* const* path_[max_depth + 1] = { 0, }; - std::uint8_t path_off_[max_depth] = { - 0, - }; + relocation_info_t relocation_info_; void increment() { ++cur_; - ++cur_off_; + relocation_info_.increment(); ensure_valid_(); } @@ -90,20 +116,20 @@ struct champ_iterator assert(parent); if (parent->nodemap()) { ++depth_; - path_[depth_] = parent->children(); - path_off_[depth_ - 1] = 0; - auto child = *path_[depth_]; + path_[depth_] = parent->children(); + relocation_info_.step_down(depth_); + auto child = *path_[depth_]; assert(child); if (depth_ < max_depth) { if (child->datamap()) { - cur_ = child->values(); - end_ = cur_ + child->data_count(); - cur_off_ = 0; + cur_ = child->values(); + end_ = cur_ + child->data_count(); + relocation_info_.reset(); } } else { - cur_ = child->collisions(); - end_ = cur_ + child->collision_count(); - cur_off_ = 0; + cur_ = child->collisions(); + end_ = cur_ + child->collision_count(); + relocation_info_.reset(); } return true; } @@ -119,19 +145,19 @@ struct champ_iterator auto next = path_[depth_] + 1; if (next < last) { path_[depth_] = next; - ++path_off_[depth_ - 1]; + relocation_info_.step_right(depth_); auto child = *path_[depth_]; assert(child); if (depth_ < max_depth) { if (child->datamap()) { - cur_ = child->values(); - end_ = cur_ + child->data_count(); - cur_off_ = 0; + cur_ = child->values(); + end_ = cur_ + child->data_count(); + relocation_info_.reset(); } } else { - cur_ = child->collisions(); - end_ = cur_ + child->collision_count(); - cur_off_ = 0; + cur_ = child->collisions(); + end_ = cur_ + child->collision_count(); + relocation_info_.reset(); } return true; } @@ -150,7 +176,7 @@ struct champ_iterator // end of sequence assert(depth_ == 0); cur_ = end_ = nullptr; - cur_off_ = 0; + relocation_info_.reset(); return; } } diff --git a/immer/memory_policy.hpp b/immer/memory_policy.hpp index d8556b94..75ad038e 100644 --- a/immer/memory_policy.hpp +++ b/immer/memory_policy.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -90,13 +91,15 @@ template , bool UseTransientRValues = - get_use_transient_rvalues_v> + get_use_transient_rvalues_v, + typename RelocationPolicy = no_relocation_policy> struct memory_policy { using heap = HeapPolicy; using refcount = RefcountPolicy; using transience = TransiencePolicy; using lock = LockPolicy; + using relocation = RelocationPolicy; static constexpr bool prefer_fewer_bigger_objects = PreferFewerBiggerObjects; diff --git a/immer/relocation/gc_relocation_policy.hpp b/immer/relocation/gc_relocation_policy.hpp new file mode 100644 index 00000000..b2428daa --- /dev/null +++ b/immer/relocation/gc_relocation_policy.hpp @@ -0,0 +1,22 @@ +// +// immer: immutable data structures for C++ +// Copyright (C) 2016, 2017, 2018 Juan Pedro Bolivar Puente +// +// This software is distributed under the Boost Software License, Version 1.0. +// See accompanying file LICENSE or copy at http://boost.org/LICENSE_1_0.txt +// + +#pragma once + +namespace immer { + +/*! + * Enables relocation information tracking; used when a relocating garbage + * collector is present in the final system. + */ +struct gc_relocation_policy +{ + gc_relocation_policy(){}; +}; + +} // namespace immer diff --git a/immer/relocation/no_relocation_policy.hpp b/immer/relocation/no_relocation_policy.hpp new file mode 100644 index 00000000..4a6cb95b --- /dev/null +++ b/immer/relocation/no_relocation_policy.hpp @@ -0,0 +1,22 @@ +// +// immer: immutable data structures for C++ +// Copyright (C) 2016, 2017, 2018 Juan Pedro Bolivar Puente +// +// This software is distributed under the Boost Software License, Version 1.0. +// See accompanying file LICENSE or copy at http://boost.org/LICENSE_1_0.txt +// + +#pragma once + +namespace immer { + +/*! + * Disables relocation information tracking; used when a relocating garbage + * collector is not present in the final system. + */ +struct no_relocation_policy +{ + no_relocation_policy(){}; +}; + +} // namespace immer From 6f5da97cc5506fd411c9b3013d84118455618a4e Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Mon, 8 Nov 2021 12:57:22 -0600 Subject: [PATCH 3/7] use boolean flag instead of typename for gc tracking --- immer/detail/hamts/champ_iterator.hpp | 13 +++++-------- immer/memory_policy.hpp | 7 ++++--- immer/relocation/gc_relocation_policy.hpp | 22 ---------------------- immer/relocation/no_relocation_policy.hpp | 22 ---------------------- 4 files changed, 9 insertions(+), 55 deletions(-) delete mode 100644 immer/relocation/gc_relocation_policy.hpp delete mode 100644 immer/relocation/no_relocation_policy.hpp diff --git a/immer/detail/hamts/champ_iterator.hpp b/immer/detail/hamts/champ_iterator.hpp index 3343c11a..3008c6cd 100644 --- a/immer/detail/hamts/champ_iterator.hpp +++ b/immer/detail/hamts/champ_iterator.hpp @@ -10,13 +10,12 @@ #include #include -#include namespace immer { namespace detail { namespace hamts { -template +template struct relocation_info_t { void reset() {} @@ -26,7 +25,7 @@ struct relocation_info_t }; template -struct relocation_info_t +struct relocation_info_t { count_t cur_off_; std::uint8_t path_off_[max_depth] = { @@ -51,10 +50,8 @@ struct champ_iterator std::ptrdiff_t, const T*> { - using memory = MP; - using relocation = typename memory::relocation; - using tree_t = champ; - using node_t = typename tree_t::node_t; + using tree_t = champ; + using node_t = typename tree_t::node_t; struct end_t {}; @@ -100,7 +97,7 @@ struct champ_iterator node_t* const* path_[max_depth + 1] = { 0, }; - relocation_info_t relocation_info_; + relocation_info_t relocation_info_; void increment() { diff --git a/immer/memory_policy.hpp b/immer/memory_policy.hpp index 75ad038e..2a26f089 100644 --- a/immer/memory_policy.hpp +++ b/immer/memory_policy.hpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -92,20 +91,22 @@ template , bool UseTransientRValues = get_use_transient_rvalues_v, - typename RelocationPolicy = no_relocation_policy> + bool TrackBaseOffsetOfPointers = false> struct memory_policy { using heap = HeapPolicy; using refcount = RefcountPolicy; using transience = TransiencePolicy; using lock = LockPolicy; - using relocation = RelocationPolicy; static constexpr bool prefer_fewer_bigger_objects = PreferFewerBiggerObjects; static constexpr bool use_transient_rvalues = UseTransientRValues; + static constexpr bool track_base_offset_of_pointers = + TrackBaseOffsetOfPointers; + using transience_t = typename transience::template apply::type; }; diff --git a/immer/relocation/gc_relocation_policy.hpp b/immer/relocation/gc_relocation_policy.hpp deleted file mode 100644 index b2428daa..00000000 --- a/immer/relocation/gc_relocation_policy.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// immer: immutable data structures for C++ -// Copyright (C) 2016, 2017, 2018 Juan Pedro Bolivar Puente -// -// This software is distributed under the Boost Software License, Version 1.0. -// See accompanying file LICENSE or copy at http://boost.org/LICENSE_1_0.txt -// - -#pragma once - -namespace immer { - -/*! - * Enables relocation information tracking; used when a relocating garbage - * collector is present in the final system. - */ -struct gc_relocation_policy -{ - gc_relocation_policy(){}; -}; - -} // namespace immer diff --git a/immer/relocation/no_relocation_policy.hpp b/immer/relocation/no_relocation_policy.hpp deleted file mode 100644 index 4a6cb95b..00000000 --- a/immer/relocation/no_relocation_policy.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// immer: immutable data structures for C++ -// Copyright (C) 2016, 2017, 2018 Juan Pedro Bolivar Puente -// -// This software is distributed under the Boost Software License, Version 1.0. -// See accompanying file LICENSE or copy at http://boost.org/LICENSE_1_0.txt -// - -#pragma once - -namespace immer { - -/*! - * Disables relocation information tracking; used when a relocating garbage - * collector is not present in the final system. - */ -struct no_relocation_policy -{ - no_relocation_policy(){}; -}; - -} // namespace immer From 4e79e91d015c1b941dc5dccdf0f274343384b504 Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Mon, 8 Nov 2021 14:54:01 -0600 Subject: [PATCH 4/7] expose relocation info as public After giving careful thought, I decided that the relocation information of a class probably ought to be public since the garbage collector is expected to need to be able to read this information. --- immer/detail/hamts/champ_iterator.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/immer/detail/hamts/champ_iterator.hpp b/immer/detail/hamts/champ_iterator.hpp index 3008c6cd..5d306e81 100644 --- a/immer/detail/hamts/champ_iterator.hpp +++ b/immer/detail/hamts/champ_iterator.hpp @@ -88,6 +88,8 @@ struct champ_iterator std::copy(other.path_, other.path_ + depth_ + 1, path_); } + const auto relocation_info() const { return relocation_info_; } + private: friend iterator_core_access; From 17e4b4b26d0def3c43100f019143041053169d96 Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Mon, 8 Nov 2021 14:54:39 -0600 Subject: [PATCH 5/7] unit test for relocation info for champ_iterator This test is slightly fragile in that it probably will not work if the depth of the tree is uneven across different nodes. However, in order to make it completely robust irrespective of the platform-specific details of the hash function and the number of elements in the set being tested, it would be necessary to re-implement a lot of the functionality for walking the tree in the test, which seems counterproductive to me as we would end up just testing the business logic against itself rather than against a flat set of expectations. So intead, I ended up just implementing a partial check for the relocation information which tests the major behavior of the step_right and step_down functions in the iterator, rather than testing the entire set exhaustively. This allows the expected results of the test to be specified declaratively, which makes the test more appropriate in terms of what functionality it is actually testing. --- test/set/relocation.cpp | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 test/set/relocation.cpp diff --git a/test/set/relocation.cpp b/test/set/relocation.cpp new file mode 100644 index 00000000..152d8bf2 --- /dev/null +++ b/test/set/relocation.cpp @@ -0,0 +1,52 @@ +// +// immer: immutable data structures for C++ +// Copyright (C) 2016, 2017, 2018 Juan Pedro Bolivar Puente +// +// This software is distributed under the Boost Software License, Version 1.0. +// See accompanying file LICENSE or copy at http://boost.org/LICENSE_1_0.txt +// + +#include +#include + +using relocation_memory = immer::memory_policy; + +template , + typename Eq = std::equal_to> +using test_set_t = immer::set; + +TEST_CASE("relocation info") +{ + auto v = test_set_t{}; + for (auto i = 0u; i < 512u; ++i) + v = v.insert(i); + auto iter = v.begin(); + test_set_t::iterator::node_t *node = v.impl().root, *parent; + unsigned depth = 0; + + while (node->nodemap()) { + parent = node; + node = node->children()[0]; + ++depth; + } + CHECK(depth >= 2); + for (auto i = 0u; i < parent->children_count(); ++i) { + for (auto j = 0u; j < node->data_count(); ++j) { + CHECK(iter.relocation_info().cur_off_ == j); + CHECK(iter.relocation_info().path_off_[depth] == 0); + CHECK(iter.relocation_info().path_off_[depth - 1] == i); + CHECK(iter.relocation_info().path_off_[depth - 2] == 0); + ++iter; + } + if (i + 1 < parent->children_count()) { + node = parent->children()[i + 1]; + } + } +} From 99741d7c73e5dbf03dfd0526c218327f72f6e780 Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Mon, 8 Nov 2021 15:06:15 -0600 Subject: [PATCH 6/7] remove ignored qualifier --- immer/detail/hamts/champ_iterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/immer/detail/hamts/champ_iterator.hpp b/immer/detail/hamts/champ_iterator.hpp index 5d306e81..594ad750 100644 --- a/immer/detail/hamts/champ_iterator.hpp +++ b/immer/detail/hamts/champ_iterator.hpp @@ -88,7 +88,7 @@ struct champ_iterator std::copy(other.path_, other.path_ + depth_ + 1, path_); } - const auto relocation_info() const { return relocation_info_; } + auto relocation_info() const { return relocation_info_; } private: friend iterator_core_access; From 71ba3b56ed0b4dafe159dc45a6c08a1acb83d8fb Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Mon, 8 Nov 2021 15:17:18 -0600 Subject: [PATCH 7/7] fix warning about uninitialized variable --- test/set/relocation.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/set/relocation.cpp b/test/set/relocation.cpp index 152d8bf2..15caa113 100644 --- a/test/set/relocation.cpp +++ b/test/set/relocation.cpp @@ -27,9 +27,10 @@ TEST_CASE("relocation info") auto v = test_set_t{}; for (auto i = 0u; i < 512u; ++i) v = v.insert(i); - auto iter = v.begin(); - test_set_t::iterator::node_t *node = v.impl().root, *parent; - unsigned depth = 0; + auto iter = v.begin(); + test_set_t::iterator::node_t *node = v.impl().root, + *parent = nullptr; + unsigned depth = 0; while (node->nodemap()) { parent = node;