diff --git a/doc/persist.rst b/doc/persist.rst index 845ac78b..442922a0 100644 --- a/doc/persist.rst +++ b/doc/persist.rst @@ -154,10 +154,10 @@ The most straightforward way would be to simply create new containers with the n function over each element. However, this approach has some disadvantages: - All new containers will be independent, no structural sharing will be preserved and the same data would be stored - multiple times. + multiple times. - The transformation would be applied more times than necessary when some of the data is shared. Example: one vector - is built by appending elements to the other vector. Transforming shared elements multiple times could be - unnecessary. + is built by appending elements to the other vector. Transforming shared elements multiple times could be + unnecessary. Let's look at a simple case using the document from the :ref:`first-example`. The desired transformation would be to multiply each element of the ``immer::vector`` by 10. @@ -218,6 +218,111 @@ the ``new_value`` and inspect the JSON: And indeed, we can see in the JSON that the node ``{"key": 2, "value": [10, 20]}`` is reused in both vectors. +Transformation into a different type +------------------------------------ + +The transforming function can even return a different type. In the following example ``vector`` is transformed into ``vector``. +The first two steps are the same as in the previous example: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: intro/start-prepare-value + :end-before: intro/end-prepare-value + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-get_auto_pool + :end-before: end-get_auto_pool + +Only this time the transforming function will convert an integer into a string: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-conversion_map-string + :end-before: end-conversion_map-string + +Then we convert the two vectors the same way as before: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-convert-vectors-of-strings + :end-before: end-convert-vectors-of-strings + +And in order to confirm that the structural sharing has been preserved, we can introduce a new document type with +the two vectors being ``vector``. + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-document_str + :end-before: end-document_str + +And serialize it with pools: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-save-new_value-str + :end-before: end-save-new_value-str + +In the resulting JSON we can confirm that the node ``{"key": 2, "value": ["_1_", "_2_"]}`` is reused for both vectors. + + +Transforming hash-based containers +---------------------------------- + +As it was shown, converting ``vectors`` is conceptually simple: the transforming function is applied to each element of +each node, producing a new node with the transformed elements. When it comes to the hash-based containers, that is `set +`_, `map `_ and `table `_, their structure is defined +by the used hash function, so defining the transformation may become a bit more verbose. + +In the following example we'll start with a simple case transforming a map. For a map, only the hash of the key matters and we will not modify the key yet. +We will focus on transformations here and not on the structural sharing within the document, so we will use the ``immer`` container itself as the document. +Let's define the following policy to say that we want to use pools only for our container: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-direct_container_policy + :end-before: end-direct_container_policy + +By default, ``immer`` uses ``std::hash`` for the hash-based containers. While sufficient for the runtime use, this hash can't be used for persistence, as +the `C++ reference `_ notes: + +.. note:: + Hash functions are only required to produce the same result for the same input within a single execution of a program + +We will use `xxHash `_ as the hash for this example. Let's create a small map like this: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-prepare-int-map + :end-before: end-prepare-int-map + +Our goal is to convert the value from ``int`` to ``std::string``. Let's create the ``conversion_map`` like this: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-prepare-conversion_map + :end-before: end-prepare-conversion_map + +A few important details to note: + +- For maps, the transforming function accepts a pair of key and value, ``std::pair``. +- The transforming function must also support being called with an argument of type + ``immer::persist::target_container_type_request``, we achieve it here by using ``hana::overload`` to tie 2 lambdas + into one callable value. When called with that argument, it should return an empty container of the type we're + transforming to. It has to be explicit like this since there is no good way to automatically determine the hash + algorithm for the new container. Even though in this case the type of the key doesn't change (and so the hash stays + the same), in other scenarios it might. + +Once the ``conversion_map`` is defined, the actual conversion is done as before: + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: start-transform-map + :end-before: end-transform-map + +And we can see that the original map's values have been transformed into strings. + + Policy ------ diff --git a/test/extra/persist/test_for_docs.cpp b/test/extra/persist/test_for_docs.cpp index 5e0c168c..5657fbb5 100644 --- a/test/extra/persist/test_for_docs.cpp +++ b/test/extra/persist/test_for_docs.cpp @@ -267,7 +267,27 @@ TEST_CASE("Custom policy", "[docs]") REQUIRE(value == loaded_value); } -TEST_CASE("Transform into same type", "[docs]") +namespace { +// include:start-document_str +struct document_str +{ + BOOST_HANA_DEFINE_STRUCT(document_str, + (vector_str, str), + (vector_str, str2) // + ); + + friend bool operator==(const document_str&, const document_str&) = default; + + template + void serialize(Archive& ar) + { + ar(CEREAL_NVP(str), CEREAL_NVP(str2)); + } +}; +// include:end-document_str +} // namespace + +TEST_CASE("Transformations", "[docs]") { const auto v1 = vector_one{1, 2, 3}; const auto v2 = v1.push_back(4).push_back(5).push_back(6); @@ -277,57 +297,169 @@ TEST_CASE("Transform into same type", "[docs]") const auto pools = immer::persist::get_auto_pool(value); // include:end-get_auto_pool - // include:start-conversion_map - namespace hana = boost::hana; - const auto conversion_map = hana::make_map(hana::make_pair( - hana::type_c, [](int val) { return val * 10; })); - // include:end-conversion_map + namespace hana = boost::hana; - // include:start-transformed_pools - auto transformed_pools = - immer::persist::transform_output_pool(pools, conversion_map); - // include:end-transformed_pools - - // include:start-convert-containers - const auto new_v1 = - immer::persist::convert_container(pools, transformed_pools, v1); - const auto expected_new_v1 = vector_one{10, 20, 30}; - REQUIRE(new_v1 == expected_new_v1); - - const auto new_v2 = - immer::persist::convert_container(pools, transformed_pools, v2); - const auto expected_new_v2 = vector_one{10, 20, 30, 40, 50, 60}; - REQUIRE(new_v2 == expected_new_v2); - - const auto new_value = document{new_v1, new_v2}; - // include:end-convert-containers - - // include:start-save-new_value - const auto policy = - immer::persist::hana_struct_auto_member_name_policy(document{}); - const auto str = immer::persist::cereal_save_with_pools(new_value, policy); - const auto expected_json = json_t::parse(R"( -{ - "pools": { - "ints": { - "B": 5, - "BL": 1, - "inners": [ - {"key": 0, "value": {"children": [2], "relaxed": false}}, - {"key": 3, "value": {"children": [2, 5], "relaxed": false}} - ], - "leaves": [ - {"key": 1, "value": [30]}, - {"key": 2, "value": [10, 20]}, - {"key": 4, "value": [50, 60]}, - {"key": 5, "value": [30, 40]} - ], - "vectors": [{"root": 0, "tail": 1}, {"root": 3, "tail": 4}] + SECTION("Into same type") + { + // include:start-conversion_map + const auto conversion_map = hana::make_map(hana::make_pair( + hana::type_c, [](int val) { return val * 10; })); + // include:end-conversion_map + + // include:start-transformed_pools + auto transformed_pools = + immer::persist::transform_output_pool(pools, conversion_map); + // include:end-transformed_pools + + // include:start-convert-containers + const auto new_v1 = + immer::persist::convert_container(pools, transformed_pools, v1); + const auto expected_new_v1 = vector_one{10, 20, 30}; + REQUIRE(new_v1 == expected_new_v1); + + const auto new_v2 = + immer::persist::convert_container(pools, transformed_pools, v2); + const auto expected_new_v2 = vector_one{10, 20, 30, 40, 50, 60}; + REQUIRE(new_v2 == expected_new_v2); + + const auto new_value = document{new_v1, new_v2}; + // include:end-convert-containers + + // include:start-save-new_value + const auto policy = + immer::persist::hana_struct_auto_member_name_policy(document{}); + const auto str = + immer::persist::cereal_save_with_pools(new_value, policy); + const auto expected_json = json_t::parse(R"( + { + "pools": { + "ints": { + "B": 5, + "BL": 1, + "inners": [ + {"key": 0, "value": {"children": [2], "relaxed": false}}, + {"key": 3, "value": {"children": [2, 5], "relaxed": false}} + ], + "leaves": [ + {"key": 1, "value": [30]}, + {"key": 2, "value": [10, 20]}, + {"key": 4, "value": [50, 60]}, + {"key": 5, "value": [30, 40]} + ], + "vectors": [{"root": 0, "tail": 1}, {"root": 3, "tail": 4}] + } + }, + "value0": {"ints": 0, "ints2": 1} + } + )"); + REQUIRE(json_t::parse(str) == expected_json); + // include:end-save-new_value + } + + SECTION("Into a different type") + { + // include:start-conversion_map-string + const auto conversion_map = hana::make_map(hana::make_pair( + hana::type_c, + [](int val) -> std::string { return fmt::format("_{}_", val); })); + // include:end-conversion_map-string + + // include:start-convert-vectors-of-strings + auto transformed_pools = + immer::persist::transform_output_pool(pools, conversion_map); + + const auto new_v1 = + immer::persist::convert_container(pools, transformed_pools, v1); + const auto expected_new_v1 = vector_str{"_1_", "_2_", "_3_"}; + REQUIRE(new_v1 == expected_new_v1); + + const auto new_v2 = + immer::persist::convert_container(pools, transformed_pools, v2); + const auto expected_new_v2 = + vector_str{"_1_", "_2_", "_3_", "_4_", "_5_", "_6_"}; + REQUIRE(new_v2 == expected_new_v2); + // include:end-convert-vectors-of-strings + + // include:start-save-new_value-str + const auto new_value = document_str{new_v1, new_v2}; + const auto policy = + immer::persist::hana_struct_auto_member_name_policy(document_str{}); + const auto str = + immer::persist::cereal_save_with_pools(new_value, policy); + const auto expected_json = json_t::parse(R"( + { + "pools": { + "str": { + "B": 5, + "BL": 1, + "inners": [ + {"key": 0, "value": {"children": [2], "relaxed": false}}, + {"key": 3, "value": {"children": [2, 5], "relaxed": false}} + ], + "leaves": [ + {"key": 1, "value": ["_3_"]}, + {"key": 2, "value": ["_1_", "_2_"]}, + {"key": 4, "value": ["_5_", "_6_"]}, + {"key": 5, "value": ["_3_", "_4_"]} + ], + "vectors": [{"root": 0, "tail": 1}, {"root": 3, "tail": 4}] + } + }, + "value0": {"str": 0, "str2": 1} + } + )"); + REQUIRE(json_t::parse(str) == expected_json); + // include:end-save-new_value-str } - }, - "value0": {"ints": 0, "ints2": 1} } - )"); - REQUIRE(json_t::parse(str) == expected_json); - // include:end-save-new_value + +namespace { +// include:start-direct_container_policy +template +struct direct_container_policy : immer::persist::value0_serialize_t +{ + auto get_pool_types(const auto&) const + { + return boost::hana::to_set(boost::hana::tuple_t); + } +}; +// include:end-direct_container_policy +} // namespace + +TEST_CASE("Transform hash-based containers", "[docs]") +{ + // include:start-prepare-int-map + using int_map_t = + immer::map>; + + const auto value = int_map_t{{"one", 1}, {"two", 2}}; + const auto pools = immer::persist::get_auto_pool( + value, direct_container_policy{}); + // include:end-prepare-int-map + + // include:start-prepare-conversion_map + namespace hana = boost::hana; + using string_map_t = immer:: + map>; + + const auto conversion_map = hana::make_map(hana::make_pair( + hana::type_c, + hana::overload( + [](const std::pair& item) { + return std::make_pair(item.first, + fmt::format("_{}_", item.second)); + }, + [](immer::persist::target_container_type_request) { + return string_map_t{}; + }))); + // include:end-prepare-conversion_map + + // include:start-transform-map + auto transformed_pools = + immer::persist::transform_output_pool(pools, conversion_map); + const auto new_value = + immer::persist::convert_container(pools, transformed_pools, value); + const auto expected_new = string_map_t{{"one", "_1_"}, {"two", "_2_"}}; + REQUIRE(new_value == expected_new); + // include:end-transform-map }