diff --git a/doc/doxygen.config b/doc/doxygen.config index b6002b8d..53e911b5 100644 --- a/doc/doxygen.config +++ b/doc/doxygen.config @@ -9,7 +9,8 @@ INPUT = \ ../immer \ ../immer/heap \ ../immer/refcount \ - ../immer/transience + ../immer/transience \ + ../immer/extra/persist/json INCLUDE_PATH = .. QUIET = YES diff --git a/immer/extra/persist/README.rst b/immer/extra/persist/README.rst index c593ff61..4824f3ec 100644 --- a/immer/extra/persist/README.rst +++ b/immer/extra/persist/README.rst @@ -3,3 +3,61 @@ Persist =============== This library allows to preserve structural sharing of immer containers while serializing and deserializing them. + + +Motivation: serialization +---------- + +Structural sharing allows immer containers to be efficient. In runtime, two distinct containers can be operated on independently but internally they share nodes and +use memory efficiently in that way. But when such containers are serialized in a simple direct way, for example, as lists, this sharing is lost: they become truly +independent, same data is stored multiple times on disk and later, when it is read from disk, in memory. + +This library operates on the internal structure of immer containers: allowing it to be serialized and deserialized (and also transformed). That allows for more efficient +storage (especially, in case when a lot of nodes are reused) and, even more importantly, for preserving structural sharing after deserializing the containers. + + +Motivation: transformation +---------- + +Imagine this scenario: an application has a document type that uses an immer container internally in multiple places, for example, a vector of strings. Some of these vectors +would be completely identical, some would have just a few elements different (stored in an undo history, for example). And we want to run a transformation function +over these vectors. + +A direct approach would be to take each vector and create a new vector applying the transformation function for each element. But after this, all the structural sharing +of the original containers would be lost: we will have multiple independent vectors without any structural sharing. + +This library allows to apply the transformation function directly on the nodes which allows to preserve structural sharing. Additionally, it doesn't matter how many times +a node is reused, the transformation needs to be performed only once. + + +First example +------------- + +For this example, we'll use a `document` type that contains two immer vectors. + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: intro/start-types + :end-before: intro/end-types + +**Without immer-persist** + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: intro/start-no-persist + :end-before: intro/end-no-persist + +**With immer-persist** + +.. literalinclude:: ../test/extra/persist/test_for_docs.cpp + :language: c++ + :start-after: intro/start-with-persist + :end-before: intro/end-with-persist + + +API Overview +------------ + +.. doxygengroup:: persist-api + :project: immer + :content-only: diff --git a/immer/extra/persist/json/json_with_pool.hpp b/immer/extra/persist/json/json_with_pool.hpp index 4c83b2cb..2d63cb55 100644 --- a/immer/extra/persist/json/json_with_pool.hpp +++ b/immer/extra/persist/json/json_with_pool.hpp @@ -10,6 +10,16 @@ namespace immer::persist { +/** + * @defgroup persist-api + * @{ + */ + +/** + * @brief Serialize the provided value using the provided policy to JSON. + * + * @see Policy + */ template Policy = default_policy> @@ -179,4 +189,6 @@ auto convert_container(const detail::output_pools& old_save_pools, return result; } +/** @} */ // group: persist-api + } // namespace immer::persist diff --git a/immer/extra/persist/json/policy.hpp b/immer/extra/persist/json/policy.hpp index e8b37038..b880d201 100644 --- a/immer/extra/persist/json/policy.hpp +++ b/immer/extra/persist/json/policy.hpp @@ -5,6 +5,19 @@ namespace immer::persist { +/** + * @brief Policy is a type that describes certain aspects of serialization for + * immer-persist. + * - How to call into the cereal archive to save and load the + * user-provided value. Can be used to serealize the value inline (without the + * "value0" node) by taking a dependency on + * https://github.com/LowCostCustoms/cereal-inline, for example. + * - Types of immer containers that will be serialized using pools. One + * pool contains nodes of only one immer container type. + * - Names for each per-type pool. + * + * @ingroup persist-api + */ template concept Policy = requires(Value value, T policy) { policy.get_pool_types(value); }; diff --git a/test/extra/persist/CMakeLists.txt b/test/extra/persist/CMakeLists.txt index ab30a659..d41ee3a6 100644 --- a/test/extra/persist/CMakeLists.txt +++ b/test/extra/persist/CMakeLists.txt @@ -19,6 +19,7 @@ add_executable( test_conversion.cpp test_circular_dependency_conversion.cpp test_table_box_recursive.cpp + test_for_docs.cpp ${PROJECT_SOURCE_DIR}/immer/extra/persist/xxhash/xxhash_64.cpp) target_precompile_headers(persist-tests PRIVATE ) diff --git a/test/extra/persist/test_for_docs.cpp b/test/extra/persist/test_for_docs.cpp new file mode 100644 index 00000000..118f1a3c --- /dev/null +++ b/test/extra/persist/test_for_docs.cpp @@ -0,0 +1,140 @@ +#include + +#include + +#include "utils.hpp" +#include + +namespace { + +// include:intro/start-types +// Set the BL constant to 1, so that only 2 elements are stored in leaves. +// This allows to demonstrate structural sharing even in vectors with just a few +// elements. +using vector_one = + immer::vector; + +struct document +{ + // Make it a boost::hana Struct. + // This allows the persist library to determine what pool types are needed + // and also to name the pools. + BOOST_HANA_DEFINE_STRUCT(document, + (vector_one, ints), + (vector_one, ints2) // + ); + + friend bool operator==(const document&, const document&) = default; + + // Make the struct serializable with cereal as usual, nothing special + // related to immer-persist. + template + void serialize(Archive& ar) + { + ar(CEREAL_NVP(ints), CEREAL_NVP(ints2)); + } +}; + +using json_t = nlohmann::json; +// include:intro/end-types + +} // namespace + +TEST_CASE("Docs save with immer-persist", "[docs]") +{ + // include:intro/start-no-persist + const auto v1 = vector_one{1, 2, 3}; + const auto v2 = v1.push_back(4).push_back(5).push_back(6); + // Vector v2 uses structural sharing to reuse the nodes that store the + // values of v1. + const auto value = document{v1, v2}; + + SECTION("Without immer-persist") + { + // Vectors are serialized directly as lists. Notably, as independent + // lists. + const auto expected_json = json_t::parse(R"( +{"value0": {"ints": [1, 2, 3], "ints2": [1, 2, 3, 4, 5, 6]}} +)"); + const auto str = [&] { + auto os = std::ostringstream{}; + { + auto ar = cereal::JSONOutputArchive{os}; + ar(value); + } + return os.str(); + }(); + REQUIRE(json_t::parse(str) == expected_json); + + const auto loaded_value = [&] { + auto is = std::istringstream{str}; + auto ar = cereal::JSONInputArchive{is}; + auto r = document{}; + ar(r); + return r; + }(); + + REQUIRE(value == loaded_value); + } + // include:intro/end-no-persist + + SECTION("With immer-persist") + { + // include:intro/start-with-persist + // Immer-persist uses policies to control certain aspects of + // serialization: + // - types of pools that should be used + // - names of those pools + const auto policy = + immer::persist::hana_struct_auto_member_name_policy(document{}); + const auto str = immer::persist::to_json_with_pool(value, policy); + + // The resulting JSON looks much more complicated for this little + // example but the more structural sharing is used inside the serialized + // value, the bigger the benefit from using immer-persist. + // + // Notable points for the structure of this JSON: + // - vectors "ints" and "ints2" are serialized as integers, referring to + // the vectors inside the pools + // - there is a "pools" object serialized next to the value itself + // - the "pools" object contains pools per type of the container, in + // this example only one, for `immer::vector` + // + // The vector pool contains: + // - B and BL constants for the corresponding `immer::vector` type + // - "inners" and "leaves" maps that store the actual nodes of the + // vector + // - "vectors" list that allows to store the root and tail of the vector + // structure and to refer to the vector with just one integer: + // `{"ints": 0, "ints2": 1}`: 0 and 1 refer to the indices of this + // array. + const auto expected_json = json_t::parse(R"( +{ + "value0": {"ints": 0, "ints2": 1}, + "pools": { + "ints": { + "B": 5, + "BL": 1, + "inners": [ + {"key": 0, "value": {"children": [2], "relaxed": false}}, + {"key": 3, "value": {"children": [2, 5], "relaxed": false}} + ], + "leaves": [ + {"key": 1, "value": [3]}, + {"key": 2, "value": [1, 2]}, + {"key": 4, "value": [5, 6]}, + {"key": 5, "value": [3, 4]} + ], + "vectors": [{"root": 0, "tail": 1}, {"root": 3, "tail": 4}] + } + } +} + )"); + REQUIRE(json_t::parse(str) == expected_json); + + const auto loaded_value = + immer::persist::from_json_with_pool(str, policy); + REQUIRE(value == loaded_value); + // include:intro/end-with-persist + } +}