diff --git a/include/hibf/hierarchical_interleaved_bloom_filter.hpp b/include/hibf/hierarchical_interleaved_bloom_filter.hpp index 3b3f3715..03a7348b 100644 --- a/include/hibf/hierarchical_interleaved_bloom_filter.hpp +++ b/include/hibf/hierarchical_interleaved_bloom_filter.hpp @@ -149,6 +149,8 @@ class hierarchical_interleaved_bloom_filter //!\brief Manages counting ranges of values for the seqan::hibf::hierarchical_interleaved_bloom_filter. template class counting_agent_type; + //!\brief Contains information about the parent IBF and bin index. + struct previous_ibf_id_pair; /*!\name Constructors, destructor and assignment * \{ @@ -212,6 +214,17 @@ class hierarchical_interleaved_bloom_filter */ std::vector> next_ibf_id; + /*!\brief Stores for each IBF of the HIBF the ID of the parent IBF and the bin index of the parent IBF. + * \details + * Assume we look up an IBF `i`, i.e. `prev_ibf_id[i]`. + * The reurned value contains two members: + * * `ibf_idx` is the index of the parent IBF in `ibf_vector`. + * * `bin_idx` is the index of the bin in the parent IBF of which IBF `i` is the child of. + * + * The root/top-level IBF has no parent and `ibf_idx == bin_idx == 0`. + */ + std::vector prev_ibf_id; + /*!\brief Stores for each bin in each IBF of the HIBF the user bin ID. * \details * Assume we look up a bin `b` in IBF `i`, i.e. `ibf_bin_to_user_bin_id[i][b]`. @@ -233,11 +246,15 @@ class hierarchical_interleaved_bloom_filter /*!\name Comparison operators * \{ */ - HIBF_CONSTEXPR_VECTOR bool operator==(hierarchical_interleaved_bloom_filter const &) const = default; + HIBF_CONSTEXPR_VECTOR bool operator==(hierarchical_interleaved_bloom_filter const &) const; //!\} /*!\cond DEV - * \brief Serialisation support function. + * \brief The version of the HIBF. + */ + static constexpr uint32_t version{1}; + + /*!\brief Serialisation support function. * \tparam archive_t Type of `archive`; must satisfy seqan::hibf::cereal_archive. * \param[in] archive The archive being serialised from/to. * @@ -247,9 +264,12 @@ class hierarchical_interleaved_bloom_filter template void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive) { + uint32_t parsed_version{version}; + archive(cereal::make_nvp("version", parsed_version)); archive(number_of_user_bins); archive(ibf_vector); archive(next_ibf_id); + archive(prev_ibf_id); archive(ibf_bin_to_user_bin_id); } @@ -269,6 +289,25 @@ class hierarchical_interleaved_bloom_filter //!\endcond }; +//!\brief Contains information about the parent IBF and bin index. +struct hierarchical_interleaved_bloom_filter::previous_ibf_id_pair +{ + size_t ibf_idx{}; //!< The index of the parent IBF in `ibf_vector`. + size_t bin_idx{}; //!< The index of the bin in the parent IBF of which an IBF is the child of. + + friend constexpr auto operator<=>(previous_ibf_id_pair const &, previous_ibf_id_pair const &) = default; + + template + void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive) + { + archive(ibf_idx); + archive(bin_idx); + } +}; + +HIBF_CONSTEXPR_VECTOR bool +hierarchical_interleaved_bloom_filter::operator==(hierarchical_interleaved_bloom_filter const &) const = default; + class hierarchical_interleaved_bloom_filter::membership_agent_type { private: diff --git a/include/hibf/interleaved_bloom_filter.hpp b/include/hibf/interleaved_bloom_filter.hpp index fef0dfdc..0508f694 100644 --- a/include/hibf/interleaved_bloom_filter.hpp +++ b/include/hibf/interleaved_bloom_filter.hpp @@ -415,7 +415,11 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector //!\} /*!\cond DEV - * \brief Serialisation support function. + * \brief The version of the HIBF. + */ + static constexpr uint32_t version{1}; + + /*!\brief Serialisation support function. * \tparam archive_t Type of `archive`; must satisfy seqan::hibf::cereal_archive. * \param[in] archive The archive being serialised from/to. * @@ -424,6 +428,8 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector template void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive) { + uint32_t parsed_version{version}; + archive(cereal::make_nvp("version", parsed_version)); archive(bins); archive(technical_bins); archive(bin_size_); diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index de7b6da2..90eef301 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -42,7 +42,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, robin_hood::unordered_flat_set & parent_kmers, layout::graph::node const & current_node, build::build_data & data, - bool is_root) + bool is_root, + size_t const parent_ibf_idx = 0u) { size_t const ibf_pos{data.request_ibf_idx()}; @@ -68,7 +69,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, kmers, current_node.children[current_node.favourite_child_idx.value()], data, - false); + false, + ibf_pos); return 1; } else // max bin is not a merged bin @@ -124,8 +126,9 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, auto & child = children[index]; robin_hood::unordered_flat_set local_kmers{}; - size_t const local_ibf_pos = hierarchical_build(hibf, local_kmers, child, data, false); + size_t const local_ibf_pos = hierarchical_build(hibf, local_kmers, child, data, false, ibf_pos); auto parent_bin_index = child.parent_bin_index; + hibf.prev_ibf_id[local_ibf_pos] = {.ibf_idx = parent_ibf_idx, .bin_idx = parent_bin_index}; { size_t const mutex_id{parent_bin_index / 64}; std::lock_guard guard{local_ibf_mutex[mutex_id]}; @@ -184,6 +187,7 @@ void build_index(hierarchical_interleaved_bloom_filter & hibf, hibf.ibf_vector.resize(number_of_ibfs); hibf.ibf_bin_to_user_bin_id.resize(number_of_ibfs); + hibf.prev_ibf_id.resize(number_of_ibfs); hibf.next_ibf_id.resize(number_of_ibfs); build::build_data data{.config = config, .ibf_graph = {hibf_layout}};