Skip to content

Commit

Permalink
Merge pull request #254 from eseiler/feature/reverse_ibf_map
Browse files Browse the repository at this point in the history
[FEATURE] Add prev_ibf_id
  • Loading branch information
eseiler authored Dec 12, 2024
2 parents 239b446 + 30752ed commit 75856d1
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 6 deletions.
43 changes: 41 additions & 2 deletions include/hibf/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ class hierarchical_interleaved_bloom_filter
//!\brief Manages counting ranges of values for the seqan::hibf::hierarchical_interleaved_bloom_filter.
template <std::integral value_t>
class counting_agent_type;
//!\brief Contains information about the parent IBF and bin index.
struct previous_ibf_id_pair;

/*!\name Constructors, destructor and assignment
* \{
Expand Down Expand Up @@ -212,6 +214,17 @@ class hierarchical_interleaved_bloom_filter
*/
std::vector<std::vector<uint64_t>> next_ibf_id;

/*!\brief Stores for each IBF of the HIBF the ID of the parent IBF and the bin index of the parent IBF.
* \details
* Assume we look up an IBF `i`, i.e. `prev_ibf_id[i]`.
* The reurned value contains two members:
* * `ibf_idx` is the index of the parent IBF in `ibf_vector`.
* * `bin_idx` is the index of the bin in the parent IBF of which IBF `i` is the child of.
*
* The root/top-level IBF has no parent and `ibf_idx == bin_idx == 0`.
*/
std::vector<previous_ibf_id_pair> prev_ibf_id;

/*!\brief Stores for each bin in each IBF of the HIBF the user bin ID.
* \details
* Assume we look up a bin `b` in IBF `i`, i.e. `ibf_bin_to_user_bin_id[i][b]`.
Expand All @@ -233,11 +246,15 @@ class hierarchical_interleaved_bloom_filter
/*!\name Comparison operators
* \{
*/
HIBF_CONSTEXPR_VECTOR bool operator==(hierarchical_interleaved_bloom_filter const &) const = default;
HIBF_CONSTEXPR_VECTOR bool operator==(hierarchical_interleaved_bloom_filter const &) const;
//!\}

/*!\cond DEV
* \brief Serialisation support function.
* \brief The version of the HIBF.
*/
static constexpr uint32_t version{1};

/*!\brief Serialisation support function.
* \tparam archive_t Type of `archive`; must satisfy seqan::hibf::cereal_archive.
* \param[in] archive The archive being serialised from/to.
*
Expand All @@ -247,9 +264,12 @@ class hierarchical_interleaved_bloom_filter
template <seqan::hibf::cereal_archive archive_t>
void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
{
uint32_t parsed_version{version};
archive(cereal::make_nvp("version", parsed_version));
archive(number_of_user_bins);
archive(ibf_vector);
archive(next_ibf_id);
archive(prev_ibf_id);
archive(ibf_bin_to_user_bin_id);
}

Expand All @@ -269,6 +289,25 @@ class hierarchical_interleaved_bloom_filter
//!\endcond
};

//!\brief Contains information about the parent IBF and bin index.
struct hierarchical_interleaved_bloom_filter::previous_ibf_id_pair
{
size_t ibf_idx{}; //!< The index of the parent IBF in `ibf_vector`.
size_t bin_idx{}; //!< The index of the bin in the parent IBF of which an IBF is the child of.

friend constexpr auto operator<=>(previous_ibf_id_pair const &, previous_ibf_id_pair const &) = default;

template <seqan::hibf::cereal_archive archive_t>
void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
{
archive(ibf_idx);
archive(bin_idx);
}
};

HIBF_CONSTEXPR_VECTOR bool
hierarchical_interleaved_bloom_filter::operator==(hierarchical_interleaved_bloom_filter const &) const = default;

class hierarchical_interleaved_bloom_filter::membership_agent_type
{
private:
Expand Down
8 changes: 7 additions & 1 deletion include/hibf/interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,11 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
//!\}

/*!\cond DEV
* \brief Serialisation support function.
* \brief The version of the HIBF.
*/
static constexpr uint32_t version{1};

/*!\brief Serialisation support function.
* \tparam archive_t Type of `archive`; must satisfy seqan::hibf::cereal_archive.
* \param[in] archive The archive being serialised from/to.
*
Expand All @@ -424,6 +428,8 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
template <cereal_archive archive_t>
void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
{
uint32_t parsed_version{version};
archive(cereal::make_nvp("version", parsed_version));
archive(bins);
archive(technical_bins);
archive(bin_size_);
Expand Down
10 changes: 7 additions & 3 deletions src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
layout::graph::node const & current_node,
build::build_data & data,
bool is_root)
bool is_root,
size_t const parent_ibf_idx = 0u)
{
size_t const ibf_pos{data.request_ibf_idx()};

Expand All @@ -68,7 +69,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
kmers,
current_node.children[current_node.favourite_child_idx.value()],
data,
false);
false,
ibf_pos);
return 1;
}
else // max bin is not a merged bin
Expand Down Expand Up @@ -124,8 +126,9 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
auto & child = children[index];

robin_hood::unordered_flat_set<uint64_t> local_kmers{};
size_t const local_ibf_pos = hierarchical_build(hibf, local_kmers, child, data, false);
size_t const local_ibf_pos = hierarchical_build(hibf, local_kmers, child, data, false, ibf_pos);
auto parent_bin_index = child.parent_bin_index;
hibf.prev_ibf_id[local_ibf_pos] = {.ibf_idx = parent_ibf_idx, .bin_idx = parent_bin_index};
{
size_t const mutex_id{parent_bin_index / 64};
std::lock_guard<std::mutex> guard{local_ibf_mutex[mutex_id]};
Expand Down Expand Up @@ -184,6 +187,7 @@ void build_index(hierarchical_interleaved_bloom_filter & hibf,

hibf.ibf_vector.resize(number_of_ibfs);
hibf.ibf_bin_to_user_bin_id.resize(number_of_ibfs);
hibf.prev_ibf_id.resize(number_of_ibfs);
hibf.next_ibf_id.resize(number_of_ibfs);

build::build_data data{.config = config, .ibf_graph = {hibf_layout}};
Expand Down

0 comments on commit 75856d1

Please sign in to comment.