Skip to content

Commit

Permalink
[FEATURE] Add layout timers to the HIBF.
Browse files Browse the repository at this point in the history
  • Loading branch information
smehringer authored and eseiler committed Nov 7, 2023
1 parent 737a231 commit 59dc8bd
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 3 deletions.
4 changes: 4 additions & 0 deletions include/hibf/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,10 @@ class hierarchical_interleaved_bloom_filter
* \brief Only contains values after the HIBF has been constructed.
* \{
*/
concurrent_timer layout_compute_sketches_timer{};
concurrent_timer layout_union_estimation_timer{};
concurrent_timer layout_rearrangement_timer{};
concurrent_timer layout_dp_algorithm_timer{};
concurrent_timer index_allocation_timer{};
concurrent_timer user_bin_io_timer{};
concurrent_timer merge_kmers_timer{};
Expand Down
8 changes: 8 additions & 0 deletions include/hibf/layout/compute_layout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <hibf/config.hpp> // for config
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/timer.hpp> // for concurrent, timer
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog

namespace seqan::hibf::layout
Expand All @@ -21,6 +22,13 @@ namespace seqan::hibf::layout
* \param[in] sketches The vector that will store the sketches.
* \returns layout
*/
layout compute_layout(config const & config,
std::vector<size_t> const & kmer_counts,
std::vector<sketch::hyperloglog> const & sketches,
timer<concurrent::yes> & union_estimation_timer,
timer<concurrent::yes> & rearrangement_timer);

//!\overload
layout compute_layout(config const & config,
std::vector<size_t> const & kmer_counts,
std::vector<sketch::hyperloglog> const & sketches);
Expand Down
6 changes: 6 additions & 0 deletions include/hibf/layout/hierarchical_binning.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <hibf/build/bin_size_in_bits.hpp> // for bin_size_in_bits
#include <hibf/config.hpp> // for config
#include <hibf/layout/data_store.hpp> // for data_store
#include <hibf/misc/timer.hpp> // for concurrent, timer
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY

namespace seqan::hibf::layout
Expand Down Expand Up @@ -124,6 +125,11 @@ class hierarchical_binning
//!\brief Executes the hierarchical binning algorithm and layouts user bins into technical bins.
size_t execute();

//!\brief Tracks the time, the algorithm spends on estimating the union of user bins (merged bins).
timer<concurrent::yes> union_estimation_timer{};
//!\brief Tracks the time, the algorithm spends on estimating the union of user bins (merged bins).
timer<concurrent::yes> rearrangement_timer{};

private:
/*!\brief Returns the number of technical bins given a number of user bins.
* \param[in] requested_num_ub The number of user bins.
Expand Down
11 changes: 10 additions & 1 deletion src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con

std::vector<size_t> kmer_counts{};
std::vector<sketch::hyperloglog> sketches{};
layout_compute_sketches_timer.start();
sketch::compute_sketches(configuration, kmer_counts, sketches);
layout_compute_sketches_timer.stop();

// If rearrangement is enabled, i.e. seqan::hibf::config::disable_rearrangement is false:
// `min_id == none` in seqan::hibf::sketch::toolbox::cluster_bins -> std::out_of_range "key not found"
Expand All @@ -216,7 +218,14 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con
return count == 0u;
}));

auto layout = layout::compute_layout(configuration, kmer_counts, sketches);
layout_dp_algorithm_timer.start();
auto layout = layout::compute_layout(configuration,
kmer_counts,
sketches,
layout_union_estimation_timer,
layout_rearrangement_timer);
layout_dp_algorithm_timer.stop();

number_of_user_bins = configuration.number_of_user_bins;
build_index(*this, configuration, layout);
}
Expand Down
19 changes: 17 additions & 2 deletions src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ namespace seqan::hibf::layout

layout compute_layout(config const & config,
std::vector<size_t> const & kmer_counts,
std::vector<sketch::hyperloglog> const & sketches)
std::vector<sketch::hyperloglog> const & sketches,
timer<concurrent::yes> & union_estimation_timer,
timer<concurrent::yes> & rearrangement_timer)
{
layout resulting_layout{};

Expand All @@ -39,7 +41,10 @@ layout compute_layout(config const & config,
.hash_count = config.number_of_hash_functions,
.t_max = config.tmax});

store.hibf_layout->top_level_max_bin_id = seqan::hibf::layout::hierarchical_binning{store, config}.execute();
seqan::hibf::layout::hierarchical_binning dp_algorithm{store, config};
store.hibf_layout->top_level_max_bin_id = dp_algorithm.execute();
union_estimation_timer = dp_algorithm.union_estimation_timer;
rearrangement_timer = dp_algorithm.rearrangement_timer;

// sort records ascending by the number of bin indices (corresponds to the IBF levels)
// GCOVR_EXCL_START
Expand All @@ -53,4 +58,14 @@ layout compute_layout(config const & config,
return *store.hibf_layout;
}

layout compute_layout(config const & config,
std::vector<size_t> const & kmer_counts,
std::vector<sketch::hyperloglog> const & sketches)
{
timer<concurrent::yes> union_estimation_timer;
timer<concurrent::yes> rearrangement_timer;

return compute_layout(config, kmer_counts, sketches, union_estimation_timer, rearrangement_timer);
}

} // namespace seqan::hibf::layout
8 changes: 8 additions & 0 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ size_t hierarchical_binning::execute()
if (!config.disable_estimate_union && !config.disable_rearrangement)
{
assert(data->sketches != nullptr);
rearrangement_timer.start();
sketch::toolbox::rearrange_bins(*data->sketches,
*data->kmer_counts,
data->positions,
config.max_rearrangement_ratio,
config.threads);
rearrangement_timer.stop();
}

data->user_bins_arranged = true;
Expand Down Expand Up @@ -105,10 +107,12 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
size_t sum = (*data->kmer_counts)[data->positions[0]];
if (!config.disable_estimate_union)
{
union_estimation_timer.start();
sketch::toolbox::precompute_initial_union_estimates(data->union_estimates,
*data->sketches,
*data->kmer_counts,
data->positions);
union_estimation_timer.stop();

for (size_t j = 1; j < num_user_bins; ++j)
{
Expand Down Expand Up @@ -153,11 +157,15 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
double const ub_cardinality = static_cast<double>(current_weight);

if (!config.disable_estimate_union)
{
union_estimation_timer.start();
sketch::toolbox::precompute_union_estimates_for(data->union_estimates,
*data->sketches,
*data->kmer_counts,
data->positions,
j);
union_estimation_timer.stop();
}

for (size_t i = 1; i < num_technical_bins; ++i)
{
Expand Down

0 comments on commit 59dc8bd

Please sign in to comment.