From 98440954932bb1c367303d84696055dce9569c89 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Tue, 7 Nov 2023 09:14:05 +0100 Subject: [PATCH] [FEATURE] Add layout timers to the HIBF. --- .../hierarchical_interleaved_bloom_filter.hpp | 4 ++++ include/hibf/layout/compute_layout.hpp | 8 ++++++++ include/hibf/layout/hierarchical_binning.hpp | 6 ++++++ src/hierarchical_interleaved_bloom_filter.cpp | 11 ++++++++++- src/layout/compute_layout.cpp | 19 +++++++++++++++++-- src/layout/hierarchical_binning.cpp | 8 ++++++++ 6 files changed, 53 insertions(+), 3 deletions(-) diff --git a/include/hibf/hierarchical_interleaved_bloom_filter.hpp b/include/hibf/hierarchical_interleaved_bloom_filter.hpp index 12b9bc47..e8cee376 100644 --- a/include/hibf/hierarchical_interleaved_bloom_filter.hpp +++ b/include/hibf/hierarchical_interleaved_bloom_filter.hpp @@ -249,6 +249,10 @@ class hierarchical_interleaved_bloom_filter * \brief Only contains values after the HIBF has been constructed. * \{ */ + concurrent_timer layout_compute_sketches_timer{}; + concurrent_timer layout_union_estimation_timer{}; + concurrent_timer layout_rearrangement_timer{}; + concurrent_timer layout_dp_algorithm_timer{}; concurrent_timer index_allocation_timer{}; concurrent_timer user_bin_io_timer{}; concurrent_timer merge_kmers_timer{}; diff --git a/include/hibf/layout/compute_layout.hpp b/include/hibf/layout/compute_layout.hpp index 2ba637a8..92f2d213 100644 --- a/include/hibf/layout/compute_layout.hpp +++ b/include/hibf/layout/compute_layout.hpp @@ -9,6 +9,7 @@ #include // for config #include // for layout +#include // for concurrent, timer #include // for hyperloglog namespace seqan::hibf::layout @@ -21,6 +22,13 @@ namespace seqan::hibf::layout * \param[in] sketches The vector that will store the sketches. * \returns layout */ +layout compute_layout(config const & config, + std::vector const & kmer_counts, + std::vector const & sketches, + concurrent_timer & union_estimation_timer, + concurrent_timer & rearrangement_timer); + +//!\overload layout compute_layout(config const & config, std::vector const & kmer_counts, std::vector const & sketches); diff --git a/include/hibf/layout/hierarchical_binning.hpp b/include/hibf/layout/hierarchical_binning.hpp index 720159e9..4d310a2a 100644 --- a/include/hibf/layout/hierarchical_binning.hpp +++ b/include/hibf/layout/hierarchical_binning.hpp @@ -12,6 +12,7 @@ #include // for bin_size_in_bits #include // for config #include // for data_store +#include // for concurrent, timer #include // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY namespace seqan::hibf::layout @@ -124,6 +125,11 @@ class hierarchical_binning //!\brief Executes the hierarchical binning algorithm and layouts user bins into technical bins. size_t execute(); + //!\brief Tracks the time, the algorithm spends on estimating the union of user bins (merged bins). + concurrent_timer union_estimation_timer{}; + //!\brief Tracks the time, the algorithm spends on estimating the union of user bins (merged bins). + concurrent_timer rearrangement_timer{}; + private: /*!\brief Returns the number of technical bins given a number of user bins. * \param[in] requested_num_ub The number of user bins. diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index 4ef2b5bc..0044c5aa 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -204,7 +204,9 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con std::vector kmer_counts{}; std::vector sketches{}; + layout_compute_sketches_timer.start(); sketch::compute_sketches(configuration, kmer_counts, sketches); + layout_compute_sketches_timer.stop(); // If rearrangement is enabled, i.e. seqan::hibf::config::disable_rearrangement is false: // `min_id == none` in seqan::hibf::sketch::toolbox::cluster_bins -> std::out_of_range "key not found" @@ -216,7 +218,14 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con return count == 0u; })); - auto layout = layout::compute_layout(configuration, kmer_counts, sketches); + layout_dp_algorithm_timer.start(); + auto layout = layout::compute_layout(configuration, + kmer_counts, + sketches, + layout_union_estimation_timer, + layout_rearrangement_timer); + layout_dp_algorithm_timer.stop(); + number_of_user_bins = configuration.number_of_user_bins; build_index(*this, configuration, layout); } diff --git a/src/layout/compute_layout.cpp b/src/layout/compute_layout.cpp index 47866f6e..376799c7 100644 --- a/src/layout/compute_layout.cpp +++ b/src/layout/compute_layout.cpp @@ -22,7 +22,9 @@ namespace seqan::hibf::layout layout compute_layout(config const & config, std::vector const & kmer_counts, - std::vector const & sketches) + std::vector const & sketches, + concurrent_timer & union_estimation_timer, + concurrent_timer & rearrangement_timer) { layout resulting_layout{}; @@ -39,7 +41,10 @@ layout compute_layout(config const & config, .hash_count = config.number_of_hash_functions, .t_max = config.tmax}); - store.hibf_layout->top_level_max_bin_id = seqan::hibf::layout::hierarchical_binning{store, config}.execute(); + seqan::hibf::layout::hierarchical_binning dp_algorithm{store, config}; + store.hibf_layout->top_level_max_bin_id = dp_algorithm.execute(); + union_estimation_timer = dp_algorithm.union_estimation_timer; + rearrangement_timer = dp_algorithm.rearrangement_timer; // sort records ascending by the number of bin indices (corresponds to the IBF levels) // GCOVR_EXCL_START @@ -53,4 +58,14 @@ layout compute_layout(config const & config, return *store.hibf_layout; } +layout compute_layout(config const & config, + std::vector const & kmer_counts, + std::vector const & sketches) +{ + concurrent_timer union_estimation_timer; + concurrent_timer rearrangement_timer; + + return compute_layout(config, kmer_counts, sketches, union_estimation_timer, rearrangement_timer); +} + } // namespace seqan::hibf::layout diff --git a/src/layout/hierarchical_binning.cpp b/src/layout/hierarchical_binning.cpp index 20d78601..03828ad0 100644 --- a/src/layout/hierarchical_binning.cpp +++ b/src/layout/hierarchical_binning.cpp @@ -40,11 +40,13 @@ size_t hierarchical_binning::execute() if (!config.disable_estimate_union && !config.disable_rearrangement) { assert(data->sketches != nullptr); + rearrangement_timer.start(); sketch::toolbox::rearrange_bins(*data->sketches, *data->kmer_counts, data->positions, config.max_rearrangement_ratio, config.threads); + rearrangement_timer.stop(); } data->user_bins_arranged = true; @@ -105,10 +107,12 @@ void hierarchical_binning::initialization(std::vector> & mat size_t sum = (*data->kmer_counts)[data->positions[0]]; if (!config.disable_estimate_union) { + union_estimation_timer.start(); sketch::toolbox::precompute_initial_union_estimates(data->union_estimates, *data->sketches, *data->kmer_counts, data->positions); + union_estimation_timer.stop(); for (size_t j = 1; j < num_user_bins; ++j) { @@ -153,11 +157,15 @@ void hierarchical_binning::recursion(std::vector> & matrix, double const ub_cardinality = static_cast(current_weight); if (!config.disable_estimate_union) + { + union_estimation_timer.start(); sketch::toolbox::precompute_union_estimates_for(data->union_estimates, *data->sketches, *data->kmer_counts, data->positions, j); + union_estimation_timer.stop(); + } for (size_t i = 1; i < num_technical_bins; ++i) {