diff --git a/include/hibf/build/insert_into_ibf.hpp b/include/hibf/build/insert_into_ibf.hpp index e8d41853..85d6d419 100644 --- a/include/hibf/build/insert_into_ibf.hpp +++ b/include/hibf/build/insert_into_ibf.hpp @@ -21,7 +21,8 @@ namespace seqan::hibf::build * \details * Automatically does naive splitting if number_of_bins > 1. */ -void insert_into_ibf(robin_hood::unordered_flat_set const & kmers, +void insert_into_ibf(build_data const & data, + robin_hood::unordered_flat_set const & kmers, size_t const number_of_bins, size_t const bin_index, seqan::hibf::interleaved_bloom_filter & ibf, diff --git a/src/build/construct_ibf.cpp b/src/build/construct_ibf.cpp index d44e7cca..ce91f853 100644 --- a/src/build/construct_ibf.cpp +++ b/src/build/construct_ibf.cpp @@ -55,7 +55,7 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s local_index_allocation_timer.stop(); data.index_allocation_timer += local_index_allocation_timer; - insert_into_ibf(kmers, number_of_bins, ibf_node.max_bin_index, ibf, data.fill_ibf_timer); + insert_into_ibf(data, kmers, number_of_bins, ibf_node.max_bin_index, ibf, data.fill_ibf_timer); if (!is_root) update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer); diff --git a/src/build/insert_into_ibf.cpp b/src/build/insert_into_ibf.cpp index b61d5d8a..5c1fb03d 100644 --- a/src/build/insert_into_ibf.cpp +++ b/src/build/insert_into_ibf.cpp @@ -23,8 +23,25 @@ namespace seqan::hibf::build { +template +inline void +dispatch_emplace(seqan::hibf::interleaved_bloom_filter & ibf, auto && values, seqan::hibf::bin_index const bin_index) +{ + if constexpr (use_exists) + { + for (auto && value : values) + ibf.emplace_exists(value, bin_index); + } + else + { + for (auto && value : values) + ibf.emplace(value, bin_index); + } +} + // automatically does naive splitting if number_of_bins > 1 -void insert_into_ibf(robin_hood::unordered_flat_set const & kmers, +void insert_into_ibf(build_data const & data, + robin_hood::unordered_flat_set const & kmers, size_t const number_of_bins, size_t const bin_index, seqan::hibf::interleaved_bloom_filter & ibf, @@ -40,8 +57,10 @@ void insert_into_ibf(robin_hood::unordered_flat_set const & kmers, assert(chunk_number < number_of_bins); seqan::hibf::bin_index const bin_idx{bin_index + chunk_number}; ++chunk_number; - for (size_t const value : chunk) - ibf.emplace_exists(value, bin_idx); + if (data.config.empty_bin_fraction > 0.0) + dispatch_emplace(ibf, std::move(chunk), bin_idx); + else + dispatch_emplace(ibf, std::move(chunk), bin_idx); } local_fill_ibf_timer.stop(); fill_ibf_timer += local_fill_ibf_timer; @@ -62,8 +81,10 @@ void insert_into_ibf(build_data const & data, serial_timer local_fill_ibf_timer{}; local_fill_ibf_timer.start(); - for (auto && value : values) - ibf.emplace_exists(value, bin_index); + if (data.config.empty_bin_fraction > 0.0) + dispatch_emplace(ibf, std::move(values), bin_index); + else + dispatch_emplace(ibf, std::move(values), bin_index); local_fill_ibf_timer.stop(); data.fill_ibf_timer += local_fill_ibf_timer; } diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index 3623fb15..72710a15 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -133,7 +133,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, size_t const mutex_id{parent_bin_index / 64}; std::lock_guard guard{local_ibf_mutex[mutex_id]}; technical_bin_to_ibf_id[parent_bin_index] = new_ibf_pos; - build::insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer); + build::insert_into_ibf(data, kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer); if (!is_root) build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer); } @@ -155,7 +155,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, else { compute_kmers(kmers, data, record); - build::insert_into_ibf(kmers, + build::insert_into_ibf(data, + kmers, record.number_of_technical_bins, record.storage_TB_id, ibf,