diff --git a/include/hibf/misc/divide_and_ceil.hpp b/include/hibf/misc/divide_and_ceil.hpp new file mode 100644 index 00000000..5c7aa669 --- /dev/null +++ b/include/hibf/misc/divide_and_ceil.hpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include +#include +#include // for size_t +#include + +#include + +namespace seqan::hibf +{ + +/*!\brief Returns, for unsigned integral operands, `dividend / divisor` ceiled to the next integer value. + * \ingroup hibf + */ +template +[[nodiscard]] inline constexpr size_t divide_and_ceil(t1 const dividend, t2 const divisor) noexcept +{ + assert(divisor > 0u); + assert(std::limits::max() - divisor + 1u >= dividend); // Overflow detection + return (static_cast(dividend) + (divisor - 1u)) / divisor; +} + +} // namespace seqan::hibf diff --git a/src/build/construct_ibf.cpp b/src/build/construct_ibf.cpp index 4323c96f..2b41e2f7 100644 --- a/src/build/construct_ibf.cpp +++ b/src/build/construct_ibf.cpp @@ -15,8 +15,9 @@ #include // for config #include // for unordered_flat_set #include // for interleaved_bloom_filter, bin_count, bin_size, hash_fun... -#include -#include // for concurrent, timer +#include // for graph +#include // for divide_and_ceil +#include // for concurrent, timer namespace seqan::hibf::build { @@ -31,7 +32,7 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s bool const max_bin_is_merged = ibf_node.max_bin_is_merged(); assert(!max_bin_is_merged || number_of_bins == 1u); // merged max bin implies (=>) number of bins == 1 - size_t const kmers_per_bin{(kmers.size() + number_of_bins - 1u) / number_of_bins}; // Integer ceil + size_t const kmers_per_bin = divide_and_ceil(kmers.size(), number_of_bins); double const fpr = max_bin_is_merged ? data.config.relaxed_fpr : data.config.maximum_fpr; size_t const bin_bits{bin_size_in_bits({.fpr = fpr, // diff --git a/src/build/insert_into_ibf.cpp b/src/build/insert_into_ibf.cpp index 59c11793..b79d22f3 100644 --- a/src/build/insert_into_ibf.cpp +++ b/src/build/insert_into_ibf.cpp @@ -18,6 +18,7 @@ #include // for operator| #include // for interleaved_bloom_filter, bin_index #include // for layout +#include // for divide_and_ceil #include // for concurrent, timer namespace seqan::hibf::build @@ -30,7 +31,7 @@ void insert_into_ibf(robin_hood::unordered_flat_set const & kmers, seqan::hibf::interleaved_bloom_filter & ibf, timer & fill_ibf_timer) { - size_t const chunk_size = kmers.size() / number_of_bins + 1; + size_t const chunk_size = divide_and_ceil(kmers.size(), number_of_bins); size_t chunk_number{}; timer local_fill_ibf_timer{}; diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index 4e769ed8..53d98f20 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -26,6 +26,7 @@ #include // for compute_layout #include // for graph #include // for layout +#include // for divide_and_ceil #include // for timer #include // for compute_sketches #include // for hyperloglog @@ -82,7 +83,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, std::vector children = current_node.children; // copy for threads - size_t const number_of_mutex = (current_node.number_of_technical_bins + 63) / 64; + size_t const number_of_mutex = divide_and_ceil(current_node.number_of_technical_bins, 64u); std::vector local_ibf_mutex(number_of_mutex); size_t number_of_threads{}; diff --git a/src/layout/hierarchical_binning.cpp b/src/layout/hierarchical_binning.cpp index 333b9efc..20d78601 100644 --- a/src/layout/hierarchical_binning.cpp +++ b/src/layout/hierarchical_binning.cpp @@ -16,6 +16,7 @@ #include // for hierarchical_binning #include // for layout #include // for simple_binning +#include // for divide_and_ceil #include // for next_multiple_of_64 #include // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY #include // for hyperloglog @@ -95,7 +96,7 @@ void hierarchical_binning::initialization(std::vector> & mat for (size_t i = 0; i < num_technical_bins; ++i) { size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[i + 1]); - matrix[i][0] = corrected_ub_cardinality / (i + 1); + matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u); trace[i][0] = {0u, 0u}; // unnecessary? } @@ -171,7 +172,8 @@ void hierarchical_binning::recursion(std::vector> & matrix, // full_score: The score to minimize -> score * #TB-high_level + low_level_memory footprint size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[(i - i_prime)]); - size_t score = std::max(corrected_ub_cardinality / (i - i_prime), matrix[i_prime][j - 1]); + size_t score = + std::max(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]); size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime][j - 1]; // std::cout << " ++ j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl; @@ -286,7 +288,7 @@ void hierarchical_binning::backtrack_split_bin(size_t trace_j, size_t const cardinality = (*data->kmer_counts)[data->positions[trace_j]]; size_t const corrected_cardinality = static_cast(cardinality * data->fpr_correction[number_of_bins]); // NOLINTNEXTLINE(clang-analyzer-core.DivideZero) - size_t const cardinality_per_bin = (corrected_cardinality + number_of_bins - 1) / number_of_bins; // round up + size_t const cardinality_per_bin = divide_and_ceil(corrected_cardinality, number_of_bins); max_tracker.update_max(bin_id, cardinality_per_bin); max_tracker.update_split_max(bin_id, cardinality_per_bin); diff --git a/src/layout/simple_binning.cpp b/src/layout/simple_binning.cpp index accd9907..2cdae718 100644 --- a/src/layout/simple_binning.cpp +++ b/src/layout/simple_binning.cpp @@ -11,6 +11,7 @@ #include // for data_store #include // for layout #include // for simple_binning +#include // for divide_and_ceil namespace seqan::hibf::layout { @@ -36,7 +37,7 @@ size_t simple_binning::execute() for (size_t i = 0; i < extra_bins; ++i) { size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[i + 1]); - matrix[i][0] = corrected_ub_cardinality / (i + 1); + matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u); } // we must iterate column wise @@ -52,7 +53,8 @@ size_t simple_binning::execute() { size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[(i - i_prime)]); - size_t score = std::max(corrected_ub_cardinality / (i - i_prime), matrix[i_prime][j - 1]); + size_t score = + std::max(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]); // std::cout << "j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl; @@ -81,7 +83,7 @@ size_t simple_binning::execute() size_t const number_of_bins = (trace_i - next_i); size_t const cardinality = (*data->kmer_counts)[data->positions[trace_j]]; size_t const corrected_cardinality = static_cast(cardinality * data->fpr_correction[number_of_bins]); - size_t const cardinality_per_bin = (corrected_cardinality + number_of_bins - 1) / number_of_bins; // round up + size_t const cardinality_per_bin = divide_and_ceil(corrected_cardinality, number_of_bins); data->hibf_layout->user_bins.emplace_back(data->previous.bin_indices, bin_id, @@ -103,7 +105,7 @@ size_t simple_binning::execute() size_t const cardinality = (*data->kmer_counts)[data->positions[0]]; size_t const corrected_cardinality = static_cast(cardinality * data->fpr_correction[trace_i]); // NOLINTNEXTLINE(clang-analyzer-core.DivideZero) - size_t const cardinality_per_bin = (corrected_cardinality + trace_i - 1) / trace_i; + size_t const cardinality_per_bin = divide_and_ceil(corrected_cardinality, trace_i); data->hibf_layout->user_bins.emplace_back(data->previous.bin_indices, bin_id, trace_i, data->positions[0]); diff --git a/test/unit/hibf/layout/hierarchical_binning_test.cpp b/test/unit/hibf/layout/hierarchical_binning_test.cpp index 350ff7fc..de5a111d 100644 --- a/test/unit/hibf/layout/hierarchical_binning_test.cpp +++ b/test/unit/hibf/layout/hierarchical_binning_test.cpp @@ -61,12 +61,12 @@ TEST(hierarchical_binning_test, another_example) seqan::hibf::layout::hierarchical_binning algo{data, config}; EXPECT_EQ(algo.execute(), 1u); // #HIGH_LEVEL_IBF max_bin_id:1 - std::vector expected_max_bins{{{0, 0}, 42}, {{0}, 1}}; + std::vector expected_max_bins{{{0, 0}, 45}, {{0}, 1}}; - std::vector expected_user_bins{{{0, 0}, 0, 42, 6}, - {{0, 0}, 42, 14, 5}, - {{0, 0}, 56, 4, 7}, - {{0, 0}, 60, 4, 4}, + std::vector expected_user_bins{{{0, 0}, 0, 45, 6}, + {{0, 0}, 45, 13, 5}, + {{0, 0}, 58, 3, 7}, + {{0, 0}, 61, 3, 4}, {{0}, 1, 2, 0}, {{0}, 3, 2, 3}, {{}, 1, 2, 2}, diff --git a/test/unit/hibf/sketch/hyperloglog_test.cpp b/test/unit/hibf/sketch/hyperloglog_test.cpp index 846521f9..cbed0fab 100644 --- a/test/unit/hibf/sketch/hyperloglog_test.cpp +++ b/test/unit/hibf/sketch/hyperloglog_test.cpp @@ -18,6 +18,7 @@ #include // for unordered_flat_set #include // for chunk_view, operator==, chunk, chunk_fn #include // for operator| +#include // for divide_and_ceil #include // for hyperloglog #include // for operator/, sandboxed_path #include // for tmp_directory @@ -131,7 +132,7 @@ TEST(hyperloglog, add_and_estimate_large) TEST(hyperloglog, merge) { size_t const chunks{10u}; - size_t const chunk_size{(input_values.size() + chunks - 1u) / chunks}; + size_t const chunk_size = seqan::hibf::divide_and_ceil(input_values.size(), chunks); seqan::hibf::sketch::hyperloglog full_sketch{}; seqan::hibf::sketch::hyperloglog merge_sketch{}; diff --git a/util/fpr_correction_check.cpp b/util/fpr_correction_check.cpp index 808605fe..a0fc3187 100644 --- a/util/fpr_correction_check.cpp +++ b/util/fpr_correction_check.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -198,7 +199,7 @@ int main(int argc, char ** argv) cfg.elements = cfg.number_of_kmers; } - cfg.split_elements_per_bin = (cfg.elements + cfg.splits - 1) / cfg.splits; // ceil for positive integers + cfg.split_elements_per_bin = seqan::hibf::divide_and_ceil(cfg.elements, cfg.splits); std::cout << "kmer: " << cfg.kmer_size << '\n'; std::cout << "elements: " << cfg.elements << '\n';