Skip to content

Commit

Permalink
[MISC] add divide_and_ceil
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Oct 24, 2023
1 parent c725f38 commit c9f8466
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 19 deletions.
28 changes: 28 additions & 0 deletions include/hibf/misc/divide_and_ceil.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <cassert>
#include <concepts>
#include <cstddef> // for size_t
#include <limits>

#include <hibf/platform.hpp>

namespace seqan::hibf
{

/*!\brief Returns, for unsigned integral operands, `dividend / divisor` ceiled to the next integer value.
* \ingroup hibf
*/
template <std::unsigned_integral t1, std::unsigned_integral t2>
[[nodiscard]] inline constexpr size_t divide_and_ceil(t1 const dividend, t2 const divisor) noexcept
{
assert(divisor > 0u);
assert(std::limits<size_t>::max() - divisor + 1u >= dividend); // Overflow detection
return (static_cast<size_t>(dividend) + (divisor - 1u)) / divisor;
}

} // namespace seqan::hibf
7 changes: 4 additions & 3 deletions src/build/construct_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
#include <hibf/config.hpp> // for config
#include <hibf/contrib/robin_hood.hpp> // for unordered_flat_set
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter, bin_count, bin_size, hash_fun...
#include <hibf/layout/graph.hpp>
#include <hibf/misc/timer.hpp> // for concurrent, timer
#include <hibf/layout/graph.hpp> // for graph
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil
#include <hibf/misc/timer.hpp> // for concurrent, timer

namespace seqan::hibf::build
{
Expand All @@ -31,7 +32,7 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s
bool const max_bin_is_merged = ibf_node.max_bin_is_merged();
assert(!max_bin_is_merged || number_of_bins == 1u); // merged max bin implies (=>) number of bins == 1

size_t const kmers_per_bin{(kmers.size() + number_of_bins - 1u) / number_of_bins}; // Integer ceil
size_t const kmers_per_bin = divide_and_ceil(kmers.size(), number_of_bins);
double const fpr = max_bin_is_merged ? data.config.relaxed_fpr : data.config.maximum_fpr;

size_t const bin_bits{bin_size_in_bits({.fpr = fpr, //
Expand Down
3 changes: 2 additions & 1 deletion src/build/insert_into_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <hibf/contrib/std/detail/adaptor_base.hpp> // for operator|
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter, bin_index
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil
#include <hibf/misc/timer.hpp> // for concurrent, timer

namespace seqan::hibf::build
Expand All @@ -30,7 +31,7 @@ void insert_into_ibf(robin_hood::unordered_flat_set<uint64_t> const & kmers,
seqan::hibf::interleaved_bloom_filter & ibf,
timer<concurrent::yes> & fill_ibf_timer)
{
size_t const chunk_size = kmers.size() / number_of_bins + 1;
size_t const chunk_size = divide_and_ceil(kmers.size(), number_of_bins);
size_t chunk_number{};

timer<concurrent::no> local_fill_ibf_timer{};
Expand Down
3 changes: 2 additions & 1 deletion src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <hibf/layout/compute_layout.hpp> // for compute_layout
#include <hibf/layout/graph.hpp> // for graph
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil
#include <hibf/misc/timer.hpp> // for timer
#include <hibf/sketch/compute_sketches.hpp> // for compute_sketches
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
Expand Down Expand Up @@ -82,7 +83,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,

std::vector<layout::graph::node> children = current_node.children; // copy for threads

size_t const number_of_mutex = (current_node.number_of_technical_bins + 63) / 64;
size_t const number_of_mutex = divide_and_ceil(current_node.number_of_technical_bins, 64u);
std::vector<std::mutex> local_ibf_mutex(number_of_mutex);

size_t number_of_threads{};
Expand Down
8 changes: 5 additions & 3 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <hibf/layout/hierarchical_binning.hpp> // for hierarchical_binning
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/layout/simple_binning.hpp> // for simple_binning
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil
#include <hibf/misc/next_multiple_of_64.hpp> // for next_multiple_of_64
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
Expand Down Expand Up @@ -95,7 +96,7 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
for (size_t i = 0; i < num_technical_bins; ++i)
{
size_t const corrected_ub_cardinality = static_cast<size_t>(ub_cardinality * data->fpr_correction[i + 1]);
matrix[i][0] = corrected_ub_cardinality / (i + 1);
matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u);
trace[i][0] = {0u, 0u}; // unnecessary?
}

Expand Down Expand Up @@ -171,7 +172,8 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
// full_score: The score to minimize -> score * #TB-high_level + low_level_memory footprint
size_t const corrected_ub_cardinality =
static_cast<size_t>(ub_cardinality * data->fpr_correction[(i - i_prime)]);
size_t score = std::max<size_t>(corrected_ub_cardinality / (i - i_prime), matrix[i_prime][j - 1]);
size_t score =
std::max<size_t>(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]);
size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime][j - 1];

// std::cout << " ++ j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl;
Expand Down Expand Up @@ -286,7 +288,7 @@ void hierarchical_binning::backtrack_split_bin(size_t trace_j,
size_t const cardinality = (*data->kmer_counts)[data->positions[trace_j]];
size_t const corrected_cardinality = static_cast<size_t>(cardinality * data->fpr_correction[number_of_bins]);
// NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
size_t const cardinality_per_bin = (corrected_cardinality + number_of_bins - 1) / number_of_bins; // round up
size_t const cardinality_per_bin = divide_and_ceil(corrected_cardinality, number_of_bins);

max_tracker.update_max(bin_id, cardinality_per_bin);
max_tracker.update_split_max(bin_id, cardinality_per_bin);
Expand Down
10 changes: 6 additions & 4 deletions src/layout/simple_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <hibf/layout/data_store.hpp> // for data_store
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/layout/simple_binning.hpp> // for simple_binning
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil

namespace seqan::hibf::layout
{
Expand All @@ -36,7 +37,7 @@ size_t simple_binning::execute()
for (size_t i = 0; i < extra_bins; ++i)
{
size_t const corrected_ub_cardinality = static_cast<size_t>(ub_cardinality * data->fpr_correction[i + 1]);
matrix[i][0] = corrected_ub_cardinality / (i + 1);
matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u);
}

// we must iterate column wise
Expand All @@ -52,7 +53,8 @@ size_t simple_binning::execute()
{
size_t const corrected_ub_cardinality =
static_cast<size_t>(ub_cardinality * data->fpr_correction[(i - i_prime)]);
size_t score = std::max<size_t>(corrected_ub_cardinality / (i - i_prime), matrix[i_prime][j - 1]);
size_t score =
std::max<size_t>(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]);

// std::cout << "j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl;

Expand Down Expand Up @@ -81,7 +83,7 @@ size_t simple_binning::execute()
size_t const number_of_bins = (trace_i - next_i);
size_t const cardinality = (*data->kmer_counts)[data->positions[trace_j]];
size_t const corrected_cardinality = static_cast<size_t>(cardinality * data->fpr_correction[number_of_bins]);
size_t const cardinality_per_bin = (corrected_cardinality + number_of_bins - 1) / number_of_bins; // round up
size_t const cardinality_per_bin = divide_and_ceil(corrected_cardinality, number_of_bins);

data->hibf_layout->user_bins.emplace_back(data->previous.bin_indices,
bin_id,
Expand All @@ -103,7 +105,7 @@ size_t simple_binning::execute()
size_t const cardinality = (*data->kmer_counts)[data->positions[0]];
size_t const corrected_cardinality = static_cast<size_t>(cardinality * data->fpr_correction[trace_i]);
// NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
size_t const cardinality_per_bin = (corrected_cardinality + trace_i - 1) / trace_i;
size_t const cardinality_per_bin = divide_and_ceil(corrected_cardinality, trace_i);

data->hibf_layout->user_bins.emplace_back(data->previous.bin_indices, bin_id, trace_i, data->positions[0]);

Expand Down
10 changes: 5 additions & 5 deletions test/unit/hibf/layout/hierarchical_binning_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ TEST(hierarchical_binning_test, another_example)
seqan::hibf::layout::hierarchical_binning algo{data, config};
EXPECT_EQ(algo.execute(), 1u); // #HIGH_LEVEL_IBF max_bin_id:1

std::vector<seqan::hibf::layout::layout::max_bin> expected_max_bins{{{0, 0}, 42}, {{0}, 1}};
std::vector<seqan::hibf::layout::layout::max_bin> expected_max_bins{{{0, 0}, 45}, {{0}, 1}};

std::vector<seqan::hibf::layout::layout::user_bin> expected_user_bins{{{0, 0}, 0, 42, 6},
{{0, 0}, 42, 14, 5},
{{0, 0}, 56, 4, 7},
{{0, 0}, 60, 4, 4},
std::vector<seqan::hibf::layout::layout::user_bin> expected_user_bins{{{0, 0}, 0, 45, 6},
{{0, 0}, 45, 13, 5},
{{0, 0}, 58, 3, 7},
{{0, 0}, 61, 3, 4},
{{0}, 1, 2, 0},
{{0}, 3, 2, 3},
{{}, 1, 2, 2},
Expand Down
3 changes: 2 additions & 1 deletion test/unit/hibf/sketch/hyperloglog_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <hibf/contrib/robin_hood.hpp> // for unordered_flat_set
#include <hibf/contrib/std/chunk_view.hpp> // for chunk_view, operator==, chunk, chunk_fn
#include <hibf/contrib/std/detail/adaptor_base.hpp> // for operator|
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
#include <hibf/test/sandboxed_path.hpp> // for operator/, sandboxed_path
#include <hibf/test/tmp_directory.hpp> // for tmp_directory
Expand Down Expand Up @@ -131,7 +132,7 @@ TEST(hyperloglog, add_and_estimate_large)
TEST(hyperloglog, merge)
{
size_t const chunks{10u};
size_t const chunk_size{(input_values.size() + chunks - 1u) / chunks};
size_t const chunk_size = seqan::hibf::divide_and_ceil(input_values.size(), chunks);

seqan::hibf::sketch::hyperloglog full_sketch{};
seqan::hibf::sketch::hyperloglog merge_sketch{};
Expand Down
3 changes: 2 additions & 1 deletion util/fpr_correction_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <hibf/contrib/robin_hood.hpp>
#include <hibf/interleaved_bloom_filter.hpp>
#include <hibf/layout/compute_fpr_correction.hpp>
#include <hibf/misc/divide_and_ceil.hpp>

#include <sharg/parser.hpp>

Expand Down Expand Up @@ -198,7 +199,7 @@ int main(int argc, char ** argv)
cfg.elements = cfg.number_of_kmers;
}

cfg.split_elements_per_bin = (cfg.elements + cfg.splits - 1) / cfg.splits; // ceil for positive integers
cfg.split_elements_per_bin = seqan::hibf::divide_and_ceil(cfg.elements, cfg.splits);

std::cout << "kmer: " << cfg.kmer_size << '\n';
std::cout << "elements: " << cfg.elements << '\n';
Expand Down

0 comments on commit c9f8466

Please sign in to comment.