From 3d0ef8e34e3d9779051bc29b12a1c2236475a3d3 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Wed, 28 Aug 2024 23:14:17 +0200 Subject: [PATCH] [MISC] C++23 md subscript --- .../hierarchical_interleaved_bloom_filter.hpp | 21 +++--- include/hibf/layout/hierarchical_binning.hpp | 16 ++--- include/hibf/misc/md_vector.hpp | 42 ++++++++++++ include/hibf/sketch/minhashes.hpp | 3 +- src/layout/hierarchical_binning.cpp | 64 +++++++++---------- src/layout/simple_binning.cpp | 17 ++--- 6 files changed, 104 insertions(+), 59 deletions(-) create mode 100644 include/hibf/misc/md_vector.hpp diff --git a/include/hibf/hierarchical_interleaved_bloom_filter.hpp b/include/hibf/hierarchical_interleaved_bloom_filter.hpp index 37cba1bb..8ee9b601 100644 --- a/include/hibf/hierarchical_interleaved_bloom_filter.hpp +++ b/include/hibf/hierarchical_interleaved_bloom_filter.hpp @@ -21,8 +21,9 @@ #include // for interleaved_bloom_filter #include // for layout #include // for counting_vector -#include // for concurrent_timer -#include // for HIBF_CONSTEXPR_VECTOR +#include +#include // for concurrent_timer +#include // for HIBF_CONSTEXPR_VECTOR namespace seqan::hibf { @@ -202,7 +203,7 @@ class hierarchical_interleaved_bloom_filter * If `j != i` is returned, there is a lower level IBF, bin `b` is a merged bin, and `j` is the ID of the lower * level IBF in ibf_vector. */ - std::vector> next_ibf_id; + md_vector next_ibf_id; /*!\brief Stores for each bin in each IBF of the HIBF the user bin ID. * \details @@ -211,7 +212,7 @@ class hierarchical_interleaved_bloom_filter * lower level IBF. * Otherwise, the returned value `j` is the corresponding user bin ID. */ - std::vector> ibf_bin_to_user_bin_id{}; + md_vector ibf_bin_to_user_bin_id{}; //!\brief Returns a membership_agent to be used for counting. membership_agent_type membership_agent() const; @@ -280,16 +281,16 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type { sum += result[bin]; - auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin]; + auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin]; if (current_filename_index < 0) // merged bin { if (sum >= threshold) - membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold); + membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold); sum = 0u; } else if (bin + 1u == result.size() || // last bin - current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin + current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin { if (sum >= threshold) result_buffer.emplace_back(current_filename_index); @@ -415,16 +416,16 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type for (size_t bin{}; bin < result.size(); ++bin) { sum += result[bin]; - auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin]; + auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin]; if (current_filename_index < 0) // merged bin { if (sum >= threshold) - bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold); + bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold); sum = 0u; } else if (bin + 1u == result.size() || // last bin - current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin + current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin { if (sum >= threshold) result_buffer[current_filename_index] = sum; diff --git a/include/hibf/layout/hierarchical_binning.hpp b/include/hibf/layout/hierarchical_binning.hpp index 652f7f27..24d80bdc 100644 --- a/include/hibf/layout/hierarchical_binning.hpp +++ b/include/hibf/layout/hierarchical_binning.hpp @@ -13,7 +13,8 @@ #include // for bin_size_in_bits #include // for config #include // for data_store -#include // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY +#include +#include // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY namespace seqan::hibf::layout { @@ -140,9 +141,9 @@ class hierarchical_binning * * \image html hierarchical_dp_init.png */ - void initialization(std::vector> & matrix, - std::vector> & ll_matrix, - std::vector>> & trace); + void initialization(md_vector & matrix, + md_vector & ll_matrix, + md_vector> & trace); /*!\brief Performs the recursion. * @@ -182,9 +183,8 @@ class hierarchical_binning * this algorithm. It would be too computational intensive to compute the splitting for every possibility. * */ - void recursion(std::vector> & matrix, - std::vector> & ll_matrix, - std::vector>> & trace); + void + recursion(md_vector & matrix, md_vector & ll_matrix, md_vector> & trace); void backtrack_merged_bin(size_t trace_j, size_t const next_j, @@ -198,7 +198,7 @@ class hierarchical_binning maximum_bin_tracker & max_tracker); //!\brief Backtracks the trace matrix and writes the resulting binning into the output file. - size_t backtracking(std::vector>> const & trace); + size_t backtracking(md_vector> const & trace); data_store initialise_libf_data(size_t const trace_j) const; diff --git a/include/hibf/misc/md_vector.hpp b/include/hibf/misc/md_vector.hpp new file mode 100644 index 00000000..d33da1a8 --- /dev/null +++ b/include/hibf/misc/md_vector.hpp @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +/*!\file + * \brief Provides seqan::hibf::md_vector. + * \author Enrico Seiler + */ + +#pragma once + +#include // for vector + +#include + +namespace seqan::hibf +{ + +template +struct md_vector : public std::vector> +{ + using base_t = std::vector>; + using base_t::base_t; + using base_t::operator[]; +#if defined(__cpp_explicit_this_parameter) && __cpp_explicit_this_parameter >= 202110L + decltype(auto) operator[](this auto & self, size_t const x, size_t const y) + { + return self[x][y]; + } +#else + value_t & operator[](size_t const x, size_t const y) + { + return (*this)[x][y]; + } + value_t const & operator[](size_t const x, size_t const y) const + { + return (*this)[x][y]; + } +#endif +}; + +} // namespace seqan::hibf diff --git a/include/hibf/sketch/minhashes.hpp b/include/hibf/sketch/minhashes.hpp index e5ef8405..4cb8135e 100644 --- a/include/hibf/sketch/minhashes.hpp +++ b/include/hibf/sketch/minhashes.hpp @@ -17,6 +17,7 @@ #include // for access #include // for make_nvp, CEREAL_NVP +#include #include namespace seqan::hibf::sketch @@ -39,7 +40,7 @@ struct minhashes static constexpr size_t sketch_size{40}; //!\brief A table of sketches. For LSH we need multiple sketches, stored in a table. - std::vector> table{}; // Each element (vector) is a minhash. + md_vector table{}; // Each element (vector) is a minhash. /*!\name Constructors, destructor and assignment * \{ diff --git a/src/layout/hierarchical_binning.cpp b/src/layout/hierarchical_binning.cpp index 0c588c32..2f45b0ea 100644 --- a/src/layout/hierarchical_binning.cpp +++ b/src/layout/hierarchical_binning.cpp @@ -54,13 +54,13 @@ size_t hierarchical_binning::execute() } // technical bins (outer) = rows; user bins (inner) = columns - std::vector> matrix(num_technical_bins, std::vector(num_user_bins, max_size_t)); + md_vector matrix(num_technical_bins, std::vector(num_user_bins, max_size_t)); // technical bins (outer) = rows; user bins (inner) = columns - std::vector> ll_matrix(num_technical_bins, std::vector(num_user_bins, 0u)); + md_vector ll_matrix(num_technical_bins, std::vector(num_user_bins, 0u)); // technical bins (outer) = rows; user bins (inner) = columns - std::vector>> trace( + md_vector> trace( num_technical_bins, std::vector>(num_user_bins, {max_size_t, max_size_t})); @@ -87,9 +87,9 @@ size_t hierarchical_binning::execute() return static_cast(std::ceil(levels)); } -void hierarchical_binning::initialization(std::vector> & matrix, - std::vector> & ll_matrix, - std::vector>> & trace) +void hierarchical_binning::initialization(md_vector & matrix, + md_vector & ll_matrix, + md_vector> & trace) { assert(data != nullptr); @@ -99,8 +99,8 @@ void hierarchical_binning::initialization(std::vector> & mat for (size_t i = 0; i < num_technical_bins; ++i) { size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[i + 1]); - matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u); - trace[i][0] = {0u, 0u}; // unnecessary? + matrix[i, 0] = divide_and_ceil(corrected_ub_cardinality, i + 1u); + trace[i, 0] = {0u, 0u}; // unnecessary? } // initialize first row @@ -118,9 +118,9 @@ void hierarchical_binning::initialization(std::vector> & mat for (size_t j = 1; j < num_user_bins; ++j) { sum += (*data->kmer_counts)[data->positions[j]]; - matrix[0][j] = data->union_estimates[j]; - ll_matrix[0][j] = max_merge_levels(j + 1) * sum; - trace[0][j] = {0u, j - 1}; // unnecessary? + matrix[0, j] = data->union_estimates[j]; + ll_matrix[0, j] = max_merge_levels(j + 1) * sum; + trace[0, j] = {0u, j - 1}; // unnecessary? } } else @@ -130,16 +130,16 @@ void hierarchical_binning::initialization(std::vector> & mat assert(j < data->positions.size()); assert(data->positions[j] < data->kmer_counts->size()); sum += (*data->kmer_counts)[data->positions[j]]; - matrix[0][j] = sum; - ll_matrix[0][j] = max_merge_levels(j + 1) * sum; - trace[0][j] = {0u, j - 1}; // unnecessary? + matrix[0, j] = sum; + ll_matrix[0, j] = max_merge_levels(j + 1) * sum; + trace[0, j] = {0u, j - 1}; // unnecessary? } } } -void hierarchical_binning::recursion(std::vector> & matrix, - std::vector> & ll_matrix, - std::vector>> & trace) +void hierarchical_binning::recursion(md_vector & matrix, + md_vector & ll_matrix, + md_vector> & trace) { assert(data != nullptr); @@ -182,8 +182,8 @@ void hierarchical_binning::recursion(std::vector> & matrix, size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[(i - i_prime)]); size_t score = - std::max(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]); - size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime][j - 1]; + std::max(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime, j - 1]); + size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime, j - 1]; // std::cout << " ++ j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl; @@ -191,14 +191,14 @@ void hierarchical_binning::recursion(std::vector> & matrix, { minimum = score; full_minimum = full_score; - trace[i][j] = {i_prime, j - 1}; - ll_matrix[i][j] = ll_matrix[i_prime][j - 1]; + trace[i, j] = {i_prime, j - 1}; + ll_matrix[i, j] = ll_matrix[i_prime, j - 1]; } } // seqan3::debug_stream << "current vertical minimum of " << "j:" << j << " i:" << i // << " -> score:" << full_minimum << " (M_ij=" << minimum << ")" - // << " trace:" << trace[i][j] + // << " trace:" << trace[i, j] // << std::endl; // check horizontal cells @@ -216,7 +216,7 @@ void hierarchical_binning::recursion(std::vector> & matrix, // if the user bin j-1 was not split into multiple technical bins! // I may merge the current user bin j into the former - while (j_prime != 0 && ((i - trace[i][j_prime].first) < 2) && get_weight() < minimum) + while (j_prime != 0 && ((i - trace[i, j_prime].first) < 2) && get_weight() < minimum) { weight += (*data->kmer_counts)[data->positions[j_prime]]; --j_prime; @@ -224,8 +224,8 @@ void hierarchical_binning::recursion(std::vector> & matrix, // score: The current maximum technical bin size for the high-level IBF (score for the matrix M) // ll_kmers: estimate for the number of k-mers that have to be resolved on lower levels // full_score: The score to minimize -> score * #TB-high_level + low_level_memory footprint - size_t const score = std::max(matrix[i - 1][j_prime], get_weight()); - size_t const ll_kmers = ll_matrix[i - 1][j_prime] + max_merge_levels(j - j_prime) * weight; + size_t const score = std::max(matrix[i - 1, j_prime], get_weight()); + size_t const ll_kmers = ll_matrix[i - 1, j_prime] + max_merge_levels(j - j_prime) * weight; size_t const full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_kmers; // seqan3::debug_stream << " -- " << "j_prime:" << j_prime @@ -236,12 +236,12 @@ void hierarchical_binning::recursion(std::vector> & matrix, { minimum = score; full_minimum = full_score; - trace[i][j] = {i - 1, j_prime}; - ll_matrix[i][j] = ll_kmers; + trace[i, j] = {i - 1, j_prime}; + ll_matrix[i, j] = ll_kmers; } } - matrix[i][j] = minimum; + matrix[i, j] = minimum; } } } @@ -307,7 +307,7 @@ void hierarchical_binning::backtrack_split_bin(size_t trace_j, // std::cout << "split " << trace_j << " into " << number_of_bins << ": " << cardinality_per_bin << std::endl; } -size_t hierarchical_binning::backtracking(std::vector>> const & trace) +size_t hierarchical_binning::backtracking(md_vector> const & trace) { assert(data != nullptr); @@ -323,8 +323,8 @@ size_t hierarchical_binning::backtracking(std::vector 0u && trace_i > 0u) { // std::cout << "\t I am now at " << trace_i << "," << trace_j << std::endl; - size_t next_i = trace[trace_i][trace_j].first; - size_t next_j = trace[trace_i][trace_j].second; + size_t next_i = trace[trace_i, trace_j].first; + size_t next_j = trace[trace_i, trace_j].second; size_t number_of_bins = (trace_i - next_i); @@ -339,7 +339,7 @@ size_t hierarchical_binning::backtracking(std::vector // for layout #include // for simple_binning #include // for divide_and_ceil +#include namespace seqan::hibf::layout { @@ -22,11 +23,11 @@ size_t simple_binning::execute() assert(num_technical_bins > 0u); assert(num_user_bins > 0u); - std::vector> matrix(num_technical_bins); // rows + md_vector matrix(num_technical_bins); // rows for (auto & v : matrix) v.resize(num_user_bins, std::numeric_limits::max()); // columns - std::vector> trace(num_technical_bins); // rows + md_vector trace(num_technical_bins); // rows for (auto & v : trace) v.resize(num_user_bins, std::numeric_limits::max()); // columns @@ -37,7 +38,7 @@ size_t simple_binning::execute() for (size_t i = 0; i < extra_bins; ++i) { size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[i + 1]); - matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u); + matrix[i, 0] = divide_and_ceil(corrected_ub_cardinality, i + 1u); } // we must iterate column wise @@ -54,14 +55,14 @@ size_t simple_binning::execute() size_t const corrected_ub_cardinality = static_cast(ub_cardinality * data->fpr_correction[(i - i_prime)]); size_t score = - std::max(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]); + std::max(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime, j - 1]); // std::cout << "j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl; - minimum = (score < minimum) ? (trace[i][j] = i_prime, score) : minimum; + minimum = (score < minimum) ? (trace[i, j] = i_prime, score) : minimum; } - matrix[i][j] = minimum; + matrix[i, j] = minimum; } } @@ -79,7 +80,7 @@ size_t simple_binning::execute() while (trace_j > 0) { - size_t next_i = trace[trace_i][trace_j]; + size_t next_i = trace[trace_i, trace_j]; size_t const number_of_bins = (trace_i - next_i); size_t const cardinality = (*data->kmer_counts)[data->positions[trace_j]]; size_t const corrected_cardinality = static_cast(cardinality * data->fpr_correction[number_of_bins]); @@ -98,7 +99,7 @@ size_t simple_binning::execute() bin_id += number_of_bins; - trace_i = trace[trace_i][trace_j]; + trace_i = trace[trace_i, trace_j]; --trace_j; } ++trace_i; // because we want the length not the index. Now trace_i == number_of_bins