Skip to content

Commit

Permalink
[MISC] C++23 md subscript
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Aug 28, 2024
1 parent f86f167 commit c502bc0
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 59 deletions.
21 changes: 11 additions & 10 deletions include/hibf/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/counting_vector.hpp> // for counting_vector
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/platform.hpp> // for HIBF_CONSTEXPR_VECTOR
#include <hibf/misc/md_vector.hpp>
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/platform.hpp> // for HIBF_CONSTEXPR_VECTOR

namespace seqan::hibf
{
Expand Down Expand Up @@ -202,7 +203,7 @@ class hierarchical_interleaved_bloom_filter
* If `j != i` is returned, there is a lower level IBF, bin `b` is a merged bin, and `j` is the ID of the lower
* level IBF in ibf_vector.
*/
std::vector<std::vector<int64_t>> next_ibf_id;
md_vector<int64_t> next_ibf_id;

/*!\brief Stores for each bin in each IBF of the HIBF the user bin ID.
* \details
Expand All @@ -211,7 +212,7 @@ class hierarchical_interleaved_bloom_filter
* lower level IBF.
* Otherwise, the returned value `j` is the corresponding user bin ID.
*/
std::vector<std::vector<int64_t>> ibf_bin_to_user_bin_id{};
md_vector<int64_t> ibf_bin_to_user_bin_id{};

//!\brief Returns a membership_agent to be used for counting.
membership_agent_type membership_agent() const;
Expand Down Expand Up @@ -280,16 +281,16 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type
{
sum += result[bin];

auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin];

if (current_filename_index < 0) // merged bin
{
if (sum >= threshold)
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin
{
if (sum >= threshold)
result_buffer.emplace_back(current_filename_index);
Expand Down Expand Up @@ -415,16 +416,16 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type
for (size_t bin{}; bin < result.size(); ++bin)
{
sum += result[bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin];

if (current_filename_index < 0) // merged bin
{
if (sum >= threshold)
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin
{
if (sum >= threshold)
result_buffer[current_filename_index] = sum;
Expand Down
16 changes: 8 additions & 8 deletions include/hibf/layout/hierarchical_binning.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
#include <hibf/build/bin_size_in_bits.hpp> // for bin_size_in_bits
#include <hibf/config.hpp> // for config
#include <hibf/layout/data_store.hpp> // for data_store
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY
#include <hibf/misc/md_vector.hpp>
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY

namespace seqan::hibf::layout
{
Expand Down Expand Up @@ -140,9 +141,9 @@ class hierarchical_binning
*
* \image html hierarchical_dp_init.png
*/
void initialization(std::vector<std::vector<size_t>> & matrix,
std::vector<std::vector<size_t>> & ll_matrix,
std::vector<std::vector<std::pair<size_t, size_t>>> & trace);
void initialization(md_vector<size_t> & matrix,
md_vector<size_t> & ll_matrix,
md_vector<std::pair<size_t, size_t>> & trace);

/*!\brief Performs the recursion.
*
Expand Down Expand Up @@ -182,9 +183,8 @@ class hierarchical_binning
* this algorithm. It would be too computational intensive to compute the splitting for every possibility.
*
*/
void recursion(std::vector<std::vector<size_t>> & matrix,
std::vector<std::vector<size_t>> & ll_matrix,
std::vector<std::vector<std::pair<size_t, size_t>>> & trace);
void
recursion(md_vector<size_t> & matrix, md_vector<size_t> & ll_matrix, md_vector<std::pair<size_t, size_t>> & trace);

void backtrack_merged_bin(size_t trace_j,
size_t const next_j,
Expand All @@ -198,7 +198,7 @@ class hierarchical_binning
maximum_bin_tracker & max_tracker);

//!\brief Backtracks the trace matrix and writes the resulting binning into the output file.
size_t backtracking(std::vector<std::vector<std::pair<size_t, size_t>>> const & trace);
size_t backtracking(md_vector<std::pair<size_t, size_t>> const & trace);

data_store initialise_libf_data(size_t const trace_j) const;

Expand Down
42 changes: 42 additions & 0 deletions include/hibf/misc/md_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

/*!\file
* \brief Provides seqan::hibf::md_vector.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#pragma once

#include <vector> // for vector

#include <hibf/platform.hpp>

namespace seqan::hibf
{

template <typename value_t>
struct md_vector : public std::vector<std::vector<value_t>>
{
using base_t = std::vector<std::vector<value_t>>;
using base_t::base_t;
using base_t::operator[];
#if defined(__cpp_explicit_this_parameter) && __cpp_explicit_this_parameter >= 202110L
decltype(auto) operator[](this auto & self, size_t const x, size_t const y)
{
return self[x][y];
}
#else
value_t & operator[](size_t const x, size_t const y)
{
return (*this)[x][y];
}
value_t const & operator[](size_t const x, size_t const y) const
{
return (*this)[x][y];
}
#endif
};

} // namespace seqan::hibf
3 changes: 2 additions & 1 deletion include/hibf/sketch/minhashes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cereal/access.hpp> // for access
#include <cereal/cereal.hpp> // for make_nvp, CEREAL_NVP

#include <hibf/misc/md_vector.hpp>
#include <hibf/platform.hpp>

namespace seqan::hibf::sketch
Expand All @@ -39,7 +40,7 @@ struct minhashes
static constexpr size_t sketch_size{40};

//!\brief A table of sketches. For LSH we need multiple sketches, stored in a table.
std::vector<std::vector<uint64_t>> table{}; // Each element (vector<uint64_t>) is a minhash.
md_vector<uint64_t> table{}; // Each element (vector<uint64_t>) is a minhash.

/*!\name Constructors, destructor and assignment
* \{
Expand Down
64 changes: 32 additions & 32 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ size_t hierarchical_binning::execute()
}

// technical bins (outer) = rows; user bins (inner) = columns
std::vector<std::vector<size_t>> matrix(num_technical_bins, std::vector<size_t>(num_user_bins, max_size_t));
md_vector<size_t> matrix(num_technical_bins, std::vector<size_t>(num_user_bins, max_size_t));

// technical bins (outer) = rows; user bins (inner) = columns
std::vector<std::vector<size_t>> ll_matrix(num_technical_bins, std::vector<size_t>(num_user_bins, 0u));
md_vector<size_t> ll_matrix(num_technical_bins, std::vector<size_t>(num_user_bins, 0u));

// technical bins (outer) = rows; user bins (inner) = columns
std::vector<std::vector<std::pair<size_t, size_t>>> trace(
md_vector<std::pair<size_t, size_t>> trace(
num_technical_bins,
std::vector<std::pair<size_t, size_t>>(num_user_bins, {max_size_t, max_size_t}));

Expand All @@ -87,9 +87,9 @@ size_t hierarchical_binning::execute()
return static_cast<size_t>(std::ceil(levels));
}

void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & matrix,
std::vector<std::vector<size_t>> & ll_matrix,
std::vector<std::vector<std::pair<size_t, size_t>>> & trace)
void hierarchical_binning::initialization(md_vector<size_t> & matrix,
md_vector<size_t> & ll_matrix,
md_vector<std::pair<size_t, size_t>> & trace)
{
assert(data != nullptr);

Expand All @@ -99,8 +99,8 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
for (size_t i = 0; i < num_technical_bins; ++i)
{
size_t const corrected_ub_cardinality = static_cast<size_t>(ub_cardinality * data->fpr_correction[i + 1]);
matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u);
trace[i][0] = {0u, 0u}; // unnecessary?
matrix[i, 0] = divide_and_ceil(corrected_ub_cardinality, i + 1u);
trace[i, 0] = {0u, 0u}; // unnecessary?
}

// initialize first row
Expand All @@ -118,9 +118,9 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
for (size_t j = 1; j < num_user_bins; ++j)
{
sum += (*data->kmer_counts)[data->positions[j]];
matrix[0][j] = data->union_estimates[j];
ll_matrix[0][j] = max_merge_levels(j + 1) * sum;
trace[0][j] = {0u, j - 1}; // unnecessary?
matrix[0, j] = data->union_estimates[j];
ll_matrix[0, j] = max_merge_levels(j + 1) * sum;
trace[0, j] = {0u, j - 1}; // unnecessary?
}
}
else
Expand All @@ -130,16 +130,16 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
assert(j < data->positions.size());
assert(data->positions[j] < data->kmer_counts->size());
sum += (*data->kmer_counts)[data->positions[j]];
matrix[0][j] = sum;
ll_matrix[0][j] = max_merge_levels(j + 1) * sum;
trace[0][j] = {0u, j - 1}; // unnecessary?
matrix[0, j] = sum;
ll_matrix[0, j] = max_merge_levels(j + 1) * sum;
trace[0, j] = {0u, j - 1}; // unnecessary?
}
}
}

void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
std::vector<std::vector<size_t>> & ll_matrix,
std::vector<std::vector<std::pair<size_t, size_t>>> & trace)
void hierarchical_binning::recursion(md_vector<size_t> & matrix,
md_vector<size_t> & ll_matrix,
md_vector<std::pair<size_t, size_t>> & trace)
{
assert(data != nullptr);

Expand Down Expand Up @@ -182,23 +182,23 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
size_t const corrected_ub_cardinality =
static_cast<size_t>(ub_cardinality * data->fpr_correction[(i - i_prime)]);
size_t score =
std::max<size_t>(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]);
size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime][j - 1];
std::max<size_t>(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime, j - 1]);
size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime, j - 1];

// std::cout << " ++ j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl;

if (full_score < full_minimum)
{
minimum = score;
full_minimum = full_score;
trace[i][j] = {i_prime, j - 1};
ll_matrix[i][j] = ll_matrix[i_prime][j - 1];
trace[i, j] = {i_prime, j - 1};
ll_matrix[i, j] = ll_matrix[i_prime, j - 1];
}
}

// seqan3::debug_stream << "current vertical minimum of " << "j:" << j << " i:" << i
// << " -> score:" << full_minimum << " (M_ij=" << minimum << ")"
// << " trace:" << trace[i][j]
// << " trace:" << trace[i, j]
// << std::endl;

// check horizontal cells
Expand All @@ -216,16 +216,16 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,

// if the user bin j-1 was not split into multiple technical bins!
// I may merge the current user bin j into the former
while (j_prime != 0 && ((i - trace[i][j_prime].first) < 2) && get_weight() < minimum)
while (j_prime != 0 && ((i - trace[i, j_prime].first) < 2) && get_weight() < minimum)
{
weight += (*data->kmer_counts)[data->positions[j_prime]];
--j_prime;

// score: The current maximum technical bin size for the high-level IBF (score for the matrix M)
// ll_kmers: estimate for the number of k-mers that have to be resolved on lower levels
// full_score: The score to minimize -> score * #TB-high_level + low_level_memory footprint
size_t const score = std::max<size_t>(matrix[i - 1][j_prime], get_weight());
size_t const ll_kmers = ll_matrix[i - 1][j_prime] + max_merge_levels(j - j_prime) * weight;
size_t const score = std::max<size_t>(matrix[i - 1, j_prime], get_weight());
size_t const ll_kmers = ll_matrix[i - 1, j_prime] + max_merge_levels(j - j_prime) * weight;
size_t const full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_kmers;

// seqan3::debug_stream << " -- " << "j_prime:" << j_prime
Expand All @@ -236,12 +236,12 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
{
minimum = score;
full_minimum = full_score;
trace[i][j] = {i - 1, j_prime};
ll_matrix[i][j] = ll_kmers;
trace[i, j] = {i - 1, j_prime};
ll_matrix[i, j] = ll_kmers;
}
}

matrix[i][j] = minimum;
matrix[i, j] = minimum;
}
}
}
Expand Down Expand Up @@ -307,7 +307,7 @@ void hierarchical_binning::backtrack_split_bin(size_t trace_j,
// std::cout << "split " << trace_j << " into " << number_of_bins << ": " << cardinality_per_bin << std::endl;
}

size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size_t, size_t>>> const & trace)
size_t hierarchical_binning::backtracking(md_vector<std::pair<size_t, size_t>> const & trace)
{
assert(data != nullptr);

Expand All @@ -323,8 +323,8 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
while (trace_j > 0u && trace_i > 0u)
{
// std::cout << "\t I am now at " << trace_i << "," << trace_j << std::endl;
size_t next_i = trace[trace_i][trace_j].first;
size_t next_j = trace[trace_i][trace_j].second;
size_t next_i = trace[trace_i, trace_j].first;
size_t next_j = trace[trace_i, trace_j].second;

size_t number_of_bins = (trace_i - next_i);

Expand All @@ -339,7 +339,7 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
{
backtrack_split_bin(trace_j, number_of_bins, bin_id, max_tracker);

trace_i = trace[trace_i][trace_j].first;
trace_i = trace[trace_i, trace_j].first;
--trace_j;
}

Expand Down
Loading

0 comments on commit c502bc0

Please sign in to comment.