Skip to content

Commit

Permalink
Merge pull request #126 from smehringer/layout_fix
Browse files Browse the repository at this point in the history
[FIX] Layout backtracking needs union estimation to find correct max bin
  • Loading branch information
smehringer authored Oct 12, 2023
2 parents 4732ff1 + 1b4e2f5 commit e5c95f6
Showing 1 changed file with 26 additions and 2 deletions.
28 changes: 26 additions & 2 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
assert(data != nullptr);

// we must iterate column wise
// j iterates over the user bins
// i iterates over the technical bins
// matrix:
// i\j UB0 UB1 UB2 UB3
// ---------------------------
// TB0 |
// TB1 |
// TB2 |
for (size_t j = 1; j < num_user_bins; ++j)
{
size_t const current_weight = (*data->kmer_counts)[data->positions[j]];
Expand Down Expand Up @@ -238,6 +246,8 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
size_t next_i = trace[trace_i][trace_j].first;
size_t next_j = trace[trace_i][trace_j].second;

sketch::hyperloglog sketch =
config.disable_estimate_union ? sketch::hyperloglog{} : (*data->sketches)[data->positions[trace_j]];
size_t kmer_count = (*data->kmer_counts)[data->positions[trace_j]];
size_t number_of_bins = (trace_i - next_i);

Expand All @@ -249,7 +259,10 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
--trace_j;
while (trace_j != next_j)
{
kmer_count += (*data->kmer_counts)[data->positions[trace_j]];
if (!config.disable_estimate_union)
sketch.merge((*data->sketches)[data->positions[trace_j]]);
else
kmer_count += (*data->kmer_counts)[data->positions[trace_j]];
libf_data.positions.push_back(data->positions[trace_j]);
// std::cout << "," << trace_j;
--trace_j;
Expand All @@ -259,6 +272,9 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size

process_merged_bin(libf_data, bin_id);

if (!config.disable_estimate_union)
kmer_count = sketch.estimate(); // overwrite kmer_count high_level_max_id/size bin

update_max_id(high_level_max_id, high_level_max_size, bin_id, kmer_count);
// std::cout << "]: " << kmer_count << std::endl;
}
Expand Down Expand Up @@ -286,21 +302,29 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
assert(trace_i == 0 || trace_j == 0);
if (trace_i == 0u && trace_j > 0u) // the last UBs get merged into the remaining TB
{
sketch::hyperloglog sketch =
config.disable_estimate_union ? sketch::hyperloglog{} : (*data->sketches)[data->positions[trace_j]];
size_t kmer_count = (*data->kmer_counts)[data->positions[trace_j]];
auto libf_data = initialise_libf_data(trace_j);

// std::cout << "merged [" << trace_j;
while (trace_j > 0)
{
--trace_j;
kmer_count += (*data->kmer_counts)[data->positions[trace_j]];
if (!config.disable_estimate_union)
sketch.merge((*data->sketches)[data->positions[trace_j]]);
else
kmer_count += (*data->kmer_counts)[data->positions[trace_j]];
libf_data.positions.push_back(data->positions[trace_j]);
// std::cout << "," << trace_j;
}
assert(trace_j == 0);

process_merged_bin(libf_data, bin_id);

if (!config.disable_estimate_union)
kmer_count = sketch.estimate(); // overwrite kmer_count high_level_max_id/size bin

update_max_id(high_level_max_id, high_level_max_size, bin_id, kmer_count);

// std::cout << "]: " << kmer_count << std::endl;
Expand Down

1 comment on commit e5c95f6

@vercel
Copy link

@vercel vercel bot commented on e5c95f6 Oct 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

hibf – ./

hibf.vercel.app
hibf-seqan.vercel.app
hibf-git-main-seqan.vercel.app

Please sign in to comment.