Skip to content

Commit

Permalink
Detect if there are multiple top-level chains in a component
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Aug 24, 2023
1 parent 8cc44c3 commit c5d5b4a
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
2 changes: 1 addition & 1 deletion deps/gbwtgraph
Submodule gbwtgraph updated 1 files
+4 −0 src/algorithms.cpp
13 changes: 13 additions & 0 deletions src/recombinator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "kff.hpp"
#include "statistics.hpp"
#include "algorithms/component.hpp"

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -313,8 +314,20 @@ Haplotypes HaplotypePartitioner::partition_haplotypes(const Parameters& paramete
if (this->verbosity >= Haplotypes::verbosity_basic) {
std::cerr << "Determining construction jobs" << std::endl;
}
size_t total_chains = 0;
this->distance_index.for_each_child(this->distance_index.get_root(), [&](const handlegraph::net_handle_t&) {
total_chains++;
});
size_t size_bound = this->gbz.graph.get_node_count() / parameters.approximate_jobs;
gbwtgraph::ConstructionJobs jobs = gbwtgraph::gbwt_construction_jobs(this->gbz.graph, size_bound);
if (jobs.components != total_chains) {
// TODO: Could we instead identify the components with multiple top-level chains
// and skip them?
std::string msg = "HaplotypePartitioner::partition_haplotypes(): there are "
+ std::to_string(total_chains) + " top-level chains and " + std::to_string(jobs.components)
+ " weakly connected components; haplotype sampling cannot be used with this graph";
throw std::runtime_error(msg);
}
result.header.top_level_chains = jobs.components;
result.header.construction_jobs = jobs.size();
result.chains.resize(result.components());
Expand Down
8 changes: 7 additions & 1 deletion src/subcommand/haplotypes_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,13 @@ void preprocess_graph(const gbwtgraph::GBZ& gbz, Haplotypes& haplotypes, Haploty

// Partition the haplotypes.
HaplotypePartitioner partitioner(gbz, r_index, distance_index, minimizer_index, config.verbosity);
haplotypes = partitioner.partition_haplotypes(config.partitioner_parameters);
try {
haplotypes = partitioner.partition_haplotypes(config.partitioner_parameters);
}
catch (const std::runtime_error& e) {
std::cerr << e.what() << std::endl;
std::exit(EXIT_FAILURE);
}
if (config.verbosity >= Haplotypes::verbosity_basic) {
double seconds = gbwt::readTimer() - start;
std::cerr << "Generated haplotype information in " << seconds << " seconds" << std::endl;
Expand Down

1 comment on commit c5d5b4a

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch diploid-sampling. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 19807 seconds

Please sign in to comment.