From aadc3e5494774934ec22082e58710d631c1be000 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Mon, 26 Aug 2024 12:29:58 -0400 Subject: [PATCH 1/4] stats --snarl-contents to dump nodes in snarls --- src/subcommand/stats_main.cpp | 42 +++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/subcommand/stats_main.cpp b/src/subcommand/stats_main.cpp index 5cf03df08df..e17f4be1ec9 100644 --- a/src/subcommand/stats_main.cpp +++ b/src/subcommand/stats_main.cpp @@ -64,6 +64,7 @@ void help_stats(char** argv) { << " multiple allowed; limit comparison to those provided" << endl << " -O, --overlap-all print overlap table for the cartesian product of paths" << endl << " -R, --snarls print statistics for each snarl" << endl + << " --snarl-contents print out a table of " << endl << " -C, --chains print statistics for each chain" << endl << " -F, --format graph format from {VG-Protobuf, PackedGraph, HashGraph, XG}. " << "Can't detect Protobuf if graph read from stdin" << endl @@ -104,10 +105,14 @@ int main_stats(int argc, char** argv) { bool overlap_all_paths = false; bool snarl_stats = false; bool chain_stats = false; + bool snarl_contents = false; bool format = false; bool degree_dist = false; string distance_index_filename; + // Long options with no corresponding short options. + constexpr int OPT_SNARL_CONTENTS = 1000; + int c; optind = 2; // force optind past command positional argument while (true) { @@ -134,6 +139,7 @@ int main_stats(int argc, char** argv) { {"overlap", no_argument, 0, 'o'}, {"overlap-all", no_argument, 0, 'O'}, {"snarls", no_argument, 0, 'R'}, + {"snarl-contents", no_argument, 0, OPT_SNARL_CONTENTS}, {"chains", no_argument, 0, 'C'}, {"format", no_argument, 0, 'F'}, {"degree-dist", no_argument, 0, 'D'}, @@ -232,6 +238,10 @@ int main_stats(int argc, char** argv) { snarl_stats = true; break; + case OPT_SNARL_CONTENTS: + snarl_contents = true; + break; + case 'C': chain_stats = true; break; @@ -297,7 +307,6 @@ int main_stats(int argc, char** argv) { exit(1); } }; - if (stats_size) { require_graph(); @@ -1100,7 +1109,7 @@ int main_stats(int argc, char** argv) { // We will track depth for each snarl (used for both snarl and chains stats) unordered_map depth; - if (snarl_stats || chain_stats) { + if (snarl_stats || chain_stats || snarl_contents) { // We will go through all the snarls and compute stats. require_graph(); @@ -1182,6 +1191,35 @@ int main_stats(int argc, char** argv) { auto netGraph = manager.net_graph_of(snarl, graph, false); cout << netGraph.get_node_count() << endl; } + + if (snarl_contents) { + pair, unordered_set > contents = manager.deep_contents(snarl, *graph, false); + contents.second.clear(); + set sorted_nodes(contents.first.begin(), contents.first.end()); + // don't bother with trivial snarls + if (!sorted_nodes.empty()) { + cout << (snarl->start().backward() ? "<" : ">") << snarl->start().node_id() + << (snarl->end().backward() ? "<" : ">") << snarl->end().node_id() << "\t" + << depth[snarl] << "\t"; + if (parent) { + cout << (parent->start().backward() ? "<" : ">") << parent->start().node_id() + << (parent->end().backward() ? "<" : ">") << parent->end().node_id() << "\t"; + } else { + cout << ".\t"; + } + + bool first = true; + for (vg::id_t node_id : sorted_nodes) { + if (first) { + first = false; + } else { + cout << ","; + } + cout << node_id; + } + cout << "\n"; + } + } }); } From 74ffdeba00f809a0fd3b518949f4b2a6d174a875 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Aug 2024 15:59:23 -0400 Subject: [PATCH 2/4] Add handling for softclips off the edge of contigs in inject --- src/alignment.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/alignment.cpp b/src/alignment.cpp index a75d33092d1..b3e408901f3 100644 --- a/src/alignment.cpp +++ b/src/alignment.cpp @@ -2689,9 +2689,20 @@ Alignment target_alignment(const PathPositionHandleGraph* graph, const path_hand size_t node_pos = pos1 - graph->get_position_of_step(step); while (edit_idx < cigar_mapping.edit_size()) { if (step == graph->path_end(path)) { - cerr << "error: walked to end of path before exhausting CIGAR on read:" << endl; - cerr << pb2json(cigar_mapping) << endl; - exit(1); + if (edit.from_length() == 0 && aln.path().mapping_size() != 0) { + // This is a softclip off the end of the contig. + // We can add it to the last mapping as an edit + assert(offset_in_edit == 0); + Mapping* last_mapping = aln.mutable_path()->mutable_mapping(aln.path().mapping_size() - 1); + *last_mapping->add_edit() = edit; + ++edit_idx; + } else { + // We've gone off the end of the contig with something other than a softclip + throw std::runtime_error("Reached unexpected end of path " + graph->get_path_name(path) + + " at edit " + std::to_string(edit_idx) + + "/" + std::to_string(cigar_mapping.edit_size()) + + " for alignment of feature " + feature); + } } handle_t h = graph->get_handle_of_step(step); string seq = graph->get_sequence(h); From 127658b49c9464a939d933029b18331383e83e71 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Aug 2024 13:03:46 -0700 Subject: [PATCH 3/4] Fetch out edit --- src/alignment.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alignment.cpp b/src/alignment.cpp index b3e408901f3..4154ae064ee 100644 --- a/src/alignment.cpp +++ b/src/alignment.cpp @@ -2689,6 +2689,7 @@ Alignment target_alignment(const PathPositionHandleGraph* graph, const path_hand size_t node_pos = pos1 - graph->get_position_of_step(step); while (edit_idx < cigar_mapping.edit_size()) { if (step == graph->path_end(path)) { + const auto& edit = cigar_mapping.edit(edit_idx); if (edit.from_length() == 0 && aln.path().mapping_size() != 0) { // This is a softclip off the end of the contig. // We can add it to the last mapping as an edit From 0f6364b5a53cbdf0aeeaf0c6bdf5215a7be808f9 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Aug 2024 13:15:10 -0700 Subject: [PATCH 4/4] Add missing continue --- src/alignment.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alignment.cpp b/src/alignment.cpp index 4154ae064ee..cbafca68e17 100644 --- a/src/alignment.cpp +++ b/src/alignment.cpp @@ -2697,6 +2697,7 @@ Alignment target_alignment(const PathPositionHandleGraph* graph, const path_hand Mapping* last_mapping = aln.mutable_path()->mutable_mapping(aln.path().mapping_size() - 1); *last_mapping->add_edit() = edit; ++edit_idx; + continue; } else { // We've gone off the end of the contig with something other than a softclip throw std::runtime_error("Reached unexpected end of path " + graph->get_path_name(path) +