diff --git a/reneo/workflow/scripts/reneo_utils/edge_graph_utils.py b/reneo/workflow/scripts/reneo_utils/edge_graph_utils.py index ab3bf61..e1c2f78 100644 --- a/reneo/workflow/scripts/reneo_utils/edge_graph_utils.py +++ b/reneo/workflow/scripts/reneo_utils/edge_graph_utils.py @@ -67,6 +67,7 @@ def get_links(assembly_graph_file): graph_contigs = {} edges_lengths = {} oriented_links = defaultdict(lambda: defaultdict(list)) + link_overlap = defaultdict(int) links = [] my_map = BidirectionalMap() @@ -83,6 +84,7 @@ def get_links(assembly_graph_file): link1_orientation = strings[2] link2_orientation = strings[4] + overlap = int(strings[5].strip()[:-1]) link = [] link.append(link1) @@ -92,16 +94,24 @@ def get_links(assembly_graph_file): if link1 != link2: if link1_orientation == "+" and link2_orientation == "+": oriented_links[link1][link2].append(("+", "+")) + link_overlap[(f"{link1}+", f"{link2}+")] = overlap oriented_links[link2][link1].append(("-", "-")) + link_overlap[(f"{link2}-", f"{link1}-")] = overlap elif link1_orientation == "-" and link2_orientation == "-": oriented_links[link1][link2].append(("-", "-")) + link_overlap[(f"{link1}-", f"{link2}-")] = overlap oriented_links[link2][link1].append(("+", "+")) + link_overlap[(f"{link2}+", f"{link1}+")] = overlap elif link1_orientation == "+" and link2_orientation == "-": oriented_links[link1][link2].append(("+", "-")) + link_overlap[(f"{link1}+", f"{link2}-")] = overlap oriented_links[link2][link1].append(("+", "-")) + link_overlap[(f"{link2}+", f"{link1}-")] = overlap elif link1_orientation == "-" and link2_orientation == "+": oriented_links[link1][link2].append(("-", "+")) + link_overlap[(f"{link1}-", f"{link2}+")] = overlap oriented_links[link2][link1].append(("-", "+")) + link_overlap[(f"{link2}-", f"{link1}+")] = overlap elif line.startswith("S"): strings = line.strip().split() @@ -112,7 +122,15 @@ def get_links(assembly_graph_file): line = file.readline() - return node_count, graph_contigs, links, oriented_links, my_map, edges_lengths + return ( + node_count, + graph_contigs, + links, + oriented_links, + link_overlap, + my_map, + edges_lengths, + ) def get_graph_edges(links, contig_names_rev): @@ -146,6 +164,7 @@ def build_assembly_graph(assembly_graph_file): graph_contigs, links, oriented_links, + link_overlap, contig_names, edges_lengths, ) = get_links(assembly_graph_file) @@ -178,6 +197,7 @@ def build_assembly_graph(assembly_graph_file): return ( assembly_graph, oriented_links, + link_overlap, contig_names, contig_names_rev, graph_contigs, diff --git a/reneo/workflow/scripts/reneo_utils/flow_utils.py b/reneo/workflow/scripts/reneo_utils/flow_utils.py index a5e3e4f..241a5f9 100644 --- a/reneo/workflow/scripts/reneo_utils/flow_utils.py +++ b/reneo/workflow/scripts/reneo_utils/flow_utils.py @@ -3,9 +3,9 @@ from .FD_Inexact import SolveInstances -def get_source_sink(G_edge, graph_unitigs, minlength, self_looped_nodes): +def get_source_sink_circular(G_edge, graph_unitigs, minlength, self_looped_nodes): """ - Identify source/sink vertex + Identify source/sink vertex for circular components """ source_sink_candidates = [] @@ -48,6 +48,28 @@ def get_source_sink(G_edge, graph_unitigs, minlength, self_looped_nodes): return source_sink_candidates +def get_source_sink_linear(G_edge, self_looped_nodes): + """ + Identify source/sink vertex for linear components + """ + + source_candidates = [] + sink_candidates = [] + + for node in list(G_edge.nodes): + unitig_name = node[:-1] + + if unitig_name not in self_looped_nodes: + indegree = len([x for x in G_edge.predecessors(node)]) + outdegree = len([x for x in G_edge.successors(node)]) + if indegree > 0 and outdegree == 0: + sink_candidates.append(node) + elif indegree == 0 and outdegree > 0: + source_candidates.append(node) + + return source_candidates, sink_candidates + + def solve_mfd(G, max_paths, output, nthreads): """ Get paths by solving MFD