Skip to content

Commit

Permalink
ref
Browse files Browse the repository at this point in the history
  • Loading branch information
katerinakazantseva committed Aug 8, 2024
1 parent dda681b commit 2fa6a7a
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 25 deletions.
15 changes: 14 additions & 1 deletion strainy/gfa_operations/asm_graph_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,27 @@
from strainy.clustering import build_data
from strainy.params import *

"""
This contains functions for operation with assembly graph:
1. add_child_edge: adds a child unitig with the same sequence as the father unitig or with the given sequence
(aff full path and othr clusters"
2. add_path_links: Adds link ("full path)
3.change_cov: recalculate coverage
4.change_sec" recalculate sequence
"""



logger = logging.getLogger()



def add_child_edge(edge, clN, g, cl, left, right, cons, flye_consensus, change_seq=True, insertmain=True):
"""
The function creates unitigs in the gfa graph
Adds a child unitig with the same sequence as the parental unitig or with the given sequence
"""
##TODO if cons provided change_seq=True (provide seq not consensus)
##TODO make separare function to add gfa edge and move to gfa_ops
consensus = flye_consensus.flye_consensus(clN, edge, cl)
consensus_start = consensus["start"]
Expand Down Expand Up @@ -46,6 +58,7 @@ def add_child_edge(edge, clN, g, cl, left, right, cons, flye_consensus, change_s


def add_path_links(graph, edge, paths,G):
#TODO remove G
"""
Add gfa links between newly created unitigs forming "full path"
"""
Expand Down
2 changes: 1 addition & 1 deletion strainy/gfa_operations/gfa_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def add_link(graph, fr, fr_or, to, to_or, w):

def add_edge(graph,edge, clN, cov):
#TODO remove edge,clN from parameters, use name instead
#TODO add sequ
#TODO add seq
"""
Adds an empty(no sequence) segment with the specified name and coverage to the graph
Parameters:
Expand Down
74 changes: 51 additions & 23 deletions strainy/gfa_operations/overlap_graph_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@

logger = logging.getLogger()

"""
This contains functions for operation with overlap graph:
1. build_paths_graph: creates overlap graph #TODO rename to overlap
2. find_full_paths: finds full paths in overlap graph
3.remove_nested(G, cons): removes nested clusters (
4.add_path_edges : calc cluster boundaries and creates unitigs using asm.add_child_edge
"""


def build_paths_graph(cons, full_paths_roots, full_paths_leafs, cluster_distances):
Expand All @@ -22,22 +30,41 @@ def build_paths_graph(cons, full_paths_roots, full_paths_leafs, cluster_distance
G.remove_node(0)
except:
pass
#todo move it to parental function
G, full_paths_roots, full_paths_leafs = \
remove_leaf_root_subnodes(G,full_paths_roots,full_paths_leafs)
G = remove_nested(G, cons)
G = remove_transitive(G)
return G


def remove_transitive(G):
path_remove = []
node_remove = []
for node in full_paths_leafs:
neighbors = list(full_paths_leafs)
for node in G.nodes():
neighbors = nx.all_neighbors(G, node)
for neighbor in list(neighbors):
for n_path in nx.algorithms.all_simple_paths(G, node, neighbor, cutoff = 2):
if len(n_path) == 2:
node_remove.append(neighbor)
for n_path in nx.algorithms.all_simple_paths(G, node, neighbor, cutoff = 3):
if len(n_path) == 3:
path_remove.append(n_path)
for n_path in path_remove:
try:
G.remove_edge(n_path[0], n_path[1])
except:
continue
return G

for node in full_paths_roots:
neighbors = list(full_paths_roots)

def remove_leaf_root_subnodes(G,full_paths_roots,full_paths_leafs):
node_remove = []
for node in full_paths_leafs+full_paths_roots:
if node in full_paths_leafs:
neighbors = list(full_paths_leafs)
else:
neighbors = list(full_paths_roots)
for neighbor in list(neighbors):
for n_path in nx.algorithms.all_simple_paths(G, neighbor,node, cutoff = 2):
for n_path in nx.algorithms.all_simple_paths(G, node, neighbor, cutoff = 2):
if len(n_path) == 2:
node_remove.append(neighbor)
G = remove_nested(G, cons)
for node in node_remove:
try:
G.remove_node(node)
Expand All @@ -46,20 +73,19 @@ def build_paths_graph(cons, full_paths_roots, full_paths_leafs, cluster_distance
full_paths_leafs.remove(node)
except:
continue
return (G,full_paths_roots,full_paths_leafs)

for node in G.nodes():
neighbors = nx.all_neighbors(G, node)

def remove_bubbles(graph, source_nodes):
for node in source_nodes:
neighbors = list(source_nodes)
for neighbor in list(neighbors):
for n_path in nx.algorithms.all_simple_paths(G, node, neighbor, cutoff = 3):
if len(n_path) == 3:
path_remove.append(n_path)
for n_path in nx.algorithms.all_simple_paths(graph, node, neighbor, cutoff = 2):
if len(n_path) == 2:
node_remove.append(neighbor)



for n_path in path_remove:
try:
G.remove_edge(n_path[0], n_path[1])
except:
continue
return (G)

def find_full_paths(G, paths_roots, paths_leafs):
paths = []
Expand All @@ -74,7 +100,7 @@ def find_full_paths(G, paths_roots, paths_leafs):
for path in list(paths_nx):
paths.append(path)

return (paths)
return paths



Expand All @@ -97,7 +123,7 @@ def remove_nested(G, cons):
continue
except:
continue
return (G)
return G

def add_path_edges(edge, g, cl, ln, full_paths, G, paths_roots, paths_leafs, full_clusters, cons, flye_consensus):
"""
Expand Down Expand Up @@ -280,3 +306,5 @@ def paths_graph_add_vis(edge, cons, cl, full_paths_roots,
graph_vis = gv.AGraph(graph_str)
graph_vis.layout(prog = "dot") # TODO: this line may cause an error
graph_vis.draw("%s/graphs/connection_graph_%s.png" % (StRainyArgs().output_intermediate, edge))


0 comments on commit 2fa6a7a

Please sign in to comment.