-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubgraph.py
111 lines (93 loc) · 3.61 KB
/
subgraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import random
import networkx as nx
import matplotlib.pyplot as plt
from tools.helper import (
import_graph_from_pickle,
get_neighbors,
export_graph_to_pickle,
read_specific_columns,
)
'''
Creates a subgraph using the top pageranked nodes for a given go term that can visualize the paths between nodes. This can be done with four different graphs, one with inferred edges (G), one without inferred edges (I), one with only protein-protein edges (P), and one with protein-go term edges only.
'''
go_term = "GO:0015140"
#Uses pre-generated graphs from main (with the exception of I, which is renamed)
G = import_graph_from_pickle("./output/dataset/graph.pickle")
I = import_graph_from_pickle("./output/dataset/no_inferred.pickle")
P = import_graph_from_pickle("./output/dataset/protein.pickle")
D = import_graph_from_pickle("./output/dataset/go_protein.pickle")
#Which graphs are used for the go term, each graph has a unique output
graphs = [G, I, P, D]
# Number of top ranked nodes to visualize
num = 30
for graph in graphs:
go_neighbors = get_neighbors(G, go_term, "protein_go_term")
n = []
for i in go_neighbors:
n.append(i[0])
# Randomly pickls one of the Go term's protein neighbors as the positive protein
neighbor = random.sample(n, 1)
neighbor = neighbor[0]
n.remove(neighbor)
t = ""
if graph == G:
t = "G"
elif graph == I:
t = "I"
elif graph == P:
t = "P"
elif graph == D:
t = "D"
# Removes the edge to the positive protein and runs pagerank
if t == "P":
pro = {}
for j in n:
pro[j] = 1
p = nx.pagerank(graph, alpha=0.7, personalization=pro)
else:
graph.remove_edge(neighbor, go_term)
p = nx.pagerank(graph, alpha=0.7, personalization={go_term:1})
sort_all = list(sorted(p.items(), key=lambda item: item[1], reverse = True))
# Pulls only the top num ranked neighbors (protein and go term)
node_lst = []
count = 1
for i in sort_all:
if count <= num:
node_lst.append(i[0])
if i[0] == neighbor:
print(i[0] + " is ranked " + str(count))
count += 1
# Creates a subgraph using the top num ranked nodes
S = graph.subgraph(node_lst)
# Colorcodes the nodes, red for the go term, dark green for the positive protein, pink for the go term neighbors and teal for everything else
color = []
Ecolor = []
for i in S.nodes():
if i == go_term:
color.append("firebrick")
elif i == neighbor:
color.append("darkolivegreen")
elif i in n:
color.append("mediumvioletred")
else:
color.append("mediumturquoise")
# Colorcodes the edges, dark yellow for the go term neighbors, pale orange for the positive protein neighbors, and purple for the neighbors of the go term annotated proteins, black for all else
for i in S.edges():
if i[1] == go_term or i[0] == go_term:
Ecolor.append("darkgoldenrod")
elif i[1] == neighbor or i[0] == neighbor:
Ecolor.append("peachpuff")
elif i[0] in n or i[1] in n:
Ecolor.append("rebeccapurple")
else:
Ecolor.append("black")
titles = {"G": "With Inferred Edges",
"I": "No Inferred Edges",
"P": "Protein-protein Edges Only",
"D": "ProGo Edges Only"
}
pos = nx.spring_layout(S, seed=63)
fig, ax = plt.subplots()
plt.title(titles[t])
nx.draw_networkx(S, pos, with_labels = True, node_color = color, edge_color = Ecolor)
plt.show()