Skip to content

Commit

Permalink
almost done with adding header files
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Dec 22, 2023
1 parent acea45e commit 2964db1
Show file tree
Hide file tree
Showing 49 changed files with 79 additions and 27 deletions.
4 changes: 3 additions & 1 deletion docker-wrappers/Cytoscape/cytoscape_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def load_pathways(pathways: List[str], output: str) -> None:
suid = p4c.networks.import_network_from_tabular_file(
file=path,
column_type_list='s,t,x,ea',
delimiters='\t'
delimiters='\t',
first_row_as_column_names = True,
start_load_row = 2,
)
p4c.networks.rename_network(name, network=suid)

Expand Down
4 changes: 3 additions & 1 deletion spras/allpairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
@param standardized_pathway_file: the same pathway written in the universal format
"""
df = pd.read_csv(raw_pathway_file, sep='\t', header=None)

df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked.
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
8 changes: 4 additions & 4 deletions spras/analysis/graphspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,21 @@ def load_graph(path: str) -> Tuple[Union[nx.Graph, nx.DiGraph], bool]:
directed = False

try:
pathways = pd.read_csv(path, sep="\t", header=None)
pathways = pd.read_csv(path, sep="\t", header=0)
except pd.errors.EmptyDataError:
print(f"The file {path} is empty.")
return G, directed
pathways.columns = ["Interactor1", "Interactor2", "Rank", "Direction"]

mask_u = pathways['Direction'] == 'U'
mask_d = pathways['Direction'] == 'D'
pathways.drop(columns=["Direction"])

if mask_u.all():
G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"])
G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"])
directed = False

elif mask_d.all():
G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"], create_using=nx.DiGraph())
G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"], create_using=nx.DiGraph())
directed = True
else:
print(f"{path} could not be visualized. GraphSpace does not support mixed direction type graphs currently")
Expand Down
7 changes: 5 additions & 2 deletions spras/analysis/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
edges = []
for line in lines:
parts = line.split('\t')
if len(parts) > 0: # in case of empty line in file
if len(parts) >= 4: # in case of empty line in file or line doesn't include all values
node1 = parts[0]
node2 = parts[1]
direction = str(parts[3]).strip()
Expand All @@ -55,7 +55,10 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
# node order does matter for directed edges
edges.append(DIR_CONST.join([node1, node2]))
else:
ValueError(f"direction is {direction}, rather than U or D")
if direction == 'Direction': # if reading the header
continue
else:
raise ValueError(f"direction is {direction}, rather than U or D")

# getting the algorithm name
p = PurePath(file)
Expand Down
13 changes: 11 additions & 2 deletions spras/analysis/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,17 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->

# Iterate through each network file path
for file_path in sorted(file_paths):
# Load in the network
nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
nw = None
# nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
if os.path.getsize(file_path) == 0:
continue
else:
with open(file_path, 'r') as f:
# skip the header line
next(f)
# Load in the network
nw = nx.read_edgelist(f, data=(('weight', float), ('Direction', str)))

# Save the network name, number of nodes, number edges, and number of connected components
nw_name = str(file_path)
number_nodes = nw.number_of_nodes()
Expand Down
6 changes: 4 additions & 2 deletions spras/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
edges_df['source'] = edges_df['source'].apply(post_domino_id_transform)
edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
edges_df = reinsert_direction_col_undirected(edges_df)

edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False)
edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
else:
edges_df.to_csv(standardized_pathway_file, sep='\t', header=None, index=False)


def pre_domino_id_transform(node_id):
Expand Down
6 changes: 3 additions & 3 deletions spras/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Would need to load the paths output file to rank edges correctly
df = add_rank_column(df)
df = reinsert_direction_col_directed(df)

df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False,
index=False, sep='\t')
df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
3 changes: 2 additions & 1 deletion spras/mincostflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# TODO update MinCostFlow version to support mixed graphs
# Currently directed edges in the input will be converted to undirected edges in the output
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')

6 changes: 3 additions & 3 deletions spras/omicsintegrator1.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df.columns = ["Edge1", "InteractionType", "Edge2"]
df = add_rank_column(df)
df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")

df.to_csv(standardized_pathway_file, columns=['Edge1', 'Edge2', 'Rank', "Direction"], header=False, index=False,
sep='\t')
df.drop(columns=['InteractionType'], inplace = True)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
3 changes: 2 additions & 1 deletion spras/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df = df.take([0, 1], axis=1)
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
3 changes: 2 additions & 1 deletion spras/pathlinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# What about multiple raw_pathway_files
df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1)
df = reinsert_direction_col_directed(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ABI1_HUMAN MK01_HUMAN 1 U
CBLB_HUMAN EGFR_HUMAN 1 U
CBL_HUMAN CD2AP_HUMAN 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ABI1_HUMAN MK01_HUMAN 1 U
CBLB_HUMAN EGFR_HUMAN 1 U
CBL_HUMAN CD2AP_HUMAN 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ABI1_HUMAN MK01_HUMAN 1 U
CBL_HUMAN CD2AP_HUMAN 1 U
CBL_HUMAN CRKL_HUMAN 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
EGF_HUMAN EGFR_HUMAN 1 U
EGF_HUMAN S10A4_HUMAN 2 U
S10A4_HUMAN MYH9_HUMAN 2 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
EGF_HUMAN EGFR_HUMAN 1 U
EGF_HUMAN S10A4_HUMAN 2 U
S10A4_HUMAN MYH9_HUMAN 2 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
A D 1 D
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A D 1 U
G H 1 U
G I 1 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A D 1 U
G H 1 U
G I 1 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
C D 1 U
C F 1 U
A D 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
A D 2 D
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
A D 2 D
1 change: 1 addition & 0 deletions test/analysis/input/standardized-ranked.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
A C 3 U
C D 5 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network1.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
C D 1 U
E F 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
C D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network3.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
A C 1 U
A D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network4.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
D E 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network5.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
C D 1 U
Expand Down
6 changes: 0 additions & 6 deletions test/analysis/output/example_summary.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
Name Number of nodes Number of undirected edges Number of connected components Nodes in sources Nodes in targets
test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 1 1
test/analysis/input/example/data0-omicsintegrator1-params-JAZWLAK_pathway.txt 0 0 0 0 0
test/analysis/input/example/data0-omicsintegrator1-params-PU62FNV_pathway.txt 0 0 0 0 0
test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt 3 2 1 1 1
test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt 3 2 1 1 1
test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0 0
test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 1 1
test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 1 1
test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 1 1
test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt 4 4 1 1 2
test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt 5 3 2 1 3
test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt 5 3 2 1 3
test/analysis/input/example/data1-omicsintegrator1-params-RQCQ4YN_pathway.txt 0 0 0 0 0
test/analysis/input/example/data1-omicsintegrator1-params-WY4V42C_pathway.txt 0 0 0 0 0
test/analysis/input/example/data1-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0 0
test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt 7 6 1 1 4
test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt 4 3 1 1 2
test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt 4 3 1 1 2
1 change: 1 addition & 0 deletions test/ml/input/test-data-longName/longName.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
node1 node2 1 U
node1 node3 1 U
node4 node5 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-longName2/longName2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
node3 node2 1 U
node1 node3 1 U
node5 node4 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-s1/s1.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
C D 1 U
E F 1 U
1 change: 1 addition & 0 deletions test/ml/input/test-data-s2/s2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
C D 1 U
E F 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-s3/s3.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
L M 1 U
M N 1 U
O P 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-spaces/spaces.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
L M 1 U
O P 1 U
nodes with spaces in name 1 U
1 change: 1 addition & 0 deletions test/ml/input/test-mixed-direction/mixed-direction.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B A 1 D
C D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/parse-outputs/expected/allpairs-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
S1 A 1 U
S1 B 1 U
A E 1 U
Expand Down
1 change: 1 addition & 0 deletions test/parse-outputs/expected/domino-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ENSG00000122691 ENSG00000138757 1 U
ENSG00000122691 ENSG00000109320 1 U
ENSG00000134954 ENSG00000077150 1 U
Expand Down
1 change: 1 addition & 0 deletions test/parse-outputs/expected/meo-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
GENEA GENEC 1 D
GENEC GENEB 1 D
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
D B 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A C 1 D
C D 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
S2 T3 1 D
A E 2 D
S1 A 2 D
Expand Down

0 comments on commit 2964db1

Please sign in to comment.