From 2964db171a72eae88445202531dbd6b015dee0da Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 22 Dec 2023 12:22:20 -0800
Subject: [PATCH 01/26] almost done with adding header files

---
 docker-wrappers/Cytoscape/cytoscape_util.py         |  4 +++-
 spras/allpairs.py                                   |  4 +++-
 spras/analysis/graphspace.py                        |  8 ++++----
 spras/analysis/ml.py                                |  7 +++++--
 spras/analysis/summary.py                           | 13 +++++++++++--
 spras/domino.py                                     |  6 ++++--
 spras/meo.py                                        |  6 +++---
 spras/mincostflow.py                                |  3 ++-
 spras/omicsintegrator1.py                           |  6 +++---
 spras/omicsintegrator2.py                           |  3 ++-
 spras/pathlinker.py                                 |  3 ++-
 ...egfr-omicsintegrator1-params-3THRXWW_pathway.txt |  1 +
 ...egfr-omicsintegrator1-params-5QH767V_pathway.txt |  1 +
 ...egfr-omicsintegrator1-params-ITO5EQS_pathway.txt |  1 +
 .../tps-egfr-pathlinker-params-7S4SLU6_pathway.txt  |  1 +
 .../tps-egfr-pathlinker-params-TCEMRS7_pathway.txt  |  1 +
 .../example/data0-meo-params-GKEDDFZ_pathway.txt    |  1 +
 ...ata0-omicsintegrator1-params-RQCQ4YN_pathway.txt |  1 +
 ...ata0-omicsintegrator1-params-WY4V42C_pathway.txt |  1 +
 ...ata0-omicsintegrator2-params-IV3IPCJ_pathway.txt |  1 +
 .../data0-pathlinker-params-6SWY7JS_pathway.txt     |  1 +
 .../data0-pathlinker-params-VQL7BDZ_pathway.txt     |  1 +
 .../example/data1-meo-params-GKEDDFZ_pathway.txt    |  1 +
 ...ata1-omicsintegrator1-params-JAZWLAK_pathway.txt |  1 +
 ...ata1-omicsintegrator1-params-PU62FNV_pathway.txt |  1 +
 ...ata1-omicsintegrator2-params-IV3IPCJ_pathway.txt |  1 +
 .../data1-pathlinker-params-6SWY7JS_pathway.txt     |  1 +
 .../data1-pathlinker-params-VQL7BDZ_pathway.txt     |  1 +
 test/analysis/input/standardized-ranked.txt         |  1 +
 test/analysis/input/toy/network1.txt                |  1 +
 test/analysis/input/toy/network2.txt                |  1 +
 test/analysis/input/toy/network3.txt                |  1 +
 test/analysis/input/toy/network4.txt                |  1 +
 test/analysis/input/toy/network5.txt                |  1 +
 test/analysis/output/example_summary.txt            |  6 ------
 test/ml/input/test-data-longName/longName.txt       |  1 +
 test/ml/input/test-data-longName2/longName2.txt     |  1 +
 test/ml/input/test-data-s1/s1.txt                   |  1 +
 test/ml/input/test-data-s2/s2.txt                   |  1 +
 test/ml/input/test-data-s3/s3.txt                   |  1 +
 test/ml/input/test-data-spaces/spaces.txt           |  1 +
 .../input/test-mixed-direction/mixed-direction.txt  |  1 +
 .../expected/allpairs-pathway-expected.txt          |  1 +
 .../expected/domino-pathway-expected.txt            |  1 +
 .../parse-outputs/expected/meo-pathway-expected.txt |  1 +
 .../expected/mincostflow-pathway-expected.txt       |  1 +
 .../expected/omicsintegrator1-pathway-expected.txt  |  1 +
 .../expected/omicsintegrator2-pathway-expected.txt  |  1 +
 .../expected/pathlinker-pathway-expected.txt        |  1 +
 49 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/docker-wrappers/Cytoscape/cytoscape_util.py b/docker-wrappers/Cytoscape/cytoscape_util.py
index dcf110f1..c8f47922 100644
--- a/docker-wrappers/Cytoscape/cytoscape_util.py
+++ b/docker-wrappers/Cytoscape/cytoscape_util.py
@@ -116,7 +116,9 @@ def load_pathways(pathways: List[str], output: str) -> None:
         suid = p4c.networks.import_network_from_tabular_file(
             file=path,
             column_type_list='s,t,x,ea',
-            delimiters='\t'
+            delimiters='\t',
+            first_row_as_column_names = True,
+            start_load_row = 2,
         )
         p4c.networks.rename_network(name, network=suid)
 
diff --git a/spras/allpairs.py b/spras/allpairs.py
index 3de9029d..38f7f000 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -110,6 +110,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
         df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
+        
         df['Rank'] = 1  # add a rank column of 1s since the edges are not ranked.
         df = reinsert_direction_col_undirected(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/analysis/graphspace.py b/spras/analysis/graphspace.py
index ba87de6e..bee103f6 100644
--- a/spras/analysis/graphspace.py
+++ b/spras/analysis/graphspace.py
@@ -77,21 +77,21 @@ def load_graph(path: str) -> Tuple[Union[nx.Graph, nx.DiGraph], bool]:
     directed = False
 
     try:
-        pathways = pd.read_csv(path, sep="\t", header=None)
+        pathways = pd.read_csv(path, sep="\t", header=0)
     except pd.errors.EmptyDataError:
         print(f"The file {path} is empty.")
         return G, directed
-    pathways.columns = ["Interactor1", "Interactor2", "Rank", "Direction"]
+
     mask_u = pathways['Direction'] == 'U'
     mask_d = pathways['Direction'] == 'D'
     pathways.drop(columns=["Direction"])
 
     if mask_u.all():
-        G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"])
+        G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"])
         directed = False
 
     elif mask_d.all():
-        G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"], create_using=nx.DiGraph())
+        G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"], create_using=nx.DiGraph())
         directed = True
     else:
         print(f"{path} could not be visualized. GraphSpace does not support mixed direction type graphs currently")
diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index 64260ad6..2c129873 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -44,7 +44,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
             edges = []
             for line in lines:
                 parts = line.split('\t')
-                if len(parts) > 0:  # in case of empty line in file
+                if len(parts) >= 4:  # in case of empty line in file or line doesn't include all values
                     node1 = parts[0]
                     node2 = parts[1]
                     direction = str(parts[3]).strip()
@@ -55,7 +55,10 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                         # node order does matter for directed edges
                         edges.append(DIR_CONST.join([node1, node2]))
                     else:
-                        ValueError(f"direction is {direction}, rather than U or D")
+                        if direction == 'Direction': # if reading the header
+                            continue
+                        else: 
+                            raise ValueError(f"direction is {direction}, rather than U or D")
 
             # getting the algorithm name
             p = PurePath(file)
diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index 9b0d797d..d0bc9582 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -33,8 +33,17 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->
 
     # Iterate through each network file path
     for file_path in sorted(file_paths):
-        # Load in the network
-        nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
+        nw = None
+        # nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
+        if os.path.getsize(file_path) == 0:
+            continue
+        else: 
+            with open(file_path, 'r') as f:
+                # skip the header line
+                next(f)
+                # Load in the network
+                nw = nx.read_edgelist(f, data=(('weight', float), ('Direction', str)))
+
         # Save the network name, number of nodes, number edges, and number of connected components
         nw_name = str(file_path)
         number_nodes = nw.number_of_nodes()
diff --git a/spras/domino.py b/spras/domino.py
index 53434e3f..4666bf83 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -205,8 +205,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             edges_df['source'] = edges_df['source'].apply(post_domino_id_transform)
             edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
             edges_df = reinsert_direction_col_undirected(edges_df)
-
-        edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False)
+            edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
+            edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
+        else: 
+            edges_df.to_csv(standardized_pathway_file, sep='\t', header=None, index=False)
 
 
 def pre_domino_id_transform(node_id):
diff --git a/spras/meo.py b/spras/meo.py
index d9cf2f24..2d5d8741 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -188,6 +188,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # Would need to load the paths output file to rank edges correctly
         df = add_rank_column(df)
         df = reinsert_direction_col_directed(df)
-
-        df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False,
-                  index=False, sep='\t')
+        df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 24ee9677..a1e18942 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -154,5 +154,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # TODO update MinCostFlow version to support mixed graphs
         # Currently directed edges in the input will be converted to undirected edges in the output
         df = reinsert_direction_col_undirected(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
 
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 6e2c6807..bf619093 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -201,6 +201,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         df.columns = ["Edge1", "InteractionType", "Edge2"]
         df = add_rank_column(df)
         df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-
-        df.to_csv(standardized_pathway_file, columns=['Edge1', 'Edge2', 'Rank', "Direction"], header=False, index=False,
-                  sep='\t')
+        df.drop(columns=['InteractionType'], inplace = True)
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 5099b8d9..1b953600 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -155,4 +155,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         df = df.take([0, 1], axis=1)
         df = add_rank_column(df)
         df = reinsert_direction_col_undirected(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 85b38fe9..0147dc40 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -140,4 +140,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # What about multiple raw_pathway_files
         df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1)
         df = reinsert_direction_col_directed(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt
index 03571eae..44944b37 100644
--- a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt
+++ b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ABI1_HUMAN MK01_HUMAN 1 U 
 CBLB_HUMAN EGFR_HUMAN 1 U
 CBL_HUMAN CD2AP_HUMAN 1 U
diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt
index 30d107b0..b2033b57 100644
--- a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt
+++ b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ABI1_HUMAN MK01_HUMAN 1 U
 CBLB_HUMAN EGFR_HUMAN 1 U
 CBL_HUMAN CD2AP_HUMAN 1 U
diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt
index 065ef6f9..e0adf2fc 100644
--- a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt
+++ b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ABI1_HUMAN MK01_HUMAN 1 U
 CBL_HUMAN CD2AP_HUMAN 1 U
 CBL_HUMAN CRKL_HUMAN 1 U
diff --git a/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt b/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt
index 899147f8..bc9dfc85 100644
--- a/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt
+++ b/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 EGF_HUMAN EGFR_HUMAN 1 U
 EGF_HUMAN S10A4_HUMAN 2 U
 S10A4_HUMAN MYH9_HUMAN 2 U
diff --git a/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt b/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt
index 3b1ddef5..a1738b00 100644
--- a/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt
+++ b/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 EGF_HUMAN EGFR_HUMAN 1 U
 EGF_HUMAN S10A4_HUMAN 2 U
 S10A4_HUMAN MYH9_HUMAN 2 U
diff --git a/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt
index 9d65620f..5547a49c 100644
--- a/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt
+++ b/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
diff --git a/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt b/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt
index e2fd8d57..21768464 100644
--- a/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt
+++ b/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 B	C	1	U
diff --git a/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt b/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt
index e2fd8d57..21768464 100644
--- a/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt
+++ b/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 B	C	1	U
diff --git a/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt
index 65f6f221..e34eeaff 100644
--- a/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt
+++ b/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 B	A	1	U
 B	C	1	U
diff --git a/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt
index 9d65620f..5547a49c 100644
--- a/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt
+++ b/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
diff --git a/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt
index 9d65620f..5547a49c 100644
--- a/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt
+++ b/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
diff --git a/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt
index 71ed6ccf..a87a0437 100644
--- a/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt
+++ b/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
 A	D	1	D
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt
index afbe030d..885a8574 100644
--- a/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt
+++ b/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	D	1	U
 G	H	1	U
 G	I	1	U
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt
index afbe030d..885a8574 100644
--- a/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt
+++ b/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	D	1	U
 G	H	1	U
 G	I	1	U
diff --git a/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt
index eddad79c..069481df 100644
--- a/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt
+++ b/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 C	D	1	U
 C	F	1	U
 A	D	1	U
diff --git a/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt
index 92b60b6e..ec070652 100644
--- a/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt
+++ b/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
 A	D	2	D
diff --git a/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt
index 92b60b6e..ec070652 100644
--- a/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt
+++ b/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
 A	D	2	D
diff --git a/test/analysis/input/standardized-ranked.txt b/test/analysis/input/standardized-ranked.txt
index c432c386..27f8222f 100644
--- a/test/analysis/input/standardized-ranked.txt
+++ b/test/analysis/input/standardized-ranked.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 A	C  	3  	U
 C	D 	5	U
diff --git a/test/analysis/input/toy/network1.txt b/test/analysis/input/toy/network1.txt
index 21847821..bd5bd343 100644
--- a/test/analysis/input/toy/network1.txt
+++ b/test/analysis/input/toy/network1.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 C D 1 U
 E F 1 U
diff --git a/test/analysis/input/toy/network2.txt b/test/analysis/input/toy/network2.txt
index f7811bc4..7506195d 100644
--- a/test/analysis/input/toy/network2.txt
+++ b/test/analysis/input/toy/network2.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 B C 1 U
 C D 1 U
diff --git a/test/analysis/input/toy/network3.txt b/test/analysis/input/toy/network3.txt
index cbf42fb5..eaf05c07 100644
--- a/test/analysis/input/toy/network3.txt
+++ b/test/analysis/input/toy/network3.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 A C 1 U
 A D 1 U
diff --git a/test/analysis/input/toy/network4.txt b/test/analysis/input/toy/network4.txt
index d711ec1a..61ed9a4b 100644
--- a/test/analysis/input/toy/network4.txt
+++ b/test/analysis/input/toy/network4.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 B C 1 U
 D E 1 U
diff --git a/test/analysis/input/toy/network5.txt b/test/analysis/input/toy/network5.txt
index 5aaf5c0b..3d0eaf8c 100644
--- a/test/analysis/input/toy/network5.txt
+++ b/test/analysis/input/toy/network5.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 B C 1 U
 C D 1 U
diff --git a/test/analysis/output/example_summary.txt b/test/analysis/output/example_summary.txt
index f34497a4..fde30449 100644
--- a/test/analysis/output/example_summary.txt
+++ b/test/analysis/output/example_summary.txt
@@ -1,19 +1,13 @@
 Name	Number of nodes	Number of undirected edges	Number of connected components	Nodes in sources	Nodes in targets
 test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt	3	2	1	1	1
-test/analysis/input/example/data0-omicsintegrator1-params-JAZWLAK_pathway.txt	0	0	0	0	0
-test/analysis/input/example/data0-omicsintegrator1-params-PU62FNV_pathway.txt	0	0	0	0	0
 test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt	3	2	1	1	1
-test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt	0	0	0	0	0
 test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt	4	4	1	1	2
 test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt	5	3	2	1	3
 test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt	5	3	2	1	3
-test/analysis/input/example/data1-omicsintegrator1-params-RQCQ4YN_pathway.txt	0	0	0	0	0
-test/analysis/input/example/data1-omicsintegrator1-params-WY4V42C_pathway.txt	0	0	0	0	0
-test/analysis/input/example/data1-omicsintegrator2-params-EHHWPMD_pathway.txt	0	0	0	0	0
 test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt	7	6	1	1	4
 test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt	4	3	1	1	2
 test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt	4	3	1	1	2
diff --git a/test/ml/input/test-data-longName/longName.txt b/test/ml/input/test-data-longName/longName.txt
index aabf41b2..7e120dff 100644
--- a/test/ml/input/test-data-longName/longName.txt
+++ b/test/ml/input/test-data-longName/longName.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 node1	node2	1	U
 node1	node3	1	U
 node4	node5	1	U
diff --git a/test/ml/input/test-data-longName2/longName2.txt b/test/ml/input/test-data-longName2/longName2.txt
index 8765175f..35bf0c2e 100644
--- a/test/ml/input/test-data-longName2/longName2.txt
+++ b/test/ml/input/test-data-longName2/longName2.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 node3	node2	1	U
 node1	node3	1	U
 node5	node4	1	U
diff --git a/test/ml/input/test-data-s1/s1.txt b/test/ml/input/test-data-s1/s1.txt
index 031f4142..a8a52914 100644
--- a/test/ml/input/test-data-s1/s1.txt
+++ b/test/ml/input/test-data-s1/s1.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 C	D	1	U
 E	F	1	U
\ No newline at end of file
diff --git a/test/ml/input/test-data-s2/s2.txt b/test/ml/input/test-data-s2/s2.txt
index 680bf369..d4e9860b 100644
--- a/test/ml/input/test-data-s2/s2.txt
+++ b/test/ml/input/test-data-s2/s2.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 C	D	1	U
 E	F	1	U
diff --git a/test/ml/input/test-data-s3/s3.txt b/test/ml/input/test-data-s3/s3.txt
index d06960f9..6884cfe8 100644
--- a/test/ml/input/test-data-s3/s3.txt
+++ b/test/ml/input/test-data-s3/s3.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 L	M	1	U
 M	N	1	U
 O	P	1	U
diff --git a/test/ml/input/test-data-spaces/spaces.txt b/test/ml/input/test-data-spaces/spaces.txt
index 0860d779..3565af81 100644
--- a/test/ml/input/test-data-spaces/spaces.txt
+++ b/test/ml/input/test-data-spaces/spaces.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 L	M	1	U
 O	P	1	U
 nodes with	spaces in name	1	U
\ No newline at end of file
diff --git a/test/ml/input/test-mixed-direction/mixed-direction.txt b/test/ml/input/test-mixed-direction/mixed-direction.txt
index 6463ab3b..f77061a1 100644
--- a/test/ml/input/test-mixed-direction/mixed-direction.txt
+++ b/test/ml/input/test-mixed-direction/mixed-direction.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	A	1	D
 C	D	1	U
diff --git a/test/parse-outputs/expected/allpairs-pathway-expected.txt b/test/parse-outputs/expected/allpairs-pathway-expected.txt
index ee3c198b..3af52bc6 100644
--- a/test/parse-outputs/expected/allpairs-pathway-expected.txt
+++ b/test/parse-outputs/expected/allpairs-pathway-expected.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 S1	A	1	U
 S1	B	1	U
 A	E	1	U
diff --git a/test/parse-outputs/expected/domino-pathway-expected.txt b/test/parse-outputs/expected/domino-pathway-expected.txt
index 3fb1c13a..074f1b20 100644
--- a/test/parse-outputs/expected/domino-pathway-expected.txt
+++ b/test/parse-outputs/expected/domino-pathway-expected.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ENSG00000122691	ENSG00000138757	1	U
 ENSG00000122691	ENSG00000109320	1	U
 ENSG00000134954	ENSG00000077150	1	U
diff --git a/test/parse-outputs/expected/meo-pathway-expected.txt b/test/parse-outputs/expected/meo-pathway-expected.txt
index 1971d419..6515013f 100644
--- a/test/parse-outputs/expected/meo-pathway-expected.txt
+++ b/test/parse-outputs/expected/meo-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 GENEA	GENEC	1	D
 GENEC	GENEB	1	D
diff --git a/test/parse-outputs/expected/mincostflow-pathway-expected.txt b/test/parse-outputs/expected/mincostflow-pathway-expected.txt
index cd60214e..b25d172b 100644
--- a/test/parse-outputs/expected/mincostflow-pathway-expected.txt
+++ b/test/parse-outputs/expected/mincostflow-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 B	A	1	U
 D	B	1	U
diff --git a/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt b/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt
index 16f30549..f808bc3a 100644
--- a/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt
+++ b/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	C	1	D
 C	D	1	U
diff --git a/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt b/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt
index 65f6f221..e34eeaff 100644
--- a/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt
+++ b/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 B	A	1	U
 B	C	1	U
diff --git a/test/parse-outputs/expected/pathlinker-pathway-expected.txt b/test/parse-outputs/expected/pathlinker-pathway-expected.txt
index 9edabc0c..e490cd91 100644
--- a/test/parse-outputs/expected/pathlinker-pathway-expected.txt
+++ b/test/parse-outputs/expected/pathlinker-pathway-expected.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 S2	T3	1	D
 A	E	2	D
 S1	A	2	D

From 1b340bb7b0f22b55d093571cc185c696e2da8e05 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 22 Dec 2023 12:25:25 -0800
Subject: [PATCH 02/26] precommit

---
 spras/allpairs.py         | 2 +-
 spras/analysis/ml.py      | 2 +-
 spras/analysis/summary.py | 2 +-
 spras/domino.py           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index 38f7f000..487cd0d8 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -110,7 +110,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
         df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
-        
+
         df['Rank'] = 1  # add a rank column of 1s since the edges are not ranked.
         df = reinsert_direction_col_undirected(df)
         df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index 2c129873..98526ef1 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -57,7 +57,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                     else:
                         if direction == 'Direction': # if reading the header
                             continue
-                        else: 
+                        else:
                             raise ValueError(f"direction is {direction}, rather than U or D")
 
             # getting the algorithm name
diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index d0bc9582..4ea6595c 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -37,7 +37,7 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->
         # nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
         if os.path.getsize(file_path) == 0:
             continue
-        else: 
+        else:
             with open(file_path, 'r') as f:
                 # skip the header line
                 next(f)
diff --git a/spras/domino.py b/spras/domino.py
index 4666bf83..32136e97 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -207,7 +207,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             edges_df = reinsert_direction_col_undirected(edges_df)
             edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
             edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
-        else: 
+        else:
             edges_df.to_csv(standardized_pathway_file, sep='\t', header=None, index=False)
 
 

From 5edf8a79fceabcb542d2c5eb727ef6ed32ee2627 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Sat, 23 Dec 2023 14:57:54 -0800
Subject: [PATCH 03/26] updated summary.py code

---
 spras/analysis/summary.py                | 16 ++++++----------
 test/analysis/output/example_summary.txt |  6 ++++++
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index 4ea6595c..aca03e72 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -33,16 +33,12 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->
 
     # Iterate through each network file path
     for file_path in sorted(file_paths):
-        nw = None
-        # nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
-        if os.path.getsize(file_path) == 0:
-            continue
-        else:
-            with open(file_path, 'r') as f:
-                # skip the header line
-                next(f)
-                # Load in the network
-                nw = nx.read_edgelist(f, data=(('weight', float), ('Direction', str)))
+        
+        lines = None
+        with open(file_path, 'r') as f:
+            lines = f.readlines()[1:]  # skip the first line
+       
+        nw = nx.read_edgelist(lines, data=(('weight', float), ('Direction', str)))
 
         # Save the network name, number of nodes, number edges, and number of connected components
         nw_name = str(file_path)
diff --git a/test/analysis/output/example_summary.txt b/test/analysis/output/example_summary.txt
index fde30449..f34497a4 100644
--- a/test/analysis/output/example_summary.txt
+++ b/test/analysis/output/example_summary.txt
@@ -1,13 +1,19 @@
 Name	Number of nodes	Number of undirected edges	Number of connected components	Nodes in sources	Nodes in targets
 test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt	3	2	1	1	1
+test/analysis/input/example/data0-omicsintegrator1-params-JAZWLAK_pathway.txt	0	0	0	0	0
+test/analysis/input/example/data0-omicsintegrator1-params-PU62FNV_pathway.txt	0	0	0	0	0
 test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt	3	2	1	1	1
+test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt	0	0	0	0	0
 test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt	3	2	1	1	1
 test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt	4	4	1	1	2
 test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt	5	3	2	1	3
 test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt	5	3	2	1	3
+test/analysis/input/example/data1-omicsintegrator1-params-RQCQ4YN_pathway.txt	0	0	0	0	0
+test/analysis/input/example/data1-omicsintegrator1-params-WY4V42C_pathway.txt	0	0	0	0	0
+test/analysis/input/example/data1-omicsintegrator2-params-EHHWPMD_pathway.txt	0	0	0	0	0
 test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt	7	6	1	1	4
 test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt	4	3	1	1	2
 test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt	4	3	1	1	2

From 733b73682e636e7f06415b7e34a35a49a1449cee Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Sat, 23 Dec 2023 14:58:39 -0800
Subject: [PATCH 04/26] precommit

---
 spras/analysis/summary.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index aca03e72..0a173521 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -33,11 +33,11 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->
 
     # Iterate through each network file path
     for file_path in sorted(file_paths):
-        
+
         lines = None
         with open(file_path, 'r') as f:
             lines = f.readlines()[1:]  # skip the first line
-       
+
         nw = nx.read_edgelist(lines, data=(('weight', float), ('Direction', str)))
 
         # Save the network name, number of nodes, number edges, and number of connected components

From 3964789f1be34fdd08e1764daa05a0628854c354 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 18 Jan 2024 16:52:58 -0800
Subject: [PATCH 05/26] added changes to cytoscape

---
 docker-wrappers/Cytoscape/cytoscape_util.py | 4 ++--
 spras/analysis/cytoscape.py                 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker-wrappers/Cytoscape/cytoscape_util.py b/docker-wrappers/Cytoscape/cytoscape_util.py
index c8f47922..1a7b8d76 100644
--- a/docker-wrappers/Cytoscape/cytoscape_util.py
+++ b/docker-wrappers/Cytoscape/cytoscape_util.py
@@ -117,8 +117,8 @@ def load_pathways(pathways: List[str], output: str) -> None:
             file=path,
             column_type_list='s,t,x,ea',
             delimiters='\t',
-            first_row_as_column_names = True,
-            start_load_row = 2,
+            first_row_as_column_names=True,
+            
         )
         p4c.networks.rename_network(name, network=suid)
 
diff --git a/spras/analysis/cytoscape.py b/spras/analysis/cytoscape.py
index f39fff6d..82c35fc6 100644
--- a/spras/analysis/cytoscape.py
+++ b/spras/analysis/cytoscape.py
@@ -51,7 +51,7 @@ def run_cytoscape(pathways: List[Union[str, PurePath]], output_file: str, singul
     # TODO consider making this a string in the config file instead of a Boolean
     container_framework = 'singularity' if singularity else 'docker'
     out = run_container(container_framework,
-                        'reedcompbio/py4cytoscape:v2',
+                        'reedcompbio/py4cytoscape:v3',
                         command,
                         volumes,
                         work_dir,

From 9d122e1c36a195962d1408b443ad1a26290a12e2 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Wed, 24 Jan 2024 14:10:07 -0600
Subject: [PATCH 06/26] precommit

---
 docker-wrappers/Cytoscape/cytoscape_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-wrappers/Cytoscape/cytoscape_util.py b/docker-wrappers/Cytoscape/cytoscape_util.py
index 1a7b8d76..cc8ddd72 100644
--- a/docker-wrappers/Cytoscape/cytoscape_util.py
+++ b/docker-wrappers/Cytoscape/cytoscape_util.py
@@ -118,7 +118,7 @@ def load_pathways(pathways: List[str], output: str) -> None:
             column_type_list='s,t,x,ea',
             delimiters='\t',
             first_row_as_column_names=True,
-            
+
         )
         p4c.networks.rename_network(name, network=suid)
 

From ada6cde76c8548b78461496e219c1bd209e06ad8 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 9 Feb 2024 12:56:06 -0600
Subject: [PATCH 07/26] update config

---
 config/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.yaml b/config/config.yaml
index b85c599b..4517e3ef 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -37,7 +37,7 @@ container_registry:
 algorithms:
       - name: "pathlinker"
         params:
-              include: false
+              include: true
               run1:
                   k: range(100,201,100)
 

From 0f1cba797bb8ab889ce574098eb86c6f774369ad Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 9 Feb 2024 14:13:25 -0600
Subject: [PATCH 08/26] review

---
 .github/workflows/test-spras.yml    | 4 ++--
 docker-wrappers/Cytoscape/README.md | 1 +
 spras/analysis/ml.py                | 5 +++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
index 3dc2ab85..ec252c8f 100644
--- a/.github/workflows/test-spras.yml
+++ b/.github/workflows/test-spras.yml
@@ -152,8 +152,8 @@ jobs:
         path: docker-wrappers/Cytoscape/.
         dockerfile: docker-wrappers/Cytoscape/Dockerfile
         repository: reedcompbio/py4cytoscape
-        tags: v2
-        cache_froms: reedcompbio/py4cytoscape:v2
+        tags: v3
+        cache_froms: reedcompbio/py4cytoscape:v3
         push: false
 
   # Run pre-commit checks on source files
diff --git a/docker-wrappers/Cytoscape/README.md b/docker-wrappers/Cytoscape/README.md
index 2d747ffd..c3cb8967 100644
--- a/docker-wrappers/Cytoscape/README.md
+++ b/docker-wrappers/Cytoscape/README.md
@@ -20,6 +20,7 @@ The Docker wrapper can be tested with `pytest`.
 ## Versions:
 - v1: Use supervisord to launch Cytoscape from a Python subprocess, then connect to Cytoscape with py4cytoscape. Only loads undirected pathways. Compatible with Singularity in local testing (Apptainer version 1.2.2-1.el7) but fails in GitHub Actions.
 - v2: Add support for edge direction column.
+- v3: Add support for header lines in files
 
 ## TODO
 - Add an auth file for `xvfb-run`
diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index 98526ef1..aa2bdb9b 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -44,7 +44,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
             edges = []
             for line in lines:
                 parts = line.split('\t')
-                if len(parts) >= 4:  # in case of empty line in file or line doesn't include all values
+                if len(parts) == 4:
                     node1 = parts[0]
                     node2 = parts[1]
                     direction = str(parts[3]).strip()
@@ -59,7 +59,8 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                             continue
                         else:
                             raise ValueError(f"direction is {direction}, rather than U or D")
-
+                elif len(parts) > 4 and len(parts) > 0: # empty line in file is okay
+                    raise ValueError(f"A line in pathway file {file} contains {len(parts)} values. There should be 4 values per line")
             # getting the algorithm name
             p = PurePath(file)
             edge_tuples.append((p.parts[-2], edges))

From a9de3edc0c91abc28c903d04eeadc934db34b6be Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 9 Feb 2024 14:29:50 -0600
Subject: [PATCH 09/26] update ml

---
 spras/analysis/ml.py                              |  2 +-
 test/ml/input/test-data-empty-line/emptyLine.txt  |  4 ++++
 .../less.txt                                      |  5 +++++
 .../more.txt                                      |  5 +++++
 test/ml/test_ml.py                                | 15 ++++++++++++++-
 5 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 test/ml/input/test-data-empty-line/emptyLine.txt
 create mode 100644 test/ml/input/test-data-unexpected-amount-of-values/less.txt
 create mode 100644 test/ml/input/test-data-unexpected-amount-of-values/more.txt

diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index aa2bdb9b..d4316750 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -59,7 +59,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                             continue
                         else:
                             raise ValueError(f"direction is {direction}, rather than U or D")
-                elif len(parts) > 4 and len(parts) > 0: # empty line in file is okay
+                elif len(parts) > 4 or (len(parts) < 4 and len(parts) > 0): # empty line in file is okay
                     raise ValueError(f"A line in pathway file {file} contains {len(parts)} values. There should be 4 values per line")
             # getting the algorithm name
             p = PurePath(file)
diff --git a/test/ml/input/test-data-empty-line/emptyLine.txt b/test/ml/input/test-data-empty-line/emptyLine.txt
new file mode 100644
index 00000000..144f2487
--- /dev/null
+++ b/test/ml/input/test-data-empty-line/emptyLine.txt
@@ -0,0 +1,4 @@
+Node1	Node2	Rank	Direction
+A	B	1	U
+
+E	F	1	U
\ No newline at end of file
diff --git a/test/ml/input/test-data-unexpected-amount-of-values/less.txt b/test/ml/input/test-data-unexpected-amount-of-values/less.txt
new file mode 100644
index 00000000..b6d268a1
--- /dev/null
+++ b/test/ml/input/test-data-unexpected-amount-of-values/less.txt
@@ -0,0 +1,5 @@
+Node1	Node2	Rank	Direction
+A	B	
+C	D	1	
+E	
+L	M	1	U
\ No newline at end of file
diff --git a/test/ml/input/test-data-unexpected-amount-of-values/more.txt b/test/ml/input/test-data-unexpected-amount-of-values/more.txt
new file mode 100644
index 00000000..f43c2d7d
--- /dev/null
+++ b/test/ml/input/test-data-unexpected-amount-of-values/more.txt
@@ -0,0 +1,5 @@
+Node1	Node2	Rank	Direction
+A	B	1	U	B	B
+C	D	1	U	B	B	B	B
+E	F	1	U	B
+L	M	1	U
\ No newline at end of file
diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py
index fa868a59..5641c282 100644
--- a/test/ml/test_ml.py
+++ b/test/ml/test_ml.py
@@ -1,6 +1,6 @@
 import filecmp
 from pathlib import Path
-
+import pytest
 import pandas as pd
 
 import spras.analysis.ml as ml
@@ -25,6 +25,19 @@ def test_summarize_networks(self):
         dataframe.to_csv(OUT_DIR + 'dataframe.csv')
         assert filecmp.cmp(OUT_DIR + 'dataframe.csv', EXPECT_DIR + 'expected-dataframe.csv', shallow=False)
 
+        
+    def test_summarize_networks_less_values(self):
+        with pytest.raises(ValueError):
+            dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/less.txt'])
+    
+    def test_summarize_networks_more_values(self):
+        with pytest.raises(ValueError):
+            dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/more.txt'])
+    
+    def test_summarize_networks_empty_line(self):
+        with pytest.raises(ValueError):
+            dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-empty-line/emptyLine.txt'])
+
     def test_pca(self):
         dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])
         ml.pca(dataframe, OUT_DIR + 'pca.png', OUT_DIR + 'pca-variance.txt',

From 6a9ea4a6cb3754efb9f72d32c0066f0073172523 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 9 Feb 2024 14:57:16 -0600
Subject: [PATCH 10/26] update ml and test cases

---
 spras/analysis/ml.py                          |  7 ++----
 test/ml/expected/expected-dataframe.csv       |  2 +-
 .../mixed-direction.txt                       |  0
 .../wrong-direction.txt                       |  5 +++++
 test/ml/test_ml.py                            | 22 +++++++++++--------
 5 files changed, 21 insertions(+), 15 deletions(-)
 rename test/ml/input/{test-mixed-direction => test-data-mixed-direction}/mixed-direction.txt (100%)
 create mode 100644 test/ml/input/test-data-wrong-direction/wrong-direction.txt

diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index d4316750..f9ec5eb6 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -54,11 +54,8 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                     elif direction == "D":
                         # node order does matter for directed edges
                         edges.append(DIR_CONST.join([node1, node2]))
-                    else:
-                        if direction == 'Direction': # if reading the header
-                            continue
-                        else:
-                            raise ValueError(f"direction is {direction}, rather than U or D")
+                    elif direction != 'Direction' and direction != 'U' and direction != 'D':
+                        raise ValueError(f"direction is {direction}, rather than U or D")
                 elif len(parts) > 4 or (len(parts) < 4 and len(parts) > 0): # empty line in file is okay
                     raise ValueError(f"A line in pathway file {file} contains {len(parts)} values. There should be 4 values per line")
             # getting the algorithm name
diff --git a/test/ml/expected/expected-dataframe.csv b/test/ml/expected/expected-dataframe.csv
index b594efb4..15cff881 100644
--- a/test/ml/expected/expected-dataframe.csv
+++ b/test/ml/expected/expected-dataframe.csv
@@ -1,4 +1,4 @@
-,test-data-s1,test-data-s2,test-data-s3,test-data-longName,test-data-longName2,test-data-empty,test-data-spaces,test-mixed-direction
+,test-data-s1,test-data-s2,test-data-s3,test-data-longName,test-data-longName2,test-data-empty,test-data-spaces,test-data-mixed-direction
 A---B,1,1,0,0,0,0,0,0
 C---D,1,1,0,0,0,0,0,1
 E---F,1,1,0,0,0,0,0,1
diff --git a/test/ml/input/test-mixed-direction/mixed-direction.txt b/test/ml/input/test-data-mixed-direction/mixed-direction.txt
similarity index 100%
rename from test/ml/input/test-mixed-direction/mixed-direction.txt
rename to test/ml/input/test-data-mixed-direction/mixed-direction.txt
diff --git a/test/ml/input/test-data-wrong-direction/wrong-direction.txt b/test/ml/input/test-data-wrong-direction/wrong-direction.txt
new file mode 100644
index 00000000..dc45581c
--- /dev/null
+++ b/test/ml/input/test-data-wrong-direction/wrong-direction.txt
@@ -0,0 +1,5 @@
+Node1	Node2	Rank	Direction
+A	B	1	D
+B	A	1	D
+C	D	1	B
+E	F	1	U
\ No newline at end of file
diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py
index 5641c282..99f8e065 100644
--- a/test/ml/test_ml.py
+++ b/test/ml/test_ml.py
@@ -1,7 +1,8 @@
 import filecmp
 from pathlib import Path
-import pytest
+
 import pandas as pd
+import pytest
 
 import spras.analysis.ml as ml
 
@@ -21,22 +22,25 @@ def setup_class(cls):
     def test_summarize_networks(self):
         dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt',
                                            INPUT_DIR + 'test-data-longName/longName.txt', INPUT_DIR + 'test-data-longName2/longName2.txt',
-                                           INPUT_DIR + 'test-data-empty/empty.txt', INPUT_DIR + 'test-data-spaces/spaces.txt', INPUT_DIR + 'test-mixed-direction/mixed-direction.txt'])
+                                           INPUT_DIR + 'test-data-empty/empty.txt', INPUT_DIR + 'test-data-spaces/spaces.txt', INPUT_DIR + 'test-data-mixed-direction/mixed-direction.txt'])
         dataframe.to_csv(OUT_DIR + 'dataframe.csv')
         assert filecmp.cmp(OUT_DIR + 'dataframe.csv', EXPECT_DIR + 'expected-dataframe.csv', shallow=False)
 
-        
     def test_summarize_networks_less_values(self):
         with pytest.raises(ValueError):
-            dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/less.txt'])
-    
+            ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/less.txt'])
+
     def test_summarize_networks_more_values(self):
         with pytest.raises(ValueError):
-            dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/more.txt'])
-    
+            ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/more.txt'])
+
     def test_summarize_networks_empty_line(self):
         with pytest.raises(ValueError):
-            dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-empty-line/emptyLine.txt'])
+            ml.summarize_networks([INPUT_DIR + 'test-data-empty-line/emptyLine.txt'])
+
+    def test_summarize_networks_wrong_direction(self):
+        with pytest.raises(ValueError):
+            ml.summarize_networks([INPUT_DIR + 'test-data-wrong-direction/wrong-direction.txt'])
 
     def test_pca(self):
         dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])
@@ -62,7 +66,7 @@ def test_hac_vertical(self):
         assert filecmp.cmp(OUT_DIR + 'hac-clusters-vertical.txt', EXPECT_DIR + 'expected-hac-vertical-clusters.txt', shallow=False)
 
     def test_ensemble_network(self):
-        dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt', INPUT_DIR + 'test-mixed-direction/mixed-direction.txt'])
+        dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt', INPUT_DIR + 'test-data-mixed-direction/mixed-direction.txt'])
         ml.ensemble_network(dataframe, OUT_DIR + 'ensemble-network.tsv')
 
         en = pd.read_table(OUT_DIR + 'ensemble-network.tsv')

From f9e989e2c7889b3ecba515403eb5cefd649a46ef Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 9 Feb 2024 15:24:59 -0600
Subject: [PATCH 11/26] update contributing guide

---
 CONTRIBUTING.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a264c5f5..2ec87b01 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -156,7 +156,8 @@ Implement the `parse_output` function.
 The edges in the Local Neighborhood output have the same format as the input, `<vertex1>|<vertex2>`.
 Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank.
 See the `add_rank_column` function in `src.util.py`.
-The output should have the format `<vertex1> <vertex2> 1`.
+Make sure header = True when the file is created.
+The output should have the format `<vertex1> <vertex2> 1 U`.
 
 ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS
 Import the new class `LocalNeighborhood` in `src/runner.py` so the wrapper functions can be accessed.

From 278b761b7a085fa33281b77f0998c10e97be36c1 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 12 Mar 2024 16:04:11 -0500
Subject: [PATCH 12/26] ml changes

---
 spras/analysis/ml.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index f9ec5eb6..206b5a97 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -41,10 +41,13 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
             with open(file, 'r') as f:
                 lines = f.readlines()
 
+            if len(lines) > 0:
+                lines.pop(0) # process header line 
+            
             edges = []
             for line in lines:
                 parts = line.split('\t')
-                if len(parts) == 4:
+                if len(parts) == 4: # empty lines not allowed but empty files are allowed
                     node1 = parts[0]
                     node2 = parts[1]
                     direction = str(parts[3]).strip()
@@ -56,8 +59,9 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                         edges.append(DIR_CONST.join([node1, node2]))
                     elif direction != 'Direction' and direction != 'U' and direction != 'D':
                         raise ValueError(f"direction is {direction}, rather than U or D")
-                elif len(parts) > 4 or (len(parts) < 4 and len(parts) > 0): # empty line in file is okay
-                    raise ValueError(f"A line in pathway file {file} contains {len(parts)} values. There should be 4 values per line")
+                elif len(parts) != 0:
+                    raise ValueError(f"In file {file}, expected line {line} to have 4 values, but found {len(parts)} values.")
+                
             # getting the algorithm name
             p = PurePath(file)
             edge_tuples.append((p.parts[-2], edges))

From 4dfd018700bae11ecf2b729879f4c7533afaaf27 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 12 Mar 2024 16:24:51 -0500
Subject: [PATCH 13/26] precommit to ml

---
 spras/analysis/ml.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index 206b5a97..23b17b87 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -42,8 +42,8 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                 lines = f.readlines()
 
             if len(lines) > 0:
-                lines.pop(0) # process header line 
-            
+                lines.pop(0) # process header line
+
             edges = []
             for line in lines:
                 parts = line.split('\t')
@@ -61,7 +61,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                         raise ValueError(f"direction is {direction}, rather than U or D")
                 elif len(parts) != 0:
                     raise ValueError(f"In file {file}, expected line {line} to have 4 values, but found {len(parts)} values.")
-                
+
             # getting the algorithm name
             p = PurePath(file)
             edge_tuples.append((p.parts[-2], edges))

From c6da91a98622178925e12c17c752e76513733fae Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 12 Mar 2024 17:25:19 -0500
Subject: [PATCH 14/26] attempting error checking for empty df read from rpw

---
 spras/allpairs.py         | 12 ++++----
 spras/domino.py           |  3 +-
 spras/meo.py              | 20 +++++++------
 spras/mincostflow.py      | 14 ++++-----
 spras/omicsintegrator1.py | 60 ++++++++++++++++++++++++++++++---------
 spras/omicsintegrator2.py | 20 +++++++------
 spras/pathlinker.py       | 14 +++++++--
 spras/util.py             | 11 +++++++
 8 files changed, 107 insertions(+), 47 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index 0a276e92..e54cf9f7 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -9,6 +9,7 @@
     reinsert_direction_col_undirected,
 )
 from spras.prm import PRM
+from spras.util import add_rank_column, raw_pathway_df
 
 __all__ = ['AllPairs']
 
@@ -111,9 +112,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
-
-        df['Rank'] = 1  # add a rank column of 1s since the edges are not ranked.
-        df = reinsert_direction_col_undirected(df)
-        df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
+        # df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
+        df = raw_pathway_df(raw_pathway_file, header = None)
+        if not df.empty:
+            df = add_rank_column(df)
+            df = reinsert_direction_col_undirected(df)
+            df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/domino.py b/spras/domino.py
index 594041b7..9e6b256e 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -208,7 +208,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
             edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
         else:
-            edges_df.to_csv(standardized_pathway_file, sep='\t', header=None, index=False)
+            df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
+            df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
 
 
 def pre_domino_id_transform(node_id):
diff --git a/spras/meo.py b/spras/meo.py
index d111d6a3..7a70119b 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -8,7 +8,7 @@
     reinsert_direction_col_directed,
 )
 from spras.prm import PRM
-from spras.util import add_rank_column
+from spras.util import add_rank_column, raw_pathway_df
 
 __all__ = ['MEO', 'write_properties']
 
@@ -181,13 +181,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
         # Columns Source Type Target Oriented Weight
-        df = pd.read_csv(raw_pathway_file, sep='\t')
+        # df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
+        df = raw_pathway_df(raw_pathway_file, header=0)
+        if not df.empty:
         # Keep only edges that were assigned an orientation (direction)
-        df = df.loc[df['Oriented']]
-        # TODO what should be the edge rank?
-        # Would need to load the paths output file to rank edges correctly
-        df = add_rank_column(df)
-        df = reinsert_direction_col_directed(df)
-        df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
-        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+            df = df.loc[df['Oriented']]
+            # TODO what should be the edge rank?
+            # Would need to load the paths output file to rank edges correctly
+            df = add_rank_column(df)
+            df = reinsert_direction_col_directed(df)
+            df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
+            df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 44575038..f04b8034 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -8,7 +8,7 @@
     reinsert_direction_col_undirected,
 )
 from spras.prm import PRM
-from spras.util import add_rank_column
+from spras.util import add_rank_column, raw_pathway_df
 
 __all__ = ['MinCostFlow']
 
@@ -150,11 +150,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
 
-        df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
-        df = add_rank_column(df)
-        # TODO update MinCostFlow version to support mixed graphs
-        # Currently directed edges in the input will be converted to undirected edges in the output
-        df = reinsert_direction_col_undirected(df)
-        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df = raw_pathway_df(raw_pathway_file, header = None)
+        # df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
+        if not df.empty:
+            df = add_rank_column(df)
+            df = reinsert_direction_col_undirected(df)
+            df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
 
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 2abda226..3e25cde2 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 
 import pandas as pd
@@ -5,7 +6,7 @@
 from spras.containers import prepare_volume, run_container
 from spras.interactome import reinsert_direction_col_mixed
 from spras.prm import PRM
-from spras.util import add_rank_column
+from spras.util import add_rank_column, raw_pathway_df
 
 __all__ = ['OmicsIntegrator1', 'write_conf']
 
@@ -191,16 +192,49 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # I'm assuming from having read the documentation that we will be passing in optimalForest.sif
         # as raw_pathway_file, in which case the format should be edge1 interactiontype edge2.
         # if that assumption is wrong we will need to tweak things
-        try:
-            df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
-        except pd.errors.EmptyDataError:
-            with open(standardized_pathway_file, 'w'):
-                pass
-            return
-
-        df.columns = ["Edge1", "InteractionType", "Edge2"]
-        df = add_rank_column(df)
-        df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-        df.drop(columns=['InteractionType'], inplace = True)
-        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        # try:
+        #     # check_file = os.stat(raw_pathway_file).st_size
+
+        #     # if(check_file == 0):
+        #     #     print("The file is empty.")
+        #     #     df = pd.DataFrame(columns = ['Edge1', 'Edge2','Rank','Direction'])
+
+        #     # else:
+        #     df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
+        #     df.columns = ["Edge1", "InteractionType", "Edge2"]
+        #     df = add_rank_column(df)
+        #     df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
+        #     df.drop(columns=['InteractionType'], inplace = True)
+        #     df.columns = ['Node1', 'Node2','Rank','Direction']
+
+        # except pd.errors.EmptyDataError:
+        #     print("we hit empty thingy")
+        #     df = pd.DataFrame(columns = ['Node1', 'Node2','Rank','Direction'])
+        #     # with open(standardized_pathway_file, 'w'):
+        #     #     pass
+        #     # return
+
+        df = raw_pathway_df(raw_pathway_file, header=None)
+        if not df.empty:
+            df.columns = ["Edge1", "InteractionType", "Edge2"]
+            df = add_rank_column(df)
+            df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
+            df.drop(columns=['InteractionType'], inplace = True)
+            df.columns = ['Node1', 'Node2','Rank','Direction']
+
+        # if df.empty:
+        #     print("THE DATA FRAME IS EMPTY")
+        #     print(df)
+        # else:
+        #     print("THE DF IS NOT EMPTY")
+
+        # df.columns = ["Edge1", "InteractionType", "Edge2"]
+        # df = add_rank_column(df)
+        # df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
+        # df.drop(columns=['InteractionType'], inplace = True)
+
+
+
+        print(df)
+        print(df.columns)
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 4083d0ab..6734aa06 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -6,7 +6,7 @@
 from spras.dataset import Dataset
 from spras.interactome import reinsert_direction_col_undirected
 from spras.prm import PRM
-from spras.util import add_rank_column
+from spras.util import add_rank_column, raw_pathway_df
 
 __all__ = ['OmicsIntegrator2']
 
@@ -149,13 +149,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
         if num_lines < 2:
-            with open(standardized_pathway_file, 'w'):
-                pass
+            df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
+            df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
             return
-        df = pd.read_csv(raw_pathway_file, sep='\t')
-        df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
-        df = df.take([0, 1], axis=1)
-        df = add_rank_column(df)
-        df = reinsert_direction_col_undirected(df)
-        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df = raw_pathway_df(raw_pathway_file, header=0 )
+        # df = pd.read_csv(raw_pathway_file, sep='\t', header = 0)
+        # df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
+        if not df.empty:
+            df = df.take([0, 1], axis=1)
+            df = add_rank_column(df)
+            df = reinsert_direction_col_undirected(df)
+            df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index c0bd7ed8..4ffc3246 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -9,6 +9,7 @@
     reinsert_direction_col_directed,
 )
 from spras.prm import PRM
+from spras.util import raw_pathway_df
 
 __all__ = ['PathLinker']
 
@@ -137,7 +138,14 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
         # What about multiple raw_pathway_files
-        df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1)
-        df = reinsert_direction_col_directed(df)
-        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        # try:
+        #     df = pd.read_csv(raw_pathway_file, sep='\t')
+
+        # except pd.errors.EmptyDataError:
+        #     print("we hit empty thingy")
+        df = raw_pathway_df(raw_pathway_file, header = 0)
+        if not df.empty:
+            df = df.take([0, 1, 2], axis=1)
+            df = reinsert_direction_col_directed(df)
+            df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/util.py b/spras/util.py
index ea6cd952..9b51ac5a 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -59,3 +59,14 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame:
     """
     df['Rank'] = 1
     return df
+
+def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame:
+    """
+    creates df from contents in raw pathway file, otherwise returns an empty df
+    """
+    try:
+        df = pd.read_csv(raw_pathway_file, sep='\t', header=header)
+    except pd.errors.EmptyDataError: # read an empty file
+        df = pd.DataFrame(columns = ['Node1', 'Node2','Rank','Direction'])
+
+    return df

From c183abf903f3fce9bc1ee3b7043f1054fac5fbcb Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Wed, 13 Mar 2024 15:58:59 -0500
Subject: [PATCH 15/26] cleaned up code

---
 spras/allpairs.py         |  3 +--
 spras/meo.py              |  2 --
 spras/mincostflow.py      |  3 +--
 spras/omicsintegrator1.py | 37 +------------------------------------
 spras/omicsintegrator2.py | 15 +++++++--------
 spras/pathlinker.py       |  8 +-------
 spras/util.py             |  3 +++
 7 files changed, 14 insertions(+), 57 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index e54cf9f7..cdce7e40 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -112,8 +112,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        # df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
-        df = raw_pathway_df(raw_pathway_file, header = None)
+        df = raw_pathway_df(raw_pathway_file, header=None)
         if not df.empty:
             df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
diff --git a/spras/meo.py b/spras/meo.py
index 7a70119b..16fae07e 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -180,8 +180,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        # Columns Source Type Target Oriented Weight
-        # df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
         df = raw_pathway_df(raw_pathway_file, header=0)
         if not df.empty:
         # Keep only edges that were assigned an orientation (direction)
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index f04b8034..531ae532 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -150,8 +150,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
 
-        df = raw_pathway_df(raw_pathway_file, header = None)
-        # df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
+        df = raw_pathway_df(raw_pathway_file, header=None)
         if not df.empty:
             df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 3e25cde2..da4c5459 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -192,27 +192,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # I'm assuming from having read the documentation that we will be passing in optimalForest.sif
         # as raw_pathway_file, in which case the format should be edge1 interactiontype edge2.
         # if that assumption is wrong we will need to tweak things
-        # try:
-        #     # check_file = os.stat(raw_pathway_file).st_size
-
-        #     # if(check_file == 0):
-        #     #     print("The file is empty.")
-        #     #     df = pd.DataFrame(columns = ['Edge1', 'Edge2','Rank','Direction'])
-
-        #     # else:
-        #     df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
-        #     df.columns = ["Edge1", "InteractionType", "Edge2"]
-        #     df = add_rank_column(df)
-        #     df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-        #     df.drop(columns=['InteractionType'], inplace = True)
-        #     df.columns = ['Node1', 'Node2','Rank','Direction']
-
-        # except pd.errors.EmptyDataError:
-        #     print("we hit empty thingy")
-        #     df = pd.DataFrame(columns = ['Node1', 'Node2','Rank','Direction'])
-        #     # with open(standardized_pathway_file, 'w'):
-        #     #     pass
-        #     # return
+        
 
         df = raw_pathway_df(raw_pathway_file, header=None)
         if not df.empty:
@@ -222,19 +202,4 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             df.drop(columns=['InteractionType'], inplace = True)
             df.columns = ['Node1', 'Node2','Rank','Direction']
 
-        # if df.empty:
-        #     print("THE DATA FRAME IS EMPTY")
-        #     print(df)
-        # else:
-        #     print("THE DF IS NOT EMPTY")
-
-        # df.columns = ["Edge1", "InteractionType", "Edge2"]
-        # df = add_rank_column(df)
-        # df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-        # df.drop(columns=['InteractionType'], inplace = True)
-
-
-
-        print(df)
-        print(df.columns)
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 6734aa06..e1197490 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -150,14 +150,13 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         num_lines = sum(1 for line in open(raw_pathway_file))
         if num_lines < 2:
             df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
-            df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
-            return
-        df = raw_pathway_df(raw_pathway_file, header=0 )
-        # df = pd.read_csv(raw_pathway_file, sep='\t', header = 0)
-        # df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
-        if not df.empty:
-            df = df.take([0, 1], axis=1)
-            df = add_rank_column(df)
+        else: 
+            df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
+            df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
+            df = add_rank_column(df) 
             df = reinsert_direction_col_undirected(df)
+            df.drop(columns=['cost', 'in_solution'], inplace = True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
+        
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 4ffc3246..97ee64ad 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -137,13 +137,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        # What about multiple raw_pathway_files
-        # try:
-        #     df = pd.read_csv(raw_pathway_file, sep='\t')
-
-        # except pd.errors.EmptyDataError:
-        #     print("we hit empty thingy")
-        df = raw_pathway_df(raw_pathway_file, header = 0)
+        df = raw_pathway_df(raw_pathway_file, header=0)
         if not df.empty:
             df = df.take([0, 1, 2], axis=1)
             df = reinsert_direction_col_directed(df)
diff --git a/spras/util.py b/spras/util.py
index 9b51ac5a..94c7ab2f 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -63,6 +63,9 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame:
 def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame:
     """
     creates df from contents in raw pathway file, otherwise returns an empty df
+    @param raw_pathway_file: the specific path to the raw_pathway_file to read from
+    @param header: what row the header is, otherwise None
+
     """
     try:
         df = pd.read_csv(raw_pathway_file, sep='\t', header=header)

From 01ad3422eaf88b508cecc526515a29a83c9f05bc Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Wed, 13 Mar 2024 16:00:44 -0500
Subject: [PATCH 16/26] precommit

---
 spras/omicsintegrator1.py | 2 +-
 spras/omicsintegrator2.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index da4c5459..4c5e5164 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -192,7 +192,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # I'm assuming from having read the documentation that we will be passing in optimalForest.sif
         # as raw_pathway_file, in which case the format should be edge1 interactiontype edge2.
         # if that assumption is wrong we will need to tweak things
-        
+
 
         df = raw_pathway_df(raw_pathway_file, header=None)
         if not df.empty:
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index e1197490..b1419258 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -150,13 +150,13 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         num_lines = sum(1 for line in open(raw_pathway_file))
         if num_lines < 2:
             df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
-        else: 
+        else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
             df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
-            df = add_rank_column(df) 
+            df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
             df.drop(columns=['cost', 'in_solution'], inplace = True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
-        
+
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
-        
+

From 129532c08c36e88e87f9d8bc61f40fe50b053b91 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Mon, 18 Mar 2024 13:49:04 -0500
Subject: [PATCH 17/26] clean up new util func, add new test, add to
 contributing guide

---
 CONTRIBUTING.md                                        |  4 ++--
 spras/omicsintegrator2.py                              |  4 ++--
 spras/util.py                                          |  7 ++++---
 test/parse-outputs/expected/empty-pathway-expected.txt |  1 +
 test/parse-outputs/input/empty-raw-pathway.txt         |  0
 test/parse-outputs/test_parse_outputs.py               | 10 ++++++++++
 6 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 test/parse-outputs/expected/empty-pathway-expected.txt
 create mode 100644 test/parse-outputs/input/empty-raw-pathway.txt

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2ec87b01..c4452f7a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -155,8 +155,8 @@ Use the `run_container` utility function to run the command in the container `<u
 Implement the `parse_output` function.
 The edges in the Local Neighborhood output have the same format as the input, `<vertex1>|<vertex2>`.
 Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank.
-See the `add_rank_column` function in `src.util.py`.
-Make sure header = True when the file is created.
+See the `add_rank_column` and `raw_pathway_df` function in `src.util.py`.
+Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created
 The output should have the format `<vertex1> <vertex2> 1 U`.
 
 ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index b1419258..2d8dee3c 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -152,10 +152,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
-            df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
+            df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
+            df = df.take([0, 1], axis=1)
             df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
-            df.drop(columns=['cost', 'in_solution'], inplace = True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/util.py b/spras/util.py
index 94c7ab2f..efa9bc73 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -62,9 +62,10 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame:
 
 def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame:
     """
-    creates df from contents in raw pathway file, otherwise returns an empty df
-    @param raw_pathway_file: the specific path to the raw_pathway_file to read from
-    @param header: what row the header is, otherwise None
+    Creates DF from contents in raw pathway file,
+    otherwise returns an empty DF with standard output column names
+    @param raw_pathway_file: path to raw_pathway_file
+    @param header: what row the header is in raw_pathway_file, otherwise None
 
     """
     try:
diff --git a/test/parse-outputs/expected/empty-pathway-expected.txt b/test/parse-outputs/expected/empty-pathway-expected.txt
new file mode 100644
index 00000000..a1a76651
--- /dev/null
+++ b/test/parse-outputs/expected/empty-pathway-expected.txt
@@ -0,0 +1 @@
+Node1	Node2	Rank	Direction
diff --git a/test/parse-outputs/input/empty-raw-pathway.txt b/test/parse-outputs/input/empty-raw-pathway.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index 8d8d0933..0f471e5d 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -1,6 +1,9 @@
 import filecmp
 from pathlib import Path
 
+import pandas as pd
+import pytest
+
 from spras import runner
 
 INDIR = "test/parse-outputs/input/"
@@ -29,3 +32,10 @@ def test_parse_outputs(self):
 
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-pathway.txt", EXPDIR + f"{algo}-pathway-expected.txt", shallow=False)
+
+    def test_empty_file(self):
+        for algo in algorithms:
+            test_file = INDIR + f"empty-raw-pathway.txt"
+            out_file = OUTDIR + f"{algo}-empty-pathway.txt"
+            runner.parse_output(algo, test_file, out_file)
+            assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)

From bbd907584c535056daa7e6e6d6dbd1a89e424922 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Mon, 18 Mar 2024 14:06:58 -0500
Subject: [PATCH 18/26] trying to fix error

---
 spras/meo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/meo.py b/spras/meo.py
index 16fae07e..c854a64f 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -190,4 +190,4 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             df = reinsert_direction_col_directed(df)
             df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
-        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
+        df.to_csv(standardized_pathway_file, index=False, sep='\t', header=True)

From 0f4510c9824c1ef5f5d3608835d882efac155f5f Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 21 Mar 2024 12:16:02 -0500
Subject: [PATCH 19/26] testing mcf tester with macos-latest

---
 .github/workflows/test-spras.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
index ec252c8f..d6e13e9c 100644
--- a/.github/workflows/test-spras.yml
+++ b/.github/workflows/test-spras.yml
@@ -32,7 +32,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest]
+        os: [macos-latest]
     steps:
     - name: Checkout repository
       uses: actions/checkout@v2

From 36d556b22b3257b590e1623edcb1a5bf600d6c67 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 21 Mar 2024 12:29:43 -0500
Subject: [PATCH 20/26] revert

---
 .github/workflows/test-spras.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
index d6e13e9c..ec252c8f 100644
--- a/.github/workflows/test-spras.yml
+++ b/.github/workflows/test-spras.yml
@@ -32,7 +32,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [macos-latest]
+        os: [ubuntu-latest]
     steps:
     - name: Checkout repository
       uses: actions/checkout@v2

From f5b880bd7c396a4a099c46563b9a840a4bb8f4b3 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 14 Jun 2024 10:36:14 -0500
Subject: [PATCH 21/26] updated contributing guide

---
 CONTRIBUTING.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c4452f7a..a943cacb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -154,9 +154,9 @@ Use the `run_container` utility function to run the command in the container `<u
 
 Implement the `parse_output` function.
 The edges in the Local Neighborhood output have the same format as the input, `<vertex1>|<vertex2>`.
-Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank.
-See the `add_rank_column` and `raw_pathway_df` function in `src.util.py`.
-Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created
+Convert these to be tab-separated vertex pairs followed by a tab `1` and tab `U` at the end of every line, which indicates all edges have the same rank and are undirected.
+See the `add_rank_column` and `raw_pathway_df` function in `src.util.py` and `reinsert_direction_col_undirected` function in `src.interactome.py`.
+Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created.
 The output should have the format `<vertex1> <vertex2> 1 U`.
 
 ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS

From 322bfa560a66d293173afb553ad4df2c8d6f30e8 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 14 Jun 2024 10:42:50 -0500
Subject: [PATCH 22/26] updated new ML test files to include headers

---
 test/ml/input/test-data-empty/empty.txt   | 1 +
 test/ml/input/test-data-single/single.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/test/ml/input/test-data-empty/empty.txt b/test/ml/input/test-data-empty/empty.txt
index e69de29b..63fda2b1 100644
--- a/test/ml/input/test-data-empty/empty.txt
+++ b/test/ml/input/test-data-empty/empty.txt
@@ -0,0 +1 @@
+Node1	Node2	Rank	Direction
\ No newline at end of file
diff --git a/test/ml/input/test-data-single/single.txt b/test/ml/input/test-data-single/single.txt
index 30397283..822ccb97 100644
--- a/test/ml/input/test-data-single/single.txt
+++ b/test/ml/input/test-data-single/single.txt
@@ -1 +1,2 @@
+Node1	Node2	Rank	Direction
 L	M	1	U

From 1ffe1d7cd75c20af85074ef23cd054f248c0711b Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Mon, 17 Jun 2024 10:02:25 -0500
Subject: [PATCH 23/26] output docs

---
 doc/output.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 doc/output.md

diff --git a/doc/output.md b/doc/output.md
new file mode 100644
index 00000000..0b0a6c93
--- /dev/null
+++ b/doc/output.md
@@ -0,0 +1,15 @@
+## File format
+### Universal Output File
+Output files include a header row and rows providing attributes for each node.
+The header row is `Node1    Node2   Rank    Direction` for every output file.
+Each row lists the two nodes that are connected with an edge, the rank for that edge, and a directionality column to indicate whether the edge is directed or undirected.
+The directionality values are either a 'U' for an undirected edge or a 'D' for a directed edge.
+
+For example:
+```
+Node1	Node2	Rank	Direction
+A       B	    1	    D
+B	    C	    1	    D
+B	    D	    1	    U
+D	    A	    1	    U
+```

From 7db6ea020092c0feadf225bfb4e410b5f94ada2f Mon Sep 17 00:00:00 2001
From: Anthony Gitter <gitter@biostat.wisc.edu>
Date: Thu, 4 Jul 2024 08:52:16 -0500
Subject: [PATCH 24/26] Code review and formatting updates

---
 .github/workflows/test-spras.yml         |  1 +
 .pre-commit-config.yaml                  |  2 +-
 doc/output.md                            | 20 +++++++++++---------
 spras/allpairs.py                        |  4 +---
 spras/analysis/ml.py                     |  6 +++---
 spras/analysis/summary.py                |  3 +--
 spras/domino.py                          | 13 ++++---------
 spras/meo.py                             |  9 ++++-----
 spras/mincostflow.py                     |  7 +++----
 spras/omicsintegrator1.py                | 11 +++--------
 spras/omicsintegrator2.py                |  5 ++---
 spras/pathlinker.py                      |  5 ++---
 spras/util.py                            | 18 ++++++++++--------
 test/ml/test_ml.py                       |  4 ++--
 test/parse-outputs/test_parse_outputs.py |  4 +---
 15 files changed, 49 insertions(+), 63 deletions(-)

diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
index ec252c8f..b801aa4e 100644
--- a/.github/workflows/test-spras.yml
+++ b/.github/workflows/test-spras.yml
@@ -83,6 +83,7 @@ jobs:
         docker pull reedcompbio/mincostflow:latest
         docker pull reedcompbio/allpairs:latest
         docker pull reedcompbio/domino:latest
+        docker pull reedcompbio/py4cytoscape:v3
     - name: Build Omics Integrator 1 Docker image
       uses: docker/build-push-action@v1
       with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 55503ef4..67958453 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@
 # See https://pre-commit.com/ for documentation
 default_language_version:
   # Match this to the version specified in environment.yml
-  python: python3.8
+  python: python3.11
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0  # Use the ref you want to point at
diff --git a/doc/output.md b/doc/output.md
index 0b0a6c93..431d67c7 100644
--- a/doc/output.md
+++ b/doc/output.md
@@ -1,15 +1,17 @@
-## File format
-### Universal Output File
-Output files include a header row and rows providing attributes for each node.
-The header row is `Node1    Node2   Rank    Direction` for every output file.
+## File formats
+
+### Pathway output format
+Output pathway files in the standard SPRAS format include a header row and rows providing attributes for each edge.
+The header row is `Node1    Node2   Rank    Direction`.
 Each row lists the two nodes that are connected with an edge, the rank for that edge, and a directionality column to indicate whether the edge is directed or undirected.
-The directionality values are either a 'U' for an undirected edge or a 'D' for a directed edge.
+The directionality values are either a 'U' for an undirected edge or a 'D' for a directed edge, where the direction is from Node1 to Node2.
+Pathways that do not contain ranked edges can output all 1s in the Rank column.
 
 For example:
 ```
 Node1	Node2	Rank	Direction
-A       B	    1	    D
-B	    C	    1	    D
-B	    D	    1	    U
-D	    A	    1	    U
+A	B	1	D
+B	C	1	D
+B	D	2	U
+D	A	3	U
 ```
diff --git a/spras/allpairs.py b/spras/allpairs.py
index cdce7e40..4c540330 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -1,8 +1,6 @@
 import warnings
 from pathlib import Path
 
-import pandas as pd
-
 from spras.containers import prepare_volume, run_container
 from spras.interactome import (
     convert_directed_to_undirected,
@@ -112,7 +110,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        df = raw_pathway_df(raw_pathway_file, header=None)
+        df = raw_pathway_df(raw_pathway_file, sep='\t', header=None)
         if not df.empty:
             df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
index 5519944f..64f8f003 100644
--- a/spras/analysis/ml.py
+++ b/spras/analysis/ml.py
@@ -42,12 +42,12 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                 lines = f.readlines()
 
             if len(lines) > 0:
-                lines.pop(0) # process header line
+                lines.pop(0)  # skip header line
 
             edges = []
             for line in lines:
                 parts = line.split('\t')
-                if len(parts) == 4: # empty lines not allowed but empty files are allowed
+                if len(parts) == 4:  # empty lines not allowed but empty files are allowed
                     node1 = parts[0]
                     node2 = parts[1]
                     direction = str(parts[3]).strip()
@@ -57,7 +57,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                     elif direction == "D":
                         # node order does matter for directed edges
                         edges.append(DIR_CONST.join([node1, node2]))
-                    elif direction != 'Direction' and direction != 'U' and direction != 'D':
+                    elif direction != 'Direction':
                         raise ValueError(f"direction is {direction}, rather than U or D")
                 elif len(parts) != 0:
                     raise ValueError(f"In file {file}, expected line {line} to have 4 values, but found {len(parts)} values.")
diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index 0a173521..0e4b4b86 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -34,9 +34,8 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->
     # Iterate through each network file path
     for file_path in sorted(file_paths):
 
-        lines = None
         with open(file_path, 'r') as f:
-            lines = f.readlines()[1:]  # skip the first line
+            lines = f.readlines()[1:]  # skip the header line
 
         nw = nx.read_edgelist(lines, data=(('weight', float), ('Direction', str)))
 
diff --git a/spras/domino.py b/spras/domino.py
index 9e6b256e..7c29f439 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -206,10 +206,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
             edges_df = reinsert_direction_col_undirected(edges_df)
             edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
-            edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
         else:
-            df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
-            df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
+            edges_df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
+
+        edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
 
 
 def pre_domino_id_transform(node_id):
@@ -228,9 +228,4 @@ def post_domino_id_transform(node_id):
     @param node_id: the node id to transform
     @return the node id without the prefix, if it was present, otherwise the original node id
     """
-    # Use removeprefix if SPRAS ever requires Python >= 3.9
-    # https://docs.python.org/3/library/stdtypes.html#str.removeprefix
-    if node_id.startswith(ID_PREFIX):
-        return node_id[ID_PREFIX_LEN:]
-    else:
-        return node_id
+    node_id.removeprefix(ID_PREFIX)
diff --git a/spras/meo.py b/spras/meo.py
index c854a64f..b614d4c4 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -1,7 +1,5 @@
 from pathlib import Path
 
-import pandas as pd
-
 from spras.containers import prepare_volume, run_container
 from spras.interactome import (
     add_directionality_constant,
@@ -180,14 +178,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        df = raw_pathway_df(raw_pathway_file, header=0)
+        # Columns Source Type Target Oriented Weight
+        df = raw_pathway_df(raw_pathway_file, sep='\t', header=0)
         if not df.empty:
-        # Keep only edges that were assigned an orientation (direction)
+            # Keep only edges that were assigned an orientation (direction)
             df = df.loc[df['Oriented']]
             # TODO what should be the edge rank?
             # Would need to load the paths output file to rank edges correctly
             df = add_rank_column(df)
             df = reinsert_direction_col_directed(df)
-            df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
+            df.drop(columns=['Type', 'Oriented', 'Weight'], inplace=True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
         df.to_csv(standardized_pathway_file, index=False, sep='\t', header=True)
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 531ae532..b0a47cc7 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -1,7 +1,5 @@
 from pathlib import Path
 
-import pandas as pd
-
 from spras.containers import prepare_volume, run_container
 from spras.interactome import (
     convert_undirected_to_directed,
@@ -150,10 +148,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
 
-        df = raw_pathway_df(raw_pathway_file, header=None)
+        df = raw_pathway_df(raw_pathway_file, sep='\t', header=None)
         if not df.empty:
             df = add_rank_column(df)
+            # TODO update MinCostFlow version to support mixed graphs
+            # Currently directed edges in the input will be converted to undirected edges in the output
             df = reinsert_direction_col_undirected(df)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
-
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 4c5e5164..16469924 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -1,8 +1,5 @@
-import os
 from pathlib import Path
 
-import pandas as pd
-
 from spras.containers import prepare_volume, run_container
 from spras.interactome import reinsert_direction_col_mixed
 from spras.prm import PRM
@@ -192,14 +189,12 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # I'm assuming from having read the documentation that we will be passing in optimalForest.sif
         # as raw_pathway_file, in which case the format should be edge1 interactiontype edge2.
         # if that assumption is wrong we will need to tweak things
-
-
-        df = raw_pathway_df(raw_pathway_file, header=None)
+        df = raw_pathway_df(raw_pathway_file, sep='\t', header=None)
         if not df.empty:
             df.columns = ["Edge1", "InteractionType", "Edge2"]
             df = add_rank_column(df)
             df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-            df.drop(columns=['InteractionType'], inplace = True)
-            df.columns = ['Node1', 'Node2','Rank','Direction']
+            df.drop(columns=['InteractionType'], inplace=True)
+            df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 2d8dee3c..ed0d5b56 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -6,7 +6,7 @@
 from spras.dataset import Dataset
 from spras.interactome import reinsert_direction_col_undirected
 from spras.prm import PRM
-from spras.util import add_rank_column, raw_pathway_df
+from spras.util import add_rank_column
 
 __all__ = ['OmicsIntegrator2']
 
@@ -149,7 +149,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
         if num_lines < 2:
-            df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
+            df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
             df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
@@ -159,4 +159,3 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
-
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 97ee64ad..876c22b0 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -1,8 +1,6 @@
 import warnings
 from pathlib import Path
 
-import pandas as pd
-
 from spras.containers import prepare_volume, run_container
 from spras.interactome import (
     convert_undirected_to_directed,
@@ -137,7 +135,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param raw_pathway_file: pathway file produced by an algorithm's run function
         @param standardized_pathway_file: the same pathway written in the universal format
         """
-        df = raw_pathway_df(raw_pathway_file, header=0)
+        # What about multiple raw_pathway_files?
+        df = raw_pathway_df(raw_pathway_file, sep='\t', header=0)
         if not df.empty:
             df = df.take([0, 1, 2], axis=1)
             df = reinsert_direction_col_directed(df)
diff --git a/spras/util.py b/spras/util.py
index efa9bc73..2d2a83d2 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -42,6 +42,7 @@ def hash_filename(filename: str, length: Optional[int] = None) -> str:
     """
     return hash_params_sha1_base32({'filename': filename}, length)
 
+
 def make_required_dirs(path: str):
     """
     Create the directory and parent directories required before an output file can be written to the specified path.
@@ -60,17 +61,18 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame:
     df['Rank'] = 1
     return df
 
-def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame:
+
+def raw_pathway_df(raw_pathway_file: str, sep: str = '\t', header: int = None) -> pd.DataFrame:
     """
-    Creates DF from contents in raw pathway file,
-    otherwise returns an empty DF with standard output column names
+    Creates dataframe from contents in raw pathway file,
+    otherwise returns an empty dataframe with standard output column names
     @param raw_pathway_file: path to raw_pathway_file
-    @param header: what row the header is in raw_pathway_file, otherwise None
-
+    @param sep: separator used when loading the dataframe, default tab character
+    @param header: what row the header is in raw_pathway_file, default None
     """
     try:
-        df = pd.read_csv(raw_pathway_file, sep='\t', header=header)
-    except pd.errors.EmptyDataError: # read an empty file
-        df = pd.DataFrame(columns = ['Node1', 'Node2','Rank','Direction'])
+        df = pd.read_csv(raw_pathway_file, sep=sep, header=header)
+    except pd.errors.EmptyDataError:  # read an empty file
+        df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
 
     return df
diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py
index e718cf3f..3010179d 100644
--- a/test/ml/test_ml.py
+++ b/test/ml/test_ml.py
@@ -43,11 +43,11 @@ def test_summarize_networks_wrong_direction(self):
             ml.summarize_networks([INPUT_DIR + 'test-data-wrong-direction/wrong-direction.txt'])
 
     def test_summarize_networks_empty(self):
-        with pytest.raises(ValueError): #raises error if empty dataframe is used for post processing
+        with pytest.raises(ValueError):  # raises error if empty dataframe is used for post processing
             ml.summarize_networks([INPUT_DIR + 'test-data-empty/empty.txt'])
 
     def test_single_line(self):
-        with pytest.raises(ValueError): #raises error if single line in file s.t. single row in dataframe is used for post processing
+        with pytest.raises(ValueError):  # raises error if single line in file s.t. single row in dataframe is used for post processing
             ml.summarize_networks([INPUT_DIR + 'test-data-single/single.txt'])
 
     def test_pca(self):
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index 0f471e5d..60763d13 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -1,9 +1,6 @@
 import filecmp
 from pathlib import Path
 
-import pandas as pd
-import pytest
-
 from spras import runner
 
 INDIR = "test/parse-outputs/input/"
@@ -37,5 +34,6 @@ def test_empty_file(self):
         for algo in algorithms:
             test_file = INDIR + f"empty-raw-pathway.txt"
             out_file = OUTDIR + f"{algo}-empty-pathway.txt"
+
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)

From 026e7e0be8386c16a5f70474ab5b940e664b81ae Mon Sep 17 00:00:00 2001
From: Anthony Gitter <gitter@biostat.wisc.edu>
Date: Thu, 4 Jul 2024 08:56:50 -0500
Subject: [PATCH 25/26] Bump version to 0.2.0

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 68d10f5c..d19a5988 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "spras"
-version = "0.1.0"
+version = "0.2.0"
 description = "Signaling Pathway Reconstruction Analysis Streamliner"
 authors = [
   { name = "Anthony Gitter", email = "gitter@biostat.wisc.edu" },

From d6b019aa17144f460d424af7083157a25010e84b Mon Sep 17 00:00:00 2001
From: Anthony Gitter <gitter@biostat.wisc.edu>
Date: Thu, 4 Jul 2024 10:27:39 -0500
Subject: [PATCH 26/26] Fix post_domino_id_transform

---
 spras/domino.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/domino.py b/spras/domino.py
index 7c29f439..3170f64d 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -228,4 +228,4 @@ def post_domino_id_transform(node_id):
     @param node_id: the node id to transform
     @return the node id without the prefix, if it was present, otherwise the original node id
     """
-    node_id.removeprefix(ID_PREFIX)
+    return node_id.removeprefix(ID_PREFIX)