dataset_dict

Reed-CompBio · Feb 17, 2025 · a75cb4f · a75cb4f
1 parent 400df3b
commit a75cb4f
Show file tree

Hide file tree

Showing 17 changed files with 33 additions and 83 deletions.
diff --git a/test/analysis/input/alternative-network.txt b/test/analysis/input/alternative-network.txt
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
@@ -109,13 +109,13 @@ datasets:
       # Relative path from the spras directory
       data_dir: "input"
     -
-      label: data1
+    #label: data1
       # Reuse some of the same sources file as 'data0' but different network and targets
-      node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
-      edge_files: ["alternative-network.txt"]
-      other_files: []
+      # node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
+      # edge_files: ["alternative-network.txt"]
+      # other_files: []
       # Relative path from the spras directory
-      data_dir: "input"
+      # data_dir: "input"
 
 gold_standards:
     -
@@ -127,10 +127,10 @@ gold_standards:
       # List of dataset labels to compare with the specific gold standard dataset
       dataset_labels: ["data0"]
     -
-      label: gs1
-      node_files: ["gs_nodes1.txt"]
-      data_dir: "input"
-      dataset_labels: ["data1", "data0"]
+    #label: gs1
+    # node_files: ["gs_nodes1.txt"]
+    # data_dir: "input"
+    # dataset_labels: ["data1", "data0"]
 
 # If we want to reconstruct then we should set run to true.
 # TODO: if include is true above but run is false here, algs are not run.

diff --git a/test/analysis/input/example/data1-allpairs-params-BEH6YB2_pathway.txt b/test/analysis/input/example/data1-allpairs-params-BEH6YB2_pathway.txt
diff --git a/test/analysis/input/example/data1-domino-params-V3X4RW7_pathway.txt b/test/analysis/input/example/data1-domino-params-V3X4RW7_pathway.txt
diff --git a/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt
diff --git a/test/analysis/input/example/data1-mincostflow-params-SZPZVU6_pathway.txt b/test/analysis/input/example/data1-mincostflow-params-SZPZVU6_pathway.txt
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-E3LSEZQ_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-E3LSEZQ_pathway.txt
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-NFIPHUX_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-NFIPHUX_pathway.txt
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-SU2S63Y_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-SU2S63Y_pathway.txt
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-V26JBGX_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-V26JBGX_pathway.txt
diff --git a/test/analysis/input/example/data1-omicsintegrator2-params-EHHWPMD_pathway.txt b/test/analysis/input/example/data1-omicsintegrator2-params-EHHWPMD_pathway.txt
diff --git a/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt
diff --git a/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt
diff --git a/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt
diff --git a/test/analysis/input/gs_nodes1.txt b/test/analysis/input/gs_nodes1.txt
diff --git a/test/analysis/output/example_summary.txt b/test/analysis/output/example_summary.txt
@@ -11,15 +11,4 @@ output/data0-omicsintegrator2-params-EHHWPMD/pathway.txt	0	0	0	0	0	0	0	{max_path
 output/data0-omicsintegrator2-params-IV3IPCJ/pathway.txt	3	2	1	2	2	1	1	{flow: 1, capacity: 1}
 output/data0-pathlinker-params-6SWY7JS/pathway.txt	3	2	1	2	2	1	1	{spras_placeholder: no parameters}
 output/data0-pathlinker-params-VQL7BDZ/pathway.txt	3	2	1	2	2	1	1	{slice_threshold: 0.3, module_threshold: 0.05}
-output/data1-allpairs-params-BEH6YB2/pathway.txt	7	6	1	2	2	1	4	{spras_placeholder: no parameters}
-output/data1-domino-params-V3X4RW7/pathway.txt	0	0	0	0	0	0	0	{slice_threshold: 0.3, module_threshold: 0.05}
-output/data1-meo-params-GKEDDFZ/pathway.txt	4	4	1	2	2	1	2	{max_path_length: 3, local_search: Yes, rand_restarts: 10}
-output/data1-mincostflow-params-SZPZVU6/pathway.txt	3	2	1	2	2	1	1	{flow: 1, capacity: 1}
-output/data1-omicsintegrator1-params-E3LSEZQ/pathway.txt	3	2	1	2	2	1	1	{b: 5, w: 5.0, d: 10}
-output/data1-omicsintegrator1-params-NFIPHUX/pathway.txt	0	0	0	0	0	0	0	{b: 6, w: 0.0, d: 10}
-output/data1-omicsintegrator1-params-SU2S63Y/pathway.txt	3	2	1	2	2	1	1	{b: 6, w: 5.0, d: 10}
-output/data1-omicsintegrator1-params-V26JBGX/pathway.txt	0	0	0	0	0	0	0	{b: 5, w: 0.0, d: 10}
-output/data1-omicsintegrator2-params-EHHWPMD/pathway.txt	0	0	0	0	0	0	0	{b: 2, g: 3}
-output/data1-omicsintegrator2-params-IV3IPCJ/pathway.txt	3	2	1	2	2	1	1	{b: 4, g: 0}
-output/data1-pathlinker-params-6SWY7JS/pathway.txt	4	3	1	2	2	1	2	{k: 200}
-output/data1-pathlinker-params-VQL7BDZ/pathway.txt	4	3	1	2	2	1	2	{k: 100}
+
diff --git a/test/analysis/test_summary.py b/test/analysis/test_summary.py
@@ -5,6 +5,7 @@
 # set up necessary dataframes to run summarize_networks
 import spras.config as config
 from spras.analysis.summary import summarize_networks
+from spras.dataset import Dataset
 
 # Notes:
 # - Column labels are required in the node table
@@ -14,26 +15,42 @@
 class TestSummary:
     # Test data from example workflow:
     def test_example_networks(self):
-        config.init_from_file(Path("test/analysis/input/config.yaml"))
+        example_dict = { "label" : "data0", \
+                         "edge_files" : ["network.txt"], \
+                         "node_files" : ["node-prizes.txt", "sources.txt", "targets.txt"], \
+                         "data_dir" : "input", \
+                         "other_files" : []
+                       } # hardcode dataset_dict for testing purposes
+        example_node_table = Dataset(example_dict) # create instance of Dataset
+
+        config.init_from_file(Path("config.yaml"))
         algorithm_params = config.config.algorithm_params
         list(algorithm_params)
         algorithms_with_params = [f'{algorithm}-params-{params_hash}' for algorithm, param_combos in algorithm_params.items() for params_hash in param_combos.keys()]
 
-        example_network_files = Path("test/analysis/input/example").glob("*.txt")
-        example_node_table = pd.read_csv(Path("test/analysis/input/example_node_table.txt"), sep = "\t")
+        example_network_files = Path("example").glob("*.txt")
+        #example_node_table = pd.read_csv(Path("test/analysis/input/example_node_table.txt"), sep = "\t")
         example_output = pd.read_csv(Path("test/analysis/output/example_summary.txt"), sep = "\t")
         example_output["Name"] = example_output["Name"].map(convert_path)
         assert summarize_networks(example_network_files, example_node_table, algorithm_params, algorithms_with_params).equals(example_output)
 
     # Test data from EGFR workflow:
     def test_egfr_networks(self):
-        config.init_from_file(Path("test/analysis/input/egfr.yaml"))
+        egfr_dict = { "label" : "tps_egfr", \
+                      "edge_files" : ["phosphosite-irefindex13.0-uniprot.txt"], \
+                      "node_files" : ["tps-egfr-prizes.txt"], \
+                      "data_dir" : "input", \
+                      "other_files" : []
+                    }
+        egfr_node_table = Dataset(egfr_dict)
+
+        config.init_from_file(Path("egfr.yaml"))
         algorithm_params = config.config.algorithm_params
         list(algorithm_params)
         algorithms_with_params = [f'{algorithm}-params-{params_hash}' for algorithm, param_combos in algorithm_params.items() for params_hash in param_combos.keys()]
 
-        egfr_network_files = Path("test/analysis/input/egfr").glob("*.txt")
-        egfr_node_table = pd.read_csv(Path("test/analysis/input/egfr_node_table.txt"), sep = "\t")
+        egfr_network_files = Path("egfr").glob("*.txt")
+        #egfr_node_table = pd.read_csv(Path("test/analysis/input/egfr_node_table.txt"), sep = "\t")
         egfr_output = pd.read_csv(Path("test/analysis/output/egfr_summary.txt"), sep = "\t")
         egfr_output["Name"] = egfr_output["Name"].map(convert_path)
         assert summarize_networks(egfr_network_files, egfr_node_table, algorithm_params, algorithms_with_params).equals(egfr_output)