Skip to content

Commit

Permalink
dataset_dict
Browse files Browse the repository at this point in the history
  • Loading branch information
cgsze committed Feb 17, 2025
1 parent 400df3b commit a75cb4f
Show file tree
Hide file tree
Showing 17 changed files with 33 additions and 83 deletions.
9 changes: 0 additions & 9 deletions test/analysis/input/alternative-network.txt

This file was deleted.

18 changes: 9 additions & 9 deletions test/analysis/input/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ datasets:
# Relative path from the spras directory
data_dir: "input"
-
label: data1
#label: data1
# Reuse some of the same sources file as 'data0' but different network and targets
node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
edge_files: ["alternative-network.txt"]
other_files: []
# node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
# edge_files: ["alternative-network.txt"]
# other_files: []
# Relative path from the spras directory
data_dir: "input"
# data_dir: "input"

gold_standards:
-
Expand All @@ -127,10 +127,10 @@ gold_standards:
# List of dataset labels to compare with the specific gold standard dataset
dataset_labels: ["data0"]
-
label: gs1
node_files: ["gs_nodes1.txt"]
data_dir: "input"
dataset_labels: ["data1", "data0"]
#label: gs1
# node_files: ["gs_nodes1.txt"]
# data_dir: "input"
# dataset_labels: ["data1", "data0"]

# If we want to reconstruct then we should set run to true.
# TODO: if include is true above but run is false here, algs are not run.
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Empty file.

This file was deleted.

This file was deleted.

This file was deleted.

1 change: 0 additions & 1 deletion test/analysis/input/gs_nodes1.txt

This file was deleted.

13 changes: 1 addition & 12 deletions test/analysis/output/example_summary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,4 @@ output/data0-omicsintegrator2-params-EHHWPMD/pathway.txt 0 0 0 0 0 0 0 {max_path
output/data0-omicsintegrator2-params-IV3IPCJ/pathway.txt 3 2 1 2 2 1 1 {flow: 1, capacity: 1}
output/data0-pathlinker-params-6SWY7JS/pathway.txt 3 2 1 2 2 1 1 {spras_placeholder: no parameters}
output/data0-pathlinker-params-VQL7BDZ/pathway.txt 3 2 1 2 2 1 1 {slice_threshold: 0.3, module_threshold: 0.05}
output/data1-allpairs-params-BEH6YB2/pathway.txt 7 6 1 2 2 1 4 {spras_placeholder: no parameters}
output/data1-domino-params-V3X4RW7/pathway.txt 0 0 0 0 0 0 0 {slice_threshold: 0.3, module_threshold: 0.05}
output/data1-meo-params-GKEDDFZ/pathway.txt 4 4 1 2 2 1 2 {max_path_length: 3, local_search: Yes, rand_restarts: 10}
output/data1-mincostflow-params-SZPZVU6/pathway.txt 3 2 1 2 2 1 1 {flow: 1, capacity: 1}
output/data1-omicsintegrator1-params-E3LSEZQ/pathway.txt 3 2 1 2 2 1 1 {b: 5, w: 5.0, d: 10}
output/data1-omicsintegrator1-params-NFIPHUX/pathway.txt 0 0 0 0 0 0 0 {b: 6, w: 0.0, d: 10}
output/data1-omicsintegrator1-params-SU2S63Y/pathway.txt 3 2 1 2 2 1 1 {b: 6, w: 5.0, d: 10}
output/data1-omicsintegrator1-params-V26JBGX/pathway.txt 0 0 0 0 0 0 0 {b: 5, w: 0.0, d: 10}
output/data1-omicsintegrator2-params-EHHWPMD/pathway.txt 0 0 0 0 0 0 0 {b: 2, g: 3}
output/data1-omicsintegrator2-params-IV3IPCJ/pathway.txt 3 2 1 2 2 1 1 {b: 4, g: 0}
output/data1-pathlinker-params-6SWY7JS/pathway.txt 4 3 1 2 2 1 2 {k: 200}
output/data1-pathlinker-params-VQL7BDZ/pathway.txt 4 3 1 2 2 1 2 {k: 100}

29 changes: 23 additions & 6 deletions test/analysis/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# set up necessary dataframes to run summarize_networks
import spras.config as config
from spras.analysis.summary import summarize_networks
from spras.dataset import Dataset

# Notes:
# - Column labels are required in the node table
Expand All @@ -14,26 +15,42 @@
class TestSummary:
# Test data from example workflow:
def test_example_networks(self):
config.init_from_file(Path("test/analysis/input/config.yaml"))
example_dict = { "label" : "data0", \
"edge_files" : ["network.txt"], \
"node_files" : ["node-prizes.txt", "sources.txt", "targets.txt"], \
"data_dir" : "input", \
"other_files" : []
} # hardcode dataset_dict for testing purposes
example_node_table = Dataset(example_dict) # create instance of Dataset

config.init_from_file(Path("config.yaml"))
algorithm_params = config.config.algorithm_params
list(algorithm_params)
algorithms_with_params = [f'{algorithm}-params-{params_hash}' for algorithm, param_combos in algorithm_params.items() for params_hash in param_combos.keys()]

example_network_files = Path("test/analysis/input/example").glob("*.txt")
example_node_table = pd.read_csv(Path("test/analysis/input/example_node_table.txt"), sep = "\t")
example_network_files = Path("example").glob("*.txt")
#example_node_table = pd.read_csv(Path("test/analysis/input/example_node_table.txt"), sep = "\t")
example_output = pd.read_csv(Path("test/analysis/output/example_summary.txt"), sep = "\t")
example_output["Name"] = example_output["Name"].map(convert_path)
assert summarize_networks(example_network_files, example_node_table, algorithm_params, algorithms_with_params).equals(example_output)

# Test data from EGFR workflow:
def test_egfr_networks(self):
config.init_from_file(Path("test/analysis/input/egfr.yaml"))
egfr_dict = { "label" : "tps_egfr", \
"edge_files" : ["phosphosite-irefindex13.0-uniprot.txt"], \
"node_files" : ["tps-egfr-prizes.txt"], \
"data_dir" : "input", \
"other_files" : []
}
egfr_node_table = Dataset(egfr_dict)

config.init_from_file(Path("egfr.yaml"))
algorithm_params = config.config.algorithm_params
list(algorithm_params)
algorithms_with_params = [f'{algorithm}-params-{params_hash}' for algorithm, param_combos in algorithm_params.items() for params_hash in param_combos.keys()]

egfr_network_files = Path("test/analysis/input/egfr").glob("*.txt")
egfr_node_table = pd.read_csv(Path("test/analysis/input/egfr_node_table.txt"), sep = "\t")
egfr_network_files = Path("egfr").glob("*.txt")
#egfr_node_table = pd.read_csv(Path("test/analysis/input/egfr_node_table.txt"), sep = "\t")
egfr_output = pd.read_csv(Path("test/analysis/output/egfr_summary.txt"), sep = "\t")
egfr_output["Name"] = egfr_output["Name"].map(convert_path)
assert summarize_networks(egfr_network_files, egfr_node_table, algorithm_params, algorithms_with_params).equals(egfr_output)
Expand Down

0 comments on commit a75cb4f

Please sign in to comment.