From c14e1fc23c08575f5da0228a42753589c0ac9aae Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 25 Oct 2024 12:39:21 -0500 Subject: [PATCH 1/7] update code to pre and post process underscores in node names --- spras/meo.py | 9 ++++++++- test/MEO/input/meo-edges.txt | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/spras/meo.py b/spras/meo.py index b614d4c4..3382c54b 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -88,6 +88,8 @@ def generate_inputs(data, filename_map): # TODO test whether this selection is needed, what values could the column contain that we would want to # include or exclude? nodes = nodes.loc[nodes[node_type]] + # replace _'s with ꧁SEP꧂ + nodes['NODEID'] = nodes['NODEID'].str.replace('_', '꧁SEP꧂') nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) # Create network file @@ -95,7 +97,9 @@ def generate_inputs(data, filename_map): # Format network file edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') - + # replace _'s with ꧁SEP꧂ + edges['Interactor1'] = edges['Interactor1'].str.replace('_', '꧁SEP꧂') + edges['Interactor2'] = edges['Interactor2'].str.replace('_', '꧁SEP꧂') edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) @@ -181,6 +185,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Columns Source Type Target Oriented Weight df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) if not df.empty: + # Replace ꧁SEP꧂ with _ + df['Source'] = df['Source'].str.replace('꧁SEP꧂', '_') + df['Target'] = df['Target'].str.replace('꧁SEP꧂', '_') # Keep only edges that were assigned an orientation (direction) df = df.loc[df['Oriented']] # TODO what should be the edge rank? diff --git a/test/MEO/input/meo-edges.txt b/test/MEO/input/meo-edges.txt index 25e05b80..dbad11c3 100644 --- a/test/MEO/input/meo-edges.txt +++ b/test/MEO/input/meo-edges.txt @@ -1,6 +1,6 @@ -GeneA (pp) GeneC 0.5 -GeneB (pp) GeneC 0.5 -GeneC (pp) GeneD 0.5 +Gene_A (pp) Gene_C 0.5 +GeneB (pp) Gene_C 0.5 +Gene_C (pp) GeneD 0.5 GeneD (pp) GeneE 0.5 GeneD (pp) GeneG 0.5 GeneE (pp) GeneF 0.5 From 0230455631235e9419bd7a25f1e0b902af738bca Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 25 Oct 2024 12:40:48 -0500 Subject: [PATCH 2/7] update wrong test --- test/MEO/input/meo-edges.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/MEO/input/meo-edges.txt b/test/MEO/input/meo-edges.txt index dbad11c3..25e05b80 100644 --- a/test/MEO/input/meo-edges.txt +++ b/test/MEO/input/meo-edges.txt @@ -1,6 +1,6 @@ -Gene_A (pp) Gene_C 0.5 -GeneB (pp) Gene_C 0.5 -Gene_C (pp) GeneD 0.5 +GeneA (pp) GeneC 0.5 +GeneB (pp) GeneC 0.5 +GeneC (pp) GeneD 0.5 GeneD (pp) GeneE 0.5 GeneD (pp) GeneG 0.5 GeneE (pp) GeneF 0.5 From 73cbf13ab2dfeddd5b1e78e6cdc4232aa9c4026b Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 25 Oct 2024 12:59:45 -0500 Subject: [PATCH 3/7] updated parse-outputs test --- test/parse-outputs/expected/meo-pathway-expected.txt | 4 ++-- test/parse-outputs/input/meo-raw-pathway.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/parse-outputs/expected/meo-pathway-expected.txt b/test/parse-outputs/expected/meo-pathway-expected.txt index 6515013f..0023a8f7 100644 --- a/test/parse-outputs/expected/meo-pathway-expected.txt +++ b/test/parse-outputs/expected/meo-pathway-expected.txt @@ -1,3 +1,3 @@ Node1 Node2 Rank Direction -GENEA GENEC 1 D -GENEC GENEB 1 D +GENE_A GENE_C 1 D +GENE_C GENEB 1 D diff --git a/test/parse-outputs/input/meo-raw-pathway.txt b/test/parse-outputs/input/meo-raw-pathway.txt index 9f44a8d5..9e1a6b8f 100644 --- a/test/parse-outputs/input/meo-raw-pathway.txt +++ b/test/parse-outputs/input/meo-raw-pathway.txt @@ -1,3 +1,3 @@ Source Type Target Oriented Weight -GENEA pp GENEC true 0.5 -GENEC pd GENEB true 0.5 +GENE꧁SEP꧂A pp GENE꧁SEP꧂C true 0.5 +GENE꧁SEP꧂C pd GENEB true 0.5 From ebd7dee5d6dbba06e5b2f662f4eff65930208932 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 29 Oct 2024 11:15:20 -0500 Subject: [PATCH 4/7] updated to use a variable to store the new seperator --- spras/meo.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/spras/meo.py b/spras/meo.py index 3382c54b..56895e90 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -10,6 +10,8 @@ __all__ = ['MEO', 'write_properties'] +# replaces all underscores in the node names with unicode seperator +underscore_replacement = '꧁SEP꧂' # Only supports the Random orientation algorithm # Does not support MINSAT or MAXCSP @@ -88,8 +90,8 @@ def generate_inputs(data, filename_map): # TODO test whether this selection is needed, what values could the column contain that we would want to # include or exclude? nodes = nodes.loc[nodes[node_type]] - # replace _'s with ꧁SEP꧂ - nodes['NODEID'] = nodes['NODEID'].str.replace('_', '꧁SEP꧂') + # replace _'s with underscore_replacement + nodes['NODEID'] = nodes['NODEID'].str.replace('_', underscore_replacement) nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) # Create network file @@ -98,8 +100,8 @@ def generate_inputs(data, filename_map): # Format network file edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') # replace _'s with ꧁SEP꧂ - edges['Interactor1'] = edges['Interactor1'].str.replace('_', '꧁SEP꧂') - edges['Interactor2'] = edges['Interactor2'].str.replace('_', '꧁SEP꧂') + edges['Interactor1'] = edges['Interactor1'].str.replace('_', underscore_replacement) + edges['Interactor2'] = edges['Interactor2'].str.replace('_', underscore_replacement) edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) @@ -185,9 +187,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Columns Source Type Target Oriented Weight df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) if not df.empty: - # Replace ꧁SEP꧂ with _ - df['Source'] = df['Source'].str.replace('꧁SEP꧂', '_') - df['Target'] = df['Target'].str.replace('꧁SEP꧂', '_') + # Replace underscore_replacement with _ + df['Source'] = df['Source'].str.replace(underscore_replacement, '_') + df['Target'] = df['Target'].str.replace(underscore_replacement, '_') # Keep only edges that were assigned an orientation (direction) df = df.loc[df['Oriented']] # TODO what should be the edge rank? From 28321e0aca8e4310b1f88e7dd12938f1d7f263d3 Mon Sep 17 00:00:00 2001 From: Neha Talluri <78840540+ntalluri@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:12:14 -0600 Subject: [PATCH 5/7] Update spras/meo.py on why we need this change Co-authored-by: Anthony Gitter --- spras/meo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spras/meo.py b/spras/meo.py index 56895e90..80e920db 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -11,6 +11,8 @@ __all__ = ['MEO', 'write_properties'] # replaces all underscores in the node names with unicode seperator +# MEO keeps only the substring up to the first underscore when parsing node names +# https://github.com/agitter/meo/blob/1fe57e8ff3952c494e2b14dfdc563a84596e2fcd/src/alg/Vertex.java#L56-L71 underscore_replacement = '꧁SEP꧂' # Only supports the Random orientation algorithm From 2a1aeabac1a6b9850f3e526eb6c618d62d3d26fd Mon Sep 17 00:00:00 2001 From: ntalluri Date: Thu, 7 Nov 2024 12:01:40 -0600 Subject: [PATCH 6/7] updated genereate inputs test case --- .../expected/allpairs-network-expected.txt | 2 +- .../expected/domino-network-expected.txt | 2 +- .../expected/meo-edges-expected.txt | 2 +- .../expected/mincostflow-edges-expected.txt | 4 +- .../omicsintegrator1-edges-expected.txt | 2 +- .../omicsintegrator2-edges-expected.txt | 2 +- .../expected/pathlinker-network-expected.txt | 4 +- test/generate-inputs/inputs/network.txt | 2 + test/generate-inputs/inputs/node-prizes.txt | 3 + test/generate-inputs/inputs/sources.txt | 1 + test/generate-inputs/inputs/targets.txt | 1 + test/generate-inputs/inputs/test_config.yaml | 69 +++++++++++++++++++ test/generate-inputs/test_generate_inputs.py | 6 +- 13 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 test/generate-inputs/inputs/network.txt create mode 100644 test/generate-inputs/inputs/node-prizes.txt create mode 100644 test/generate-inputs/inputs/sources.txt create mode 100644 test/generate-inputs/inputs/targets.txt create mode 100644 test/generate-inputs/inputs/test_config.yaml diff --git a/test/generate-inputs/expected/allpairs-network-expected.txt b/test/generate-inputs/expected/allpairs-network-expected.txt index 3cbac89c..011a4c71 100644 --- a/test/generate-inputs/expected/allpairs-network-expected.txt +++ b/test/generate-inputs/expected/allpairs-network-expected.txt @@ -1,3 +1,3 @@ #Interactor1 Interactor2 Weight -A B 0.98 +test_A B 0.98 B C 0.77 diff --git a/test/generate-inputs/expected/domino-network-expected.txt b/test/generate-inputs/expected/domino-network-expected.txt index 27683d36..dba06b4d 100644 --- a/test/generate-inputs/expected/domino-network-expected.txt +++ b/test/generate-inputs/expected/domino-network-expected.txt @@ -1,3 +1,3 @@ ID_interactor_A ppi ID_interactor_B -ENSG0A ppi ENSG0B +ENSG0test_A ppi ENSG0B ENSG0B ppi ENSG0C diff --git a/test/generate-inputs/expected/meo-edges-expected.txt b/test/generate-inputs/expected/meo-edges-expected.txt index 5916b3d7..d2afa5ae 100644 --- a/test/generate-inputs/expected/meo-edges-expected.txt +++ b/test/generate-inputs/expected/meo-edges-expected.txt @@ -1,2 +1,2 @@ -A (pp) B 0.98 +test꧁SEP꧂A (pp) B 0.98 B (pp) C 0.77 diff --git a/test/generate-inputs/expected/mincostflow-edges-expected.txt b/test/generate-inputs/expected/mincostflow-edges-expected.txt index f0889bf4..a52b1593 100644 --- a/test/generate-inputs/expected/mincostflow-edges-expected.txt +++ b/test/generate-inputs/expected/mincostflow-edges-expected.txt @@ -1,4 +1,4 @@ -A B 0.98 +test_A B 0.98 B C 0.77 -B A 0.98 +B test_A 0.98 C B 0.77 diff --git a/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt b/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt index 6342d481..dd21d5aa 100644 --- a/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt +++ b/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt @@ -1,3 +1,3 @@ protein1 protein2 weight directionality -A B 0.98 U +test_A B 0.98 U B C 0.77 U diff --git a/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt b/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt index 997eb62e..51ccfd0f 100644 --- a/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt +++ b/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt @@ -1,3 +1,3 @@ protein1 protein2 cost -A B 0.52 +test_A B 0.52 B C 0.73 diff --git a/test/generate-inputs/expected/pathlinker-network-expected.txt b/test/generate-inputs/expected/pathlinker-network-expected.txt index d1f92741..d90b3f05 100644 --- a/test/generate-inputs/expected/pathlinker-network-expected.txt +++ b/test/generate-inputs/expected/pathlinker-network-expected.txt @@ -1,5 +1,5 @@ #Interactor1 Interactor2 Weight -A B 0.98 +test_A B 0.98 B C 0.77 -B A 0.98 +B test_A 0.98 C B 0.77 diff --git a/test/generate-inputs/inputs/network.txt b/test/generate-inputs/inputs/network.txt new file mode 100644 index 00000000..fa374457 --- /dev/null +++ b/test/generate-inputs/inputs/network.txt @@ -0,0 +1,2 @@ +test_A B 0.98 U +B C 0.77 U diff --git a/test/generate-inputs/inputs/node-prizes.txt b/test/generate-inputs/inputs/node-prizes.txt new file mode 100644 index 00000000..f0d540ad --- /dev/null +++ b/test/generate-inputs/inputs/node-prizes.txt @@ -0,0 +1,3 @@ +NODEID prize active +test_A 2 true +C 5.7 true diff --git a/test/generate-inputs/inputs/sources.txt b/test/generate-inputs/inputs/sources.txt new file mode 100644 index 00000000..050111ac --- /dev/null +++ b/test/generate-inputs/inputs/sources.txt @@ -0,0 +1 @@ +test_A diff --git a/test/generate-inputs/inputs/targets.txt b/test/generate-inputs/inputs/targets.txt new file mode 100644 index 00000000..3cc58df8 --- /dev/null +++ b/test/generate-inputs/inputs/targets.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/inputs/test_config.yaml b/test/generate-inputs/inputs/test_config.yaml new file mode 100644 index 00000000..42210c8f --- /dev/null +++ b/test/generate-inputs/inputs/test_config.yaml @@ -0,0 +1,69 @@ +hash_length: 7 +container_framework: docker +unpack_singularity: false +container_registry: + base_url: docker.io + owner: reedcompbio + +algorithms: + - name: "pathlinker" + params: + include: true + run1: + k: range(100,201,100) + + - name: "omicsintegrator1" + params: + include: true + run1: + b: [5, 6] + w: np.linspace(0,5,2) + d: [10] + + - name: "omicsintegrator2" + params: + include: true + run1: + b: [4] + g: [0] + run2: + b: [2] + g: [3] + + - name: "meo" + params: + include: true + run1: + max_path_length: [3] + local_search: ["Yes"] + rand_restarts: [10] + + - name: "mincostflow" + params: + include: true + run1: + flow: [1] # The flow must be an int + capacity: [1] + + - name: "allpairs" + params: + include: true + + - name: "domino" + params: + include: true + run1: + slice_threshold: [0.3] + module_threshold: [0.05] + +datasets: + - + # Labels can only contain letters, numbers, or underscores + label: test_data + node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] + # DataLoader.py can currently only load a single edge file, which is the primary network + edge_files: ["network.txt"] + # Placeholder + other_files: [] + # Relative path from the spras directory + data_dir: "test/generate-inputs/inputs" \ No newline at end of file diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index 86319e2c..4ad74d0b 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -29,14 +29,14 @@ def setup_class(cls): Path(OUTDIR).mkdir(parents=True, exist_ok=True) def test_prepare_inputs_networks(self): - config_loc = os.path.join("config", "config.yaml") + config_loc = os.path.join("test","generate-inputs", "inputs", "test_config.yaml") with open(config_loc) as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) test_file = "test/generate-inputs/output/test_pickled_dataset.pkl" - data0_dataset = next((ds for ds in config["datasets"] if ds["label"] == "data0"), None) - runner.merge_input(data0_dataset, test_file) + test_dataset = next((ds for ds in config["datasets"] if ds["label"] == "test_data"), None) + runner.merge_input(test_dataset, test_file) for algo in algo_exp_file.keys(): inputs = runner.get_required_inputs(algo) From de539524fbbdda22045b75b23669f4bc262e90d8 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 8 Nov 2024 16:31:10 -0600 Subject: [PATCH 7/7] Run code linter --- spras/meo.py | 4 +++- test/generate-inputs/inputs/test_config.yaml | 2 +- test/generate-inputs/test_generate_inputs.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/spras/meo.py b/spras/meo.py index 80e920db..1f2cbc23 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -15,6 +15,7 @@ # https://github.com/agitter/meo/blob/1fe57e8ff3952c494e2b14dfdc563a84596e2fcd/src/alg/Vertex.java#L56-L71 underscore_replacement = '꧁SEP꧂' + # Only supports the Random orientation algorithm # Does not support MINSAT or MAXCSP # TODO add parameter validation @@ -67,6 +68,8 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None, - MEO tracks the directionality of the original edges, but all of its output edges are directed. - To remain accurate to MEO's design we will also treat the output graph's as directed """ + + class MEO(PRM): required_inputs = ['sources', 'targets', 'edges'] @@ -107,7 +110,6 @@ def generate_inputs(data, filename_map): edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) - # TODO add parameter validation # TODO document required arguments @staticmethod diff --git a/test/generate-inputs/inputs/test_config.yaml b/test/generate-inputs/inputs/test_config.yaml index 42210c8f..8cef4722 100644 --- a/test/generate-inputs/inputs/test_config.yaml +++ b/test/generate-inputs/inputs/test_config.yaml @@ -66,4 +66,4 @@ datasets: # Placeholder other_files: [] # Relative path from the spras directory - data_dir: "test/generate-inputs/inputs" \ No newline at end of file + data_dir: "test/generate-inputs/inputs" diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index 4ad74d0b..6d732d31 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -17,7 +17,7 @@ 'domino': 'network', 'pathlinker': 'network', 'allpairs': 'network' - } +} class TestGenerateInputs: @@ -29,7 +29,7 @@ def setup_class(cls): Path(OUTDIR).mkdir(parents=True, exist_ok=True) def test_prepare_inputs_networks(self): - config_loc = os.path.join("test","generate-inputs", "inputs", "test_config.yaml") + config_loc = os.path.join("test", "generate-inputs", "inputs", "test_config.yaml") with open(config_loc) as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader)