diff --git a/spras/meo.py b/spras/meo.py index b614d4c4..1f2cbc23 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -10,6 +10,11 @@ __all__ = ['MEO', 'write_properties'] +# replaces all underscores in the node names with unicode seperator +# MEO keeps only the substring up to the first underscore when parsing node names +# https://github.com/agitter/meo/blob/1fe57e8ff3952c494e2b14dfdc563a84596e2fcd/src/alg/Vertex.java#L56-L71 +underscore_replacement = '꧁SEP꧂' + # Only supports the Random orientation algorithm # Does not support MINSAT or MAXCSP @@ -63,6 +68,8 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None, - MEO tracks the directionality of the original edges, but all of its output edges are directed. - To remain accurate to MEO's design we will also treat the output graph's as directed """ + + class MEO(PRM): required_inputs = ['sources', 'targets', 'edges'] @@ -88,6 +95,8 @@ def generate_inputs(data, filename_map): # TODO test whether this selection is needed, what values could the column contain that we would want to # include or exclude? nodes = nodes.loc[nodes[node_type]] + # replace _'s with underscore_replacement + nodes['NODEID'] = nodes['NODEID'].str.replace('_', underscore_replacement) nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) # Create network file @@ -95,11 +104,12 @@ def generate_inputs(data, filename_map): # Format network file edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') - + # replace _'s with ꧁SEP꧂ + edges['Interactor1'] = edges['Interactor1'].str.replace('_', underscore_replacement) + edges['Interactor2'] = edges['Interactor2'].str.replace('_', underscore_replacement) edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) - # TODO add parameter validation # TODO document required arguments @staticmethod @@ -181,6 +191,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Columns Source Type Target Oriented Weight df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) if not df.empty: + # Replace underscore_replacement with _ + df['Source'] = df['Source'].str.replace(underscore_replacement, '_') + df['Target'] = df['Target'].str.replace(underscore_replacement, '_') # Keep only edges that were assigned an orientation (direction) df = df.loc[df['Oriented']] # TODO what should be the edge rank? diff --git a/test/generate-inputs/expected/allpairs-network-expected.txt b/test/generate-inputs/expected/allpairs-network-expected.txt index 3cbac89c..011a4c71 100644 --- a/test/generate-inputs/expected/allpairs-network-expected.txt +++ b/test/generate-inputs/expected/allpairs-network-expected.txt @@ -1,3 +1,3 @@ #Interactor1 Interactor2 Weight -A B 0.98 +test_A B 0.98 B C 0.77 diff --git a/test/generate-inputs/expected/domino-network-expected.txt b/test/generate-inputs/expected/domino-network-expected.txt index 27683d36..dba06b4d 100644 --- a/test/generate-inputs/expected/domino-network-expected.txt +++ b/test/generate-inputs/expected/domino-network-expected.txt @@ -1,3 +1,3 @@ ID_interactor_A ppi ID_interactor_B -ENSG0A ppi ENSG0B +ENSG0test_A ppi ENSG0B ENSG0B ppi ENSG0C diff --git a/test/generate-inputs/expected/meo-edges-expected.txt b/test/generate-inputs/expected/meo-edges-expected.txt index 5916b3d7..d2afa5ae 100644 --- a/test/generate-inputs/expected/meo-edges-expected.txt +++ b/test/generate-inputs/expected/meo-edges-expected.txt @@ -1,2 +1,2 @@ -A (pp) B 0.98 +test꧁SEP꧂A (pp) B 0.98 B (pp) C 0.77 diff --git a/test/generate-inputs/expected/mincostflow-edges-expected.txt b/test/generate-inputs/expected/mincostflow-edges-expected.txt index f0889bf4..a52b1593 100644 --- a/test/generate-inputs/expected/mincostflow-edges-expected.txt +++ b/test/generate-inputs/expected/mincostflow-edges-expected.txt @@ -1,4 +1,4 @@ -A B 0.98 +test_A B 0.98 B C 0.77 -B A 0.98 +B test_A 0.98 C B 0.77 diff --git a/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt b/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt index 6342d481..dd21d5aa 100644 --- a/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt +++ b/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt @@ -1,3 +1,3 @@ protein1 protein2 weight directionality -A B 0.98 U +test_A B 0.98 U B C 0.77 U diff --git a/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt b/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt index 997eb62e..51ccfd0f 100644 --- a/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt +++ b/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt @@ -1,3 +1,3 @@ protein1 protein2 cost -A B 0.52 +test_A B 0.52 B C 0.73 diff --git a/test/generate-inputs/expected/pathlinker-network-expected.txt b/test/generate-inputs/expected/pathlinker-network-expected.txt index d1f92741..d90b3f05 100644 --- a/test/generate-inputs/expected/pathlinker-network-expected.txt +++ b/test/generate-inputs/expected/pathlinker-network-expected.txt @@ -1,5 +1,5 @@ #Interactor1 Interactor2 Weight -A B 0.98 +test_A B 0.98 B C 0.77 -B A 0.98 +B test_A 0.98 C B 0.77 diff --git a/test/generate-inputs/inputs/network.txt b/test/generate-inputs/inputs/network.txt new file mode 100644 index 00000000..fa374457 --- /dev/null +++ b/test/generate-inputs/inputs/network.txt @@ -0,0 +1,2 @@ +test_A B 0.98 U +B C 0.77 U diff --git a/test/generate-inputs/inputs/node-prizes.txt b/test/generate-inputs/inputs/node-prizes.txt new file mode 100644 index 00000000..f0d540ad --- /dev/null +++ b/test/generate-inputs/inputs/node-prizes.txt @@ -0,0 +1,3 @@ +NODEID prize active +test_A 2 true +C 5.7 true diff --git a/test/generate-inputs/inputs/sources.txt b/test/generate-inputs/inputs/sources.txt new file mode 100644 index 00000000..050111ac --- /dev/null +++ b/test/generate-inputs/inputs/sources.txt @@ -0,0 +1 @@ +test_A diff --git a/test/generate-inputs/inputs/targets.txt b/test/generate-inputs/inputs/targets.txt new file mode 100644 index 00000000..3cc58df8 --- /dev/null +++ b/test/generate-inputs/inputs/targets.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/inputs/test_config.yaml b/test/generate-inputs/inputs/test_config.yaml new file mode 100644 index 00000000..8cef4722 --- /dev/null +++ b/test/generate-inputs/inputs/test_config.yaml @@ -0,0 +1,69 @@ +hash_length: 7 +container_framework: docker +unpack_singularity: false +container_registry: + base_url: docker.io + owner: reedcompbio + +algorithms: + - name: "pathlinker" + params: + include: true + run1: + k: range(100,201,100) + + - name: "omicsintegrator1" + params: + include: true + run1: + b: [5, 6] + w: np.linspace(0,5,2) + d: [10] + + - name: "omicsintegrator2" + params: + include: true + run1: + b: [4] + g: [0] + run2: + b: [2] + g: [3] + + - name: "meo" + params: + include: true + run1: + max_path_length: [3] + local_search: ["Yes"] + rand_restarts: [10] + + - name: "mincostflow" + params: + include: true + run1: + flow: [1] # The flow must be an int + capacity: [1] + + - name: "allpairs" + params: + include: true + + - name: "domino" + params: + include: true + run1: + slice_threshold: [0.3] + module_threshold: [0.05] + +datasets: + - + # Labels can only contain letters, numbers, or underscores + label: test_data + node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] + # DataLoader.py can currently only load a single edge file, which is the primary network + edge_files: ["network.txt"] + # Placeholder + other_files: [] + # Relative path from the spras directory + data_dir: "test/generate-inputs/inputs" diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index 86319e2c..6d732d31 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -17,7 +17,7 @@ 'domino': 'network', 'pathlinker': 'network', 'allpairs': 'network' - } +} class TestGenerateInputs: @@ -29,14 +29,14 @@ def setup_class(cls): Path(OUTDIR).mkdir(parents=True, exist_ok=True) def test_prepare_inputs_networks(self): - config_loc = os.path.join("config", "config.yaml") + config_loc = os.path.join("test", "generate-inputs", "inputs", "test_config.yaml") with open(config_loc) as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) test_file = "test/generate-inputs/output/test_pickled_dataset.pkl" - data0_dataset = next((ds for ds in config["datasets"] if ds["label"] == "data0"), None) - runner.merge_input(data0_dataset, test_file) + test_dataset = next((ds for ds in config["datasets"] if ds["label"] == "test_data"), None) + runner.merge_input(test_dataset, test_file) for algo in algo_exp_file.keys(): inputs = runner.get_required_inputs(algo) diff --git a/test/parse-outputs/expected/meo-pathway-expected.txt b/test/parse-outputs/expected/meo-pathway-expected.txt index 6515013f..0023a8f7 100644 --- a/test/parse-outputs/expected/meo-pathway-expected.txt +++ b/test/parse-outputs/expected/meo-pathway-expected.txt @@ -1,3 +1,3 @@ Node1 Node2 Rank Direction -GENEA GENEC 1 D -GENEC GENEB 1 D +GENE_A GENE_C 1 D +GENE_C GENEB 1 D diff --git a/test/parse-outputs/input/meo-raw-pathway.txt b/test/parse-outputs/input/meo-raw-pathway.txt index 9f44a8d5..9e1a6b8f 100644 --- a/test/parse-outputs/input/meo-raw-pathway.txt +++ b/test/parse-outputs/input/meo-raw-pathway.txt @@ -1,3 +1,3 @@ Source Type Target Oriented Weight -GENEA pp GENEC true 0.5 -GENEC pd GENEB true 0.5 +GENE꧁SEP꧂A pp GENE꧁SEP꧂C true 0.5 +GENE꧁SEP꧂C pd GENEB true 0.5