diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 54d4125d..fb527efd 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -85,6 +85,16 @@ jobs: docker pull reedcompbio/domino:latest docker pull reedcompbio/py4cytoscape:v3 docker pull reedcompbio/spras:v0.1.0 + docker pull reedcompbio/bowtiebuilder:v1 + - name: Build BowtieBuilder Docker Image + uses: docker/build-push-action@v1 + with: + path: docker-wrappers/BowTieBuilder + dockerfile: docker-wrappers/BowTieBuilder/Dockerfile + respository: reedcompbio/bowtiebuilder + tags: latest + cache_from: reedcompbio/bowtiebuilder:v1 + push: false - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 with: diff --git a/config/config.yaml b/config/config.yaml index 741d8ca9..1894421d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -43,6 +43,7 @@ container_registry: # careful: too many parameters might make your runs take a long time. algorithms: + - name: "pathlinker" params: include: true @@ -92,6 +93,9 @@ algorithms: run1: slice_threshold: [0.3] module_threshold: [0.05] + - name: "bowtiebuilder" + params: + include: true # Here we specify which pathways to run and other file location information. @@ -116,6 +120,7 @@ datasets: # Relative path from the spras directory data_dir: "input" + # If we want to reconstruct then we should set run to true. # TODO: if include is true above but run is false here, algs are not run. # is this the behavior we want? diff --git a/docker-wrappers/BowTieBuilder/Dockerfile b/docker-wrappers/BowTieBuilder/Dockerfile new file mode 100644 index 00000000..06606ec9 --- /dev/null +++ b/docker-wrappers/BowTieBuilder/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.8-bullseye + +WORKDIR /btb +RUN wget https://raw.githubusercontent.com/Reed-CompBio/BowTieBuilder-Algorithm/main/btb.py +RUN pip install networkx==2.8 \ No newline at end of file diff --git a/docker-wrappers/BowTieBuilder/README.md b/docker-wrappers/BowTieBuilder/README.md new file mode 100644 index 00000000..555904be --- /dev/null +++ b/docker-wrappers/BowTieBuilder/README.md @@ -0,0 +1,15 @@ +# BowTieBuilder Docker image + +A Docker image for [BowTieBuilder](https://github.com/Reed-CompBio/BowTieBuilder-Algorithm) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/bowtiebuilder). + +To create the Docker image run: +``` +docker build -t reedcompbio/bowtiebuilder:v1 -f Dockerfile . +``` +from this directory. + +## Original Paper + +The original paper for [BowTieBuilder] can be accessed here: + +Supper, J., Spangenberg, L., Planatscher, H. et al. BowTieBuilder: modeling signal transduction pathways. BMC Syst Biol 3, 67 (2009). https://doi.org/10.1186/1752-0509-3-67 \ No newline at end of file diff --git a/docker-wrappers/LocalNeighborhood/Dockerfile b/docker-wrappers/LocalNeighborhood/Dockerfile deleted file mode 100644 index 06dcce8a..00000000 --- a/docker-wrappers/LocalNeighborhood/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -# Create a Docker image for the Local Neighborhood algorithm here diff --git a/docker-wrappers/LocalNeighborhood/README.md b/docker-wrappers/LocalNeighborhood/README.md deleted file mode 100644 index 94209fa4..00000000 --- a/docker-wrappers/LocalNeighborhood/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# Local Neighborhood Docker image - -A simple pathway reconstruction algorithm used to welcome new contributors. -The algorithm takes a network and a list of nodes as input. -It outputs all edges in the network that have a node from the list as an endpoint. - -New contributors complete the `Dockerfile` to wrap the implementation in `local_neighborhood.py`. - -## Usage -``` -$ python local_neighborhood.py -h -usage: local_neighborhood.py [-h] --network NETWORK --nodes NODES --output OUTPUT - -Local neighborhood pathway reconstruction - -optional arguments: - -h, --help show this help message and exit - --network NETWORK Path to the network file with '|' delimited node pairs - --nodes NODES Path to the nodes file - --output OUTPUT Path to the output file that will be written -``` - -## Example behavior -Network file: -``` -A|B -C|B -C|D -D|E -A|E -``` - -Nodes file: -``` -A -B -``` - -Output file: -``` -A|B -C|B -A|E -``` \ No newline at end of file diff --git a/docker-wrappers/LocalNeighborhood/local_neighborhood.py b/docker-wrappers/LocalNeighborhood/local_neighborhood.py deleted file mode 100644 index 2a2b6096..00000000 --- a/docker-wrappers/LocalNeighborhood/local_neighborhood.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Local neighborhood pathway reconstruction algorithm. -The algorithm takes a network and a list of nodes as input. -It outputs all edges in the network that have a node from the list as an endpoint. -""" - -import argparse -from pathlib import Path - - -def parse_arguments(): - """ - Process command line arguments. - @return arguments - """ - parser = argparse.ArgumentParser( - description="Local neighborhood pathway reconstruction" - ) - parser.add_argument("--network", type=Path, required=True, help="Path to the network file with '|' delimited node pairs") - parser.add_argument("--nodes", type=Path, required=True, help="Path to the nodes file") - parser.add_argument("--output", type=Path, required=True, help="Path to the output file that will be written") - - return parser.parse_args() - - -def local_neighborhood(network_file: Path, nodes_file: Path, output_file: Path): - if not network_file.exists(): - raise OSError(f"Network file {str(network_file)} does not exist") - if not nodes_file.exists(): - raise OSError(f"Nodes file {str(nodes_file)} does not exist") - if output_file.exists(): - print(f"Output file {str(output_file)} will be overwritten") - - # Create the parent directories for the output file if needed - output_file.parent.mkdir(parents=True, exist_ok=True) - - # Read the list of nodes - nodes = set() - with nodes_file.open() as nodes_f: - for line in nodes_f: - nodes.add(line.strip()) - print(f"Read {len(nodes)} unique nodes") - - # Iterate through the network edges and write those that have an endpoint in the node set - in_edge_counter = 0 - out_edge_counter = 0 - with output_file.open('w') as output_f: - with network_file.open() as network_f: - for line in network_f: - line = line.strip() - in_edge_counter += 1 - endpoints = line.split("|") - if len(endpoints) != 2: - raise ValueError(f"Edge {line} does not contain 2 nodes separated by '|'") - if endpoints[0] in nodes or endpoints[1] in nodes: - out_edge_counter += 1 - output_f.write(f"{line}\n") - print(f"Kept {out_edge_counter} of {in_edge_counter} edges") - - -def main(): - """ - Parse arguments and run pathway reconstruction - """ - args = parse_arguments() - local_neighborhood(args.network, args.nodes, args.output) - - -if __name__ == "__main__": - main() diff --git a/spras/btb.py b/spras/btb.py new file mode 100644 index 00000000..73ebe0e1 --- /dev/null +++ b/spras/btb.py @@ -0,0 +1,174 @@ +# need to define a new btb class and contain the following functions +# - generate_inputs +# - run +# - parse_output + +import warnings +from pathlib import Path + +import pandas as pd + +from spras.containers import prepare_volume, run_container +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) + +from spras.prm import PRM + +__all__ = ['BowtieBuilder'] + +class BowtieBuilder(PRM): + required_inputs = ['sources', 'targets', 'edges'] + + #generate input taken from meo.py beacuse they have same input requirements + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in BowtieBuilder.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + print("FILEMAP NAME: ", filename_map) + print("DATA HEAD: ") + print( data.node_table.head()) + print("DATA INTERACTOME: ") + print(data.interactome.head()) + + # Get sources and write to file, repeat for targets + # Does not check whether a node is a source and a target + for node_type in ['sources', 'targets']: + nodes = data.request_node_columns([node_type]) + if nodes is None: + raise ValueError(f'No {node_type} found in the node files') + + # TODO test whether this selection is needed, what values could the column contain that we would want to + # include or exclude? + nodes = nodes.loc[nodes[node_type]] + if(node_type == "sources"): + nodes.to_csv(filename_map["sources"], sep= '\t', index=False, columns=['NODEID'], header=False) + print("NODES: ") + print(nodes) + elif(node_type == "targets"): + nodes.to_csv(filename_map["targets"], sep= '\t', index=False, columns=['NODEID'], header=False) + print("NODES: ") + print(nodes) + + + # Create network file + edges = data.get_interactome() + + # Format into directed graph + edges = convert_undirected_to_directed(edges) + + edges.to_csv(filename_map['edges'], sep='\t', index=False, header=False) + + + + # Skips parameter validation step + @staticmethod + def run(sources=None, targets=None, edges=None, output_file=None, container_framework="docker"): + """ + Run PathLinker with Docker + @param sources: input source file (required) + @param targets: input target file (required) + @param edges: input edge file (required) + @param output_file: path to the output pathway file (required) + @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) + """ + + # Tests for pytest (docker container also runs this) + # Testing out here avoids the trouble that container errors provide + + if not sources or not targets or not edges or not output_file: + raise ValueError('Required BowtieBuilder arguments are missing') + + if not Path(sources).exists() or not Path(targets).exists() or not Path(edges).exists(): + raise ValueError('Missing input file') + + # Testing for btb index errors + # It's a bit messy, but it works \_('_')_/ + with open(edges, 'r') as edge_file: + try: + for line in edge_file: + line = line.strip() + line = line.split('\t') + line = line[2] + + except Exception as err: + raise(err) + + work_dir = '/btb' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, source_file = prepare_volume(sources, work_dir) + volumes.append(bind_path) + + bind_path, target_file = prepare_volume(targets, work_dir) + volumes.append(bind_path) + + bind_path, edges_file = prepare_volume(edges, work_dir) + volumes.append(bind_path) + + # PathLinker does not provide an argument to set the output directory + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + # PathLinker requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/raw-pathway.txt' # Use posix path inside the container + + command = ['python', + 'btb.py', + '--edges', + edges_file, + '--sources', + source_file, + '--target', + target_file, + '--output', + mapped_out_prefix] + # command = ['ls', '-R'] + + + print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) + + container_suffix = "bowtiebuilder:v1" + out = run_container(container_framework, + container_suffix, + command, + volumes, + work_dir) + print(out) + print("Source file: ", source_file) + print("target file: ", target_file) + print("edges file: ", edges_file) + print("mapped out dir: ", mapped_out_dir) + print("mapped out prefix: ", mapped_out_prefix) + + + # Output is already written to raw-pathway.txt file + # output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + # output_edges.rename(output_file) + + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + # What about multiple raw_pathway_files + print("PARSING OUTPUT BTB") + df = pd.read_csv(raw_pathway_file, sep='\t') + df = reinsert_direction_col_directed(df) + print(df) + df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') diff --git a/spras/runner.py b/spras/runner.py index 6ef26496..2d3fb519 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -7,6 +7,7 @@ from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from spras.pathlinker import PathLinker as pathlinker +from spras.btb import BowtieBuilder as bowtiebuilder def run(algorithm, params): diff --git a/test/BowtieBuilder/expected/output1.txt b/test/BowtieBuilder/expected/output1.txt new file mode 100644 index 00000000..34740e2d --- /dev/null +++ b/test/BowtieBuilder/expected/output1.txt @@ -0,0 +1,7 @@ +Node1 Node2 +A D +B D +C D +D F +D G +D E diff --git a/test/BowtieBuilder/input/edges.txt b/test/BowtieBuilder/input/edges.txt new file mode 100644 index 00000000..6f97ec4e --- /dev/null +++ b/test/BowtieBuilder/input/edges.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BowtieBuilder/input/edges_bad.txt b/test/BowtieBuilder/input/edges_bad.txt new file mode 100644 index 00000000..c08a8503 --- /dev/null +++ b/test/BowtieBuilder/input/edges_bad.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/LocalNeighborhood/input/ln-nodes.txt b/test/BowtieBuilder/input/source.txt similarity index 66% rename from test/LocalNeighborhood/input/ln-nodes.txt rename to test/BowtieBuilder/input/source.txt index 35d242ba..b1e67221 100644 --- a/test/LocalNeighborhood/input/ln-nodes.txt +++ b/test/BowtieBuilder/input/source.txt @@ -1,2 +1,3 @@ A B +C diff --git a/test/BowtieBuilder/input/target.txt b/test/BowtieBuilder/input/target.txt new file mode 100644 index 00000000..0cae3d39 --- /dev/null +++ b/test/BowtieBuilder/input/target.txt @@ -0,0 +1,3 @@ +E +F +G diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py new file mode 100644 index 00000000..38858c5a --- /dev/null +++ b/test/BowtieBuilder/test_btb.py @@ -0,0 +1,67 @@ +import sys +from filecmp import cmp +from pathlib import Path + +import pytest + +import spras.config as config + +config.init_from_file("config/config.yaml") + +# TODO consider refactoring to simplify the import +# Modify the path because of the - in the directory +SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() +sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowtieBuilder'))) +from spras.btb import BowtieBuilder as bowtiebuilder + +TEST_DIR = Path('test', 'BowtieBuilder/') +OUT_FILE = Path(TEST_DIR, 'output', 'raw-pathway.txt') + + +class TestBowtieBuilder: + """ + Run the bowtiebuilder algorithm on the example input files and check the output matches the expected output + """ + def test_ln(self): + print("RUNNING TEST_LN FOR BOWTIEBUILDER") + OUT_FILE.unlink(missing_ok=True) + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + assert OUT_FILE.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'output1.txt') + assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' + + """ + Run the bowtiebuilder algorithm with missing arguments + """ + def test_missing_arguments(self): + with pytest.raises(ValueError): + bowtiebuilder.run( + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + + + """ + Run the bowtiebuilder algorithm with missing files + """ + def test_missing_file(self): + with pytest.raises(ValueError): + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + + + """ + Run the bowtiebuilder algorithm with bad input data + """ + def test_format_error(self): + with pytest.raises(IndexError): + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), + output_file=OUT_FILE) + diff --git a/test/LocalNeighborhood/expected_output/ln-output.txt b/test/LocalNeighborhood/expected_output/ln-output.txt deleted file mode 100644 index 58dc92d9..00000000 --- a/test/LocalNeighborhood/expected_output/ln-output.txt +++ /dev/null @@ -1,3 +0,0 @@ -A|B -C|B -A|E diff --git a/test/LocalNeighborhood/input/ln-bad-network.txt b/test/LocalNeighborhood/input/ln-bad-network.txt deleted file mode 100644 index 970b0e11..00000000 --- a/test/LocalNeighborhood/input/ln-bad-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B|E -C|B -C|D -D|E -A|E diff --git a/test/LocalNeighborhood/input/ln-network.txt b/test/LocalNeighborhood/input/ln-network.txt deleted file mode 100644 index 5a9b0451..00000000 --- a/test/LocalNeighborhood/input/ln-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B -C|B -C|D -D|E -A|E diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py deleted file mode 100644 index 391c5fb1..00000000 --- a/test/LocalNeighborhood/test_ln.py +++ /dev/null @@ -1,52 +0,0 @@ -import sys -from filecmp import cmp -from pathlib import Path - -import pytest - -import spras.config as config - -config.init_from_file("config/config.yaml") - -# TODO consider refactoring to simplify the import -# Modify the path because of the - in the directory -SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() -sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'LocalNeighborhood'))) -from local_neighborhood import local_neighborhood - -TEST_DIR = Path('test', 'LocalNeighborhood/') -OUT_FILE = Path(TEST_DIR, 'output', 'ln-output.txt') - - -class TestLocalNeighborhood: - """ - Run the local neighborhood algorithm on the example input files and check the output matches the expected output - """ - def test_ln(self): - OUT_FILE.unlink(missing_ok=True) - local_neighborhood(network_file=Path(TEST_DIR, 'input', 'ln-network.txt'), - nodes_file=Path(TEST_DIR, 'input', 'ln-nodes.txt'), - output_file=OUT_FILE) - assert OUT_FILE.exists(), 'Output file was not written' - expected_file = Path(TEST_DIR, 'expected_output', 'ln-output.txt') - assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' - - """ - Run the local neighborhood algorithm with a missing input file - """ - def test_missing_file(self): - with pytest.raises(OSError): - local_neighborhood(network_file=Path(TEST_DIR, 'input', 'missing.txt'), - nodes_file=Path(TEST_DIR, 'input', 'ln-nodes.txt'), - output_file=OUT_FILE) - - """ - Run the local neighborhood algorithm with an improperly formatted network file - """ - def test_format_error(self): - with pytest.raises(ValueError): - local_neighborhood(network_file=Path(TEST_DIR, 'input', 'ln-bad-network.txt'), - nodes_file=Path(TEST_DIR, 'input', 'ln-nodes.txt'), - output_file=OUT_FILE) - - # Write tests for the Local Neighborhood run function here diff --git a/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt b/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt new file mode 100644 index 00000000..0fb97edd --- /dev/null +++ b/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt @@ -0,0 +1,9 @@ +A B 0.98 U +B C 0.77 U +A D 0.12 U +C D 0.89 U +C E 0.59 U +C F 0.5 U +F G 0.76 U +G H 0.92 U +G I 0.66 U diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index 86319e2c..b9d14a6f 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -16,7 +16,8 @@ 'omicsintegrator2': 'edges', 'domino': 'network', 'pathlinker': 'network', - 'allpairs': 'network' + 'allpairs': 'network', + 'bowtiebuilder': 'edges' } diff --git a/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt b/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt new file mode 100644 index 00000000..350d85f7 --- /dev/null +++ b/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt @@ -0,0 +1,7 @@ +A D +A B +C F +B C +F G +G I +G H diff --git a/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt b/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt new file mode 100644 index 00000000..5699a112 --- /dev/null +++ b/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt @@ -0,0 +1,8 @@ +Node1 Node2 +A D +A B +C F +B C +F G +G I +G H diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 60763d13..2c56a3e0 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -11,7 +11,7 @@ # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples -algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino'] +algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'bowtiebuilder'] class TestParseOutputs: