Reed-CompBio · Lyce24 · Jun 2, 2023 · Jun 3, 2023 · Jun 5, 2023 · Jun 6, 2023
diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
@@ -81,6 +81,8 @@ jobs:
         docker pull reedcompbio/pathlinker:latest
         docker pull reedcompbio/meo:latest
         docker pull reedcompbio/mincostflow:latest
+        docker pull reedcompbio/random-walk-with-restart:latest
+        docker pull reedcompbio/tiedie:latest
     - name: Build Omics Integrator 1 Docker image
       uses: docker/build-push-action@v1
       with:
@@ -126,6 +128,24 @@ jobs:
         tags: latest
         cache_froms: reedcompbio/mincostflow:latest
         push: false
+    - name: Build RWR Docker image
+      uses: docker/build-push-action@v1
+      with:
+        path: docker-wrappers/RWR/.
+        dockerfile: docker-wrappers/RWR/Dockerfile
+        repository: reedcompbio/random-walk-with-restart
+        tags: latest
+        cache_froms: reedcompbio/random-walk-with-restart:latest
+        push: false
+    - name: Build TieDIE Docker image
+      uses: docker/build-push-action@v1
+      with:
+        path: docker-wrappers/TieDIE/.
+        dockerfile: docker-wrappers/TieDIE/Dockerfile
+        repository: reedcompbio/tiedie
+        tags: latest
+        cache_froms: reedcompbio/tiedie:latest
+        push: false
 
   # Run pre-commit checks on source files
   pre-commit:

diff --git a/config/config.yaml b/config/config.yaml
@@ -33,6 +33,24 @@
                 run1:
                     k: range(100,201,100)
 
+        - name: "rwr"
+          params:
+                include : true
+                directed : true
+                run1:
+                    single_source: [1, 0]
+                    df: [0.85, 0.75]
+                    threshold: [0.01, 0.05]
+                    w : [0.02, 0.05]
+
+        - name: "tiedie"
+          params:
+                include: true
+                directed: true
+                run1:
+                    pagerank: [true]
+                    s: [1.1]
+
         - name: "omicsintegrator1"
           params:
                 include: true

diff --git a/docker-wrappers/RWR/Dockerfile b/docker-wrappers/RWR/Dockerfile
@@ -0,0 +1,8 @@
+FROM python:3.10.7
+
+WORKDIR /RWR
+
+# installing essential packages
+RUN pip install networkx==2.8 numpy==1.24.3 scipy==1.10.1
+
+RUN wget https://raw.githubusercontent.com/Reed-CompBio/random-walk-with-restart/8ca6969fb2fc744edd544535e2ebd67217b0606c/random_walk.py
diff --git a/docker-wrappers/RWR/README.md b/docker-wrappers/RWR/README.md
@@ -0,0 +1,29 @@
+# RWR Docker image
+
+A Docker image for the random-walk-with-start algorithm that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/random-walk-with-restart).
+
+To create the Docker image run:
+```
+docker build -t reedcompbio/random-walk-with-restart -f Dockerfile .
+```
+from this directory.
+
+To inspect the installed Python packages:
+```
+winpty docker run reedcompbio/random-walk-with-restart pip list
+```
+The `winpty` prefix is only needed on Windows.
+
+## Testing
+Test code is located in `test/RWR`.
+The `input` subdirectory contains test files `source_nodes.txt`, `target_nodes.txt` and `edges.txt`.
+The Docker wrapper can be tested with `pytest` or a unit test with `pytest -k test_rwr.py`.
+
+Alternatively, to test the Docker image directly, run the following command from the root of the `spras` repository
+```
+docker run -w /data --mount type=bind,source=/${PWD},target=/data reedcompbio/random-walk-with-restart python random_walk.py \
+  /data/test/RWR/input/edges.txt /data/test/RWR/input/source_nodes.txt /data/test/RWR/input/target_nodes.txt --damping_factor 0.85 --selection_function min --threshold 0.001 --w 0.0001 --output_file /data/test/RWR/output/output.txt
+```
+This will run RWR on the test input files and write the output files to the root of the `spras` repository.
+Windows users may need to escape the absolute paths so that `/data` becomes `//data`, etc.
+
diff --git a/docker-wrappers/TieDIE/Dockerfile b/docker-wrappers/TieDIE/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:2.7.15
+
+WORKDIR /TieDIE
+
+COPY requirements.txt .
+RUN pip install -r requirements.txt && \
+    commit=c64ab5c4b4e0f6cfac4b5151c7d9f1d7ea331e65 && \
+    wget https://github.com/Reed-CompBio/TieDIE/tarball/$commit && \
+    tar -zxvf $commit && \
+    rm $commit && \
+    mv Reed-CompBio-TieDIE-*/* .
diff --git a/docker-wrappers/TieDIE/README.md b/docker-wrappers/TieDIE/README.md
@@ -0,0 +1,20 @@
+# TieDIE Docker image
+
+A Docker image for [TieDIE](https://github.com/Reed-CompBio/TieDIE) that is available on [DockerHub](https://hub.docker.com/r/reedcompbio/tiedie).
+
+To create the Docker image run:
+```
+docker build -t reedcompbio/tiedie -f Dockerfile .
+```
+from this directory.
+
+To inspect the installed Python packages:
+```
+winpty docker run reedcompbio/tiedie pip list
+```
+The `winpty` prefix is only needed on Windows.
+
+## Testing
+Test code is located in `test/TieDIE`.
+The `input` subdirectory contains test files `pathway.txt`, `target.txt` and `source.txt`.
+The Docker wrapper can be tested with `pytest` or a unit test with `pytest -k test_tiedie.py`.
diff --git a/docker-wrappers/TieDIE/requirements.txt b/docker-wrappers/TieDIE/requirements.txt
@@ -0,0 +1,3 @@
+networkx==1.11
+numpy==1.11.3
+scipy==0.18.1
diff --git a/src/runner.py b/src/runner.py
@@ -6,6 +6,8 @@
 from src.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1
 from src.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2
 from src.pathlinker import PathLinker as pathlinker
+from src.rwr import RWR as rwr
+from src.tiedie import TieDIE as tiedie
 
 
 def run(algorithm, params):

diff --git a/src/rwr.py b/src/rwr.py
@@ -0,0 +1,170 @@
+import warnings
+from pathlib import Path
+
+import pandas as pd
+
+from src.prm import PRM
+from src.util import add_rank_column, prepare_volume, run_container
+
+__all__ = ['RWR']
+
+class RWR(PRM):
+    # we need edges (weighted), source set (with prizes), and target set (with prizes).
+    required_inputs = ['edges', 'prizes']
+
+    @staticmethod
+    def generate_inputs(data, filename_map):
+        """
+        Access fields from the dataset and write the required input files
+        @param data: dataset
+        @param filename_map: a dict mapping file types in the required_inputs to the filename for that type
+        """
+        # ensures the required input are within the filename_map
+        for input_type in RWR.required_inputs:
+            if input_type not in filename_map:
+                raise ValueError(f"{input_type} filename is missing")
+
+        sources_targets = data.request_node_columns(["sources", "targets"])
+        if sources_targets is None:
+            if data.contains_node_columns('prize'):
+                sources_targets = data.request_node_columns(['prize'])
+                input_df = sources_targets[["NODEID"]].copy()
+                input_df["Node type"] = "source"
+            else:
+                raise ValueError("No sources, targets, or prizes found in dataset")
+        else:
+            both_series = sources_targets.sources & sources_targets.targets
+            for _index,row in sources_targets[both_series].iterrows():
+                warn_msg = row.NODEID+" has been labeled as both a source and a target."
+                # Only use stacklevel 1 because this is due to the data not the code context
+                warnings.warn(warn_msg, stacklevel=1)
+
+            #Create nodetype file
+            input_df = sources_targets[["NODEID"]].copy()
+            input_df.loc[sources_targets["sources"] == True,"Node type"]="source"
+            input_df.loc[sources_targets["targets"] == True,"Node type"]="target"
+
+            if data.contains_node_columns('prize'):
+                node_df = data.request_node_columns(['prize'])
+                input_df = pd.merge(input_df, node_df, on='NODEID')
+            else:
+                #If there aren't prizes but are sources and targets, make prizes based on them
+                input_df['prize'] = 1.0
+
+        input_df.to_csv(filename_map["prizes"],sep="\t",index=False,columns=["NODEID", "prize", "Node type"])
+
+        # create the network of edges
+        edges = data.get_interactome()
+
+        # creates the edges files that contains the head and tail nodes and the weights after them
+        edges.to_csv(filename_map['edges'], sep="\t", index=False, columns=["Interactor1","Interactor2","Weight"])
+
+
+    # Skips parameter validation step
+    @staticmethod
+    def run(edges=None, prizes = None, output_file = None, single_source = None, df = None, w = None, f = None, threshold = None, singularity = False):
+        """
+        Run RandomWalk with Docker
+        @param edges:  input network file (required)
+        @param prizes:  input node prizes with sources and targets (required)
+        @param output_file: path to the output pathway file (required)
+        @param df: damping factor for restarting (default 0.85) (optional)
+        @param single_source: 1 for single source, 0 for source-target (default 1) (optional)
+        @param w: lower bound to filter the edges based on the edge confidence (default 0.00) (optional)
+        @param f: selection function (default 'min') (optional)
+        @param threshold: threshold for constructing the final pathway (default 0.0001) (optional)
+        @param singularity: if True, run using the Singularity container instead of the Docker container
+        """
+
+        if not edges or not prizes or not output_file:
+            raise ValueError('Required RWR arguments are missing')
+
+        work_dir = '/spras'
+
+        # Each volume is a tuple (src, dest) - data generated by Docker
+        volumes = list()
+
+        bind_path, edges_file = prepare_volume(edges, work_dir)
+        volumes.append(bind_path)
+
+        bind_path, prizes_file = prepare_volume(prizes, work_dir)
+        volumes.append(bind_path)
+
+
+        out_dir = Path(output_file).parent
+
+        # RWR requires that the output directory exist
+        out_dir.mkdir(parents=True, exist_ok=True)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        volumes.append(bind_path)
+        mapped_out_prefix= mapped_out_dir + '/out'  # Use posix path inside the container
+
+
+        command = ['python',
+                   '/RWR/random_walk.py',
+                   '--edges_file', edges_file,
+                   '--prizes_file', prizes_file,
+                   '--output_file', mapped_out_prefix]
+
+        if single_source is not None:
+            command.extend(['--single_source', str(single_source)])
+        if df is not None:
+            command.extend(['--damping_factor', str(df)])
+        if f is not None:
+            command.extend(['--selection_function', str(f)])
+        if w is not None:
+            command.extend(['--w', str(w)])
+        if threshold is not None:
+            command.extend(['--threshold', str(threshold)])
+
+        print('Running RWR with arguments: {}'.format(' '.join(command)), flush=True)
+
+
+        container_framework = 'singularity' if singularity else 'docker'
+        out = run_container(container_framework,
+                            'reedcompbio/random-walk-with-restart',
+                            command,
+                            volumes,
+                            work_dir)
+        print(out)
+
+        output = Path(out_dir, 'out')
+        output.rename(output_file)
+
+
+    @staticmethod
+    def parse_output(raw_pathway_file, standardized_pathway_file):
+        """
+        Convert a predicted pathway into the universal format
+        @param raw_pathway_file: pathway file produced by an algorithm's run function
+        @param standardized_pathway_file: the same pathway written in the universal format
+        """
+
+        df = pd.read_csv(raw_pathway_file, sep="\t")
+
+        pathway_output_file = standardized_pathway_file
+        edge_output_file = standardized_pathway_file.replace('.txt', '') + '_edges.txt'
+        node_output_file = standardized_pathway_file.replace('.txt', '') + '_nodes.txt'
+
+        # get all rows where type is 1
+        df_edge = df.loc[df["Type"] == 1]
+
+        # get rid of the placeholder column and output it to a file
+        df_edge = df_edge.drop(columns=['Type'])
+        df_edge.to_csv(edge_output_file, sep="\t", index=False, header=True)
+
+        # locate the first place where placeholder is not Nan
+        df_node = df.loc[df['Type'] == 2]
+        # rename the header to Node, Pr, R_Pr, Final_Pr
+        df_node = df_node.drop(columns=['Type'])
+        df_node = df_node.rename(columns={'Node1': 'Node', 'Node2': 'Pr', 'Edge Flux': 'R_Pr', 'Weight': 'Final_Pr', 'InNetwork' : 'InNetwork'})
+        df_node.to_csv(node_output_file, sep="\t", index=False, header=True)
+
+        df_pathway = df.loc[df['Type'] == 3]
+        df_pathway = df_pathway.drop(columns=['InNetwork'])
+        df_pathway = df_pathway.drop(columns=['Type'])
+        df_pathway = df_pathway.drop(columns=['Weight'])
+        df_pathway = df_pathway.drop(columns=['Edge Flux'])
+        # add a colum of 1 to represent the rank
+        df_pathway = add_rank_column(df_pathway)
+        df_pathway.to_csv(pathway_output_file, sep="\t", index=False, header=False)