From 820136806d53506b470d12071faba7d4825400e7 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 25 Aug 2023 17:30:22 -0500 Subject: [PATCH 1/7] Generalize prepare_volume to support paths --- src/util.py | 4 +++- test/test_util.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/util.py b/src/util.py index 911fbf12..bb49f2a1 100644 --- a/src/util.py +++ b/src/util.py @@ -229,7 +229,7 @@ def hash_filename(filename: str, length: Optional[int] = None) -> str: # Because this is called independently for each file, the same local path can be mounted to multiple volumes -def prepare_volume(filename: str, volume_base: str) -> Tuple[Tuple[PurePath, PurePath], str]: +def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]: """ Makes a file on the local file system accessible within a container by mapping the local (source) path to a new container (destination) path and renaming the file to be relative to the destination path. @@ -245,6 +245,8 @@ def prepare_volume(filename: str, volume_base: str) -> Tuple[Tuple[PurePath, Pur if not base_path.is_absolute(): raise ValueError(f'Volume base must be an absolute path: {volume_base}') + if isinstance(filename, PurePath): + filename = str(filename) filename_hash = hash_filename(filename, DEFAULT_HASH_LENGTH) dest = PurePosixPath(base_path, filename_hash) diff --git a/test/test_util.py b/test/test_util.py index 88ae0eb7..968ec716 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -39,6 +39,8 @@ def test_hash_params_sha1_base32(self): [('oi1-edges.txt', '/spras', '/spras/MG4YPNK/oi1-edges.txt'), ('test/OmicsIntegrator1/input/oi1-edges.txt', '/spras', '/spras/ZNNT3GR/oi1-edges.txt'), ('test/OmicsIntegrator1/output/', '/spras', '/spras/DPCSFJV/output'), + (PurePosixPath('test/OmicsIntegrator1/output/'), '/spras', '/spras/TNDO5TR/output'), + ('test/OmicsIntegrator1/output', PurePosixPath('/spras'), '/spras/TNDO5TR/output'), ('../src', '/spras', '/spras/NNBVZ6X/src')]) def test_prepare_volume(self, filename, volume_base, expected_filename): _, container_filename = prepare_volume(filename, volume_base) From ca7d5dbc20e246d1abfefe65a510f9e64ec2e2e8 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 25 Aug 2023 17:50:31 -0500 Subject: [PATCH 2/7] Initial Omics Integrator 2 converstion to newer container usage Update test cases --- src/omicsintegrator2.py | 84 ++++++++++++------------------- test/OmicsIntegrator2/test_oi2.py | 27 ++++++++-- 2 files changed, 57 insertions(+), 54 deletions(-) diff --git a/src/omicsintegrator2.py b/src/omicsintegrator2.py index e76f9b4c..89aeeae8 100644 --- a/src/omicsintegrator2.py +++ b/src/omicsintegrator2.py @@ -1,11 +1,10 @@ import os from pathlib import Path -import docker import pandas as pd from src.prm import PRM -from src.util import prepare_path_docker +from src.util import prepare_path_docker, prepare_volume, run_container __all__ = ['OmicsIntegrator2'] @@ -15,7 +14,8 @@ class OmicsIntegrator2(PRM): def generate_inputs(data, filename_map): """ - Access fields from the dataset and write the required input files + Access fields from the dataset and write the required input files. + Automatically converts edge weights to edge costs. @param data: dataset @param filename_map: a dict mapping file types in the required_inputs to the filename for that type @return: @@ -25,26 +25,26 @@ def generate_inputs(data, filename_map): raise ValueError(f"{input_type} filename is missing") if data.contains_node_columns('prize'): - #NODEID is always included in the node table + # NODEID is always included in the node table node_df = data.request_node_columns(['prize']) - elif data.contains_node_columns(['sources','targets']): - #If there aren't prizes but are sources and targets, make prizes based on them - node_df = data.request_node_columns(['sources','targets']) + elif data.contains_node_columns(['sources', 'targets']): + # If there aren't prizes but are sources and targets, make prizes based on them + node_df = data.request_node_columns(['sources', 'targets']) node_df.loc[node_df['sources']==True, 'prize'] = 1.0 node_df.loc[node_df['targets']==True, 'prize'] = 1.0 else: raise ValueError("Omics Integrator 2 requires node prizes or sources and targets") - #Omics Integrator already gives warnings for strange prize values, so we won't here - node_df.to_csv(filename_map['prizes'],sep='\t',index=False,columns=['NODEID','prize'],header=['name','prize']) + # Omics Integrator already gives warnings for strange prize values, so we won't here + node_df.to_csv(filename_map['prizes'], sep='\t', index=False, columns=['NODEID', 'prize'], header=['name', 'prize']) edges_df = data.get_interactome() - #We'll have to update this when we make iteractomes more proper, but for now + # We'll have to update this when we make iteractomes more proper, but for now # assume we always get a weight and turn it into a cost. - # use the same approach as omicsintegrator2 by adding half the max cost as the base cost. + # use the same approach as OmicsIntegrator2 by adding half the max cost as the base cost. # if everything is less than 1 assume that these are confidences and set the max to 1 - edges_df['cost'] = (max(edges_df['Weight'].max(),1.0)*1.5) - edges_df['Weight'] - edges_df.to_csv(filename_map['edges'],sep='\t',index=False,columns=['Interactor1','Interactor2','cost'],header=['protein1','protein2','cost']) + edges_df['cost'] = (max(edges_df['Weight'].max(), 1.0)*1.5) - edges_df['Weight'] + edges_df.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'Interactor2', 'cost'], header=['protein1', 'protein2', 'cost']) @@ -63,22 +63,25 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise if edges is None or prizes is None or output_file is None: raise ValueError('Required Omics Integrator 2 arguments are missing') - if singularity: - raise NotImplementedError('Omics Integrator 2 does not yet support Singularity') + work_dir = '/spras' - # Initialize a Docker client using environment variables - client = docker.from_env() - work_dir = Path(__file__).parent.parent.absolute() + # Each volume is a tuple (src, dest) + volumes = list() - edge_file = Path(edges) - prize_file = Path(prizes) + bind_path, edge_file = prepare_volume(edges, work_dir) + volumes.append(bind_path) + + bind_path, prize_file = prepare_volume(prizes, work_dir) + volumes.append(bind_path) out_dir = Path(output_file).parent # Omics Integrator 2 requires that the output directory exist - Path(work_dir, out_dir).mkdir(parents=True, exist_ok=True) + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(out_dir, work_dir) + volumes.append(bind_path) - command = ['OmicsIntegrator', '-e', edge_file.as_posix(), '-p', prize_file.as_posix(), - '-o', out_dir.as_posix(), '--filename', 'oi2'] + command = ['OmicsIntegrator', '-e', edge_file, '-p', prize_file, + '-o', mapped_out_dir, '--filename', 'oi2'] # Add optional arguments if w is not None: @@ -101,34 +104,13 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise print('Running Omics Integrator 2 with arguments: {}'.format(' '.join(command)), flush=True) - #Don't perform this step on systems where permissions aren't an issue like windows - need_chown = True - try: - uid = os.getuid() - except AttributeError: - need_chown = False - - try: - out = client.containers.run('reedcompbio/omics-integrator-2', - command, - stderr=True, - volumes={ - prepare_path_docker(work_dir): {'bind': '/OmicsIntegrator2', 'mode': 'rw'}}, - working_dir='/OmicsIntegrator2') - if need_chown: - #This command changes the ownership of output files so we don't - # get a permissions error when snakemake tries to touch the files - chown_command = " ".join(["chown",str(uid),out_dir.as_posix()+"/oi2*"]) - client.containers.run('reedcompbio/omics-integrator-2', - chown_command, - stderr=True, - volumes={prepare_path_docker(work_dir): {'bind': '/OmicsIntegrator2', 'mode': 'rw'}}, - working_dir='/OmicsIntegrator2') - - print(out.decode('utf-8')) - finally: - # Not sure whether this is needed - client.close() + container_framework = 'singularity' if singularity else 'docker' + out = run_container(container_framework, + 'reedcompbio/omics-integrator-2', + command, + volumes, + work_dir) + print(out) # TODO do we want to retain other output files? # TODO if deleting other output files, write them all to a tmp directory and copy diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index 91d90816..d517f2eb 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -1,8 +1,12 @@ +import shutil +from pathlib import Path + import pytest from src.omicsintegrator2 import OmicsIntegrator2 TEST_DIR = 'test/OmicsIntegrator2/' +OUT_FILE = Path(TEST_DIR, 'output', 'test.tsv') class TestOmicsIntegrator2: @@ -11,22 +15,27 @@ class TestOmicsIntegrator2: """ def test_oi2_required(self): # Only include required arguments + OUT_FILE.unlink(missing_ok=True) OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', prizes=TEST_DIR+'input/oi2-prizes.txt', - output_file=TEST_DIR+'output/test.tsv') + output_file=OUT_FILE) + assert OUT_FILE.exists() def test_oi2_some_optional(self): # Include optional argument + OUT_FILE.unlink(missing_ok=True) OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', prizes=TEST_DIR+'input/oi2-prizes.txt', - output_file=TEST_DIR+'output/test.tsv', + output_file=OUT_FILE, g=0) + assert OUT_FILE.exists() def test_oi2_all_optional(self): # Include all optional arguments + OUT_FILE.unlink(missing_ok=True) OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', prizes=TEST_DIR+'input/oi2-prizes.txt', - output_file=TEST_DIR+'output/test.tsv', + output_file=OUT_FILE, w=5, b=1, g=3, @@ -35,6 +44,7 @@ def test_oi2_all_optional(self): random_terminals=0, dummy_mode='terminals', seed=2) + assert OUT_FILE.exists() def test_oi2_missing(self): # Test the expected error is raised when required arguments are missing @@ -42,3 +52,14 @@ def test_oi2_missing(self): # No output_file OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', prizes=TEST_DIR+'input/oi2-prizes.txt') + + # Only run Singularity test if the binary is available on the system + # spython is only available on Unix, but do not explicitly skip non-Unix platforms + @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') + def test_oi2_singularity(self): + # Only include required arguments + OUT_FILE.unlink(missing_ok=True) + OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', + prizes=TEST_DIR+'input/oi2-prizes.txt', + output_file=OUT_FILE) + assert OUT_FILE.exists() From bbeb4ae6e70bc0c1712bbddcc96bc77f6e5931ee Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 25 Aug 2023 17:51:56 -0500 Subject: [PATCH 3/7] Update Dockerfile to not create a new conda env No longer requires activating the env which is a problem with Singularity Instead update the base env Creates conda conflicts if the base env is too new relative to the packages listed in the env file --- docker-wrappers/OmicsIntegrator2/Dockerfile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docker-wrappers/OmicsIntegrator2/Dockerfile b/docker-wrappers/OmicsIntegrator2/Dockerfile index 3bab7dd9..e0df2d2d 100644 --- a/docker-wrappers/OmicsIntegrator2/Dockerfile +++ b/docker-wrappers/OmicsIntegrator2/Dockerfile @@ -1,11 +1,6 @@ # Omics Integrator 2 wrapper # https://github.com/fraenkel-lab/OmicsIntegrator2 -# Activates the conda environment before running command inside container -# Uses the strategy from https://pythonspeed.com/articles/activate-conda-dockerfile/ -# by Itamar Turner-Trauring FROM continuumio/miniconda3:4.9.2 COPY environment.yml . -RUN conda env create -f environment.yml - -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "oi2"] +RUN conda env update --name base --file environment.yml --prune From a80ce2d27dfcdc37533cd6733093577f5e09b01d Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 15 Sep 2023 17:55:48 -0500 Subject: [PATCH 4/7] Use versioned Docker image --- .github/workflows/test-spras.yml | 6 +++--- docker-wrappers/OmicsIntegrator2/README.md | 12 +++++------- src/omicsintegrator2.py | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index e2dbf3dd..177401dd 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -77,7 +77,7 @@ jobs: - name: Pull Docker images run: | docker pull reedcompbio/omics-integrator-1:latest - docker pull reedcompbio/omics-integrator-2:latest + docker pull reedcompbio/omics-integrator-2:v2 docker pull reedcompbio/pathlinker:latest docker pull reedcompbio/meo:latest docker pull reedcompbio/mincostflow:latest @@ -98,8 +98,8 @@ jobs: path: docker-wrappers/OmicsIntegrator2/. dockerfile: docker-wrappers/OmicsIntegrator2/Dockerfile repository: reedcompbio/omics-integrator-2 - tags: latest - cache_froms: reedcompbio/omics-integrator-2:latest + tags: v2 + cache_froms: reedcompbio/omics-integrator-2:v2 push: false - name: Build PathLinker Docker image uses: docker/build-push-action@v1 diff --git a/docker-wrappers/OmicsIntegrator2/README.md b/docker-wrappers/OmicsIntegrator2/README.md index e1678826..4366028e 100644 --- a/docker-wrappers/OmicsIntegrator2/README.md +++ b/docker-wrappers/OmicsIntegrator2/README.md @@ -2,12 +2,7 @@ A Docker image for [Omics Integrator 2](https://github.com/fraenkel-lab/OmicsIntegrator2) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/omics-integrator-2). -## Activating conda inside a Docker container - -By default, an installed conda environment will not be activated inside the Docker container. -Docker does not invoke Bash as a login shell. -[This blog post](https://pythonspeed.com/articles/activate-conda-dockerfile/) provides a workaround demonstrated here in `Dockerfile` and `env.yml`. -It defines a custom ENTRYPOINT that uses `conda run` to run the command inside the conda environment. +## Building the Docker image To create the Docker image run: ``` @@ -27,8 +22,11 @@ Test code is located in `test/OmicsIntegrator2`. The `input` subdirectory contains test files `oi2-edges.txt` and `oi2-prizes.txt`. The Docker wrapper can be tested with `pytest`. +## Versions: +- v1: Created a named conda environment in the container and used `RUN` to execute commands inside that environment. Not compatible with Singularity. +- v2: Used the environment file to update the base conda environment so the `RUN` command was no longer needed. Compatible with Singularity. + ## TODO - Attribute https://github.com/fraenkel-lab/OmicsIntegrator2 - Modify environment to use fraenkel-lab or [PyPI](https://pypi.org/project/OmicsIntegrator/) version instead of fork - Document usage -- Consider `continuumio/miniconda3:4.9.2-alpine` base image \ No newline at end of file diff --git a/src/omicsintegrator2.py b/src/omicsintegrator2.py index 89aeeae8..a2437815 100644 --- a/src/omicsintegrator2.py +++ b/src/omicsintegrator2.py @@ -106,7 +106,7 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise container_framework = 'singularity' if singularity else 'docker' out = run_container(container_framework, - 'reedcompbio/omics-integrator-2', + 'reedcompbio/omics-integrator-2:v2', command, volumes, work_dir) From c3d6626047c47630bc05f28e87dff02b4590fb86 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 15 Sep 2023 18:00:55 -0500 Subject: [PATCH 5/7] Code cleanup --- src/omicsintegrator2.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/omicsintegrator2.py b/src/omicsintegrator2.py index a2437815..0147dd49 100644 --- a/src/omicsintegrator2.py +++ b/src/omicsintegrator2.py @@ -1,10 +1,10 @@ -import os from pathlib import Path import pandas as pd +from src.dataset import Dataset from src.prm import PRM -from src.util import prepare_path_docker, prepare_volume, run_container +from src.util import add_rank_column, prepare_volume, run_container __all__ = ['OmicsIntegrator2'] @@ -12,13 +12,12 @@ class OmicsIntegrator2(PRM): required_inputs = ['prizes', 'edges'] - def generate_inputs(data, filename_map): + def generate_inputs(data: Dataset, filename_map): """ Access fields from the dataset and write the required input files. Automatically converts edge weights to edge costs. @param data: dataset @param filename_map: a dict mapping file types in the required_inputs to the filename for that type - @return: """ for input_type in OmicsIntegrator2.required_inputs: if input_type not in filename_map: @@ -46,8 +45,6 @@ def generate_inputs(data, filename_map): edges_df['cost'] = (max(edges_df['Weight'].max(), 1.0)*1.5) - edges_df['Weight'] edges_df.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'Interactor2', 'cost'], header=['protein1', 'protein2', 'cost']) - - # TODO add parameter validation # TODO add reasonable default values # TODO document required arguments @@ -139,5 +136,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file): df = pd.read_csv(raw_pathway_file, sep='\t') df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line df = df.take([0, 1], axis=1) - df[3] = [1 for _ in range(len(df.index))] + df = add_rank_column(df) df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') From cc1f1046ec400b551c5b5c126241ceb973f96b1d Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 15 Sep 2023 22:21:49 -0500 Subject: [PATCH 6/7] Run Singularity test with Singularity --- test/OmicsIntegrator2/test_oi2.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index d517f2eb..9470649d 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -6,6 +6,8 @@ from src.omicsintegrator2 import OmicsIntegrator2 TEST_DIR = 'test/OmicsIntegrator2/' +EDGE_FILE = TEST_DIR+'input/oi2-edges.txt' +PRIZE_FILE = TEST_DIR+'input/oi2-prizes.txt' OUT_FILE = Path(TEST_DIR, 'output', 'test.tsv') @@ -16,16 +18,16 @@ class TestOmicsIntegrator2: def test_oi2_required(self): # Only include required arguments OUT_FILE.unlink(missing_ok=True) - OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', - prizes=TEST_DIR+'input/oi2-prizes.txt', + OmicsIntegrator2.run(edges=EDGE_FILE, + prizes=PRIZE_FILE, output_file=OUT_FILE) assert OUT_FILE.exists() def test_oi2_some_optional(self): # Include optional argument OUT_FILE.unlink(missing_ok=True) - OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', - prizes=TEST_DIR+'input/oi2-prizes.txt', + OmicsIntegrator2.run(edges=EDGE_FILE, + prizes=PRIZE_FILE, output_file=OUT_FILE, g=0) assert OUT_FILE.exists() @@ -33,8 +35,8 @@ def test_oi2_some_optional(self): def test_oi2_all_optional(self): # Include all optional arguments OUT_FILE.unlink(missing_ok=True) - OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', - prizes=TEST_DIR+'input/oi2-prizes.txt', + OmicsIntegrator2.run(edges=EDGE_FILE, + prizes=PRIZE_FILE, output_file=OUT_FILE, w=5, b=1, @@ -43,15 +45,16 @@ def test_oi2_all_optional(self): noisy_edges=0, random_terminals=0, dummy_mode='terminals', - seed=2) + seed=2, + singularity=False) assert OUT_FILE.exists() def test_oi2_missing(self): # Test the expected error is raised when required arguments are missing with pytest.raises(ValueError): # No output_file - OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', - prizes=TEST_DIR+'input/oi2-prizes.txt') + OmicsIntegrator2.run(edges=EDGE_FILE, + prizes=PRIZE_FILE) # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @@ -59,7 +62,8 @@ def test_oi2_missing(self): def test_oi2_singularity(self): # Only include required arguments OUT_FILE.unlink(missing_ok=True) - OmicsIntegrator2.run(edges=TEST_DIR+'input/oi2-edges.txt', - prizes=TEST_DIR+'input/oi2-prizes.txt', - output_file=OUT_FILE) + OmicsIntegrator2.run(edges=EDGE_FILE, + prizes=PRIZE_FILE, + output_file=OUT_FILE, + singularity=True) assert OUT_FILE.exists() From 7b310431cb903e8b1604867238b25f5e553f33c9 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 15 Sep 2023 22:23:40 -0500 Subject: [PATCH 7/7] Readme correction --- docker-wrappers/OmicsIntegrator2/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-wrappers/OmicsIntegrator2/README.md b/docker-wrappers/OmicsIntegrator2/README.md index 4366028e..c7c7d11f 100644 --- a/docker-wrappers/OmicsIntegrator2/README.md +++ b/docker-wrappers/OmicsIntegrator2/README.md @@ -23,8 +23,8 @@ The `input` subdirectory contains test files `oi2-edges.txt` and `oi2-prizes.txt The Docker wrapper can be tested with `pytest`. ## Versions: -- v1: Created a named conda environment in the container and used `RUN` to execute commands inside that environment. Not compatible with Singularity. -- v2: Used the environment file to update the base conda environment so the `RUN` command was no longer needed. Compatible with Singularity. +- v1: Created a named conda environment in the container and used `ENTRYPOINT` to execute commands inside that environment. Not compatible with Singularity. +- v2: Used the environment file to update the base conda environment so the `ENTRYPOINT` command was no longer needed. Compatible with Singularity. ## TODO - Attribute https://github.com/fraenkel-lab/OmicsIntegrator2