From 3e9800e22b5d43e3a546f26463f39447037f43e1 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 17:25:24 +0000 Subject: [PATCH 01/30] Add ability to unpack singularity images When running nested, unprivileged containers singularity containers, there's a bit of extra setup we need to perform in order to get things working. In particular, in order to avoid FUSE requirements, we can "unpack" containers into a directory after we've converted the docker image to a `.sif`. This PR adds a new configuration option to allow that unpacking, as well as a SPRAS runtime container that can be used to demonstrate this behavior in HTCondor. Bundled along with these changes are a variety of package version updates that I needed to change to get things working. --- .pre-commit-config.yaml | 2 +- config/config.yaml | 5 + docker-wrappers/SPRAS/Dockerfile | 16 +++ docker-wrappers/SPRAS/README.md | 35 +++++ docker-wrappers/SPRAS/example_config.yaml | 151 ++++++++++++++++++++++ docker-wrappers/SPRAS/spras.sh | 2 + docker-wrappers/SPRAS/spras.sub | 27 ++++ pyproject.toml | 3 +- spras/config.py | 10 ++ spras/containers.py | 33 ++++- 10 files changed, 276 insertions(+), 8 deletions(-) create mode 100644 docker-wrappers/SPRAS/Dockerfile create mode 100644 docker-wrappers/SPRAS/README.md create mode 100644 docker-wrappers/SPRAS/example_config.yaml create mode 100644 docker-wrappers/SPRAS/spras.sh create mode 100644 docker-wrappers/SPRAS/spras.sub diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 55503ef4..67958453 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ # See https://pre-commit.com/ for documentation default_language_version: # Match this to the version specified in environment.yml - python: python3.8 + python: python3.11 repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 # Use the ref you want to point at diff --git a/config/config.yaml b/config/config.yaml index 5fe6083b..41d923b1 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -7,6 +7,11 @@ hash_length: 7 # 'singularity'. If container_framework is not specified, SPRAS will default to docker. container_framework: docker +# Only used if container_framework is set to singularity, this will unpack the singularity containers +# to the local filesystem. This is useful when PRM containers need to run inside another container, +# such as would be the case in an HTCondor/OSPool environment. +unpack_singularity: false + # Allow the user to configure which container registry containers should be pulled from # Note that this assumes container names are consistent across registries, and that the # registry being passed doesn't require authentication for pull actions diff --git a/docker-wrappers/SPRAS/Dockerfile b/docker-wrappers/SPRAS/Dockerfile new file mode 100644 index 00000000..647d9006 --- /dev/null +++ b/docker-wrappers/SPRAS/Dockerfile @@ -0,0 +1,16 @@ +FROM almalinux:9 + +RUN dnf install -y epel-release + +# gcc/g++ are required for building several of the packages if you're using apple silicon +RUN dnf update -y && \ + dnf install -y gcc gcc-c++ \ + python3.11 python3.11-pip python3.11-devel \ + docker apptainer + +COPY / /spras/ + +WORKDIR /spras + +# Install spras into the container +RUN pip3.11 install . diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md new file mode 100644 index 00000000..9436a9e5 --- /dev/null +++ b/docker-wrappers/SPRAS/README.md @@ -0,0 +1,35 @@ +# SPRAS Docker image + +## Building +A Docker image for SPRAS that is available on [Dockerhub]() +This image comes bundled with all of the necessary software packages to run SPRAS, and can be used for execution in distributed environments (like HTCondor). + +To create the Docker image, make sure you are in this repository's root directory, and from your terminal run: +``` +docker build -t reedcompbio/spras -f docker-wrappers/SPRAS/Dockerfile . +``` + +This will copy the entire SPRAS repository into the container and install SPRAS with `pip`. As such, any changes you've made to the current SPRAS repository will be reflected in version of SPRAS installed in the container. Since SPRAS +is being installed with `pip`, it's also possible to specify that you want development modules installed as well. If you're using the container for development and you want the optional `pre-commit` and `pytest` packages as well as a +spras package that receives changes without re-installation, change the +`pip` installation line to: +``` +pip install -e .[dev] +``` +This will cause changes to spras source code to update the intsalled package. + +**Note:** This image will build for the same platform that is native to your system (ie amd64 or arm64). If you need to run this in a remote environment like HTCondor that is almost certainly `amd64` but you're building from Apple Silicon, it is recommended to either modify the Dockerfile to pin the platform: +``` +FROM --platform=linux/amd64 almalinux:9 +``` + +Or to temporarily override your system's default by exporting the environment variable: +``` +export DOCKER_DEFAULT_PLATFORM=linux/amd64 +``` +(This environment variable can then be cleared by running `unset DOCKER_DEFAULT_PLATFORM` to return your system to its default) + + +## Testing + +The folder `docker-wrappers/SPRAS` also contains several files that can be used to test this container on HTCondor. diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml new file mode 100644 index 00000000..909bcfea --- /dev/null +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -0,0 +1,151 @@ +# Global workflow control + +# The length of the hash used to identify a parameter combination +hash_length: 7 + +# Specify the container framework. Current supported versions include 'docker' and +# 'singularity'. If container_framework is not specified, SPRAS will default to docker. +container_framework: singularity + +# Unpack singularity +unpack_singularity: true + +# Allow the user to configure which container registry containers should be pulled from +# Note that this assumes container names are consistent across registries, and that the +# registry being passed doesn't require authentication for pull actions +container_registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio + +# This list of algorithms should be generated by a script which checks the filesystem for installs. +# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm +# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved) +# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change +# which algorithms are run in a given experiment. +# +# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple +# parameters are specified then the algorithm will be run as many times as needed to cover all parameter +# combinations. For instance if we have the following: +# - name: "myAlg" +# params: +# include: true +# a: [1,2] +# b: [0.5,0.75] +# +# then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be +# careful: too many parameters might make your runs take a long time. + +algorithms: + - name: "pathlinker" + params: + include: false + run1: + k: range(100,201,100) + + - name: "omicsintegrator1" + params: + include: true + run1: + r: [5] + b: [5, 6] + w: np.linspace(0,5,2) + g: [3] + d: [10] + + - name: "omicsintegrator2" + params: + include: true + run1: + b: [4] + g: [0] + run2: + b: [2] + g: [3] + + - name: "meo" + params: + include: true + run1: + max_path_length: [3] + local_search: ["Yes"] + rand_restarts: [10] + + - name: "mincostflow" + params: + include: true + run1: + flow: [1] # The flow must be an int + capacity: [1] + + - name: "allpairs" + params: + include: true + + - name: "domino" + params: + include: true + run1: + slice_threshold: [0.3] + module_threshold: [0.05] + + +# Here we specify which pathways to run and other file location information. +# DataLoader.py can currently only load a single dataset +# Assume that if a dataset label does not change, the lists of associated input files do not change +datasets: + - + label: data0 + node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] + # DataLoader.py can currently only load a single edge file, which is the primary network + edge_files: ["network.txt"] + # Placeholder + other_files: [] + # Relative path from the spras directory + data_dir: "input" + # - + # label: data1 + # # Reuse some of the same sources file as 'data0' but different network and targets + # node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"] + # edge_files: ["alternative-network.txt"] + # other_files: [] + # # Relative path from the spras directory + # data_dir: "input" + +# If we want to reconstruct then we should set run to true. +# TODO: if include is true above but run is false here, algs are not run. +# is this the behavior we want? +reconstruction_settings: + + #set where everything is saved + locations: + + #place the save path here + # TODO move to global + reconstruction_dir: "output" + + run: true + +analysis: + # Create one summary per pathway file and a single summary table for all pathways for each dataset + summary: + include: true + # Create output files for each pathway that can be visualized with GraphSpace + graphspace: + include: true + # Create Cytoscape session file with all pathway graphs for each dataset + cytoscape: + include: false + # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset + ml: + include: true + # specify how many principal components to calculate + components: 2 + # boolean to show the labels on the pca graph + labels: true + # 'ward', 'complete', 'average', 'single' + # if linkage: ward, must use metric: euclidean + linkage: 'ward' + # 'euclidean', 'manhattan', 'cosine' + metric: 'euclidean' diff --git a/docker-wrappers/SPRAS/spras.sh b/docker-wrappers/SPRAS/spras.sh new file mode 100644 index 00000000..45e1f858 --- /dev/null +++ b/docker-wrappers/SPRAS/spras.sh @@ -0,0 +1,2 @@ +#!/bin/bash +snakemake --cores 4 --configfile example_config.yaml diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub new file mode 100644 index 00000000..09695d9a --- /dev/null +++ b/docker-wrappers/SPRAS/spras.sub @@ -0,0 +1,27 @@ +# A SPRAS submit file to demonstrate running SPRAS on HTCondor +container_image = docker://reedcompbio/spras:v1 +universe = container + +# Specify names for log/stdout/stderr files generated by HTCondor +log = spras_$(Cluster).log +output = spras_$(Cluster).out +error = spras_$(Cluster).err + +# Specify the script to run inside the container. This is simply a wrapper on the Snakefile +executable = spras.sh + +# Handle transferring required inputs/outputs +should_transfer_files = YES +when_to_transfer_output = ON_EXIT +transfer_input_files = example_config.yaml, ../../input, ../../Snakefile, spras.sh +transfer_output_files = output + +# System specifications. +request_cpus = 4 +request_memory = 8GB +request_disk = 16GB + +# Only run on nodes with Singularity installed +requirements = (HAS_SINGULARITY == True) + +queue 1 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5962fc3c..ac535c91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ classifiers = [ requires-python = ">=3.8" dependencies = [ "adjusttext==0.7.3", - "snakemake==7.19.1", + # A bug was introduced in older versions of snakemake that prevent it from running. Update to fix + "snakemake==8.4.4", "docker==5.0.3", # Switched from docker-py to docker because docker-py is not maintained in pypi. This appears to have no effect "matplotlib==3.5", "networkx==2.8", diff --git a/spras/config.py b/spras/config.py index fdd51165..c1895e01 100644 --- a/spras/config.py +++ b/spras/config.py @@ -65,6 +65,8 @@ def __init__(self, raw_config): self.container_framework = None # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio" self.container_prefix = DEFAULT_CONTAINER_PREFIX + # A Boolean specifying whether to unpack singularity containers. Default is False + self.unpack_singularity = False # A dictionary to store configured datasets against which SPRAS will be run self.datasets = None # The hash length SPRAS will use to identify parameter combinations. Default is 7 @@ -114,6 +116,14 @@ def process_config(self, raw_config): else: self.container_framework = "docker" + # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. + if "unpack_singularity" in raw_config: + # The value in the config is a string, and we need to convert it to a bool. + unpack_singularity = raw_config["unpack_singularity"].lower() in ("true", "yes", "t", "1") + if unpack_singularity and self.container_framework != "singularity": + print("Warning: unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.") + self.unpack_singularity = unpack_singularity + # Grab registry from the config, and if none is provided default to docker if "container_registry" in raw_config and raw_config["container_registry"]["base_url"] != "" and raw_config["container_registry"]["owner"] != "": self.container_prefix = raw_config["container_registry"]["base_url"] + "/" + raw_config["container_registry"]["owner"] diff --git a/spras/containers.py b/spras/containers.py index bdb18acd..a51fde31 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -181,6 +181,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ bind_paths = [f'{prepare_path_docker(src)}:{dest}' for src, dest in volumes] # TODO is try/finally needed for Singularity? + # To debug a container add the execute arguments: singularity_options=['--debug'], quiet=False singularity_options = ['--cleanenv', '--containall', '--pwd', working_dir] # Singularity does not allow $HOME to be set as a regular environment variable # Capture it and use the special argument instead @@ -190,12 +191,32 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ else: singularity_options.extend(['--env', environment]) - # To debug a container add the execute arguments: singularity_options=['--debug'], quiet=False - # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert - return Client.execute('docker://' + container, - command, - options=singularity_options, - bind=bind_paths) + # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containeres + if config.config.unpack_singularity: + # Split the string by "/" + path_elements = container.split("/") + + # Get the last element, which will indicate the base container name + base_cont = path_elements[-1] + base_cont = base_cont.replace(":", "_").split(":")[0] + + # Pull the container to a local .sif + # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert + image_path = Client.pull('docker://' + container, name=base_cont+'.sif') + Client.build(recipe=image_path, image=base_cont, sandbox=True, sudo=False) + + # Execute the locally unpacked container. + return Client.execute(base_cont, + command, + options=singularity_options, + bind=bind_paths) + + else: + # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert + return Client.execute('docker://' + container, + command, + options=singularity_options, + bind=bind_paths) # Because this is called independently for each file, the same local path can be mounted to multiple volumes def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]: From 3dc73bc249184ac1880530cf059208a59925164c Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 19:35:05 +0000 Subject: [PATCH 02/30] Bump missed python versions --- .github/workflows/test-spras.yml | 2 +- environment.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index a4d5886b..d5dcf590 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -167,6 +167,6 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: '3.8' # Match this to the version specified in environment.yml + python-version: '3.11' # Match this to the version specified in environment.yml - name: Run pre-commit checks uses: pre-commit/action@v3.0.0 diff --git a/environment.yml b/environment.yml index 75546ef0..a41f1b99 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,7 @@ dependencies: - pandas=1.4 - pre-commit=2.20 # Only required for development - pytest=7.1 # Only required for development - - python=3.8 + - python=3.11 - pip=22.1 - requests=2.28 - scikit-learn=1.2 diff --git a/pyproject.toml b/pyproject.toml index ac535c91..974972c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Bio-Informatics", ] -requires-python = ">=3.8" +requires-python = ">=3.11" dependencies = [ "adjusttext==0.7.3", # A bug was introduced in older versions of snakemake that prevent it from running. Update to fix From 21a831398bdafa3bb8d6e28a4f8f8092c01a89c2 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 14:03:11 -0600 Subject: [PATCH 03/30] Bump more version --- environment.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index a41f1b99..7f352f25 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - adjusttext=0.7.3.1 - - bioconda::snakemake-minimal=7.19.1 + - bioconda::snakemake-minimal=8.4.4 - docker-py=5.0 - matplotlib=3.5 - networkx=2.8 diff --git a/pyproject.toml b/pyproject.toml index 974972c0..f6ee3a53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ requires = ["setuptools>=64.0"] build-backend = "setuptools.build_meta" [tool.ruff] -target-version = "py38" +target-version = "py311" # Autofix errors when possible fix = true # Select categories or specific rules from https://beta.ruff.rs/docs/rules/ From b18144826bfdaf32322ac760af37e4d4a41163bb Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 14:36:18 -0600 Subject: [PATCH 04/30] Fix bad config parsing --- spras/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spras/config.py b/spras/config.py index c1895e01..22220d51 100644 --- a/spras/config.py +++ b/spras/config.py @@ -119,7 +119,7 @@ def process_config(self, raw_config): # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. if "unpack_singularity" in raw_config: # The value in the config is a string, and we need to convert it to a bool. - unpack_singularity = raw_config["unpack_singularity"].lower() in ("true", "yes", "t", "1") + unpack_singularity = raw_config["unpack_singularity"] if unpack_singularity and self.container_framework != "singularity": print("Warning: unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.") self.unpack_singularity = unpack_singularity @@ -189,7 +189,7 @@ def process_config(self, raw_config): run_list_tuples = list(it.product(*all_runs)) param_name_tuple = tuple(param_name_list) for r in run_list_tuples: - run_dict = dict(zip(param_name_tuple, r)) + run_dict = dict(zip(param_name_tuple, r, strict=False)) # TODO temporary workaround for yaml.safe_dump in Snakefile write_parameter_log for param, value in run_dict.copy().items(): if isinstance(value, np.float64): From d3ee1754767a8ea0e1fe7329af52ffda931451a1 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 14:44:44 -0600 Subject: [PATCH 05/30] Make spras.sh run on as many cores as it can --- docker-wrappers/SPRAS/spras.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 docker-wrappers/SPRAS/spras.sh diff --git a/docker-wrappers/SPRAS/spras.sh b/docker-wrappers/SPRAS/spras.sh old mode 100644 new mode 100755 index 45e1f858..dad2d26e --- a/docker-wrappers/SPRAS/spras.sh +++ b/docker-wrappers/SPRAS/spras.sh @@ -1,2 +1,2 @@ #!/bin/bash -snakemake --cores 4 --configfile example_config.yaml +snakemake --cores `nproc` --configfile example_config.yaml From ddfa66c0f22c610968ec177b0e37e07d57afc69d Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 16:22:29 -0600 Subject: [PATCH 06/30] Add check for already-unpacked sing images --- spras/containers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index a51fde31..516b6b12 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -199,11 +199,16 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ # Get the last element, which will indicate the base container name base_cont = path_elements[-1] base_cont = base_cont.replace(":", "_").split(":")[0] + sif_file = base_cont + ".sif" - # Pull the container to a local .sif # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert - image_path = Client.pull('docker://' + container, name=base_cont+'.sif') - Client.build(recipe=image_path, image=base_cont, sandbox=True, sudo=False) + image_path = Client.pull('docker://' + container, name=sif_file) + + # Check if the directory for base_cont already exists. When running concurrent jobs, it's possible + # something else has already pulled/unpacked the container. + # Here, we expand the sif image from `image_path` to a directory indicated by `base_cont` + if not os.path.exists(base_cont): + Client.build(recipe=image_path, image=base_cont, sandbox=True, sudo=False) # Execute the locally unpacked container. return Client.execute(base_cont, From 85d5e9028bb196ab55695e3580dc4805d435975d Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 12 Feb 2024 16:23:12 -0600 Subject: [PATCH 07/30] Update htcondor memory based on observed usage --- docker-wrappers/SPRAS/spras.sub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 09695d9a..0a89e160 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -18,7 +18,7 @@ transfer_output_files = output # System specifications. request_cpus = 4 -request_memory = 8GB +request_memory = 4GB request_disk = 16GB # Only run on nodes with Singularity installed From 2a64269bbd1187c4663ada068cd1fd6b09447348 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Tue, 13 Feb 2024 14:53:45 +0000 Subject: [PATCH 08/30] Update testing documentation --- docker-wrappers/SPRAS/README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index 9436a9e5..ac54aca1 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -32,4 +32,23 @@ export DOCKER_DEFAULT_PLATFORM=linux/amd64 ## Testing -The folder `docker-wrappers/SPRAS` also contains several files that can be used to test this container on HTCondor. +The folder `docker-wrappers/SPRAS` also contains several files that can be used to test this container on HTCondor. To test the `spras` container +in this environment, first login to an HTCondor Access Point (AP). Then, from the AP clone this repo: +``` +git clone git@github.com:Reed-CompBio/spras.git +``` + +When you're ready to run SPRAS as an HTCondor workflow, navigate to the `spras/docker-wrappers/SPRAS` directory and run `condor_submit spras.sub`. This will +submit SPRAS to HTCondor as a single job with as many cores as indicated by the `request_cpus` line in `spras.sub`, using `example_config.yaml` as the +SPRAS configuration file. Note that you can alter the configuration file to test various workflows, but you should leave `unpack_singularity = true`, +or it is likely the job will be unsuccessful. By default, the `example_config.yaml` runs everything except for `cytoscape`, which appears to fail periodically +in HTCondor. + +To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates (you +should never run something like `watch condor_q`, because this places extraneous strain on the AP). Upon completion, the `output` directory from the workflow +should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the workflows logging information (anything that matches `spras_*` +and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should contain useful debugging clues about what may have gone wrong. + +**Note**: If you want to run the workflow with a different version of SPRAS, or one that contains development updates you've made, rebuild this image against +the version of SPRAS you want to test, and push the image to your image repository. To use that container in the workflow, change the `container_image` line of +`spras.sub` to point to the new image. \ No newline at end of file From 0f775e7cd58c3657c780a4720a2e6ef52b80d11a Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Tue, 13 Feb 2024 15:15:11 +0000 Subject: [PATCH 09/30] Update more 'zip' funcs to make linter happy --- Snakefile | 2 +- spras/analysis/ml.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Snakefile b/Snakefile index 6a9b513d..09e1e4c6 100644 --- a/Snakefile +++ b/Snakefile @@ -219,7 +219,7 @@ rule reconstruct: # Create a copy so that the updates are not written to the parameters logfile params = reconstruction_params(wildcards.algorithm, wildcards.params).copy() # Add the input files - params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}))) + params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}, strict=False))) # Add the output file # All run functions can accept a relative path to the output file that should be written that is called 'output_file' params['output_file'] = output.pathway_file diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py index a1571988..a76ab97a 100644 --- a/spras/analysis/ml.py +++ b/spras/analysis/ml.py @@ -100,7 +100,7 @@ def create_palette(column_names): """ # TODO: could add a way for the user to customize the color palette? custom_palette = sns.color_palette("husl", len(column_names)) - label_color_map = {label: color for label, color in zip(column_names, custom_palette)} + label_color_map = {label: color for label, color in zip(column_names, custom_palette, strict=False)} return label_color_map From a21cfab4e8710740f92c38fbd7468490a066f2be Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Wed, 28 Feb 2024 17:29:39 +0000 Subject: [PATCH 10/30] Update package versions I've tested, and the conda/pip packages seem to resolve/build for both x86/arm on Linux and MacOS. --- environment.yml | 14 +++++++------- pyproject.toml | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/environment.yml b/environment.yml index 7f352f25..ad06d899 100644 --- a/environment.yml +++ b/environment.yml @@ -3,13 +3,13 @@ channels: - conda-forge dependencies: - adjusttext=0.7.3.1 - - bioconda::snakemake-minimal=8.4.4 + - bioconda::snakemake-minimal=8.5.3 - docker-py=5.0 - - matplotlib=3.5 + - matplotlib=3.6 - networkx=2.8 - - pandas=1.4 + - pandas=1.5 - pre-commit=2.20 # Only required for development - - pytest=7.1 # Only required for development + - pytest=8.0 # Only required for development - python=3.11 - pip=22.1 - requests=2.28 @@ -18,11 +18,11 @@ dependencies: - spython=0.2 # Only required for GraphSpace - commonmark=0.9 - - docutils=0.18 + - docutils=0.19 - jinja2=3.1 - mock=4.0 - recommonmark=0.7 - - sphinx=5.0 + - sphinx=6.0 - pip: - graphspace_python==1.3.1 - - sphinx-rtd-theme==1.2.0 + - sphinx-rtd-theme==2.0.0 diff --git a/pyproject.toml b/pyproject.toml index f6ee3a53..ee4b6300 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,11 +20,11 @@ requires-python = ">=3.11" dependencies = [ "adjusttext==0.7.3", # A bug was introduced in older versions of snakemake that prevent it from running. Update to fix - "snakemake==8.4.4", + "snakemake==8.5.3", "docker==5.0.3", # Switched from docker-py to docker because docker-py is not maintained in pypi. This appears to have no effect - "matplotlib==3.5", + "matplotlib==3.6", "networkx==2.8", - "pandas==1.4", + "pandas==1.5", "pip==22.1", "requests==2.28", "scikit-learn==1.2", @@ -32,20 +32,20 @@ dependencies = [ "spython==0.2", # Only required for GraphSpace "commonmark==0.9", - "docutils==0.18", + "docutils==0.19", "jinja2==3.1", "mock==4.0", "recommonmark==0.7", - "sphinx==5.0", + "sphinx==6.0", "graphspace_python==1.3.1", - "sphinx-rtd-theme==1.2.0", + "sphinx-rtd-theme==2.0.0", ] [project.optional-dependencies] dev = [ # Only required for development "pre-commit==2.20", - "pytest==7.1", + "pytest==8.0", ] [project.urls] From fee6b6cf2f7b77297a3bfdaa670d5d6353f6377c Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Wed, 28 Feb 2024 18:14:05 +0000 Subject: [PATCH 11/30] Add __init__.py to test dirs --- test/AllPairs/__init__.py | 0 test/DOMINO/__init__.py | 0 test/LocalNeighborhood/__init__.py | 0 test/MEO/__init__.py | 0 test/MinCostFlow/__init__.py | 0 test/OmicsIntegrator1/__init__.py | 0 test/OmicsIntegrator2/__init__.py | 0 test/PathLinker/__init__.py | 0 test/analysis/__init__.py | 0 test/generate-inputs/__init__.py | 0 test/interactome/__init__.py | 0 test/ml/__init__.py | 0 test/parse-outputs/__init__.py | 0 13 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/AllPairs/__init__.py create mode 100644 test/DOMINO/__init__.py create mode 100644 test/LocalNeighborhood/__init__.py create mode 100644 test/MEO/__init__.py create mode 100644 test/MinCostFlow/__init__.py create mode 100644 test/OmicsIntegrator1/__init__.py create mode 100644 test/OmicsIntegrator2/__init__.py create mode 100644 test/PathLinker/__init__.py create mode 100644 test/analysis/__init__.py create mode 100644 test/generate-inputs/__init__.py create mode 100644 test/interactome/__init__.py create mode 100644 test/ml/__init__.py create mode 100644 test/parse-outputs/__init__.py diff --git a/test/AllPairs/__init__.py b/test/AllPairs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/DOMINO/__init__.py b/test/DOMINO/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/LocalNeighborhood/__init__.py b/test/LocalNeighborhood/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/MEO/__init__.py b/test/MEO/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/MinCostFlow/__init__.py b/test/MinCostFlow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/OmicsIntegrator1/__init__.py b/test/OmicsIntegrator1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/OmicsIntegrator2/__init__.py b/test/OmicsIntegrator2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/PathLinker/__init__.py b/test/PathLinker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/analysis/__init__.py b/test/analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/generate-inputs/__init__.py b/test/generate-inputs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/interactome/__init__.py b/test/interactome/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/ml/__init__.py b/test/ml/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/parse-outputs/__init__.py b/test/parse-outputs/__init__.py new file mode 100644 index 00000000..e69de29b From 3b17802196a0dd72d23ef37e0e45d9d8df7843be Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Wed, 28 Feb 2024 16:16:41 -0600 Subject: [PATCH 12/30] Add unpack sing test --- test/AllPairs/test_ap.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index b6aab9aa..902007c4 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -54,6 +54,20 @@ def test_allpairs_singularity(self): container_framework="singularity") assert out_path.exists() + @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') + def test_allpairs_singularity_unpacked(self): + out_path = Path(OUT_DIR+'sample-out.txt') + out_path.unlink(missing_ok=True) + # Indicate via config mechanism that we want to unpack the Singularity container + config.config.unpack_singularity = True + AllPairs.run( + nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', + network=TEST_DIR+'input/sample-in-net.txt', + output_file=str(out_path), + container_framework="singularity") + config.config.unpack_singularity = False + assert out_path.exists() + def test_allpairs_correctness(self): """ Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run From 30613244998d36128d06501f5e2da661fd662114 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Wed, 28 Feb 2024 16:23:04 -0600 Subject: [PATCH 13/30] Update unpack test outfile to make unique --- test/AllPairs/test_ap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 902007c4..442b26a7 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -56,7 +56,7 @@ def test_allpairs_singularity(self): @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') def test_allpairs_singularity_unpacked(self): - out_path = Path(OUT_DIR+'sample-out.txt') + out_path = Path(OUT_DIR+'sample-out-unpack.txt') out_path.unlink(missing_ok=True) # Indicate via config mechanism that we want to unpack the Singularity container config.config.unpack_singularity = True From 67878d1c4914d6bc4e2e853254fdb8c6889c85f2 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 15 Mar 2024 19:40:07 +0000 Subject: [PATCH 14/30] Add warning about cleaning up sing images to config.yaml --- config/config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/config.yaml b/config/config.yaml index 41d923b1..741d8ca9 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -10,6 +10,9 @@ container_framework: docker # Only used if container_framework is set to singularity, this will unpack the singularity containers # to the local filesystem. This is useful when PRM containers need to run inside another container, # such as would be the case in an HTCondor/OSPool environment. +# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way +# that persists after the workflow is complete. To clean up the unpacked containers, the user must +# manually delete them. unpack_singularity: false # Allow the user to configure which container registry containers should be pulled from From 10e2a5fb445442d58e7b781950357b555803f2e8 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 18 Mar 2024 14:54:07 +0000 Subject: [PATCH 15/30] Update a few items based on review feedback --- docker-wrappers/SPRAS/README.md | 18 +++++++++++------- docker-wrappers/SPRAS/spras.sub | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index ac54aca1..354deb5b 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -1,7 +1,7 @@ # SPRAS Docker image ## Building -A Docker image for SPRAS that is available on [Dockerhub]() +A Docker image for SPRAS that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/spras) This image comes bundled with all of the necessary software packages to run SPRAS, and can be used for execution in distributed environments (like HTCondor). To create the Docker image, make sure you are in this repository's root directory, and from your terminal run: @@ -35,7 +35,7 @@ export DOCKER_DEFAULT_PLATFORM=linux/amd64 The folder `docker-wrappers/SPRAS` also contains several files that can be used to test this container on HTCondor. To test the `spras` container in this environment, first login to an HTCondor Access Point (AP). Then, from the AP clone this repo: ``` -git clone git@github.com:Reed-CompBio/spras.git +git clone https://github.com/Reed-CompBio/spras.git ``` When you're ready to run SPRAS as an HTCondor workflow, navigate to the `spras/docker-wrappers/SPRAS` directory and run `condor_submit spras.sub`. This will @@ -44,11 +44,15 @@ SPRAS configuration file. Note that you can alter the configuration file to test or it is likely the job will be unsuccessful. By default, the `example_config.yaml` runs everything except for `cytoscape`, which appears to fail periodically in HTCondor. -To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates (you -should never run something like `watch condor_q`, because this places extraneous strain on the AP). Upon completion, the `output` directory from the workflow -should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the workflows logging information (anything that matches `spras_*` -and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should contain useful debugging clues about what may have gone wrong. +To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates. +Upon completion, the `output` directory from the workflow should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the +workflow's logging information (anything that matches `spras_*` and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should contain +useful debugging clues about what may have gone wrong. **Note**: If you want to run the workflow with a different version of SPRAS, or one that contains development updates you've made, rebuild this image against the version of SPRAS you want to test, and push the image to your image repository. To use that container in the workflow, change the `container_image` line of -`spras.sub` to point to the new image. \ No newline at end of file +`spras.sub` to point to the new image. + +## Versions: +- v0.0.1: Created an image with SPRAS as an installed python module. This makes SPRAS runnable anywhere with Docker/Singularity. Note that the Snakefile should be + runnable from any directory within the container. diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 0a89e160..8c17b8ab 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -1,5 +1,5 @@ # A SPRAS submit file to demonstrate running SPRAS on HTCondor -container_image = docker://reedcompbio/spras:v1 +container_image = docker://reedcompbio/spras:v0.0.1 universe = container # Specify names for log/stdout/stderr files generated by HTCondor From d7c1536e5d8117bf1d54f8a56d1b82106bca4ea4 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Thu, 21 Mar 2024 21:34:33 +0000 Subject: [PATCH 16/30] Update SPRAS version to 0.1.0 --- docker-wrappers/SPRAS/README.md | 14 +++++++++++--- docker-wrappers/SPRAS/spras.sub | 2 +- pyproject.toml | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index 354deb5b..eee30ddf 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -1,10 +1,12 @@ # SPRAS Docker image ## Building + A Docker image for SPRAS that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/spras) This image comes bundled with all of the necessary software packages to run SPRAS, and can be used for execution in distributed environments (like HTCondor). To create the Docker image, make sure you are in this repository's root directory, and from your terminal run: + ``` docker build -t reedcompbio/spras -f docker-wrappers/SPRAS/Dockerfile . ``` @@ -13,27 +15,32 @@ This will copy the entire SPRAS repository into the container and install SPRAS is being installed with `pip`, it's also possible to specify that you want development modules installed as well. If you're using the container for development and you want the optional `pre-commit` and `pytest` packages as well as a spras package that receives changes without re-installation, change the `pip` installation line to: + ``` pip install -e .[dev] ``` + This will cause changes to spras source code to update the intsalled package. **Note:** This image will build for the same platform that is native to your system (ie amd64 or arm64). If you need to run this in a remote environment like HTCondor that is almost certainly `amd64` but you're building from Apple Silicon, it is recommended to either modify the Dockerfile to pin the platform: + ``` FROM --platform=linux/amd64 almalinux:9 ``` Or to temporarily override your system's default by exporting the environment variable: + ``` export DOCKER_DEFAULT_PLATFORM=linux/amd64 ``` -(This environment variable can then be cleared by running `unset DOCKER_DEFAULT_PLATFORM` to return your system to its default) +(This environment variable can then be cleared by running `unset DOCKER_DEFAULT_PLATFORM` to return your system to its default) ## Testing The folder `docker-wrappers/SPRAS` also contains several files that can be used to test this container on HTCondor. To test the `spras` container in this environment, first login to an HTCondor Access Point (AP). Then, from the AP clone this repo: + ``` git clone https://github.com/Reed-CompBio/spras.git ``` @@ -54,5 +61,6 @@ the version of SPRAS you want to test, and push the image to your image reposito `spras.sub` to point to the new image. ## Versions: -- v0.0.1: Created an image with SPRAS as an installed python module. This makes SPRAS runnable anywhere with Docker/Singularity. Note that the Snakefile should be - runnable from any directory within the container. + +- v0.1.0: Created an image with SPRAS as an installed python module. This makes SPRAS runnable anywhere with Docker/Singularity. Note that the Snakefile should be + runnable from any directory within the container. diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 8c17b8ab..322fb5b9 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -1,5 +1,5 @@ # A SPRAS submit file to demonstrate running SPRAS on HTCondor -container_image = docker://reedcompbio/spras:v0.0.1 +container_image = docker://reedcompbio/spras:v0.1.0 universe = container # Specify names for log/stdout/stderr files generated by HTCondor diff --git a/pyproject.toml b/pyproject.toml index ee4b6300..e726bb24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "spras" -version = "0.0.1" +version = "0.1.0" description = "Signaling Pathway Reconstruction Analysis Streamliner" authors = [ { name = "Anthony Gitter", email = "gitter@biostat.wisc.edu" }, From 165710e6b0389fdc0944c3183e75580f92c71056 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 10:52:05 -0500 Subject: [PATCH 17/30] Update submit file and job wrapper --- docker-wrappers/SPRAS/spras.sh | 3 +- docker-wrappers/SPRAS/spras.sub | 83 +++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/docker-wrappers/SPRAS/spras.sh b/docker-wrappers/SPRAS/spras.sh index dad2d26e..46b5dd5e 100755 --- a/docker-wrappers/SPRAS/spras.sh +++ b/docker-wrappers/SPRAS/spras.sh @@ -1,2 +1,3 @@ #!/bin/bash -snakemake --cores `nproc` --configfile example_config.yaml + +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin snakemake "$@" diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 322fb5b9..e86ef1ea 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -1,27 +1,82 @@ -# A SPRAS submit file to demonstrate running SPRAS on HTCondor -container_image = docker://reedcompbio/spras:v0.1.0 +############################################################ +# A submit file to demonstrate running SPRAS in the OSPool # +############################################################ + +############################################################ +# Define a few macros we use throughout the submit file # +############################################################ +CONFIG_FILE = example_config.yaml +NUM_PROCS = 4 + +############################################################ +# Specify that the workflow should run in the SPRAS # +# container. In the OSPool, this image is usually # +# converted automatically to an Apptainer/Singularity # +# image, which is why the example config has # +# `unpack_singularity = true`. # +############################################################ universe = container +#container_image = docker://reedcompbio/spras:v0.1.0 +container_image = docker://jhiemstra/spras:with-uid-v4 -# Specify names for log/stdout/stderr files generated by HTCondor -log = spras_$(Cluster).log -output = spras_$(Cluster).out -error = spras_$(Cluster).err +############################################################ +# Specify names for log/stdout/stderr files generated by # +# HTCondor. # +# NOTE: You should `mkdir logs/` before running, or the # +# spras_$(Cluster).log file won't be available. # +############################################################ +log = logs/spras_$(Cluster).log +output = logs/spras_$(Cluster)_$(Process).out +error = logs/spras_$(Cluster)_$(Process).err -# Specify the script to run inside the container. This is simply a wrapper on the Snakefile +############################################################ +# Specify the script to run inside the container. This is # +# simply a wrapper on the Snakefile. # +############################################################ executable = spras.sh +arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE)" -# Handle transferring required inputs/outputs +############################################################ +# Handle transferring required inputs/outputs # +############################################################ should_transfer_files = YES when_to_transfer_output = ON_EXIT -transfer_input_files = example_config.yaml, ../../input, ../../Snakefile, spras.sh +transfer_input_files = $(CONFIG_FILE), ../../input, ../../Snakefile +# The output directory should match whatever you configure in your configfile. transfer_output_files = output -# System specifications. -request_cpus = 4 -request_memory = 4GB +############################################################ +# System specifications. Be sure to request enough disk to # +# hold any additional containers that might be downloaded # +# and unpacked as part of the workflow. # +############################################################ +request_cpus = $(NUM_PROCS) +request_memory = 8GB request_disk = 16GB -# Only run on nodes with Singularity installed -requirements = (HAS_SINGULARITY == True) +############################################################ +# Specify a batch name that we can use to identify the # +# workflow via `condor_q`. # +############################################################ +JobBatchName = "SPRAS-workflow-OSPool-2" + +############################################################ +# Indicate that we want to run in the OSPool. This is only # +# needed if running from CHTC. If running from an OSPool # +# AP, omit this line. # +############################################################ ++WantGlideIn = true + +############################################################ +# Not all Execution Points in the OSPool will have # +# Apptainer (formerly Singularity) installed, but this is # +# a requirement to run SPRAS (since the OSPool is not # +# Docker friendly). To make sure we land somewhere with # +# Apptainer, we add it as a job requirement. If running # +# this submit file from CHTC, we also need a requirement # +# to prevent landing on a CHTC Execution Point. # +############################################################ +requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC") +# Queue the job queue 1 \ No newline at end of file From 4d2a1ca39355a5d5d76d13343697600d2a3b15f5 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 10:54:09 -0500 Subject: [PATCH 18/30] Revert submit file container --- docker-wrappers/SPRAS/spras.sub | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index e86ef1ea..49bf5e02 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -16,8 +16,7 @@ NUM_PROCS = 4 # `unpack_singularity = true`. # ############################################################ universe = container -#container_image = docker://reedcompbio/spras:v0.1.0 -container_image = docker://jhiemstra/spras:with-uid-v4 +container_image = docker://reedcompbio/spras:v0.1.0 ############################################################ # Specify names for log/stdout/stderr files generated by # From 30c8d4a18f825093d49534807c20620db812ecf6 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 12:52:37 -0500 Subject: [PATCH 19/30] Update Dockerfile and installation versions --- docker-wrappers/SPRAS/Dockerfile | 2 +- environment.yml | 3 ++- pyproject.toml | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docker-wrappers/SPRAS/Dockerfile b/docker-wrappers/SPRAS/Dockerfile index 647d9006..5a721a07 100644 --- a/docker-wrappers/SPRAS/Dockerfile +++ b/docker-wrappers/SPRAS/Dockerfile @@ -9,7 +9,7 @@ RUN dnf update -y && \ docker apptainer COPY / /spras/ - +RUN chmod -R 777 /spras WORKDIR /spras # Install spras into the container diff --git a/environment.yml b/environment.yml index ad06d899..bcbb69c0 100644 --- a/environment.yml +++ b/environment.yml @@ -3,11 +3,12 @@ channels: - conda-forge dependencies: - adjusttext=0.7.3.1 - - bioconda::snakemake-minimal=8.5.3 + - bioconda::snakemake-minimal=8.11.6 - docker-py=5.0 - matplotlib=3.6 - networkx=2.8 - pandas=1.5 + - numpy=1.26.4 - pre-commit=2.20 # Only required for development - pytest=8.0 # Only required for development - python=3.11 diff --git a/pyproject.toml b/pyproject.toml index e726bb24..a6705ece 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,11 +20,12 @@ requires-python = ">=3.11" dependencies = [ "adjusttext==0.7.3", # A bug was introduced in older versions of snakemake that prevent it from running. Update to fix - "snakemake==8.5.3", + "snakemake==8.11.6", "docker==5.0.3", # Switched from docker-py to docker because docker-py is not maintained in pypi. This appears to have no effect "matplotlib==3.6", "networkx==2.8", - "pandas==1.5", + "pandas==2.0", + "numpy==1.26.4", "pip==22.1", "requests==2.28", "scikit-learn==1.2", From e3f981c893f85895ef77402eec3b3f0dd4b150de Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 13:59:40 -0500 Subject: [PATCH 20/30] Update spras.sh to work with directories --- docker-wrappers/SPRAS/spras.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docker-wrappers/SPRAS/spras.sh b/docker-wrappers/SPRAS/spras.sh index 46b5dd5e..cdfb924c 100755 --- a/docker-wrappers/SPRAS/spras.sh +++ b/docker-wrappers/SPRAS/spras.sh @@ -1,3 +1,10 @@ #!/bin/bash -PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin snakemake "$@" +# Fail early if there's an issue +set -e + +# When .cache files are created, they need to know where HOME is to write there. +# In this case, that should be the HTCondor scratch dir the job is executing in. +export HOME=$(pwd) + +snakemake "$@" From 8265de2e3e7c587ba0f4b389da1ac1dd89742cce Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 14:07:31 -0500 Subject: [PATCH 21/30] Update SPRAS container README --- docker-wrappers/SPRAS/README.md | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index eee30ddf..13474b72 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -8,7 +8,12 @@ This image comes bundled with all of the necessary software packages to run SPRA To create the Docker image, make sure you are in this repository's root directory, and from your terminal run: ``` -docker build -t reedcompbio/spras -f docker-wrappers/SPRAS/Dockerfile . +docker build -t /: -f docker-wrappers/SPRAS/Dockerfile . +``` + +For example, to build this image with the intent of pushing it to DockerHub as reedcompbio/spras:v0.1.0, you'd run: +``` +docker build -t reedcompbio/spras:v0.1.0 -f docker-wrappers/SPRAS/Dockerfile . ``` This will copy the entire SPRAS repository into the container and install SPRAS with `pip`. As such, any changes you've made to the current SPRAS repository will be reflected in version of SPRAS installed in the container. Since SPRAS @@ -28,13 +33,15 @@ This will cause changes to spras source code to update the intsalled package. FROM --platform=linux/amd64 almalinux:9 ``` -Or to temporarily override your system's default by exporting the environment variable: - +Or to temporarily override your system's default during the build, prepend your build command with: ``` -export DOCKER_DEFAULT_PLATFORM=linux/amd64 +DOCKER_DEFAULT_PLATFORM=linux/amd64 ``` -(This environment variable can then be cleared by running `unset DOCKER_DEFAULT_PLATFORM` to return your system to its default) +For example, to build reedcompbio/spras:v0.1.0 on Apple Silicon as a linux/amd64 container, you'd run: +``` +DOCKER_DEFAULT_PLATFORM=linux/amd64 docker build -t reedcompbio/spras:v0.1.0 -f docker-wrappers/SPRAS/Dockerfile . +``` ## Testing @@ -45,16 +52,16 @@ in this environment, first login to an HTCondor Access Point (AP). Then, from th git clone https://github.com/Reed-CompBio/spras.git ``` -When you're ready to run SPRAS as an HTCondor workflow, navigate to the `spras/docker-wrappers/SPRAS` directory and run `condor_submit spras.sub`. This will -submit SPRAS to HTCondor as a single job with as many cores as indicated by the `request_cpus` line in `spras.sub`, using `example_config.yaml` as the -SPRAS configuration file. Note that you can alter the configuration file to test various workflows, but you should leave `unpack_singularity = true`, -or it is likely the job will be unsuccessful. By default, the `example_config.yaml` runs everything except for `cytoscape`, which appears to fail periodically -in HTCondor. +When you're ready to run SPRAS as an HTCondor workflow, navigate to the `spras/docker-wrappers/SPRAS` directory and create the `logs/` directory. Then run +`condor_submit spras.sub`, which will submit SPRAS to HTCondor as a single job with as many cores as indicated by the `NUM_PROCS` line in `spras.sub`, using +the value of `EXAMPLE_CONFIG` as the SPRAS configuration file. Note that you can alter the configuration file to test various workflows, but you should leave +`unpack_singularity = true`, or it is likely the job will be unsuccessful. By default, the `example_config.yaml` runs everything except for `cytoscape`, which +appears to fail periodically in HTCondor. To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates. Upon completion, the `output` directory from the workflow should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the -workflow's logging information (anything that matches `spras_*` and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should contain -useful debugging clues about what may have gone wrong. +workflow's logging information (anything that matches `logs/spras_*` and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should +contain useful debugging clues about what may have gone wrong. **Note**: If you want to run the workflow with a different version of SPRAS, or one that contains development updates you've made, rebuild this image against the version of SPRAS you want to test, and push the image to your image repository. To use that container in the workflow, change the `container_image` line of From e017273b395c0910b2396355f598648181afdb51 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 15:04:32 -0500 Subject: [PATCH 22/30] More SPRAS image README updates --- docker-wrappers/SPRAS/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index 13474b72..164ef6f9 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -67,6 +67,12 @@ contain useful debugging clues about what may have gone wrong. the version of SPRAS you want to test, and push the image to your image repository. To use that container in the workflow, change the `container_image` line of `spras.sub` to point to the new image. +**Note**: In some cases, it may be necessary to convert the SPRAS image to a `.sif` container image before running someplace like the OSPool. To do this, run +``` +apptainer build spras.sif docker://reedcompbio/spras:v0.1.0 +``` +to produce the file `spras.sif`. Then, substitute this value as the `container_image` in the submit file. + ## Versions: - v0.1.0: Created an image with SPRAS as an installed python module. This makes SPRAS runnable anywhere with Docker/Singularity. Note that the Snakefile should be From de8a8fc1006d94b7ec437d28e5854c23f203bc7b Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 15:05:30 -0500 Subject: [PATCH 23/30] Update spras.sub to include .sif replacement as comment --- docker-wrappers/SPRAS/spras.sub | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 49bf5e02..2c1f1cb3 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -17,6 +17,8 @@ NUM_PROCS = 4 ############################################################ universe = container container_image = docker://reedcompbio/spras:v0.1.0 +# container_image = spras.sif + ############################################################ # Specify names for log/stdout/stderr files generated by # @@ -57,7 +59,7 @@ request_disk = 16GB # Specify a batch name that we can use to identify the # # workflow via `condor_q`. # ############################################################ -JobBatchName = "SPRAS-workflow-OSPool-2" +JobBatchName = "SPRAS-workflow-OSPool" ############################################################ # Indicate that we want to run in the OSPool. This is only # From b4c9748950ba4076dabdc5f7784c4c5d7af43f96 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 15:08:17 -0500 Subject: [PATCH 24/30] Update SPRAS cont README with when to use .sif in submit file --- docker-wrappers/SPRAS/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index 164ef6f9..995eb970 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -67,7 +67,8 @@ contain useful debugging clues about what may have gone wrong. the version of SPRAS you want to test, and push the image to your image repository. To use that container in the workflow, change the `container_image` line of `spras.sub` to point to the new image. -**Note**: In some cases, it may be necessary to convert the SPRAS image to a `.sif` container image before running someplace like the OSPool. To do this, run +**Note**: In some cases, especially if you're encountering an error like `/srv//spras.sh: line 10: snakemake: command not found`, it may be necessary to convert +the SPRAS image to a `.sif` container image before running someplace like the OSPool. To do this, run: ``` apptainer build spras.sif docker://reedcompbio/spras:v0.1.0 ``` From ef1e5263c4b4891888d1f4c32c1095ccc6f35747 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 28 Jun 2024 15:21:26 -0500 Subject: [PATCH 25/30] Update spras.sub to include snakemake job retries --- docker-wrappers/SPRAS/spras.sub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 2c1f1cb3..5f659c32 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -35,7 +35,7 @@ error = logs/spras_$(Cluster)_$(Process).err # simply a wrapper on the Snakefile. # ############################################################ executable = spras.sh -arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE)" +arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE) --retries 3" ############################################################ # Handle transferring required inputs/outputs # From 606de0194c84757150d9505ea56881145fb18542 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 1 Jul 2024 10:56:23 -0500 Subject: [PATCH 26/30] Fix typos and add clarity to READMEs based on feedback --- docker-wrappers/SPRAS/README.md | 5 +++-- docker-wrappers/SPRAS/example_config.yaml | 2 +- spras/containers.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index 995eb970..6187a553 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -25,9 +25,9 @@ spras package that receives changes without re-installation, change the pip install -e .[dev] ``` -This will cause changes to spras source code to update the intsalled package. +This will cause changes to spras source code to update the installed package. -**Note:** This image will build for the same platform that is native to your system (ie amd64 or arm64). If you need to run this in a remote environment like HTCondor that is almost certainly `amd64` but you're building from Apple Silicon, it is recommended to either modify the Dockerfile to pin the platform: +**Note:** This image will build for the same platform that is native to your system (i.e. amd64 or arm64). If you need to run this in a remote environment like HTCondor that is almost certainly `amd64` but you're building from Apple Silicon, it is recommended to either modify the Dockerfile to pin the platform: ``` FROM --platform=linux/amd64 almalinux:9 @@ -76,5 +76,6 @@ to produce the file `spras.sif`. Then, substitute this value as the `container_i ## Versions: +The versions of this image match the version of the spras package within it. - v0.1.0: Created an image with SPRAS as an installed python module. This makes SPRAS runnable anywhere with Docker/Singularity. Note that the Snakefile should be runnable from any directory within the container. diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index 909bcfea..8b9c1edb 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -7,7 +7,7 @@ hash_length: 7 # 'singularity'. If container_framework is not specified, SPRAS will default to docker. container_framework: singularity -# Unpack singularity +# Unpack singularity. See config/config.yaml for details. unpack_singularity: true # Allow the user to configure which container registry containers should be pulled from diff --git a/spras/containers.py b/spras/containers.py index 516b6b12..c9523129 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -191,7 +191,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ else: singularity_options.extend(['--env', environment]) - # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containeres + # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containers if config.config.unpack_singularity: # Split the string by "/" path_elements = container.split("/") From 0184ceac6e825e9cf93b4a27d30afef127900463 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 1 Jul 2024 12:06:35 -0500 Subject: [PATCH 27/30] Update spras.sub according to feedback --- docker-wrappers/SPRAS/spras.sub | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docker-wrappers/SPRAS/spras.sub b/docker-wrappers/SPRAS/spras.sub index 5f659c32..b89945e7 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/docker-wrappers/SPRAS/spras.sub @@ -7,6 +7,9 @@ ############################################################ CONFIG_FILE = example_config.yaml NUM_PROCS = 4 +# Paths to input data and Snakefile. +INPUT_DIR = ../../input +SNAKEFILE = ../../Snakefile ############################################################ # Specify that the workflow should run in the SPRAS # @@ -26,7 +29,7 @@ container_image = docker://reedcompbio/spras:v0.1.0 # NOTE: You should `mkdir logs/` before running, or the # # spras_$(Cluster).log file won't be available. # ############################################################ -log = logs/spras_$(Cluster).log +log = logs/spras_$(Cluster)_$(Process).log output = logs/spras_$(Cluster)_$(Process).out error = logs/spras_$(Cluster)_$(Process).err @@ -42,7 +45,7 @@ arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE) --retries 3" ############################################################ should_transfer_files = YES when_to_transfer_output = ON_EXIT -transfer_input_files = $(CONFIG_FILE), ../../input, ../../Snakefile +transfer_input_files = $(CONFIG_FILE), $(INPUT_DIR), $(SNAKEFILE) # The output directory should match whatever you configure in your configfile. transfer_output_files = output From dd444da08487b2592355f8b74166d038d95bf506 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 1 Jul 2024 12:13:27 -0500 Subject: [PATCH 28/30] Revert Pandas version in pyproject.toml after finding NumPy version bug --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a6705ece..68d10f5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "docker==5.0.3", # Switched from docker-py to docker because docker-py is not maintained in pypi. This appears to have no effect "matplotlib==3.6", "networkx==2.8", - "pandas==2.0", + "pandas==1.5", "numpy==1.26.4", "pip==22.1", "requests==2.28", From f78bf713bc9964a725a22914561927b9109cdc8a Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 1 Jul 2024 12:16:17 -0500 Subject: [PATCH 29/30] Update various zip functions' flag to True --- Snakefile | 2 +- spras/analysis/ml.py | 2 +- spras/config.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Snakefile b/Snakefile index 09e1e4c6..71a8a6ed 100644 --- a/Snakefile +++ b/Snakefile @@ -219,7 +219,7 @@ rule reconstruct: # Create a copy so that the updates are not written to the parameters logfile params = reconstruction_params(wildcards.algorithm, wildcards.params).copy() # Add the input files - params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}, strict=False))) + params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}, strict=True))) # Add the output file # All run functions can accept a relative path to the output file that should be written that is called 'output_file' params['output_file'] = output.pathway_file diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py index a76ab97a..a637d7fe 100644 --- a/spras/analysis/ml.py +++ b/spras/analysis/ml.py @@ -100,7 +100,7 @@ def create_palette(column_names): """ # TODO: could add a way for the user to customize the color palette? custom_palette = sns.color_palette("husl", len(column_names)) - label_color_map = {label: color for label, color in zip(column_names, custom_palette, strict=False)} + label_color_map = {label: color for label, color in zip(column_names, custom_palette, strict=True)} return label_color_map diff --git a/spras/config.py b/spras/config.py index 22220d51..91676ca5 100644 --- a/spras/config.py +++ b/spras/config.py @@ -189,7 +189,7 @@ def process_config(self, raw_config): run_list_tuples = list(it.product(*all_runs)) param_name_tuple = tuple(param_name_list) for r in run_list_tuples: - run_dict = dict(zip(param_name_tuple, r, strict=False)) + run_dict = dict(zip(param_name_tuple, r, strict=True)) # TODO temporary workaround for yaml.safe_dump in Snakefile write_parameter_log for param, value in run_dict.copy().items(): if isinstance(value, np.float64): From 6cee79f510377ab24e40097ec1647e21af1f86ee Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 1 Jul 2024 15:19:44 -0500 Subject: [PATCH 30/30] Add SPRAS docker build to CI workflows --- .github/workflows/test-spras.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index d5dcf590..0f68d83a 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -84,6 +84,7 @@ jobs: docker pull reedcompbio/allpairs:v2 docker pull reedcompbio/domino:latest docker pull reedcompbio/py4cytoscape:v2 + docker pull reedcompbio/spras:v0.1.0 - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 with: @@ -156,6 +157,15 @@ jobs: tags: v2 cache_froms: reedcompbio/py4cytoscape:latest push: false + - name: Build SPRAS Docker image + uses: docker/build-push-action@v1 + with: + path: . + dockerfile: docker-wrappers/SPRAS/Dockerfile + repository: reedcompbio/spras + tags: v0.1.0 + cache_froms: reedcompbio/spras:v0.1.0 + push: false # Run pre-commit checks on source files pre-commit: