Skip to content

Commit

Permalink
Merge pull request #66 from Reed-CompBio/cytoscape
Browse files Browse the repository at this point in the history
Cytoscape
  • Loading branch information
agitter authored Sep 22, 2023
2 parents 17af0e9 + 80e70df commit e67167f
Show file tree
Hide file tree
Showing 24 changed files with 386 additions and 43 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/test-spras.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
uses: eWaterCycle/setup-apptainer@v2
with:
# Choose version from https://github.com/apptainer/apptainer/releases
apptainer-version: 1.1.3
apptainer-version: 1.2.2
- name: Run tests
shell: bash --login {0}
# Verbose output and disable stdout and stderr capturing
Expand Down Expand Up @@ -146,6 +146,15 @@ jobs:
tags: latest
cache_froms: reedcompbio/domino:latest
push: false
- name: Build Cytoscape Docker image
uses: docker/build-push-action@v1
with:
path: docker-wrappers/Cytoscape/.
dockerfile: docker-wrappers/Cytoscape/Dockerfile
repository: reedcompbio/py4cytoscape
tags: v1
cache_froms: reedcompbio/py4cytoscape:v1
push: false

# Run pre-commit checks on source files
pre-commit:
Expand Down
19 changes: 16 additions & 3 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import shutil
import yaml
from src.dataset import Dataset
from src.util import process_config
from src.analysis import ml, summary, graphspace
from src.analysis import ml, summary, graphspace, cytoscape

# Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
# and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
Expand Down Expand Up @@ -68,7 +68,10 @@ def make_final_input(wildcards):
# add graph and style JSON files.
final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}gs.json',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}gsstyle.json',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))


if config["analysis"]["cytoscape"]["include"]:
final_input.extend(expand('{out_dir}{sep}{dataset}-cytoscape.cys',out_dir=out_dir,sep=SEP,dataset=dataset_labels))

if config["analysis"]["ml"]["include"]:
final_input.extend(expand('{out_dir}{sep}{dataset}-pca.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
final_input.extend(expand('{out_dir}{sep}{dataset}-pca-variance.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
Expand Down Expand Up @@ -225,7 +228,7 @@ rule summarize_pathway:
run:
summary.run(input.standardized_file,output.summary_file,directed=algorithm_directed[wildcards.algorithm])

# Write GraphSpace JSON Graphs
# Write GraphSpace JSON graphs
rule viz_graphspace:
input: standardized_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'pathway.txt'])
output:
Expand All @@ -234,6 +237,16 @@ rule viz_graphspace:
run:
graphspace.write_json(input.standardized_file,output.graph_json,output.style_json,directed=algorithm_directed[wildcards.algorithm])


# Write a Cytoscape session file with all pathways for each dataset
rule viz_cytoscape:
input: pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params)
output:
session = SEP.join([out_dir, '{dataset}-cytoscape.cys'])
run:
cytoscape.run_cytoscape(input.pathways, output.session, SINGULARITY)


# Write a single summary table for all pathways for each dataset
rule summary_table:
input:
Expand Down
3 changes: 3 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@
# Create output files for each pathway that can be visualized with GraphSpace
graphspace:
include: true
# Create Cytoscape session file with all pathway graphs for each dataset
cytoscape:
include: true
# Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
ml:
include: true
Expand Down
2 changes: 2 additions & 0 deletions config/egfr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ reconstruction_settings:
analysis:
graphspace:
include: false
cytoscape:
include: true
summary:
include: true
ml:
Expand Down
4 changes: 4 additions & 0 deletions docker-wrappers/Cytoscape/Cytoscape.vmoptions
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-Xms256m
-Xmx2g
-Xss5m
-Duser.home=/spras
31 changes: 31 additions & 0 deletions docker-wrappers/Cytoscape/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Dockerfile derived from
# https://github.com/cytoscape/docker-cytoscape-desktop/blob/173ab46b4b5e5c148113ad0c9960a6af3fc50432/py4cytoscape/Dockerfile
# by Kozo Nishida
FROM python:3.9.13

# Versions
ENV CYTOSCAPE_VERSION=3.9.1
ENV PY4CYTOSCAPE_VERSION=1.3.0
ENV JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
# Workaround for java.util.zip.ZipException: Invalid CEN header https://cytoscape.org/common_issues.html#zipvalidation
ENV EXTRA_JAVA_OPTS="-Djdk.util.zip.disableZip64ExtraFieldValidation=true"

WORKDIR /py4cytoscape

# Install py4cytoscape
RUN pip install py4cytoscape==${PY4CYTOSCAPE_VERSION}

# Install Java and Cytoscape
RUN apt-get update && apt-get -y install default-jdk xvfb supervisor wget
RUN wget https://github.com/cytoscape/cytoscape/releases/download/${CYTOSCAPE_VERSION}/cytoscape-unix-${CYTOSCAPE_VERSION}.tar.gz \
&& tar xf cytoscape-unix-${CYTOSCAPE_VERSION}.tar.gz && rm cytoscape-unix-${CYTOSCAPE_VERSION}.tar.gz
RUN cd cytoscape-unix-${CYTOSCAPE_VERSION}/framework/system/org/cytoscape/property-impl/${CYTOSCAPE_VERSION} \
&& jar -xf property-impl-${CYTOSCAPE_VERSION}.jar cytoscape3.props \
&& cat cytoscape3.props | sed "s/^cyrest.version.*/cyrest.version=3.12.3/g" > cytoscape3.props.tmp \
&& mv cytoscape3.props.tmp cytoscape3.props \
&& jar -uf property-impl-${CYTOSCAPE_VERSION}.jar cytoscape3.props \
&& rm cytoscape3.props

COPY Cytoscape.vmoptions ./cytoscape-unix-${CYTOSCAPE_VERSION}/Cytoscape.vmoptions
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY cytoscape_util.py .
25 changes: 25 additions & 0 deletions docker-wrappers/Cytoscape/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Cytoscape image

A Docker image for [Cytoscape](https://cytoscape.org/) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/py4cytoscape).
It was originally derived from the [`docker-cytoscape-desktop/py4cytoscape`](https://github.com/cytoscape/docker-cytoscape-desktop/blob/173ab46b4b5e5c148113ad0c9960a6af3fc50432/py4cytoscape/Dockerfile) image.

Thank you to Scooter Morris for help debugging problems running Cytoscape in Singularity.

## Building the Docker image

To create the Docker image run:
```
docker build -t reedcompbio/py4cytoscape -f Dockerfile .
```
from this directory.

## Testing
Test code is located in `test/analysis/test_cytoscape.py`.
The Docker wrapper can be tested with `pytest`.

## Versions:
- v1: Use supervisord to launch Cytoscape from a Python subprocess, then connect to Cytoscape with py4cytoscape. Only loads undirected pathways. Compatible with Singularity in local testing (Apptainer version 1.2.2-1.el7) but fails in GitHub Actions.

## TODO
- Add an auth file for `xvfb-run`
- Java initial heap size, maximum Java heap size, and thread stack size are hard-coded in `Cytoscape.vmoptions` file
135 changes: 135 additions & 0 deletions docker-wrappers/Cytoscape/cytoscape_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import argparse
import subprocess
import time
from typing import List

import py4cytoscape as p4c
from requests.exceptions import RequestException

SLEEP_INTERVAL = 10
MAX_CONNECTION_ATTEMPTS = 20


def get_parser() -> argparse.ArgumentParser:
"""
:return: an argparse ArgumentParser object for parsing command
line parameters
"""
parser = argparse.ArgumentParser(
description='Visualize pathway files from SPRAS.')

parser.add_argument(
"--pathway",
dest='pathways',
type=str,
action='append',
required=True,
help='The path to a pathway file. Add the argument multiple times to visualize multiple pathways. '
'Optionally use a | to append a label for the pathway such as path/to/file.txt|pathway_label'
)

parser.add_argument(
"--output",
dest='output',
type=str,
default='cytoscape-session.cys',
help='The output filename of the Cytoscape session file, which will have the extension .cys added if it is not '
'already provided. Default: cytoscape-session.cys'
)
return parser


def parse_arguments() -> argparse.Namespace:
"""
Initialize a parser and use it to parse the command line arguments
:return: parsed dictionary of command line arguments
"""
parser = get_parser()
opts = parser.parse_args()

return opts


def start_remote_cytoscape() -> None:
"""
Use supervisord to start the Cytoscape process. Ping Cytoscape until a connection is established and sleep in
between pings. Raises an error if Cytoscape cannot be reached within the maximum number of attempts.
"""
try:
subprocess.run([
'/usr/bin/supervisord', '-c', '/etc/supervisor/conf.d/supervisord.conf'
],
check=True)
except subprocess.CalledProcessError as e:
raise RuntimeError('An error has occurred while trying to run Cytoscape') from e

connected = False
attempts = 0
# Allow initial time to start up before trying to connect
time.sleep(SLEEP_INTERVAL)
while not connected and attempts < MAX_CONNECTION_ATTEMPTS:
attempts += 1
try:
p4c.cytoscape_ping()
print('Connected to Cytoscape', flush=True)
connected = True
except (RequestException, p4c.exceptions.CyError):
print('Pinging Cytoscape, waiting for connection... ', flush=True)
time.sleep(SLEEP_INTERVAL)
pass
except Exception as e:
print(e)
print('Pinging Cytoscape, waiting for connection... ', flush=True)
time.sleep(SLEEP_INTERVAL)

if not connected:
raise ConnectionError('Could not connect to Cytoscape')


def parse_name(pathway: str) -> (str, str):
"""
Extract the optional label from the pathway argument
@param pathway: the command line pathway argument, which may contain a | separated label
@return: a tuple with the file path and the label
"""
parts = pathway.split('|')
# No label provided or empty label provided so the file path is the label
if len(parts) == 1 or len(parts[1]) == 0:
return parts[0], parts[0]
# A valid label was provided
else:
return parts[0], parts[1]


def load_pathways(pathways: List[str], output: str) -> None:
"""
Launch and connect to Cytoscape, import all pathways, and save a session file
@param pathways: the list of pathways to import
@param output: the name of the Cytoscape session file to save
"""
if len(pathways) == 0:
raise ValueError('One or more pathway files are required')

start_remote_cytoscape()
for pathway in pathways:
path, name = parse_name(pathway)
suid = p4c.networks.import_network_from_tabular_file(
file=path,
column_type_list='s,t,x',
delimiters='\t'
)
p4c.networks.rename_network(name, network=suid)

p4c.session.save_session(output)


def main():
"""
Main function
"""
opts = parse_arguments()
load_pathways(opts.pathways, opts.output)


if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions docker-wrappers/Cytoscape/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[supervisord]
nodaemon=false

[program:cytoscape]
# supervisord syntax for environment variable substitution http://supervisord.org/configuration.html#environment-variables
# The variable is CYTOSCAPE_VERSION
# Removed auth file from the original example because the file used was not present anyway
# Should consider adding back an auth file
# --auth-file /root/.Xauth
command=/bin/bash -c 'xvfb-run -s "-screen 0 1920x1080x24" /py4cytoscape/cytoscape-unix-%(ENV_CYTOSCAPE_VERSION)s/cytoscape.sh'
priority=10
stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0
60 changes: 60 additions & 0 deletions src/analysis/cytoscape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from pathlib import Path, PurePath
from shutil import rmtree
from typing import List, Union

from src.util import prepare_volume, run_container


def run_cytoscape(pathways: List[Union[str, PurePath]], output_file: str, singularity: bool = False) -> None:
"""
Create a Cytoscape session file with visualizations of each of the provided pathways
@param pathways: a list of pathways to visualize
@param output_file: the output Cytoscape session file
@param singularity: whether to run in a Singularity container
"""
work_dir = '/spras'

# To work with Singularity, /spras must be mapped to a writeable location because that directory is fixed as
# the home directory inside the container and Cytoscape writes configuration files there
# $HOME cannot be set in the Dockerfile because Singularity overwrites home at launch
env = f'HOME={work_dir}'

# Each volume is a tuple (src, dest)
volumes = list()

# A temporary directory for Cytoscape output files
cytoscape_output_dir = Path(output_file.replace('.cys', '')).absolute()
cytoscape_output_dir.mkdir(parents=True, exist_ok=True)

# TODO update to the latest p4cytoscape and use env variable to control the log directory instead
# Requires generalizing the run_container function to support multiple environment variables
volumes.append((cytoscape_output_dir, PurePath(work_dir, 'logs')))
# Only needed when running in Singularity
volumes.append((cytoscape_output_dir, PurePath(work_dir, 'CytoscapeConfiguration')))

# Map the output file
bind_path, mapped_output = prepare_volume(output_file, work_dir)
volumes.append(bind_path)

# Create the initial Python command to run inside the container
command = ['python', '/py4cytoscape/cytoscape_util.py', '--output', mapped_output]

# Map the pathway filenames and add them to the Python command
for pathway in pathways:
bind_path, mapped_pathway = prepare_volume(pathway, work_dir)
volumes.append(bind_path)
# Provided the mapped pathway file path and the original file path as the label Cytoscape
command.extend(['--pathway', f'{mapped_pathway}|{pathway}'])

print('Running Cytoscape with arguments: {}'.format(' '.join(command)), flush=True)

# TODO consider making this a string in the config file instead of a Boolean
container_framework = 'singularity' if singularity else 'docker'
out = run_container(container_framework,
'reedcompbio/py4cytoscape:v1',
command,
volumes,
work_dir,
env)
print(out)
rmtree(cytoscape_output_dir)
Loading

0 comments on commit e67167f

Please sign in to comment.