Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

diffdock #129

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions utils/docking/diffdock/diffdock-tool/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/diffdock/__init__.py]
4 changes: 4 additions & 0 deletions utils/docking/diffdock/diffdock-tool/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
5 changes: 5 additions & 0 deletions utils/docking/diffdock/diffdock-tool/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.pdb filter=lfs diff=lfs merge=lfs -text
*.pdbqt filter=lfs diff=lfs merge=lfs -text
*.mol2 filter=lfs diff=lfs merge=lfs -text
*.xlsx filter=lfs diff=lfs merge=lfs -text
*.sdf filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions utils/docking/diffdock/diffdock-tool/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/docking/diffdock/diffdock-tool/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
78 changes: 78 additions & 0 deletions utils/docking/diffdock/diffdock-tool/Dockerfile_diffdock_gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# docker build -f Dockerfile_diffdock_gpu -t mrbrandonwalker/diffdock-gpu-tool .
# Stage 1: Build Environment Setup
FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 as builder

RUN apt-get update -y && apt-get install -y wget curl git tar bzip2 unzip && rm -rf /var/lib/apt/lists/*

# Create a user
ENV APPUSER="appuser"
ENV HOME=/home/$APPUSER
RUN useradd -m -u 1000 $APPUSER
USER $APPUSER
WORKDIR $HOME

ENV ENV_NAME="diffdock"
ENV DIR_NAME="DiffDock"

# Install micromamba
RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj bin/micromamba
ENV PATH=$HOME/bin:$HOME/.local/bin:$PATH

# Copy and create Conda environment
ENV ENV_FILE_NAME=environment.yml
RUN git clone https://github.com/gcorso/DiffDock.git $DIR_NAME
WORKDIR $HOME/$DIR_NAME
RUN ~/bin/micromamba env create --file $ENV_FILE_NAME && ~/bin/micromamba clean -afy --quiet

# Download models
# These should download automatically on first inference
# RUN curl -L -o diffdock_models_v1.1.zip "https://www.dropbox.com/scl/fi/drg90rst8uhd2633tyou0/diffdock_models.zip?rlkey=afzq4kuqor2jb8adah41ro2lz&dl=1" \
# && mkdir -p $HOME/$DIR_NAME/workdir \
# && unzip diffdock_models_v1.1.zip -d $HOME/$DIR_NAME/workdir


# Stage 2: Runtime Environment
FROM nvidia/cuda:11.7.1-runtime-ubuntu22.04

# Create user and setup environment
ENV APPUSER="appuser"
ENV HOME=/home/$APPUSER
RUN useradd -m -u 1000 $APPUSER
USER $APPUSER
WORKDIR $HOME

ENV ENV_NAME="diffdock"
ENV DIR_NAME="DiffDock"

# Copy the Conda environment and application code from the builder stage
COPY --from=builder --chown=$APPUSER:$APPUSER $HOME/micromamba $HOME/micromamba
COPY --from=builder --chown=$APPUSER:$APPUSER $HOME/bin $HOME/bin
COPY --from=builder --chown=$APPUSER:$APPUSER $HOME/$DIR_NAME $HOME/$DIR_NAME
COPY --from=builder --chown=$APPUSER:$APPUSER $HOME/$DIR_NAME $HOME/$DIR_NAME
WORKDIR $HOME/$DIR_NAME

# Set the environment variables
ENV MAMBA_ROOT_PREFIX=$HOME/micromamba
ENV PATH=$HOME/bin:$HOME/.local/bin:$PATH
RUN micromamba shell init -s bash --root-prefix $MAMBA_ROOT_PREFIX

# Precompute series for SO(2) and SO(3) groups
RUN micromamba run -n ${ENV_NAME} python utils/precompute_series.py

# Expose ports for streamlit and gradio
EXPOSE 7860 8501

# shell file to copy cached files, run diffdock and remove large cached files after execution
COPY diffdock_cmds.sh $HOME/$DIR_NAME

# Switch to root user to change ownership and permissions
USER root

# Change ownership to root and set permissions
RUN chown appuser:appuser $HOME/$DIR_NAME/diffdock_cmds.sh
RUN chmod 777 $HOME/$DIR_NAME/diffdock_cmds.sh
USER appuser

# Default command
CMD ["sh", "-c", "micromamba run -n ${ENV_NAME} python utils/print_device.py"]
ENTRYPOINT []
22 changes: 22 additions & 0 deletions utils/docking/diffdock/diffdock-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# diffdock (0.1.0)

DiffDock Diffusion based protein ligand docking

## Options

This plugin takes 11 input arguments and 4 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| protein_path | Protein input file | Input | File | File |
| ligand_path | Ligand input file | Input | File | File |
| inference_steps | Number of inference steps for diffusion | Input | int | int |
| samples_per_complex | Number of pose samples to generate per complex | Input | int | int |
| batch_size | Batch size | Input | int | int |
| out_dir | Output directory to save poses | Input | string | string |
| model_dir | Input model directory to use | Input | string | string |
| confidence_model_dir | Input confidence model directory | Input | string | string |
| complex_name | Name of complex | Input | string | string |
| max_confident_pose | Highest confident pose | Output | File | File |
| output_files | The output poses | Output | File[] | File[] |
| execution_time | Time to run DiffDock | Output | float | float |
1 change: 1 addition & 0 deletions utils/docking/diffdock/diffdock-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
140 changes: 140 additions & 0 deletions utils/docking/diffdock/diffdock-tool/diffdock_0@[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: DiffDock Diffusion based protein ligand docking

doc: |-
DiffDock Diffusion based protein ligand docking

baseCommand: ["bash", "/home/appuser/DiffDock/diffdock_cmds.sh"]


hints:
cwltool:CUDARequirement:
cudaVersionMin: "11.7"
cudaComputeCapability: "3.0"
cudaDeviceCountMin: 1
cudaDeviceCountMax: 1
DockerRequirement:
dockerPull: mrbrandonwalker/diffdock-gpu-tool@sha256:ae2a6de920e8d250cabcf86a20f9e7be28503c0d70483635a7639392bed7f4c2

requirements:
InlineJavascriptRequirement: {}

inputs:

protein_path:
type: File
format: edam:format_1476
inputBinding:
prefix: --protein_path

ligand_path:
type: File
format: edam:format_3814
inputBinding:
prefix: --ligand_description

inference_steps:
label: number of reverse diffusion steps
type: int?
inputBinding:
prefix: --inference_steps
default: 20

samples_per_complex:
label: Number of sample poses to generate per complex
type: int?
inputBinding:
prefix: --samples_per_complex
default: 40

batch_size:
label: input batch size for neural net
type: int?
inputBinding:
prefix: --batch_size
default: 1

out_dir:
label: where output from diffdock is saved
type: string?
inputBinding:
prefix: --out_dir
default: results/

model_dir:
label: directory of DiffDock score model from paper
type: string?
inputBinding:
prefix: --model_dir
default: /DiffDock/workdir/paper_score_model/

confidence_model_dir:
label: directory of DiffDock confidence model from paper
type: string?
inputBinding:
prefix: --confidence_model_dir
default: /DiffDock/workdir/paper_confidence_model

complex_name:
label: name of folder with pose outputs that will be saved under out_dir folder
type: string?
inputBinding:
prefix: --complex_name
default: outputs

output_files:
type: string?

max_confident_pose:
type: string?

outputs:

max_confident_pose:
type: File
outputBinding:
# the diffdock developers copy only the top ranked pose to a new file rank1.sdf
glob: $(inputs.out_dir)/$(inputs.complex_name)/rank1.sdf
format: edam:format_3814

output_files:
type: File[]
outputBinding:
# all other output files besides rank1.sdf have confidence information in them rank*_confidence*.sdf
glob: $(inputs.out_dir)/$(inputs.complex_name)/rank*_confidence*.sdf
format: edam:format_3814

stderr:
type: File
outputBinding:
glob: stderr

execution_time:
label: Time to run DiffDock
doc: |-
Time to run DiffDock
type: float
outputBinding:
glob: stderr
loadContents: true
outputEval: |
${
// the time command outputs to stderr and not to stdout
// example output below, parse the float value of seconds (first item in line)
// 1it [00:41, 41.03s/it]
// 78.909
return self[0].contents.split("\n").map(str => parseFloat(str)).reverse().find(num => !isNaN(num));
}

stderr: stderr

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
7 changes: 7 additions & 0 deletions utils/docking/diffdock/diffdock-tool/diffdock_cmds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash -e
# copy cached files to current directory
cp /DiffDock/.*.npy .
TIMEFORMAT=%R && time python /DiffDock/inference.py "$@"
# need to remove large files otherwise cachedir folder will be 3GB each!!
rm .*.npy
rm -r .cache/
Loading
Loading