Skip to content

Commit

Permalink
diffdock
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon Duane Walker authored and Brandon Duane Walker committed May 30, 2024
1 parent 6d5007e commit ee6db7a
Show file tree
Hide file tree
Showing 16 changed files with 472 additions and 0 deletions.
29 changes: 29 additions & 0 deletions utils/docking/diffdock/diffdock-tool/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/diffdock/__init__.py]
4 changes: 4 additions & 0 deletions utils/docking/diffdock/diffdock-tool/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
5 changes: 5 additions & 0 deletions utils/docking/diffdock/diffdock-tool/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.pdb filter=lfs diff=lfs merge=lfs -text
*.pdbqt filter=lfs diff=lfs merge=lfs -text
*.mol2 filter=lfs diff=lfs merge=lfs -text
*.xlsx filter=lfs diff=lfs merge=lfs -text
*.sdf filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions utils/docking/diffdock/diffdock-tool/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/docking/diffdock/diffdock-tool/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
58 changes: 58 additions & 0 deletions utils/docking/diffdock/diffdock-tool/Dockerfile_diffdock_gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# docker build -f Dockerfile_diffdock_gpu -t mrbrandonwalker/diffdock_gpu .

FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 as devel

# Install conda / mamba
RUN apt-get update && apt-get install -y wget git build-essential

RUN CONDA="Mambaforge-Linux-x86_64.sh" && \
wget --quiet https://github.com/conda-forge/miniforge/releases/latest/download/$CONDA && \
chmod +x $CONDA && \
./$CONDA -b -p /mambaforge && \
rm -f $CONDA
ENV PATH /mambaforge/bin:$PATH

RUN conda install pytorch==1.13.0 pytorch-cuda=11.7 pytorch-cluster -c pytorch -c nvidia -c pyg

# Website suggests torch 1.12 but only 1.12 doesn't exist with cuda 11.7 https://github.com/gcorso/DiffDock https://data.pyg.org/whl/torch-1.13.0+cu117.html
# Need to install pytorch first before other torch packages
# Cannot use conda for these packages otherwise will install them with a default CUDA (not same as cuda 11.7)
# so need to specify which package versions (CUDA and Torch)
# If install pytorch-cluster below with rest of torch packages, the will get error Not compiled with CUDA support

RUN pip install torch-scatter torch-sparse torch-spline-conv torch-geometric==2.0.4 -f https://data.pyg.org/whl/torch-1.13.0+cu117.html

RUN conda install PyYAML scipy "networkx[default]" biopython rdkit e3nn spyrmsd pandas biopandas

# See install for GPU https://github.com/gcorso/DiffDock, some packages are only in pip

RUN pip install 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' "fair-esm[esmfold]"

RUN git clone https://github.com/gcorso/DiffDock.git

WORKDIR /DiffDock

RUN conda init bash

# generate the pre-computed cached files for speeding up the inference
# See https://github.com/gcorso/DiffDock#running-diffdock-on-your-own-complexes
# Note that the first time you run DiffDock on a device the program will precompute and store in cache look-up tables for SO(2) and SO(3) distributions (typically takes a couple of minutes), this won't be repeated in following runs.
# output pre-computed cached files are of the format .*.npy such as .so3_omegas_array2.npy, .so3_cdf_vals2.npy, .so3_score_norms2.npy, .so3_exp_score_norms2.npy
RUN python -m inference --protein_ligand_csv data/protein_ligand_example_csv.csv --out_dir results/user_predictions_small --inference_steps 1 --samples_per_complex 1 --batch_size 1 --actual_steps 1

# Delete output results so not in same output folder as future runs
RUN rm -r results/user_predictions_small

# Clean up temp files
RUN mamba clean --all --yes

# Now copy everything into a minimal cuda runtime base image.
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 as runtime

COPY --from=devel DiffDock/ DiffDock/
COPY --from=devel mambaforge/ mambaforge/

# shell file to copy cached files, run diffdock and remove large cached files after execution
ADD diffdock_cmds.sh /DiffDock/

ENV PATH /mambaforge/bin:$PATH
22 changes: 22 additions & 0 deletions utils/docking/diffdock/diffdock-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# diffdock (0.1.0)

DiffDock Diffusion based protein ligand docking

## Options

This plugin takes 11 input arguments and 4 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| protein_path | Protein input file | Input | File | File |
| ligand_path | Ligand input file | Input | File | File |
| inference_steps | Number of inference steps for diffusion | Input | int | int |
| samples_per_complex | Number of pose samples to generate per complex | Input | int | int |
| batch_size | Batch size | Input | int | int |
| out_dir | Output directory to save poses | Input | string | string |
| model_dir | Input model directory to use | Input | string | string |
| confidence_model_dir | Input confidence model directory | Input | string | string |
| complex_name | Name of complex | Input | string | string |
| max_confident_pose | Highest confident pose | Output | File | File |
| output_files | The output poses | Output | File[] | File[] |
| execution_time | Time to run DiffDock | Output | float | float |
1 change: 1 addition & 0 deletions utils/docking/diffdock/diffdock-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
138 changes: 138 additions & 0 deletions utils/docking/diffdock/diffdock-tool/diffdock.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: DiffDock Diffusion based protein ligand docking

doc: |-
DiffDock Diffusion based protein ligand docking

baseCommand: ["bash", "/DiffDock/diffdock_cmds.sh"]

hints:
cwltool:CUDARequirement:
cudaVersionMin: "11.7"
cudaComputeCapability: "3.0"
cudaDeviceCountMin: 1
cudaDeviceCountMax: 1
DockerRequirement:
dockerPull: mrbrandonwalker/diffdock_gpu

requirements:
InlineJavascriptRequirement: {}
inputs:

protein_path:
type: File
format: edam:format_1476
inputBinding:
prefix: --protein_path

ligand_path:
type: File
format: edam:format_3814
inputBinding:
prefix: --ligand_description

inference_steps:
label: number of reverse diffusion steps
type: int?
inputBinding:
prefix: --inference_steps
default: 20

samples_per_complex:
label: Number of sample poses to generate per complex
type: int?
inputBinding:
prefix: --samples_per_complex
default: 40

batch_size:
label: input batch size for neural net
type: int?
inputBinding:
prefix: --batch_size
default: 1

out_dir:
label: where output from diffdock is saved
type: string?
inputBinding:
prefix: --out_dir
default: results/

model_dir:
label: directory of DiffDock score model from paper
type: string?
inputBinding:
prefix: --model_dir
default: /DiffDock/workdir/paper_score_model/

confidence_model_dir:
label: directory of DiffDock confidence model from paper
type: string?
inputBinding:
prefix: --confidence_model_dir
default: /DiffDock/workdir/paper_confidence_model

complex_name:
label: name of folder with pose outputs that will be saved under out_dir folder
type: string?
inputBinding:
prefix: --complex_name
default: outputs

output_files:
type: string?

max_confident_pose:
type: string?

outputs:

max_confident_pose:
type: File
outputBinding:
# the diffdock developers copy only the top ranked pose to a new file rank1.sdf
glob: $(inputs.out_dir)/$(inputs.complex_name)/rank1.sdf
format: edam:format_3814

output_files:
type: File[]
outputBinding:
# all other output files besides rank1.sdf have confidence information in them rank*_confidence*.sdf
glob: $(inputs.out_dir)/$(inputs.complex_name)/rank*_confidence*.sdf
format: edam:format_3814

stderr:
type: File
outputBinding:
glob: stderr

execution_time:
label: Time to run DiffDock
doc: |-
Time to run DiffDock
type: float
outputBinding:
glob: stderr
loadContents: true
outputEval: |
${
// the time command outputs to stderr and not to stdout
// example output below, parse the float value of seconds (first item in line)
// 1it [00:41, 41.03s/it]
// 78.909
return self[0].contents.split("\n").map(str => parseFloat(str)).reverse().find(num => !isNaN(num));
}

stderr: stderr

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
7 changes: 7 additions & 0 deletions utils/docking/diffdock/diffdock-tool/diffdock_cmds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash -e
# copy cached files to current directory
cp /DiffDock/.*.npy .
TIMEFORMAT=%R && time python /DiffDock/inference.py "$@"
# need to remove large files otherwise cachedir folder will be 3GB each!!
rm .*.npy
rm -r .cache/
Loading

0 comments on commit ee6db7a

Please sign in to comment.