-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Brandon Duane Walker
authored and
Brandon Duane Walker
committed
May 30, 2024
1 parent
6d5007e
commit ee6db7a
Showing
16 changed files
with
472 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/diffdock/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
*.pdb filter=lfs diff=lfs merge=lfs -text | ||
*.pdbqt filter=lfs diff=lfs merge=lfs -text | ||
*.mol2 filter=lfs diff=lfs merge=lfs -text | ||
*.xlsx filter=lfs diff=lfs merge=lfs -text | ||
*.sdf filter=lfs diff=lfs merge=lfs -text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
58 changes: 58 additions & 0 deletions
58
utils/docking/diffdock/diffdock-tool/Dockerfile_diffdock_gpu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# docker build -f Dockerfile_diffdock_gpu -t mrbrandonwalker/diffdock_gpu . | ||
|
||
FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 as devel | ||
|
||
# Install conda / mamba | ||
RUN apt-get update && apt-get install -y wget git build-essential | ||
|
||
RUN CONDA="Mambaforge-Linux-x86_64.sh" && \ | ||
wget --quiet https://github.com/conda-forge/miniforge/releases/latest/download/$CONDA && \ | ||
chmod +x $CONDA && \ | ||
./$CONDA -b -p /mambaforge && \ | ||
rm -f $CONDA | ||
ENV PATH /mambaforge/bin:$PATH | ||
|
||
RUN conda install pytorch==1.13.0 pytorch-cuda=11.7 pytorch-cluster -c pytorch -c nvidia -c pyg | ||
|
||
# Website suggests torch 1.12 but only 1.12 doesn't exist with cuda 11.7 https://github.com/gcorso/DiffDock https://data.pyg.org/whl/torch-1.13.0+cu117.html | ||
# Need to install pytorch first before other torch packages | ||
# Cannot use conda for these packages otherwise will install them with a default CUDA (not same as cuda 11.7) | ||
# so need to specify which package versions (CUDA and Torch) | ||
# If install pytorch-cluster below with rest of torch packages, the will get error Not compiled with CUDA support | ||
|
||
RUN pip install torch-scatter torch-sparse torch-spline-conv torch-geometric==2.0.4 -f https://data.pyg.org/whl/torch-1.13.0+cu117.html | ||
|
||
RUN conda install PyYAML scipy "networkx[default]" biopython rdkit e3nn spyrmsd pandas biopandas | ||
|
||
# See install for GPU https://github.com/gcorso/DiffDock, some packages are only in pip | ||
|
||
RUN pip install 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' "fair-esm[esmfold]" | ||
|
||
RUN git clone https://github.com/gcorso/DiffDock.git | ||
|
||
WORKDIR /DiffDock | ||
|
||
RUN conda init bash | ||
|
||
# generate the pre-computed cached files for speeding up the inference | ||
# See https://github.com/gcorso/DiffDock#running-diffdock-on-your-own-complexes | ||
# Note that the first time you run DiffDock on a device the program will precompute and store in cache look-up tables for SO(2) and SO(3) distributions (typically takes a couple of minutes), this won't be repeated in following runs. | ||
# output pre-computed cached files are of the format .*.npy such as .so3_omegas_array2.npy, .so3_cdf_vals2.npy, .so3_score_norms2.npy, .so3_exp_score_norms2.npy | ||
RUN python -m inference --protein_ligand_csv data/protein_ligand_example_csv.csv --out_dir results/user_predictions_small --inference_steps 1 --samples_per_complex 1 --batch_size 1 --actual_steps 1 | ||
|
||
# Delete output results so not in same output folder as future runs | ||
RUN rm -r results/user_predictions_small | ||
|
||
# Clean up temp files | ||
RUN mamba clean --all --yes | ||
|
||
# Now copy everything into a minimal cuda runtime base image. | ||
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 as runtime | ||
|
||
COPY --from=devel DiffDock/ DiffDock/ | ||
COPY --from=devel mambaforge/ mambaforge/ | ||
|
||
# shell file to copy cached files, run diffdock and remove large cached files after execution | ||
ADD diffdock_cmds.sh /DiffDock/ | ||
|
||
ENV PATH /mambaforge/bin:$PATH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# diffdock (0.1.0) | ||
|
||
DiffDock Diffusion based protein ligand docking | ||
|
||
## Options | ||
|
||
This plugin takes 11 input arguments and 4 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| protein_path | Protein input file | Input | File | File | | ||
| ligand_path | Ligand input file | Input | File | File | | ||
| inference_steps | Number of inference steps for diffusion | Input | int | int | | ||
| samples_per_complex | Number of pose samples to generate per complex | Input | int | int | | ||
| batch_size | Batch size | Input | int | int | | ||
| out_dir | Output directory to save poses | Input | string | string | | ||
| model_dir | Input model directory to use | Input | string | string | | ||
| confidence_model_dir | Input confidence model directory | Input | string | string | | ||
| complex_name | Name of complex | Input | string | string | | ||
| max_confident_pose | Highest confident pose | Output | File | File | | ||
| output_files | The output poses | Output | File[] | File[] | | ||
| execution_time | Time to run DiffDock | Output | float | float | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#!/usr/bin/env cwl-runner | ||
cwlVersion: v1.0 | ||
|
||
class: CommandLineTool | ||
|
||
label: DiffDock Diffusion based protein ligand docking | ||
|
||
doc: |- | ||
DiffDock Diffusion based protein ligand docking | ||
|
||
baseCommand: ["bash", "/DiffDock/diffdock_cmds.sh"] | ||
|
||
hints: | ||
cwltool:CUDARequirement: | ||
cudaVersionMin: "11.7" | ||
cudaComputeCapability: "3.0" | ||
cudaDeviceCountMin: 1 | ||
cudaDeviceCountMax: 1 | ||
DockerRequirement: | ||
dockerPull: mrbrandonwalker/diffdock_gpu | ||
|
||
requirements: | ||
InlineJavascriptRequirement: {} | ||
inputs: | ||
|
||
protein_path: | ||
type: File | ||
format: edam:format_1476 | ||
inputBinding: | ||
prefix: --protein_path | ||
|
||
ligand_path: | ||
type: File | ||
format: edam:format_3814 | ||
inputBinding: | ||
prefix: --ligand_description | ||
|
||
inference_steps: | ||
label: number of reverse diffusion steps | ||
type: int? | ||
inputBinding: | ||
prefix: --inference_steps | ||
default: 20 | ||
|
||
samples_per_complex: | ||
label: Number of sample poses to generate per complex | ||
type: int? | ||
inputBinding: | ||
prefix: --samples_per_complex | ||
default: 40 | ||
|
||
batch_size: | ||
label: input batch size for neural net | ||
type: int? | ||
inputBinding: | ||
prefix: --batch_size | ||
default: 1 | ||
|
||
out_dir: | ||
label: where output from diffdock is saved | ||
type: string? | ||
inputBinding: | ||
prefix: --out_dir | ||
default: results/ | ||
|
||
model_dir: | ||
label: directory of DiffDock score model from paper | ||
type: string? | ||
inputBinding: | ||
prefix: --model_dir | ||
default: /DiffDock/workdir/paper_score_model/ | ||
|
||
confidence_model_dir: | ||
label: directory of DiffDock confidence model from paper | ||
type: string? | ||
inputBinding: | ||
prefix: --confidence_model_dir | ||
default: /DiffDock/workdir/paper_confidence_model | ||
|
||
complex_name: | ||
label: name of folder with pose outputs that will be saved under out_dir folder | ||
type: string? | ||
inputBinding: | ||
prefix: --complex_name | ||
default: outputs | ||
|
||
output_files: | ||
type: string? | ||
|
||
max_confident_pose: | ||
type: string? | ||
|
||
outputs: | ||
|
||
max_confident_pose: | ||
type: File | ||
outputBinding: | ||
# the diffdock developers copy only the top ranked pose to a new file rank1.sdf | ||
glob: $(inputs.out_dir)/$(inputs.complex_name)/rank1.sdf | ||
format: edam:format_3814 | ||
|
||
output_files: | ||
type: File[] | ||
outputBinding: | ||
# all other output files besides rank1.sdf have confidence information in them rank*_confidence*.sdf | ||
glob: $(inputs.out_dir)/$(inputs.complex_name)/rank*_confidence*.sdf | ||
format: edam:format_3814 | ||
|
||
stderr: | ||
type: File | ||
outputBinding: | ||
glob: stderr | ||
|
||
execution_time: | ||
label: Time to run DiffDock | ||
doc: |- | ||
Time to run DiffDock | ||
type: float | ||
outputBinding: | ||
glob: stderr | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
// the time command outputs to stderr and not to stdout | ||
// example output below, parse the float value of seconds (first item in line) | ||
// 1it [00:41, 41.03s/it] | ||
// 78.909 | ||
return self[0].contents.split("\n").map(str => parseFloat(str)).reverse().find(num => !isNaN(num)); | ||
} | ||
|
||
stderr: stderr | ||
|
||
$namespaces: | ||
edam: https://edamontology.org/ | ||
cwltool: http://commonwl.org/cwltool# | ||
|
||
$schemas: | ||
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/bash -e | ||
# copy cached files to current directory | ||
cp /DiffDock/.*.npy . | ||
TIMEFORMAT=%R && time python /DiffDock/inference.py "$@" | ||
# need to remove large files otherwise cachedir folder will be 3GB each!! | ||
rm .*.npy | ||
rm -r .cache/ |
Oops, something went wrong.