-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Brandon Duane Walker
authored and
Brandon Duane Walker
committed
May 28, 2024
1 parent
5965ec4
commit 07e3016
Showing
15 changed files
with
370 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/extract_ligand_protein/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
FROM condaforge/mambaforge | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
|
||
# Work directory defined in the base container | ||
# WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
|
||
# Install needed packages here | ||
|
||
RUN pip install filepattern | ||
RUN conda config --add channels conda-forge | ||
RUN conda install mdanalysis | ||
|
||
|
||
COPY src ${EXEC_DIR}/src | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir | ||
|
||
CMD ["--help"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# extract_ligand_protein (0.1.0) | ||
|
||
A tool that employs OpenMM to extract ligands and protein from a PDB file | ||
## Options | ||
|
||
This plugin takes 3 input arguments and 0 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| input_pdb_path | Input pdb file path, Type: string, File type: input, Accepted formats: pdb, Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_protein.pdb | Input | File | File | | ||
| output_pdb_path | Output pdb file path, Type: string, File type: output, Accepted formats: pdb, Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_cat_pdb.pdb | Input | string | string | | ||
| output_pdb_ligand_path | Output pdb ligand file path, Type: string, File type: output, Accepted formats: sdf | Input | string | string | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
version=$(<VERSION) | ||
docker build . -t polusai/extract-ligand-protein-plugin:${version} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
specVersion: "0.1.0" | ||
name: extract_ligand_protein | ||
version: 0.1.0 | ||
container: extract-ligand-protein-plugin | ||
entrypoint: | ||
title: extract_ligand_protein | ||
description: A tool that employs OpenMM to extract ligands and protein from a PDB file | ||
author: Data Scientist | ||
contact: [email protected] | ||
repository: | ||
documentation: | ||
citation: | ||
|
||
inputs: | ||
- name: input_pdb_path | ||
required: true | ||
description: Input pdb file path, Type string, File type input, Accepted formats pdb, Example file https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_protein.pdb | ||
type: File | ||
format: | ||
uri: edam:format_1476 | ||
- name: output_pdb_path | ||
required: true | ||
description: Output pdb file path, Type string, File type output, Accepted formats pdb, Example file https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_cat_pdb.pdb | ||
type: string | ||
defaultValue: system.pdb | ||
format: | ||
uri: edam:format_1476 | ||
- name: output_pdb_ligand_path | ||
required: true | ||
description: Output pdb ligand file path, Type string, File type output, Accepted formats sdf | ||
type: string | ||
defaultValue: ligand_system.pdb | ||
format: | ||
uri: edam:format_1476 | ||
outputs: | ||
ui: | ||
- key: inputs.input_pdb_path | ||
title: "input_pdb_path: " | ||
description: "Input pdb file path, Type: string, File type: input, Accepted formats: pdb, Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_protein.pdb" | ||
type: File | ||
- key: inputs.output_pdb_path | ||
title: "output_pdb_path: " | ||
description: "Output pdb file path, Type: string, File type: output, Accepted formats: pdb, Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_cat_pdb.pdb" | ||
type: string | ||
- key: inputs.output_pdb_ligand_path | ||
title: "output_pdb_ligand_path: " | ||
description: "Output pdb ligand file path, Type: string, File type: output, Accepted formats: sdf" | ||
type: string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[tool.poetry] | ||
name = "polus-mm-utils-extract-ligand-protein" | ||
version = "0.1.0" | ||
description = "An awesome function." | ||
authors = ["Data Scientist <[email protected]>"] | ||
readme = "README.md" | ||
packages = [{include = "polus", from = "src"}] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.9,<3.12" | ||
typer = "^0.7.0" | ||
mdanalysis = "2.7.0" | ||
cwl-utils = "0.33" | ||
cwltool = "3.1.20240404144621" | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
bump2version = "^1.0.1" | ||
pytest = "^7.4" | ||
pytest-sugar = "^0.9.6" | ||
pre-commit = "^3.2.1" | ||
black = "^23.3.0" | ||
mypy = "^1.1.1" | ||
ruff = "^0.0.270" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
pythonpath = [ | ||
"." | ||
] |
7 changes: 7 additions & 0 deletions
7
utils/extract-ligand-protein-plugin/src/polus/mm/utils/extract_ligand_protein/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""extract_ligand_protein.""" | ||
|
||
__version__ = "0.1.0" | ||
|
||
from src.polus.mm.utils.extract_ligand_protein import ( # noqa # pylint: disable=unused-import | ||
extract_ligand_protein, | ||
) |
55 changes: 55 additions & 0 deletions
55
utils/extract-ligand-protein-plugin/src/polus/mm/utils/extract_ligand_protein/__main__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
"""Package entrypoint for the extract_ligand_protein package.""" | ||
|
||
# Base packages | ||
import logging | ||
from os import environ | ||
from pathlib import Path | ||
|
||
import typer | ||
from polus.mm.utils.extract_ligand_protein.extract_ligand_protein import ( | ||
extract_all_ligand_protein, | ||
) | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", | ||
datefmt="%d-%b-%y %H:%M:%S", | ||
) | ||
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) | ||
logger = logging.getLogger("polus.mm.utils.extract_ligand_protein.") | ||
logger.setLevel(POLUS_LOG) | ||
|
||
app = typer.Typer(help="extract_ligand_protein.") | ||
|
||
|
||
@app.command() | ||
def main( | ||
input_pdb_path: Path = typer.Option( | ||
..., | ||
"--input_pdb_path", | ||
help="Input pdb file path, Type: string", | ||
), | ||
output_pdb_path: str = typer.Option( | ||
..., | ||
"--output_pdb_path", | ||
help="Output pdb file path, Type: string", | ||
), | ||
output_pdb_ligand_path: str = typer.Option( | ||
..., | ||
"--output_pdb_ligand_path", | ||
help="Output pdb ligand file path, Type: string", | ||
), | ||
) -> None: | ||
"""extract_ligand_protein.""" | ||
logger.info(f"input_pdb_path: {input_pdb_path}") | ||
logger.info(f"output_pdb_path: {output_pdb_path}") | ||
logger.info(f"output_pdb_ligand_path: {output_pdb_ligand_path}") | ||
|
||
extract_all_ligand_protein( | ||
input_pdb_path=input_pdb_path, | ||
output_pdb_path=output_pdb_path, | ||
output_pdb_ligand_path=output_pdb_ligand_path, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
115 changes: 115 additions & 0 deletions
115
...ligand-protein-plugin/src/polus/mm/utils/extract_ligand_protein/extract_ligand_protein.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
"""Extract ligand and protein from the PDB file.""" | ||
|
||
from pathlib import Path | ||
|
||
import MDAnalysis | ||
|
||
|
||
def extract_single_ligand_protein( # noqa: PLR0912 | ||
input_pdb_path: Path, | ||
output_pdb_path: Path, | ||
output_pdb_ligand_path: Path, | ||
) -> None: | ||
"""Extract ligand & protein from the PDB file. | ||
Args: | ||
input_pdb_path (Path): The path to the input pdb file | ||
output_pdb_path (Path): The path to the output pdb file | ||
output_pdb_ligand_path (Path): The path to the output pdb ligand file | ||
""" | ||
# Load the PDB file | ||
u = MDAnalysis.Universe(input_pdb_path) | ||
|
||
# Get unique residue names | ||
protein_atoms = u.select_atoms("protein") # use simple atom selection when possible | ||
|
||
# Create a new Universe with only protein atoms | ||
protein_u = MDAnalysis.Universe.empty( | ||
n_atoms=protein_atoms.n_atoms, | ||
trajectory=True, | ||
) # needed for coordinates | ||
protein_u.atoms = protein_atoms | ||
|
||
# duplicate the universe object | ||
dup_u = MDAnalysis.Universe(input_pdb_path) | ||
|
||
# now do the same for the ligand, not protein and not water or salts | ||
ligand_atoms = u.select_atoms("not protein") | ||
|
||
try: | ||
# guess the bonds, since input PDB may not have bonds | ||
dup_u.atoms.guess_bonds() | ||
except ValueError: | ||
# ValueError: vdw radii for types: AS. | ||
# These can be defined manually using the keyword 'vdwradii' | ||
print("Error: Could not guess bonds. Check the input PDB file.") # noqa: T201 | ||
|
||
has_bonds = False | ||
try: | ||
len(dup_u.atoms.bonds) | ||
has_bonds = True | ||
except MDAnalysis.exceptions.NoDataError: | ||
print("No bonds found in the PDB file.") # noqa: T201 | ||
|
||
# Identify water molecules based on the | ||
# connectivity pattern (Oxygen bonded to two Hydrogens) | ||
if has_bonds: | ||
water_indices = set() | ||
for atom in dup_u.atoms: # dont use selection resname == 'HOH', | ||
# pdb file may have different water residue names | ||
h_bonds = 2 | ||
if ( | ||
atom.name == "O" and len(atom.bonds) == h_bonds | ||
): # if hydrogens are added | ||
bonded_atoms_names = {a.name for a in atom.bonded_atoms} | ||
if bonded_atoms_names == {"H"}: # Check if both bonds are Hydrogens | ||
water_indices.add(atom.index) | ||
water_indices.update([a.index for a in atom.bonded_atoms]) | ||
|
||
# now want to remove all salts, waters without H | ||
non_bonded = set() | ||
for atom in dup_u.atoms: | ||
if len(atom.bonds) == 0: | ||
non_bonded.add(atom.index) | ||
|
||
# Remove water by excluding the water indices | ||
if len(water_indices) > 0: | ||
water_indices_string = " ".join([str(i) for i in water_indices]) | ||
ligand_atoms = ligand_atoms.select_atoms( | ||
f"not index {water_indices_string}", | ||
) | ||
|
||
# Remove non bonded atoms | ||
if len(non_bonded) > 0: | ||
non_bonded_string = " ".join([str(i) for i in non_bonded]) | ||
ligand_atoms = ligand_atoms.select_atoms(f"not index {non_bonded_string}") | ||
|
||
ligand_u = MDAnalysis.Universe.empty( | ||
n_atoms=ligand_atoms.n_atoms, | ||
trajectory=True, | ||
) # needed for coordinates | ||
ligand_u.atoms = ligand_atoms | ||
|
||
protein_u.atoms.write(str(output_pdb_path)) | ||
if len(ligand_u.atoms) > 0: # will crash if no ligand atoms | ||
ligand_u.atoms.write(str(output_pdb_ligand_path)) | ||
|
||
|
||
def extract_all_ligand_protein(input_pdb_path: list[Path], outdir: Path) -> None: | ||
"""extract_ligand_protein. | ||
Args: | ||
input_pdb_path: Input pdb file path | ||
outdir: Output collection. | ||
Returns: | ||
None | ||
""" | ||
for pdb in input_pdb_path: | ||
output_pdb_path = outdir / f"{pdb.stem}_protein.pdb" | ||
output_pdb_ligand_path = outdir / f"{pdb.stem}_ligand.pdb" | ||
extract_single_ligand_protein( | ||
pdb, | ||
output_pdb_path, | ||
output_pdb_ligand_path, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Tests for extract_ligand_protein.""" |
29 changes: 29 additions & 0 deletions
29
utils/extract-ligand-protein-plugin/tests/test_extract_ligand_protein.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
"""Tests for extract_ligand_protein.""" | ||
from pathlib import Path | ||
|
||
from polus.mm.utils.extract_ligand_protein.extract_ligand_protein import ( | ||
extract_single_ligand_protein, | ||
) | ||
|
||
|
||
def test_extract_single_ligand_protein() -> None: | ||
"""Test extract_single_ligand_protein.""" | ||
# Get the parent directory of the current file | ||
test_dir = Path(__file__).resolve().parent | ||
|
||
# Use glob to find all PDB files in the same directory | ||
pdb_files = test_dir.glob("*.pdb") | ||
|
||
# Iterate through each PDB file and test extract_single_ligand_protein | ||
for pdb_file in pdb_files: | ||
pdb = Path(pdb_file) | ||
output_pdb_path = Path(f"{pdb.stem}_protein.pdb") | ||
output_pdb_ligand_path = Path(f"{pdb.stem}_ligand.pdb") | ||
extract_single_ligand_protein(pdb, output_pdb_path, output_pdb_ligand_path) | ||
with output_pdb_path.open() as f: | ||
for line in f: | ||
assert not line.startswith("HETATM") | ||
|
||
with output_pdb_ligand_path.open() as f: | ||
for line in f: | ||
assert not line.startswith("ATOM") |