From 2c2b43e1f7978a40e2764255b45c4293a4ade266 Mon Sep 17 00:00:00 2001 From: Nazanin Donyapour Date: Tue, 14 May 2024 12:06:05 -0400 Subject: [PATCH] pymol_align_protein_ca --- .../.bumpversion.cfg | 29 ++++ .../.dockerignore | 4 + .../.gitattributes | 3 + .../pymol-align-protein-ca-plugin/.gitignore | 1 + .../CHANGELOG.md | 5 + .../pymol-align-protein-ca-plugin/Dockerfile | 30 ++++ utils/pymol-align-protein-ca-plugin/README.md | 16 ++ utils/pymol-align-protein-ca-plugin/VERSION | 1 + .../build-docker.sh | 4 + .../environment.yml | 6 + utils/pymol-align-protein-ca-plugin/ict.yml | 73 ++++++++ .../pymol_align_protein_ca_0@1@0.cwl | 83 +++++++++ .../pyproject.toml | 31 ++++ .../utils/pymol_align_protein_ca/__init__.py | 7 + .../utils/pymol_align_protein_ca/__main__.py | 68 ++++++++ .../pymol_align_protein_ca.py | 161 ++++++++++++++++++ .../tests/__init__.py | 1 + .../tests/npt.gro | 3 + .../tests/pose_ligand.xyz | 3 + .../tests/prod.trr | 3 + .../tests/receptor.xyz | 3 + .../tests/test_pymol_align_protein_ca.py | 38 +++++ 22 files changed, 573 insertions(+) create mode 100644 utils/pymol-align-protein-ca-plugin/.bumpversion.cfg create mode 100644 utils/pymol-align-protein-ca-plugin/.dockerignore create mode 100644 utils/pymol-align-protein-ca-plugin/.gitattributes create mode 100644 utils/pymol-align-protein-ca-plugin/.gitignore create mode 100644 utils/pymol-align-protein-ca-plugin/CHANGELOG.md create mode 100644 utils/pymol-align-protein-ca-plugin/Dockerfile create mode 100644 utils/pymol-align-protein-ca-plugin/README.md create mode 100644 utils/pymol-align-protein-ca-plugin/VERSION create mode 100755 utils/pymol-align-protein-ca-plugin/build-docker.sh create mode 100644 utils/pymol-align-protein-ca-plugin/environment.yml create mode 100644 utils/pymol-align-protein-ca-plugin/ict.yml create mode 100644 utils/pymol-align-protein-ca-plugin/pymol_align_protein_ca_0@1@0.cwl create mode 100644 utils/pymol-align-protein-ca-plugin/pyproject.toml create mode 100644 utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__init__.py create mode 100644 utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__main__.py create mode 100644 utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/pymol_align_protein_ca.py create mode 100644 utils/pymol-align-protein-ca-plugin/tests/__init__.py create mode 100644 utils/pymol-align-protein-ca-plugin/tests/npt.gro create mode 100644 utils/pymol-align-protein-ca-plugin/tests/pose_ligand.xyz create mode 100644 utils/pymol-align-protein-ca-plugin/tests/prod.trr create mode 100644 utils/pymol-align-protein-ca-plugin/tests/receptor.xyz create mode 100644 utils/pymol-align-protein-ca-plugin/tests/test_pymol_align_protein_ca.py diff --git a/utils/pymol-align-protein-ca-plugin/.bumpversion.cfg b/utils/pymol-align-protein-ca-plugin/.bumpversion.cfg new file mode 100644 index 00000000..d8364c96 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/mm/utils/pymol_align_protein_ca/__init__.py] diff --git a/utils/pymol-align-protein-ca-plugin/.dockerignore b/utils/pymol-align-protein-ca-plugin/.dockerignore new file mode 100644 index 00000000..7c603f81 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/utils/pymol-align-protein-ca-plugin/.gitattributes b/utils/pymol-align-protein-ca-plugin/.gitattributes new file mode 100644 index 00000000..86d4b9c5 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/.gitattributes @@ -0,0 +1,3 @@ +*.gro filter=lfs diff=lfs merge=lfs -text +*.xyz filter=lfs diff=lfs merge=lfs -text +*.trr filter=lfs diff=lfs merge=lfs -text diff --git a/utils/pymol-align-protein-ca-plugin/.gitignore b/utils/pymol-align-protein-ca-plugin/.gitignore new file mode 100644 index 00000000..c04bc49f --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/utils/pymol-align-protein-ca-plugin/CHANGELOG.md b/utils/pymol-align-protein-ca-plugin/CHANGELOG.md new file mode 100644 index 00000000..b67793f7 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.0 + +Initial release. diff --git a/utils/pymol-align-protein-ca-plugin/Dockerfile b/utils/pymol-align-protein-ca-plugin/Dockerfile new file mode 100644 index 00000000..b36bb1a6 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/Dockerfile @@ -0,0 +1,30 @@ +FROM condaforge/mambaforge + +RUN apt-get update && apt-get install libgl1-mesa-glx -y + +ENV EXEC_DIR="/opt/executables" +ENV POLUS_LOG="INFO" +RUN mkdir -p ${EXEC_DIR} + +# Work directory defined in the base container +# WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY CHANGELOG.md ${EXEC_DIR} + +# Install needed packages here +# errors installing pymol-open-source from poetry so using conda +COPY environment.yml ${EXEC_DIR} +RUN mamba env create -f ${EXEC_DIR}/environment.yml +RUN echo "source activate project_env" > ~/.bashrc +ENV PATH /opt/conda/envs/env/bin:$PATH + +COPY src ${EXEC_DIR}/src + +ADD Dockerfile . + +RUN conda run -n project_env pip install ${EXEC_DIR} --no-cache-dir + +CMD ["--help"] diff --git a/utils/pymol-align-protein-ca-plugin/README.md b/utils/pymol-align-protein-ca-plugin/README.md new file mode 100644 index 00000000..d7fe385a --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/README.md @@ -0,0 +1,16 @@ +# pymol_align_protein_ca (0.1.0) + +Align Protein and CA atoms for a trajectory using Pymol + +## Options + +This plugin takes 6 input arguments and 1 output argument: + +| Name | Description | I/O | Type | Default | +|---------------|-------------------------|--------|--------|---------| +| input_1_path | Input receptor file path | Input | File | File | +| input_2_path | Input ligand file path | Input | File | File | +| input_3_path | Input structure file path | Input | File | File | +| input_4_path | Input trajectory file path | Input | File | File | +| output_file_path | Path to the output file | Input | string | string | +| output_file_path | Path to the output file | Output | File | File | diff --git a/utils/pymol-align-protein-ca-plugin/VERSION b/utils/pymol-align-protein-ca-plugin/VERSION new file mode 100644 index 00000000..6e8bf73a --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/utils/pymol-align-protein-ca-plugin/build-docker.sh b/utils/pymol-align-protein-ca-plugin/build-docker.sh new file mode 100755 index 00000000..40122e8e --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$("] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.13" +typer = "^0.7.0" +sophios = "0.1.4" +mdanalysis = "2.7.0" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pytest = "^7.4" +pytest-sugar = "^0.9.6" +pre-commit = "^3.2.1" +black = "^23.3.0" +mypy = "^1.1.1" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__init__.py b/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__init__.py new file mode 100644 index 00000000..3f351a70 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__init__.py @@ -0,0 +1,7 @@ +"""pymol_align_protein_ca.""" + +__version__ = "0.1.0" + +from polus.mm.utils.pymol_align_protein_ca.pymol_align_protein_ca import ( # noqa # pylint: disable=unused-import + pymol_align_protein_ca, +) diff --git a/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__main__.py b/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__main__.py new file mode 100644 index 00000000..e57b475d --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/__main__.py @@ -0,0 +1,68 @@ +"""Package entrypoint for the pymol_align_protein_ca package.""" + +# Base packages +import logging +from os import environ + +import typer +from polus.mm.utils.pymol_align_protein_ca.pymol_align_protein_ca import ( + pymol_align_protein_ca, +) + +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) +logger = logging.getLogger("polus.mm.utils.pymol_align_protein_ca.") +logger.setLevel(POLUS_LOG) + +app = typer.Typer(help="pymol_align_protein_ca.") + + +@app.command() +def main( + input_1_path: str = typer.Option( + ..., + "--input_1_path", + help="Input receptor file path", + ), + input_2_path: str = typer.Option( + ..., + "--input_2_path", + help="Input ligand file path", + ), + input_3_path: str = typer.Option( + ..., + "--input_3_path", + help="Input structure file path", + ), + input_4_path: str = typer.Option( + ..., + "--input_4_path", + help="Input trajectory file path", + ), + output_file_path: str = typer.Option( + ..., + "--output_file_path", + help="Path to the output file", + ), +) -> None: + """pymol_align_protein_ca.""" + logger.info(f"input_1_path: {input_1_path}") + logger.info(f"input_2_path: {input_2_path}") + logger.info(f"input_3_path: {input_3_path}") + logger.info(f"input_4_path: {input_4_path}") + logger.info(f"output_file_path: {output_file_path}") + + pymol_align_protein_ca( + input_1_path=input_1_path, + input_2_path=input_2_path, + input_3_path=input_3_path, + input_4_path=input_4_path, + output_file_path=output_file_path, + ) + + +if __name__ == "__main__": + app() diff --git a/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/pymol_align_protein_ca.py b/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/pymol_align_protein_ca.py new file mode 100644 index 00000000..c992059a --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/src/polus/mm/utils/pymol_align_protein_ca/pymol_align_protein_ca.py @@ -0,0 +1,161 @@ +"""Align Protein and CA atoms for a trajectory using Pymol.""" +import subprocess as sub + +import pymol + + +def pymol_align_protein_ca( + input_1_path: str, + input_2_path: str, + input_3_path: str, + input_4_path: str, + output_file_path: str, +) -> None: + """pymol_align_protein_ca. + + Args: + script: + input_1_path: Input receptor file path + input_2_path: Input ligand file path + input_3_path: Input structure file path + input_4_path: Input trajectory file path + output_file_path: Path to the output file + Returns: + None + """ + pymol.cmd.load(input_1_path, "receptor") + pymol.cmd.load(input_2_path, "ligand") + + receptor = pymol.cmd.select("R", "receptor") + # This assumes the receptor atoms are first in the file. + indices_receptor = " or ".join([f"index {i}" for i in range(1, 1 + int(receptor))]) + + ligand = pymol.cmd.select("L", "ligand") + # This assumes the ligand atoms are appended to the file after the receptor. + indices_ligand = " or ".join( + [ + f"index {i}" + for i in range(1 + int(receptor), 1 + int(receptor) + int(ligand)) + ], + ) + + pymol.cmd.load(input_3_path, "genion") + pymol.cmd.select("G", f"genion and name CA and ({indices_receptor})") + + # NOTE: We do NOT want to load prod.gro here because the coordinates only get + # written after the last timestep, so we can't use it for realtime analysis. + # However, pymol just needs to extract the topology information, so we can use + # genion.gro and overwrite the coordinates from the trajectory file prod.trr + pymol.cmd.load(input_3_path, "prod") + interval = 1 # Only load every nth frame. + + # NOTE: Pymol performs an alignment using an average across all + # frames in the trajectory; + # See https://sourceforge.net/p/pymol/mailman/message/29514454/ + # It does NOT perform a separate alignment for each frame! + # (That's what we really want because e.g. gromacs cannot remove + # the angular drift when using a periodic simulation box. It can + # still remove the linear drift, however. This is probably fine + # for very short simulations, but eventually the angular drift may + # drift may inflate the rmsd of the ligand w.r.t. the docked conformation. + # This is bad because we really want to avoid false negatives.) + + align_separately = False + if align_separately: + # We can process each frame individually and concatenate the pdb + # files together. Unfortunately, re-seeking into the trajectory + # file causes this loop to be O(n^2) + sub.run( + ["rm", output_file_path], # noqa: S603, S607 + check=False, + ) # Delete output_file_path (if it exists) due to >> below. + + # First load all of the states so we can call count_states. + pymol.cmd.load(input_3_path, "num_states") + pymol.cmd.load_traj(input_4_path, "num_states") + num_states = pymol.cmd.count_states("num_states") + + states = [interval * i for i in range(1, 1 + int(num_states / interval))] + + pymol.cmd.load(input_3_path, "aligned_sep") + for idx, state in enumerate(states): + # Now re-load each state individually. + pymol.cmd.load_traj( + input_4_path, + "prod", + state=1, + start=state, + stop=state, + max=1, + ) + + pymol.cmd.select("P", f"prod and name CA and ({indices_receptor})") + pymol.cmd.pair_fit("P", "G") + + pymol.cmd.select("N", f"prod and ({indices_receptor} or {indices_ligand})") + pymol.cmd.save( + f"{state}.pdb", + "N", + -1, + ) # 0 == all states (default -1 == current state only) + + # For interactive visualization / comparison, load the aligned results. + pymol.cmd.save( + "temp.pdb", + "prod", + -1, + ) # 0 == all states (default -1 == current state only) + pymol.cmd.load_traj("temp.pdb", "aligned_sep", state=1 + idx) # 0 = append + sub.run(["rm", "temp.pdb"], check=False) # noqa: S603, S607 + + # Now concatenate the individual pdb files into output_file_path + # TODO: For security, avoid using shell=True + sub.run( + f"cat {state}.pdb >> {output_file_path}", + shell=False, # noqa: S603 + check=True, + ) + sub.run(["rm", f"{state}.pdb"], check=False) # noqa: S603, S607 + + # For interactive visualization / comparison, load the aligned results. + + align_averaged = True + if align_averaged: + # Load all of the states. + pymol.cmd.load_traj(input_4_path, "prod", interval=interval) + + pymol.cmd.select("P", f"prod and name CA and ({indices_receptor})") + pymol.cmd.pair_fit("P", "G") + + pymol.cmd.select("N", f"prod and ({indices_receptor} or {indices_ligand})") + pymol.cmd.save( + output_file_path, + "N", + 0, + ) # 0 == all states (default -1 == current state only) + + mdtraj = True + if mdtraj: + import MDAnalysis + from MDAnalysis.analysis import align + from MDAnalysis.coordinates import TRR + + gro = MDAnalysis.Universe(topology=input_3_path, coordinates=input_3_path) + print(gro) # noqa: T201 + # NOTE: Setting coordinates= in the constructor only loads the first frame! + trr = MDAnalysis.Universe(topology=input_3_path) + trr.trajectory = TRR.TRRReader(input_4_path) # This loads all frames. + print(trr) # noqa: T201 + + # Don't forget to call .run()! Otherwise, it will silently do nothing + # (except write an empty file). + aligntraj = align.AlignTraj( + trr, + gro, + select="protein and name CA", + filename="aligned.trr", + ).run() + print(aligntraj.frames) # noqa: T201 + + pymol.cmd.load(input_3_path, "aligned_mda") + pymol.cmd.load_traj("aligned.trr", "aligned_mda") diff --git a/utils/pymol-align-protein-ca-plugin/tests/__init__.py b/utils/pymol-align-protein-ca-plugin/tests/__init__.py new file mode 100644 index 00000000..147bd30e --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for pymol_align_protein_ca.""" diff --git a/utils/pymol-align-protein-ca-plugin/tests/npt.gro b/utils/pymol-align-protein-ca-plugin/tests/npt.gro new file mode 100644 index 00000000..2d3f3eaf --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/tests/npt.gro @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5894510bf27f7957adb92379ee79807b61d588d53ab39a43cf8908ad16da9b23 +size 4345743 diff --git a/utils/pymol-align-protein-ca-plugin/tests/pose_ligand.xyz b/utils/pymol-align-protein-ca-plugin/tests/pose_ligand.xyz new file mode 100644 index 00000000..99208249 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/tests/pose_ligand.xyz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d19bd5cb09b60c179f0f67d26574bbff5af5b1131414682ac07c878c8273d41 +size 2276 diff --git a/utils/pymol-align-protein-ca-plugin/tests/prod.trr b/utils/pymol-align-protein-ca-plugin/tests/prod.trr new file mode 100644 index 00000000..286c48a1 --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/tests/prod.trr @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ecff906cb2435fc3254c7e89666c8556b7b91dcfc05546ce1915d2a76898d4 +size 7558920 diff --git a/utils/pymol-align-protein-ca-plugin/tests/receptor.xyz b/utils/pymol-align-protein-ca-plugin/tests/receptor.xyz new file mode 100644 index 00000000..1e8f3faa --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/tests/receptor.xyz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1222a39319a3f541dbd2b56840b796ee8483c05fe167c2c32acd6660b0f25902 +size 201331 diff --git a/utils/pymol-align-protein-ca-plugin/tests/test_pymol_align_protein_ca.py b/utils/pymol-align-protein-ca-plugin/tests/test_pymol_align_protein_ca.py new file mode 100644 index 00000000..8bb306db --- /dev/null +++ b/utils/pymol-align-protein-ca-plugin/tests/test_pymol_align_protein_ca.py @@ -0,0 +1,38 @@ +"""Tests for pymol_align_protein_ca.""" +from pathlib import Path + +from sophios.api.pythonapi import Step +from sophios.api.pythonapi import Workflow + + +def test_pymol_align_protein_ca_cwl() -> None: + """Test pymol_align_protein_ca CWL.""" + cwl_file = Path("pymol_align_protein_ca_0@1@0.cwl") + + # Create the step for the CWL file + load_pymol_align_protein_ca_step = Step(clt_path=cwl_file) + + input_1_path = str(Path(__file__).resolve().parent / Path("receptor.xyz")) + input_2_path = str(Path(__file__).resolve().parent / Path("pose_ligand.xyz")) + input_3_path = str(Path(__file__).resolve().parent / Path("npt.gro")) + input_4_path = str(Path(__file__).resolve().parent / Path("prod.trr")) + + load_pymol_align_protein_ca_step.input_1_path = input_1_path + load_pymol_align_protein_ca_step.input_2_path = input_2_path + load_pymol_align_protein_ca_step.input_3_path = input_3_path + load_pymol_align_protein_ca_step.input_4_path = input_4_path + load_pymol_align_protein_ca_step.output_file_path = "prod_align_protein_ca.pdb" + + # Define the workflow with the step + steps = [load_pymol_align_protein_ca_step] + filename = "pymol_align_protein_ca" + workflow = Workflow(steps, filename) + + # Run the workflow + workflow.run() + + # Check for the existence of the output file + outdir = Path("outdir") + assert any( + file.name == "prod_align_protein_ca.pdb" for file in outdir.rglob("*") + ), "The file prod_align_protein_ca.pdb was not found."