From 38a1e303ca644201098cfdb87700a7f14af7c335 Mon Sep 17 00:00:00 2001 From: Nazanin Donyapour Date: Thu, 6 Jun 2024 21:48:21 +0000 Subject: [PATCH] extract_pdbids_drugbank --- .../.bumpversion.cfg | 29 + .../.dockerignore | 4 + .../extract-pdbids-drugbank-plugin/.gitignore | 1 + .../CHANGELOG.md | 5 + .../extract-pdbids-drugbank-plugin/Dockerfile | 27 + .../extract-pdbids-drugbank-plugin/README.md | 19 + utils/extract-pdbids-drugbank-plugin/VERSION | 1 + .../build-docker.sh | 4 + .../environment.yml | 10 + .../extract_pdbids_drugbank.cwl | 188 ++++++ utils/extract-pdbids-drugbank-plugin/ict.yml | 86 +++ .../pyproject.toml | 30 + .../utils/extract_pdbids_drugbank/__init__.py | 7 + .../utils/extract_pdbids_drugbank/__main__.py | 69 +++ .../extract_pdbids_drugbank.py | 129 ++++ .../tests/__init__.py | 1 + .../tests/drugbank_10_fake_records_5.1.10.xml | 573 ++++++++++++++++++ .../tests/test_extract_pdbids_drugbank.py | 19 + 18 files changed, 1202 insertions(+) create mode 100644 utils/extract-pdbids-drugbank-plugin/.bumpversion.cfg create mode 100644 utils/extract-pdbids-drugbank-plugin/.dockerignore create mode 100644 utils/extract-pdbids-drugbank-plugin/.gitignore create mode 100644 utils/extract-pdbids-drugbank-plugin/CHANGELOG.md create mode 100644 utils/extract-pdbids-drugbank-plugin/Dockerfile create mode 100644 utils/extract-pdbids-drugbank-plugin/README.md create mode 100644 utils/extract-pdbids-drugbank-plugin/VERSION create mode 100755 utils/extract-pdbids-drugbank-plugin/build-docker.sh create mode 100644 utils/extract-pdbids-drugbank-plugin/environment.yml create mode 100644 utils/extract-pdbids-drugbank-plugin/extract_pdbids_drugbank.cwl create mode 100644 utils/extract-pdbids-drugbank-plugin/ict.yml create mode 100644 utils/extract-pdbids-drugbank-plugin/pyproject.toml create mode 100644 utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__init__.py create mode 100644 utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__main__.py create mode 100644 utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/extract_pdbids_drugbank.py create mode 100644 utils/extract-pdbids-drugbank-plugin/tests/__init__.py create mode 100644 utils/extract-pdbids-drugbank-plugin/tests/drugbank_10_fake_records_5.1.10.xml create mode 100644 utils/extract-pdbids-drugbank-plugin/tests/test_extract_pdbids_drugbank.py diff --git a/utils/extract-pdbids-drugbank-plugin/.bumpversion.cfg b/utils/extract-pdbids-drugbank-plugin/.bumpversion.cfg new file mode 100644 index 00000000..b3643d30 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/mm/utils/extract_pdbids_drugbank/__init__.py] diff --git a/utils/extract-pdbids-drugbank-plugin/.dockerignore b/utils/extract-pdbids-drugbank-plugin/.dockerignore new file mode 100644 index 00000000..7c603f81 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/utils/extract-pdbids-drugbank-plugin/.gitignore b/utils/extract-pdbids-drugbank-plugin/.gitignore new file mode 100644 index 00000000..c04bc49f --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/utils/extract-pdbids-drugbank-plugin/CHANGELOG.md b/utils/extract-pdbids-drugbank-plugin/CHANGELOG.md new file mode 100644 index 00000000..b67793f7 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.0 + +Initial release. diff --git a/utils/extract-pdbids-drugbank-plugin/Dockerfile b/utils/extract-pdbids-drugbank-plugin/Dockerfile new file mode 100644 index 00000000..b463afab --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/Dockerfile @@ -0,0 +1,27 @@ +FROM condaforge/mambaforge + +ENV EXEC_DIR="/opt/executables" +ENV POLUS_LOG="INFO" +RUN mkdir -p ${EXEC_DIR} + + +# Work directory defined in the base container +# WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY CHANGELOG.md ${EXEC_DIR} + +# Install needed packages here +# errors installing rdkit from poetry so using conda +COPY environment.yml ${EXEC_DIR} +RUN mamba env create -f ${EXEC_DIR}/environment.yml +RUN echo "source activate project_env" > ~/.bashrc +ENV PATH /opt/conda/envs/env/bin:$PATH + +COPY src ${EXEC_DIR}/src + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +CMD ["--help"] diff --git a/utils/extract-pdbids-drugbank-plugin/README.md b/utils/extract-pdbids-drugbank-plugin/README.md new file mode 100644 index 00000000..ebf11b09 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/README.md @@ -0,0 +1,19 @@ +# extract_pdbids_drugbank (0.1.0) + +Filter the Drugbank database + +## Options + +This plugin takes 5 input arguments and 4 output argument: + +| Name | Description | I/O | Type | Default | +|---------------|-------------------------|--------|--------|---------| +| drugbank_xml_file_path | Path to the Drugbank xml file | Input | File | File | +| smiles | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | +| inchi | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | +| inchi_keys | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | +| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string | +| output_txt_path | Path to the txt file | Output | File | File | +| output_smiles | The Smiles of small molecules | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | +| output_pdbids_1D | The PDB IDs of target structures in 1D array | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | +| output_pdbids_2D | The PDB IDs of target structures in 2D array | Output | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | diff --git a/utils/extract-pdbids-drugbank-plugin/VERSION b/utils/extract-pdbids-drugbank-plugin/VERSION new file mode 100644 index 00000000..6e8bf73a --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/utils/extract-pdbids-drugbank-plugin/build-docker.sh b/utils/extract-pdbids-drugbank-plugin/build-docker.sh new file mode 100755 index 00000000..00cb8628 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$(", "Brandon Walker "] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +typer = "^0.7.0" +cwltool = "3.1.20240404144621" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pytest = "^7.4" +pytest-sugar = "^0.9.6" +pre-commit = "^3.2.1" +black = "^23.3.0" +mypy = "^1.1.1" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__init__.py b/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__init__.py new file mode 100644 index 00000000..4bfe8da1 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__init__.py @@ -0,0 +1,7 @@ +"""extract_pdbids_drugbank.""" + +__version__ = "0.1.0" + +from polus.mm.utils.extract_pdbids_drugbank.extract_pdbids_drugbank import ( # noqa # pylint: disable=unused-import + extract_pdbids_drugbank, +) diff --git a/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__main__.py b/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__main__.py new file mode 100644 index 00000000..369becf7 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/__main__.py @@ -0,0 +1,69 @@ +"""Package entrypoint for the extract_pdbids_drugbank package.""" + +# Base packages +import logging +from os import environ + +import typer +from polus.mm.utils.extract_pdbids_drugbank.extract_pdbids_drugbank import ( + extract_pdbids_drugbank, +) + +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) +logger = logging.getLogger("polus.mm.utils.extract_pdbids_drugbank.") +logger.setLevel(POLUS_LOG) + +app = typer.Typer(help="extract_pdbids_drugbank.") + + +@app.command() +def main( + drugbank_xml_file_path: str = typer.Option( + ..., + "--drugbank_xml_file_path", + help="Path to the Drugbank xml file", + ), + smiles: list[str] = typer.Option( + ..., + "--smiles", + help="List of input SMILES, Type string[], File type input," + "Accepted formats list[string]", + ), + inchi: list[str] = typer.Option( + ..., + "--inchi", + help="List of input SMILES, Type string[], File type input", + ), + inchi_keys: list[str] = typer.Option( + ..., + "--inchi_keys", + help="List of input SMILES, Type string[], File type input", + ), + output_txt_path: str = typer.Option( + ..., + "--output_txt_path", + help="Path to the text dataset file, Type string, File type output", + ), +) -> None: + """extract_pdbids_drugbank.""" + logger.info(f"drugbank_xml_file_path: {drugbank_xml_file_path}") + logger.info(f"smiles: {smiles}") + logger.info(f"inchi: {inchi}") + logger.info(f"inchi_keys: {inchi_keys}") + logger.info(f"output_txt_path: {output_txt_path}") + + extract_pdbids_drugbank( + drugbank_xml_file_path=drugbank_xml_file_path, + smiles=smiles, + inchi=inchi, + inchi_keys=inchi_keys, + output_txt_path=output_txt_path, + ) + + +if __name__ == "__main__": + app() diff --git a/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/extract_pdbids_drugbank.py b/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/extract_pdbids_drugbank.py new file mode 100644 index 00000000..024e3e4d --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/src/polus/mm/utils/extract_pdbids_drugbank/extract_pdbids_drugbank.py @@ -0,0 +1,129 @@ +"""Extract PDB IDs Drugbank Plugin.""" +import collections +from pathlib import Path +from typing import Optional + +import pandas as pd +from defusedxml.ElementTree import parse +from rdkit import Chem + + +# The code is adapted from https://github.com/dhimmel/drugbank/blob/gh-pages/parse.ipynb +def parse_drugbank_xml(drugbank_xml_path: str) -> pd.DataFrame: + """Parse the DrugBank XML file into a data frame. + + Args: + drugbank_xml_path (str): The path to the drugbank xml file + + Returns: + pd.DataFrame: The proccesd Drugbank + """ + ns = "{http://www.drugbank.ca}" + inchikey_template = ( + f"{ns}calculated-properties/{ns}property[{ns}kind='InChIKey']/{ns}value" + ) + inchi_template = ( + f"{ns}calculated-properties/{ns}property[{ns}kind='InChI']/{ns}value" + ) + smiles_template = ( + f"{ns}calculated-properties/{ns}property[{ns}kind='SMILES']/{ns}value" + ) + + xtree = parse(drugbank_xml_path) + root = xtree.getroot() + rows = [] + for drug in root: + row = collections.OrderedDict() + + row["name"] = drug.findtext(f"{ns}name") + row["type"] = drug.get("type") + row["drugbank_id"] = drug.findtext(ns + "drugbank-id[@primary='true']") + row["groups"] = [group.text for group in drug.findall(f"{ns}groups/{ns}group")] + row["inchi"] = drug.findtext(inchi_template) + row["inchikey"] = drug.findtext(inchikey_template) + row["smiles"] = drug.findtext(smiles_template) + + pdb_ids = drug.find(f"{ns}pdb-entries") + if pdb_ids is not None: + target_ids = [] + for pdb_id in pdb_ids: + target_ids.append(str(pdb_id.text)) + + row["pdb_entries"] = ",".join(target_ids) + + rows.append(row) + + columns = [ + "drugbank_id", + "name", + "type", + "groups", + "inchi", + "inchikey", + "smiles", + "pdb_entries", + ] + drugbank_df = pd.DataFrame.from_dict(rows)[columns] + + return drugbank_df[ + drugbank_df.smiles.map(lambda x: x is not None) + & drugbank_df.type.map(lambda x: x == "small molecule") + ] + + +def smiles_to_inchi(smiles: str) -> Optional[str]: + """Converts SMILES to InChI. + + Args: + smiles (str): The SMILES of small molecules + + Returns: + str: The InChi key + """ + # Convert SMILES to RDKit molecule object + mol = Chem.MolFromSmiles(smiles) + if mol is None: + print(f"Error: Invalid SMILES string: {smiles}") # noqa: T201 + return None + + # Convert molecule to InChI + return Chem.MolToInchi(mol) + + +def extract_pdbids_drugbank( + drugbank_xml_file_path: str, + smiles: list[str], + inchi: list[str], + inchi_keys: list[str], + output_txt_path: str, +) -> None: + """Filter DrugBank based on a list of small molecules. + + Args: + drugbank_xml_file_path: Path to the Drugbank xml file + smiles: List of input SMILES, Type string[], File type input + inchi: List of input SMILES, Type string[], File type input + inchi_keys: List of input SMILES, Type string[], File type input + output_txt_path: Path to the text dataset file, Type string, File type output + Returns: + None. + """ + drugbank = parse_drugbank_xml(drugbank_xml_file_path) + + if smiles: + inchi_ids = [ + smiles_to_inchi(sm) for sm in smiles + ] # smiles can be in different formats + inchi_ids = [inchi_id for inchi_id in inchi_ids if inchi_id is not None] + filtered_df = drugbank[drugbank["inchi"].isin(inchi_ids)] + + elif inchi: + filtered_df = drugbank[drugbank["inchi"].isin(inchi)] + + elif inchi_keys: + filtered_df = drugbank[drugbank["inchikey"].isin(inchi_keys)] + + with Path.open(Path(output_txt_path), mode="w", encoding="utf-8") as f: + for _, row in filtered_df.iterrows(): + if row["pdb_entries"]: + f.write(f"{row['smiles']},{row['pdb_entries']}\n") diff --git a/utils/extract-pdbids-drugbank-plugin/tests/__init__.py b/utils/extract-pdbids-drugbank-plugin/tests/__init__.py new file mode 100644 index 00000000..cdae7aad --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for extract_pdbids_drugbank.""" diff --git a/utils/extract-pdbids-drugbank-plugin/tests/drugbank_10_fake_records_5.1.10.xml b/utils/extract-pdbids-drugbank-plugin/tests/drugbank_10_fake_records_5.1.10.xml new file mode 100644 index 00000000..0c92672d --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/tests/drugbank_10_fake_records_5.1.10.xml @@ -0,0 +1,573 @@ + + + + DB1193 + Drug9180 + This is a fake drug used for testing purposes. + 3263-01-2 + UNII4440 + 396.50 + 439.70 + solid + + approved + + + + + 7700475 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey8840 + + + InChI + InChI3491 + + + SMILES + SMILES5443 + + + + PDB8720 + + + + DB9051 + Drug7020 + This is a fake drug used for testing purposes. + 4836-01-2 + UNII4896 + 448.00 + 339.12 + solid + + approved + + + + + 3255888 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey6081 + + + InChI + InChI4865 + + + SMILES + SMILES1834 + + + + PDB6952 + + + + DB3763 + Drug9033 + This is a fake drug used for testing purposes. + 4243-01-2 + UNII9552 + 410.91 + 276.24 + solid + + approved + + + + + 1340178 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey8854 + + + InChI + InChI6632 + + + SMILES + SMILES8698 + + + + PDB7692 + + + + DB3437 + Drug7099 + This is a fake drug used for testing purposes. + 3972-01-2 + UNII6829 + 367.17 + 111.38 + solid + + approved + + + + + 5652181 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey9523 + + + InChI + InChI8564 + + + SMILES + SMILES3441 + + + + PDB5533 + + + + DB7158 + Drug3491 + This is a fake drug used for testing purposes. + 7602-01-2 + UNII1079 + 169.07 + 454.30 + solid + + approved + + + + + 7275507 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey4848 + + + InChI + InChI5085 + + + SMILES + SMILES5755 + + + + PDB2260 + + + + DB3593 + Drug8863 + This is a fake drug used for testing purposes. + 4103-01-2 + UNII1309 + 314.69 + 345.05 + solid + + approved + + + + + 2622924 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey8183 + + + InChI + InChI9997 + + + SMILES + SMILES2154 + + + + PDB7554 + + + + DB7335 + Drug4801 + This is a fake drug used for testing purposes. + 8814-01-2 + UNII6987 + 149.96 + 206.80 + solid + + approved + + + + + 4367264 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey1494 + + + InChI + InChI5278 + + + SMILES + SMILES2534 + + + + PDB9333 + + + + DB1046 + Drug8209 + This is a fake drug used for testing purposes. + 2566-01-2 + UNII6800 + 223.91 + 192.80 + solid + + approved + + + + + 1045399 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey6639 + + + InChI + InChI5577 + + + SMILES + SMILES9937 + + + + PDB9547 + + + + DB2929 + Drug2385 + This is a fake drug used for testing purposes. + 4854-01-2 + UNII5494 + 326.39 + 274.70 + solid + + approved + + + + + 4989494 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey5085 + + + InChI + InChI2296 + + + SMILES + SMILES1291 + + + + PDB6942 + + + + DB6242 + Drug4247 + This is a fake drug used for testing purposes. + 5891-01-2 + UNII2176 + 141.48 + 308.53 + solid + + approved + + + + + 2946505 + + + + Synthesis reference text. + Indication text. + Pharmacodynamics text. + Mechanism of action text. + Toxicity text. + Metabolism text. + Absorption text. + Half-life text. + Protein binding text. + Route of elimination text. + Volume of distribution text. + Clearance text. + + Direct parent text. + + + + Salt name + + + + + InChIKey + InChIKey7042 + + + InChI + InChI7556 + + + SMILES + SMILES2803 + + + + PDB3506 + + + diff --git a/utils/extract-pdbids-drugbank-plugin/tests/test_extract_pdbids_drugbank.py b/utils/extract-pdbids-drugbank-plugin/tests/test_extract_pdbids_drugbank.py new file mode 100644 index 00000000..c4a99d10 --- /dev/null +++ b/utils/extract-pdbids-drugbank-plugin/tests/test_extract_pdbids_drugbank.py @@ -0,0 +1,19 @@ +"""Tests for extract_pdbids_drugbank.""" +from pathlib import Path + +from polus.mm.utils.extract_pdbids_drugbank.extract_pdbids_drugbank import ( + extract_pdbids_drugbank, +) + + +def test_extract_pdbids_drugbank() -> None: + """Test extract_pdbids_drugbank.""" + # Fake SMILES + inchi = ["InChI3491", "InChI8564", "InChI7556"] + + input_xml_path = "drugbank_10_fake_records_5.1.10.xml" + input_xml_path = str(Path(__file__).resolve().parent / Path(input_xml_path)) + + extract_pdbids_drugbank(input_xml_path, [], inchi, [], "out.txt") + + assert Path("out.txt").exists()