Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract_pdbids_drugbank #171

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/extract_pdbids_drugbank/__init__.py]
4 changes: 4 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
1 change: 1 addition & 0 deletions utils/extract-pdbids-drugbank-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
21 changes: 21 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

# Install needed packages here
COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
19 changes: 19 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# extract_pdbids_drugbank (0.1.0)

Filter the Drugbank database

## Options

This plugin takes 5 input arguments and 4 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| drugbank_xml_file_path | Path to the Drugbank xml file | Input | File | File |
| smiles | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
| inchi | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
| inchi_keys | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string |
| output_txt_path | Path to the txt file | Output | File | File |
| output_smiles | The Smiles of small molecules | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
| output_pdbids_1D | The PDB IDs of target structures in 1D array | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
| output_pdbids_2D | The PDB IDs of target structures in 2D array | Output | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} |
1 change: 1 addition & 0 deletions utils/extract-pdbids-drugbank-plugin/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
4 changes: 4 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/build-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/extract-pdbids-drugbank-tool:${version}
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Filter the Drugbank database

doc: |-
Filter the Drugbank database

baseCommand: ["python", "-m", "polus.mm.utils.extract_pdbids_drugbank"]

hints:
DockerRequirement:
dockerPull: polusai/extract-pdbids-drugbank-tool@sha256:60097d534aef1ced5e08bc896d541a5364d8452a16d883b845e29552a99027b4

requirements:
InlineJavascriptRequirement: {}
# Enabling InitialWorkDirRequirement will stage the input Drugbank xml file
InitialWorkDirRequirement:
listing:
- $(inputs.drugbank_xml_file_path)

inputs:
drugbank_xml_file_path:
label: Path to the Drugbank xml file
doc: |-
Path to the Drugbank xml file
type: File
format: edam:format_2332
inputBinding:
prefix: --drugbank_xml_file_path

smiles:
label: List of input SMILES # type:
doc: |-
List of input SMILES
Type: string[]
File type: input
Accepted formats: list[string]
type: ["null", {"type": "array", "items": "string"}]
format: edam:format_2330
inputBinding:
prefix: --smiles
default: []

inchi:
label: List of input SMILES # type:
doc: |-
List of input SMILES
Type: string[]
File type: input
Accepted formats: list[string]
type: ["null", {"type": "array", "items": "string"}]
format:
- edam:format_2330
inputBinding:
prefix: --inchi
default: []

inchi_keys:
label: List of input SMILES # type:
doc: |-
List of input SMILES
Type: string[]
File type: input
Accepted formats: list[string]
type: ["null", {"type": "array", "items": "string"}]
format:
- edam:format_2330
inputBinding:
prefix: --inchi_keys
default: []

output_txt_path:
label: Path to the text dataset file
doc: |-
Path to the text dataset file
Type: string
File type: output
Accepted formats: txt
type: string
format:
- edam:format_2330
inputBinding:
prefix: --output_txt_path
default: system.log

outputs:
output_txt_path:
label: Path to the txt file
doc: |-
Path to the txt file
type: File
outputBinding:
glob: $(inputs.output_txt_path)
format: edam:format_2330

output_smiles:
label: The Smiles of small molecules
doc: |-
The Smiles of small molecules
type:
type: array
items: string
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove black lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var smiles = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7
// The first item is the SMILES notation. We need to duplicate it, so each SMILES string
// corresponds to a PDB ID in the PDB IDs array.
var words = lines[i].split(",").map(function(item) {return item.trim();});
for (var j = 1; j < words.length; j++) {
smiles.push(words[0]);
}
}
return smiles;
}

output_pdbids_1D:
label: The PDB IDs of target structures in 1D array
doc: |-
The PDB IDs of target structures in 1D array
type:
type: array
items: string
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove blank lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var pdbids = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7
// The first item is the SMILES notation and the rest are the target structure PDB IDs.
var words = lines[i].split(",").map(function(item) {return item.trim();});
for (var j = 1; j < words.length; j++) {
pdbids.push(words[j]);
}
}
return pdbids;
}

output_pdbids_2D:
label: The PDB IDs of target structures in 2D array
doc: |-
The PDB IDs of target structures in 2D array
type: {"type": "array", "items": {"type": "array", "items": "string"}}
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove blank lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var pdbids_2d = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7
// The first item is the SMILES notation and the rest are the target structure PDB IDs.
var words = lines[i].split(",").map(function(item) {return item.trim();});
var pdbids = [];
for (var j = 1; j < words.length; j++) {
pdbids.push(words[j]);
}
pdbids_2d.push(pdbids);
}
return pdbids_2d;
}

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
86 changes: 86 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
specVersion: "0.1.0"
name: extract_pdbids_drugbank
version: 0.1.0
container: extract-pdbids-drugbank-plugin
entrypoint:
title: extract_pdbids_drugbank
description: Filter the Drugbank database
author: Brandon Walker, Nazanin Donyapour
contact: [email protected], [email protected]
repository:
documentation:
citation:

inputs:
- name: drugbank_xml_file_path
required: true
description: Path to the Drugbank xml file
type: File
defaultValue: {'class': 'File', 'location': '../../../fda_drug_dataset/drugbank/drugbank_5.1.10.xml'}
format:
uri: edam:format_2332
- name: smiles
required: true
description: List of input SMILES, Type string[], File type input, Accepted formats list[string]
type: ['null', {'type': 'array', 'items': 'string'}]
format:
uri: edam:format_2330
- name: inchi
required: true
description: List of input SMILES, Type string[], File type input, Accepted formats list[string]
type: ['null', {'type': 'array', 'items': 'string'}]
format:
uri: edam:format_2330
- name: inchi_keys
required: true
description: List of input SMILES, Type string[], File type input, Accepted formats list[string]
type: ['null', {'type': 'array', 'items': 'string'}]
format:
uri: edam:format_2330
- name: output_txt_path
required: true
description: Path to the text dataset file, Type string, File type output, Accepted formats txt
type: string
defaultValue: system.log
format:
uri: edam:format_2330
outputs:
- name: output_txt_path
required: true
description: Path to the txt file
type: File
format:
uri: edam:format_2330
- name: output_smiles
required: true
description: The Smiles of small molecules
type: {'type': 'array', 'items': 'string'}
- name: output_pdbids_1D
required: true
description: The PDB IDs of target structures in 1D array
type: {'type': 'array', 'items': 'string'}
- name: output_pdbids_2D
required: true
description: The PDB IDs of target structures in 2D array
type: {'type': 'array', 'items': {'type': 'array', 'items': 'string'}}
ui:
- key: inputs.drugbank_xml_file_path
title: "drugbank_xml_file_path: "
description: "Path to the Drugbank xml file"
type: File
- key: inputs.smiles
title: "smiles: "
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]"
type: ['null', {'type': 'array', 'items': 'string'}]
- key: inputs.inchi
title: "inchi: "
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]"
type: ['null', {'type': 'array', 'items': 'string'}]
- key: inputs.inchi_keys
title: "inchi_keys: "
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]"
type: ['null', {'type': 'array', 'items': 'string'}]
- key: inputs.output_txt_path
title: "output_txt_path: "
description: "Path to the text dataset file, Type string, File type output, Accepted formats txt"
type: string
33 changes: 33 additions & 0 deletions utils/extract-pdbids-drugbank-plugin/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[tool.poetry]
name = "polus-mm-utils-extract-pdbids-drugbank"
version = "0.1.0"
description = "Filter the Drugbank database"
authors = ["Nazanin Donyapour <[email protected]>", "Brandon Walker <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
typer = "^0.7.0"
sophios = "0.1.4"
pandas = "2.2.2"
rdkit = "2024.3.5"
defusedxml = "0.7.1"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
Loading
Loading