-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
23fb84d
commit 38a1e30
Showing
18 changed files
with
1,202 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/extract_pdbids_drugbank/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
FROM condaforge/mambaforge | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
|
||
# Work directory defined in the base container | ||
# WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
|
||
# Install needed packages here | ||
# errors installing rdkit from poetry so using conda | ||
COPY environment.yml ${EXEC_DIR} | ||
RUN mamba env create -f ${EXEC_DIR}/environment.yml | ||
RUN echo "source activate project_env" > ~/.bashrc | ||
ENV PATH /opt/conda/envs/env/bin:$PATH | ||
|
||
COPY src ${EXEC_DIR}/src | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir | ||
|
||
CMD ["--help"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# extract_pdbids_drugbank (0.1.0) | ||
|
||
Filter the Drugbank database | ||
|
||
## Options | ||
|
||
This plugin takes 5 input arguments and 4 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| drugbank_xml_file_path | Path to the Drugbank xml file | Input | File | File | | ||
| smiles | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| inchi | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| inchi_keys | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string | | ||
| output_txt_path | Path to the txt file | Output | File | File | | ||
| output_smiles | The Smiles of small molecules | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | | ||
| output_pdbids_1D | The PDB IDs of target structures in 1D array | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | | ||
| output_pdbids_2D | The PDB IDs of target structures in 2D array | Output | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
version=$(<VERSION) | ||
docker build . -t polusai/extract-pdbids-drugbank-tool:${version} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: project_env | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- python==3.10 | ||
- rdkit==2024.03.1 | ||
- defusedxml==0.7.1 | ||
- pytest==8.1.1 | ||
- cwltool==3.1.20240404144621 | ||
- cwl-utils==0.33 |
188 changes: 188 additions & 0 deletions
188
utils/extract-pdbids-drugbank-plugin/extract_pdbids_drugbank.cwl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
#!/usr/bin/env cwl-runner | ||
cwlVersion: v1.0 | ||
|
||
class: CommandLineTool | ||
|
||
label: Filter the Drugbank database | ||
|
||
doc: |- | ||
Filter the Drugbank database | ||
|
||
baseCommand: ["conda", "run", "-n", "project_env", "python", "-m", "polus.mm.utils.extract_pdbids_drugbank"] | ||
|
||
hints: | ||
DockerRequirement: | ||
dockerPull: ndonyapour/extract_pdbids_drugbank | ||
|
||
requirements: | ||
InlineJavascriptRequirement: {} | ||
# Enabling InitialWorkDirRequirement will stage the input Drugbank xml file | ||
InitialWorkDirRequirement: | ||
listing: | ||
- $(inputs.drugbank_xml_file_path) | ||
|
||
inputs: | ||
drugbank_xml_file_path: | ||
label: Path to the Drugbank xml file | ||
doc: |- | ||
Path to the Drugbank xml file | ||
type: File | ||
format: edam:format_2332 | ||
inputBinding: | ||
prefix: --drugbank_xml_file_path | ||
default: | ||
class: File | ||
location: ../../../fda_drug_dataset/drugbank/drugbank_5.1.10.xml | ||
|
||
smiles: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: edam:format_2330 | ||
inputBinding: | ||
prefix: --smiles | ||
default: [] | ||
|
||
inchi: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --inchi | ||
default: [] | ||
|
||
inchi_keys: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --inchi_keys | ||
default: [] | ||
|
||
output_txt_path: | ||
label: Path to the text dataset file | ||
doc: |- | ||
Path to the text dataset file | ||
Type: string | ||
File type: output | ||
Accepted formats: txt | ||
type: string | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --output_txt_path | ||
default: system.log | ||
|
||
outputs: | ||
output_txt_path: | ||
label: Path to the txt file | ||
doc: |- | ||
Path to the txt file | ||
type: File | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
format: edam:format_2330 | ||
|
||
output_smiles: | ||
label: The Smiles of small molecules | ||
doc: |- | ||
The Smiles of small molecules | ||
type: | ||
type: array | ||
items: string | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove black lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var smiles = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation. We need to duplicate it, so each SMILES string | ||
// corresponds to a PDB ID in the PDB IDs array. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
for (var j = 1; j < words.length; j++) { | ||
smiles.push(words[0]); | ||
} | ||
} | ||
return smiles; | ||
} | ||
|
||
output_pdbids_1D: | ||
label: The PDB IDs of target structures in 1D array | ||
doc: |- | ||
The PDB IDs of target structures in 1D array | ||
type: | ||
type: array | ||
items: string | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove blank lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var pdbids = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation and the rest are the target structure PDB IDs. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
for (var j = 1; j < words.length; j++) { | ||
pdbids.push(words[j]); | ||
} | ||
} | ||
return pdbids; | ||
} | ||
|
||
output_pdbids_2D: | ||
label: The PDB IDs of target structures in 2D array | ||
doc: |- | ||
The PDB IDs of target structures in 2D array | ||
type: {"type": "array", "items": {"type": "array", "items": "string"}} | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove blank lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var pdbids_2d = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation and the rest are the target structure PDB IDs. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
var pdbids = []; | ||
for (var j = 1; j < words.length; j++) { | ||
pdbids.push(words[j]); | ||
} | ||
pdbids_2d.push(pdbids); | ||
} | ||
return pdbids_2d; | ||
} | ||
|
||
$namespaces: | ||
edam: https://edamontology.org/ | ||
|
||
$schemas: | ||
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
specVersion: "0.1.0" | ||
name: extract_pdbids_drugbank | ||
version: 0.1.0 | ||
container: extract-pdbids-drugbank-plugin | ||
entrypoint: | ||
title: extract_pdbids_drugbank | ||
description: Filter the Drugbank database | ||
author: Brandon Walker, Nazanin Donyapour | ||
contact: [email protected], [email protected] | ||
repository: | ||
documentation: | ||
citation: | ||
|
||
inputs: | ||
- name: drugbank_xml_file_path | ||
required: true | ||
description: Path to the Drugbank xml file | ||
type: File | ||
defaultValue: {'class': 'File', 'location': '../../../fda_drug_dataset/drugbank/drugbank_5.1.10.xml'} | ||
format: | ||
uri: edam:format_2332 | ||
- name: smiles | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: inchi | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: inchi_keys | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_txt_path | ||
required: true | ||
description: Path to the text dataset file, Type string, File type output, Accepted formats txt | ||
type: string | ||
defaultValue: system.log | ||
format: | ||
uri: edam:format_2330 | ||
outputs: | ||
- name: output_txt_path | ||
required: true | ||
description: Path to the txt file | ||
type: File | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_smiles | ||
required: true | ||
description: The Smiles of small molecules | ||
type: {'type': 'array', 'items': 'string'} | ||
- name: output_pdbids_1D | ||
required: true | ||
description: The PDB IDs of target structures in 1D array | ||
type: {'type': 'array', 'items': 'string'} | ||
- name: output_pdbids_2D | ||
required: true | ||
description: The PDB IDs of target structures in 2D array | ||
type: {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | ||
ui: | ||
- key: inputs.drugbank_xml_file_path | ||
title: "drugbank_xml_file_path: " | ||
description: "Path to the Drugbank xml file" | ||
type: File | ||
- key: inputs.smiles | ||
title: "smiles: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.inchi | ||
title: "inchi: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.inchi_keys | ||
title: "inchi_keys: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.output_txt_path | ||
title: "output_txt_path: " | ||
description: "Path to the text dataset file, Type string, File type output, Accepted formats txt" | ||
type: string |
Oops, something went wrong.