Skip to content

Commit

Permalink
feat: generate result in deemian data format
Browse files Browse the repository at this point in the history
  • Loading branch information
radifar committed Jan 11, 2024
1 parent f8dc0a4 commit 6ad5c7b
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 17 deletions.
9 changes: 8 additions & 1 deletion src/deemian/deemian.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import time

import typer
from rich.console import Console

from deemian import __version__ as deemian_version
from deemian.engine.builder import DeemianData
from deemian.engine.processor import parser, InstructionTransformer
from deemian.engine.director import director
Expand All @@ -12,7 +15,9 @@

@app.command(short_help="run deemian script")
def run(script_name: str):
console.print(f"[bold magenta]Running {script_name}[/bold magenta]")
now = time.time()
console.print(f"[bold deep_pink3] Running {script_name}[/bold deep_pink3]")
print(f" on Deemian version: {deemian_version}")
with open(script_name) as f:
text = f.read()

Expand All @@ -21,7 +26,9 @@ def run(script_name: str):
command_tree = transformer.transform(command_tree)

deemian_data = DeemianData()
deemian_data.metadata.start_time = now
director(command_tree, deemian_data)
console.print(f"\nrunning time: {deemian_data.metadata.running_time}\n")


# one command one callback, just a temporary helper command
Expand Down
46 changes: 42 additions & 4 deletions src/deemian/engine/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,24 @@
import pandas as pd
from rdkit.Chem import AllChem as Chem

from deemian import __version__ as deemian_version
from deemian.chem.interactions import InteractionData
from deemian.chem.reader import mol_to_dataframe
from deemian.chem.selection import mol_dataframe_selection
from deemian.chem.utility import dataframe_to_pdb_block
from deemian.writer.readable import generate_report, write_readable
from deemian.writer.bundle import write_metadata, write_corrected_molecule, write_calculation_result, write_bundle


@dataclass
class Metadata:
deemian_version: str = deemian_version
start_time: float = 0.0
end_time: float = 0.0
running_time: str = ""
creation_time: str = ""
selection: dict = field(default_factory=lambda: {})
measurement: dict = field(default_factory=lambda: {})


@dataclass
Expand All @@ -30,10 +43,12 @@ class Measurement:
ionizable: dict = field(default_factory=lambda: {"positive": False, "negative": False})
interacting_subjects: dict = field(default_factory=lambda: {})
conformation: list = field(default_factory=lambda: [])
conformation_range: list = field(default_factory=lambda: [])
calculation_results: dict = field(default_factory=lambda: {})

def conformation_range(self, start, end):
def set_conformation_range(self, start, end):
self.conformation = list(range(int(start), int(end) + 1))
self.conformation_range = [start, end]

def set_ionizable(self, charge: str, boolean: str):
if boolean == "true":
Expand All @@ -45,6 +60,7 @@ def set_ionizable(self, charge: str, boolean: str):

@dataclass
class DeemianData:
metadata: Metadata = Metadata()
molecule: dict[str, Molecule] = field(default_factory=lambda: {})
selection: dict[str, Selection] = field(default_factory=lambda: {})
measurement: dict[str, Measurement] = field(default_factory=lambda: defaultdict(Measurement))
Expand All @@ -60,6 +76,7 @@ def add_selection(self, name, selection, mol_parent):
selection_df = mol_dataframe_selection(selection, parent_df)

self.selection[name] = Selection(mol_parent, selection_df)
self.metadata.selection[name] = dict(sele_string=selection, parent=mol_parent)

def correct_bond(self, name, template):
selection_df = self.selection[name].mol_dataframe
Expand All @@ -71,7 +88,8 @@ def correct_bond(self, name, template):
corrected_df = mol_to_dataframe(corrected_mol)

self.molecule[name] = Molecule(corrected_mol)
self.selection[name] = Selection(name, corrected_df, selection_pdb_block)
self.selection[name] = Selection(name, corrected_df, Chem.MolToPDBBlock(corrected_mol))
self.metadata.selection[name]["parent"] = name + "_corrected.pdb"

def add_measurement(self, id):
return self.measurement[id]
Expand All @@ -80,6 +98,7 @@ def calculate_interactions(self, id):
measurement = self.measurement[id]

for pair in measurement.interacting_subjects:
print(f" ... start calculating {pair}")
subject_1, subject_2 = measurement.interacting_subjects[pair]
subject_1 = self.selection[subject_1]
subject_2 = self.selection[subject_2]
Expand All @@ -106,5 +125,24 @@ def write_readable_output(self, presentation_id: str, out_file: str, form):
report = generate_report(measurement, form)
write_readable(report, out_file)

def write_deemian_data(self, out_file: str, presentation_id: str):
return (out_file, presentation_id)
def write_deemian_data(self, presentation_id: str, out_file: str):
manifest = []
measurement_data = self.measurement[presentation_id]

for name, selection in self.selection.items():
if selection.mol_pdb_block:
pdb_block = selection.mol_pdb_block
corrected_molecule = write_corrected_molecule(name, pdb_block)

manifest.append(corrected_molecule)

for name, interaction_data in measurement_data.calculation_results.items():
interaction_df = interaction_data.dataframe
calculation_result = write_calculation_result(name, interaction_df)

manifest.append(calculation_result)

metadata_file = write_metadata(self.metadata, measurement_data)
manifest.append(metadata_file)

write_bundle(manifest, out_file)
4 changes: 2 additions & 2 deletions src/deemian/engine/director.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def director(steps: Tree, data: DeemianData):
measurement.conformation.extend(inst.number)

elif inst.type == "conformation_range":
measurement.conformation_range(inst.start, inst.end)
measurement.set_conformation_range(inst.start, inst.end)

data.calculate_interactions(measurement_id)

Expand All @@ -49,4 +49,4 @@ def director(steps: Tree, data: DeemianData):
data.write_readable_output(presentation_id, inst.out_file, inst.format)

elif inst.type == "deemian_data":
data.write_deemian_data(inst.out_file, presentation_id)
data.write_deemian_data(presentation_id, inst.out_file)
71 changes: 71 additions & 0 deletions src/deemian/writer/bundle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from copy import deepcopy
from dataclasses import asdict
from datetime import datetime
import io
import json
import tarfile
import time


def dict_factory(x):
exclude_fields = ("start_time", "end_time")
return {k: v for (k, v) in x if ((v is not None) and (k not in exclude_fields))}


def write_metadata(metadata, measurement_data):
metadata_copy = deepcopy(metadata)
metadata_copy.measurement["interacting_subjects"] = []

for name, subjects in measurement_data.interacting_subjects.items():
parquet_result = name.replace(":", "_") + ".parquet"
interacting_subject = dict(name=name, subjects=subjects, results=parquet_result)
metadata_copy.measurement["interacting_subjects"].append(interacting_subject)

if measurement_data.conformation_range:
metadata_copy.measurement["conformation_range"] = measurement_data.conformation_range
else:
if measurement_data.conformation:
metadata_copy.measurement["conformation"] = measurement_data.conformation
else:
metadata_copy.measurement["conformation"] = [1]

# setup the original metadata, because the running time is used by main script
metadata.end_time = time.time()
metadata.running_time = f"{metadata.end_time - metadata.start_time:.3f} seconds"
metadata_copy.running_time = metadata.running_time
metadata_copy.creation_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")

metadata_json = json.dumps(asdict(metadata_copy, dict_factory=dict_factory), indent=2).encode("utf-8")
metadata_io = io.BytesIO(metadata_json)
size = len(metadata_json)

return ("deemian.json", metadata_io, size)


def write_corrected_molecule(name, pdb_block: str):
name = name + "_corrected.pdb"
pdb_block = pdb_block.encode("utf-8")
pdb_io = io.BytesIO(pdb_block)
size = len(pdb_block)

return (name, pdb_io, size)


def write_calculation_result(name, interaction_df):
parquet_result = name.replace(":", "_") + ".parquet"
parquet_io = io.BytesIO()
interaction_df.to_parquet(parquet_io)
size = parquet_io.getbuffer().nbytes

return (parquet_result, parquet_io, size)


def write_bundle(manifest, out_file):
with tarfile.open(out_file, mode="w:gz") as dd_file:
for file in manifest:
file_name, file_object, size = file
tarinfo = tarfile.TarInfo(file_name)
tarinfo.size = size
file_object.seek(0)
dd_file.addfile(tarinfo, file_object)
print(f" ... wrote Deemian data: {out_file}")
11 changes: 9 additions & 2 deletions src/deemian/writer/readable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from jinja2 import Environment, PackageLoader

from deemian import __version__ as deemian_version


def generate_report(measurement, template_form):
template_dict = dict(
Expand All @@ -18,7 +20,12 @@ def generate_report(measurement, template_form):
results = measurement.calculation_results
interacting_subjects = measurement.interacting_subjects
report = template.render(
results=results, interacting_subjects=interacting_subjects, zip=zip, len=len, sorted=sorted
results=results,
interacting_subjects=interacting_subjects,
version=deemian_version,
zip=zip,
len=len,
sorted=sorted,
)

return report
Expand All @@ -27,4 +34,4 @@ def generate_report(measurement, template_form):
def write_readable(report, out_file):
with open(out_file, mode="w", encoding="utf-8") as result:
result.write(report)
print(f"... wrote {out_file}")
print(f" ... wrote readable output: {out_file}")
34 changes: 30 additions & 4 deletions tests/test_engine_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_deemian_data_measurement():
measurement.set_ionizable("positive", "true")
measurement.set_ionizable("negative", "false")
measurement.interacting_subjects["oseltamivir:protein_A"] = ("oseltamivir", "protein_A")
measurement.conformation_range("1", "4")
measurement.set_conformation_range("1", "4")
measurement.conformation.extend([5])

with patch.object(data, "calculate_interactions", return_value=1):
Expand All @@ -65,7 +65,7 @@ def test_deemian_data_measurement():
== """Measurement(interactions=['all'], \
ionizable={'positive': True, 'negative': False}, \
interacting_subjects={'oseltamivir:protein_A': ('oseltamivir', 'protein_A')}, \
conformation=[1, 2, 3, 4, 5], \
conformation=[1, 2, 3, 4, 5], conformation_range=['1', '4'], \
calculation_results={})"""
)
assert result == 1
Expand Down Expand Up @@ -144,17 +144,43 @@ def test_deemian_data_calculate_interactions_empty_conformation():
def test_deemian_data_presentation():
data = DeemianData()

protein_A = Mock()
protein_A.mol_parent = "5nzn.pdb"
protein_A.mol_dataframe = "protein_A:pd.DataFrame"
protein_A.mol_pdb_block = None

oseltamivir = Mock()
oseltamivir.mol_parent = "oseltamivir_corrected.pdb"
oseltamivir.mol_dataframe = "oseltamivir:pd.DataFrame"
oseltamivir.mol_pdb_block = "pdb_block:str"

data.selection["protein_A"] = protein_A
data.selection["oseltamivir"] = oseltamivir

measurement = data.add_measurement("protein_ligand")
measurement.interactions.extend(["all"])
measurement.set_ionizable("positive", "true")
measurement.set_ionizable("negative", "true")
measurement.interacting_subjects["oseltamivir:protein_A"] = ("oseltamivir", "protein_A")
measurement.calculation_results["oseltamivir:protein_A"] = "InteractionData"

interaction_data = Mock()
interaction_data.dataframe = "interaction_data:pd.DataFrame"
measurement.calculation_results["oseltamivir:protein_A"] = interaction_data

with patch("deemian.engine.builder.generate_report") as reporter:
with patch("deemian.engine.builder.write_readable") as writer:
data.write_readable_output("protein_ligand", "protein_ligand.txt", "detailed_conf_first")
data.write_deemian_data("protein_ligand.db", "protein_ligand")

reporter.assert_called_once()
writer.assert_called_once()

with patch("deemian.engine.builder.write_corrected_molecule") as writer_1:
with patch("deemian.engine.builder.write_calculation_result") as writer_2:
with patch("deemian.engine.builder.write_metadata") as metadata_writer:
with patch("deemian.engine.builder.write_bundle") as bundler:
data.write_deemian_data("protein_ligand", "protein_ligand.dd")

writer_1.assert_called_once()
writer_2.assert_called_once()
metadata_writer.assert_called_once()
bundler.assert_called_once()
8 changes: 4 additions & 4 deletions tests/test_engine_director.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def test_engine_director_simple(steps_simple):
call.add_measurement().conformation.extend([1]),
call.calculate_interactions("protein_ligand"),
call.write_readable_output("protein_ligand", "protein_ligand.txt", "detailed_conf_first"),
call.write_deemian_data("protein_ligand.dd", "protein_ligand"),
call.write_deemian_data("protein_ligand", "protein_ligand.dd"),
]
)

Expand Down Expand Up @@ -235,7 +235,7 @@ def test_engine_director_multiselect(steps_multiselect):
call.add_measurement().interacting_subjects.__setitem__("internal_rbm", ("spike_rbm", "spike_rbm")),
call.calculate_interactions("ace2_spike_rbd"),
call.write_readable_output("ace2_spike_rbd", "ace2_spike_rbd_detailed.txt", "detailed_conf_first"),
call.write_deemian_data("ace2_spike_rbd_detailed.dd", "ace2_spike_rbd"),
call.write_deemian_data("ace2_spike_rbd", "ace2_spike_rbd_detailed.dd"),
]
)

Expand All @@ -254,9 +254,9 @@ def test_engine_director_multiconf(steps_multiconf):
call.add_measurement().set_ionizable("positive", "true"),
call.add_measurement().set_ionizable("negative", "true"),
call.add_measurement().interacting_subjects.__setitem__("vps4:chmp6", ("vps4", "chmp6")),
call.add_measurement().conformation_range("1", "20"),
call.add_measurement().set_conformation_range("1", "20"),
call.calculate_interactions("vps4_chmp6"),
call.write_readable_output("vps4_chmp6", "vps4_chmp6.txt", "detailed_conf_first"),
call.write_deemian_data("vps4_chmp6.dd", "vps4_chmp6"),
call.write_deemian_data("vps4_chmp6", "vps4_chmp6.dd"),
]
)
Loading

0 comments on commit 6ad5c7b

Please sign in to comment.