Skip to content

Commit

Permalink
Merge pull request #210 from mgcam/towards_pydantic_dataclass
Browse files Browse the repository at this point in the history
Towards pydantic dataclass
  • Loading branch information
nerdstrike authored Mar 7, 2024
2 parents 392bdbc + 4d2efbd commit 732f1ba
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 97 deletions.
21 changes: 4 additions & 17 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,7 @@ def _get_wells_for_status(
id_product = qc_state_model.id_product
mlwh_well = self.get_mlwh_well_by_product_id(id_product=id_product)
if mlwh_well is not None:
pbw = PacBioWell(
id_product=id_product,
run_name=mlwh_well.pac_bio_run_name,
plate_number=mlwh_well.plate_number,
label=mlwh_well.well_label,
qc_state=qc_state_model,
)
pbw.copy_run_tracking_info(mlwh_well)
pbw = PacBioWell(db_well=mlwh_well, qc_state=qc_state_model)
wells.append(pbw)
else:
"""
Expand Down Expand Up @@ -398,16 +391,10 @@ def _well_models(
pb_wells = []
for db_well in db_wells_list:
id_product = db_well.id_pac_bio_product
attrs = {
"id_product": id_product,
"run_name": db_well.pac_bio_run_name,
"plate_number": db_well.plate_number,
"label": db_well.well_label,
}
qc_state = None
if id_product in qced_products:
attrs["qc_state"] = qced_products[id_product][0]
pb_well = PacBioWell.model_validate(attrs)
pb_well.copy_run_tracking_info(db_well)
qc_state = qced_products[id_product][0]
pb_well = PacBioWell(db_well=db_well, qc_state=qc_state)
pb_wells.append(pb_well)

return pb_wells
Expand Down
5 changes: 4 additions & 1 deletion lang_qc/endpoints/pacbio_well.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from lang_qc.db.helper.qc import (
assign_qc_state_to_product,
claim_qc_for_product,
get_qc_state_for_product,
product_has_qc_state,
)
from lang_qc.db.helper.well import well_seq_product_find_or_create
Expand Down Expand Up @@ -179,7 +180,9 @@ def get_seq_metrics(

mlwh_well = _find_well_product_or_error(id_product, mlwhdb_session)

return PacBioWellFull.from_orm(mlwh_well, qcdb_session)
qc_state_db = get_qc_state_for_product(session=qcdb_session, id_product=id_product)
qc_state = None if qc_state_db is None else QcState.from_orm(qc_state_db)
return PacBioWellFull(db_well=mlwh_well, qc_state=qc_state)


@router.post(
Expand Down
196 changes: 120 additions & 76 deletions lang_qc/models/pacbio/well.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,52 +21,100 @@
# this program. If not, see <http://www.gnu.org/licenses/>.

from datetime import datetime
from typing import Optional
from typing import Any, Optional

from pydantic import BaseModel, ConfigDict, Field
from sqlalchemy.orm import Session
from pydantic import Field, model_validator
from pydantic.dataclasses import dataclass

from lang_qc.db.helper.qc import get_qc_state_for_product
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.models.pacbio.experiment import PacBioExperiment
from lang_qc.models.pacbio.qc_data import QCDataWell
from lang_qc.models.pager import PagedResponse
from lang_qc.models.qc_state import QcState


class PacBioWell(BaseModel, extra="forbid"):
def get_field_names(cls):
"""Returns a list of field names for a class given as an argument.
The fields that can only be used at the object initialisation step
are excluded. For fields, which have a validation_alias defined,
this alias is returned rather than the field name.
"""
A response model for a single PacBio well on a particular PacBio run.
The class contains the attributes that uniquely define this well (`run_name`
and `label`), along with the time line and the current QC state of this well,
if any.

This model does not contain any information about data that was
sequenced or QC metrics or assessment for such data.
field_names = []
for field_name in cls.__dataclass_fields__:
field = cls.__dataclass_fields__[field_name]
if field.default.init_var is True:
continue
name = field.default.validation_alias
if name is None:
name = field.name
field_names.append(name)
return field_names


@dataclass(kw_only=True, frozen=True)
class PacBioWell:
"""A basic response model for a single PacBio well.
`run_name`, `label`, `plate_number`, and `id_product` fields uniquely
identify the well. The model also has fields that reflect the time line
of the run and information about a PacBio instrument. The optional
`qc_state field might contain the current QC state of the well.
The best way to instantiate the model is via the constructor, supplying
the an ORM object representing a database row with information about
the well and, optionally, the model representing the current QC state.
Examples:
well_model = PacBioWell(db_well=well_row)
well_model = PacBioWell(db_well=well_row, qc_state=current_qc_state)
Mapping of the database values to this model's fields is performed by
a pre `__init__` hook. To enable automatic mapping, some fields of this
model have `validation_alias` set.
"""

db_well: PacBioRunWellMetrics = Field(init_var=True)

# Well identifies.
id_product: str = Field(title="Product identifier")
label: str = Field(title="Well label", description="The label of the PacBio well")
id_product: str = Field(
title="Product identifier", validation_alias="id_pac_bio_product"
)
label: str = Field(
title="Well label",
description="The label of the PacBio well",
validation_alias="well_label",
)
plate_number: Optional[int] = Field(
default=None,
title="Plate number",
description="Plate number, relevant for Revio instruments only",
)
run_name: str = Field(
title="Run name", description="PacBio run name as registered in LIMS"
title="Run name",
description="PacBio run name as registered in LIMS",
validation_alias="pac_bio_run_name",
)
# Run and well tracking information from SMRT Link
run_start_time: datetime = Field(default=None, title="Run start time")
run_complete_time: datetime = Field(default=None, title="Run complete time")
well_start_time: datetime = Field(default=None, title="Well start time")
well_complete_time: datetime = Field(default=None, title="Well complete time")
run_status: str = Field(default=None, title="Current PacBio run status")
well_status: str = Field(default=None, title="Current PacBio well status")
instrument_name: str = Field(default=None, title="Instrument name")
instrument_type: str = Field(default=None, title="Instrument type")

qc_state: QcState = Field(
run_start_time: Optional[datetime] = Field(
default=None, title="Run start time", validation_alias="run_start"
)
run_complete_time: Optional[datetime] = Field(
default=None, title="Run complete time", validation_alias="run_complete"
)
well_start_time: Optional[datetime] = Field(
default=None, title="Well start time", validation_alias="well_start"
)
well_complete_time: Optional[datetime] = Field(
default=None, title="Well complete time", validation_alias="well_complete"
)
run_status: Optional[str] = Field(default=None, title="Current PacBio run status")
well_status: Optional[str] = Field(default=None, title="Current PacBio well status")
instrument_name: Optional[str] = Field(default=None, title="Instrument name")
instrument_type: Optional[str] = Field(default=None, title="Instrument type")

qc_state: Optional[QcState] = Field(
default=None,
title="Current QC state of this well",
description="""
Expand All @@ -76,25 +124,31 @@ class PacBioWell(BaseModel, extra="forbid"):
""",
)

def copy_run_tracking_info(self, db_well: PacBioRunWellMetrics):
@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
"""
Populates this object with the run and well tracking information
from a database row that is passed as an argument.
"""
self.run_start_time = db_well.run_start
self.run_complete_time = db_well.run_complete
self.well_start_time = db_well.well_start
self.well_complete_time = db_well.well_complete
self.run_status = db_well.run_status
self.well_status = db_well.well_status
self.instrument_name = db_well.instrument_name
self.instrument_type = db_well.instrument_type

# https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]

column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns]

assigned = dict()
for field_name in get_field_names(cls):
if field_name in column_names:
assigned[field_name] = getattr(mlwh_db_row, field_name)

if "qc_state" in values.kwargs:
assigned["qc_state"] = values.kwargs["qc_state"]

return assigned


class PacBioPagedWells(PagedResponse, extra="forbid"):
"""
A response model for paged data about PacBio wells.
"""
"""A response model for paged data about PacBio wells."""

wells: list[PacBioWell] = Field(
default=[],
Expand All @@ -106,55 +160,45 @@ class PacBioPagedWells(PagedResponse, extra="forbid"):
)


@dataclass(kw_only=True, frozen=True)
class PacBioWellFull(PacBioWell):
"""
A response model for a single PacBio well on a particular PacBio run.
The class contains the attributes that uniquely define this well (`run_name`
and `label`), along with the laboratory experiment and sequence run tracking
information, current QC state of this well and QC data for this well.
"""A full response model for a single PacBio well.
The model has teh fields that uniquely define the well (`run_name`, `label`,
`plate_number`, `id_product`), along with the laboratory experiment and
sequence run tracking information, current QC state of this well and
QC data for this well.
Instance creation is described in the documentation of this class's parent
`PacBioWell`.
"""

metrics: QCDataWell = Field(
title="Currently available QC data for well",
)
experiment_tracking: PacBioExperiment = Field(
experiment_tracking: Optional[PacBioExperiment] = Field(
default=None,
title="Experiment tracking information",
description="""
Laboratory experiment tracking information for this well, if available.
""",
)
model_config = ConfigDict(from_attributes=True, extra="forbid")

@classmethod
def from_orm(cls, mlwh_db_row: PacBioRunWellMetrics, qc_session: Session):

id_product = mlwh_db_row.id_pac_bio_product
obj = cls(
id_product=id_product,
run_name=mlwh_db_row.pac_bio_run_name,
plate_number=mlwh_db_row.plate_number,
label=mlwh_db_row.well_label,
metrics=QCDataWell.from_orm(mlwh_db_row),
)
obj.copy_run_tracking_info(mlwh_db_row)

experiment_info = []
for row in mlwh_db_row.pac_bio_product_metrics:
exp_row = row.pac_bio_run
if exp_row:
experiment_info.append(exp_row)
else:
# Do not supply incomplete data.
experiment_info = []
break
if len(experiment_info):
obj.experiment_tracking = PacBioExperiment.from_orm(experiment_info)

qc_state_db = get_qc_state_for_product(
session=qc_session, id_product=id_product
)
if qc_state_db is not None:
obj.qc_state = QcState.from_orm(qc_state_db)

return obj

@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:

assigned = super().pre_root(values)
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]

assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row)

product_metrics = mlwh_db_row.pac_bio_product_metrics
experiment_info = [
pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None
]
# Occasionally product rows are not linked to LIMS rows.
# Go for all or nothing, do not supply incomplete data.
if len(experiment_info) and (len(experiment_info) == len(product_metrics)):
assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info)

return assigned
12 changes: 9 additions & 3 deletions tests/test_pac_well_full.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from npg_id_generation.pac_bio import PacBioEntity

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.wells import WellWh
from lang_qc.models.pacbio.well import PacBioWellFull
from tests.conftest import compare_dates, insert_from_yaml
Expand All @@ -21,7 +22,7 @@ def test_creating_experiment_object(
).hash_product_id()
well_row = helper.get_mlwh_well_by_product_id(id_product)

pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session)
pb_well = PacBioWellFull(db_well=well_row)
assert pb_well.id_product == id_product
assert pb_well.run_name == "TRACTION-RUN-92"
assert pb_well.label == "A1"
Expand All @@ -45,7 +46,12 @@ def test_creating_experiment_object(
).hash_product_id()
well_row = helper.get_mlwh_well_by_product_id(id_product)

pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session)
qc_state = get_qc_states_by_id_product_list(
session=qcdb_test_session,
ids=[id_product],
sequencing_outcomes_only=True,
)
pb_well = PacBioWellFull(db_well=well_row, qc_state=qc_state)
assert pb_well.id_product == id_product
assert pb_well.run_name == "TRACTION_RUN_1"
assert pb_well.label == "B1"
Expand All @@ -65,7 +71,7 @@ def test_creating_experiment_object(
).hash_product_id()
well_row = helper.get_mlwh_well_by_product_id(id_product)

pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session)
pb_well = PacBioWellFull(db_well=well_row, qc_state=None)
assert pb_well.id_product == id_product
assert pb_well.run_name == "TRACTION_RUN_10"
assert pb_well.label == "C1"
Expand Down

0 comments on commit 732f1ba

Please sign in to comment.