From 249f0316d7372a1945517e5772617d1e59206d96 Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 29 Feb 2024 10:03:28 +0000 Subject: [PATCH] Used a in-built factory method for PacBioWell. Simplified the code that uses this class and extends it. Optimised the use of methods that return QC state(s) for a product(s) according to the context. --- lang_qc/db/helper/wells.py | 46 +++++---------- lang_qc/models/pacbio/well.py | 104 +++++++++++++++++----------------- 2 files changed, 67 insertions(+), 83 deletions(-) diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index 5ba0589..a9ff525 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -290,14 +290,8 @@ def _get_wells_for_status( id_product = qc_state_model.id_product mlwh_well = self.get_mlwh_well_by_product_id(id_product=id_product) if mlwh_well is not None: - pbw = PacBioWell( - id_product=id_product, - run_name=mlwh_well.pac_bio_run_name, - plate_number=mlwh_well.plate_number, - label=mlwh_well.well_label, - qc_state=qc_state_model, - ) - pbw.copy_run_tracking_info(mlwh_well) + pbw = PacBioWell.model_validate(mlwh_well) + pbw.qc_state = qc_state_model wells.append(pbw) else: """ @@ -424,32 +418,22 @@ def _well_models( ): # Normally QC data is not available for the inbox, aborted, etc. - # wells. If some well with a non-inbox status has QC state assigned, - # the same well will also be retrieved by the 'in progress' or - # 'on hold' or 'qc complete' queries. However, it is useful to display - # the QC state if it is available. The `qc_state_applicable` argument - # is a hint to fetch QC state. + # wells. The `qc_state_applicable` argument is a hint to fetch + # the QC state. + qced_products = dict() + if qc_state_applicable: + product_ids = [db_well.id_pac_bio_product for db_well in db_wells_list] + qced_products = get_qc_states_by_id_product_list( + session=self.qcdb_session, + ids=product_ids, + sequencing_outcomes_only=True, + ) pb_wells = [] for db_well in db_wells_list: + pb_well = PacBioWell.model_validate(db_well) id_product = db_well.id_pac_bio_product - attrs = { - "id_product": id_product, - "run_name": db_well.pac_bio_run_name, - "plate_number": db_well.plate_number, - "label": db_well.well_label, - } - if qc_state_applicable: - # TODO: Query by all IDs at once. - qced_products = get_qc_states_by_id_product_list( - session=self.qcdb_session, - ids=[id_product], - sequencing_outcomes_only=True, - ).get(id_product) - # A well can have only one or zero current sequencing outcomes. - if qced_products is not None and (len(qced_products) > 0): - attrs["qc_state"] = qced_products[0] - pb_well = PacBioWell.model_validate(attrs) - pb_well.copy_run_tracking_info(db_well) + if id_product in qced_products: + pb_well.qc_state = qced_products[id_product][0] pb_wells.append(pb_well) return pb_wells diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index d83baa8..22e40d9 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -26,7 +26,7 @@ from pydantic import BaseModel, ConfigDict, Field from sqlalchemy.orm import Session -from lang_qc.db.helper.qc import get_qc_states_by_id_product_list +from lang_qc.db.helper.qc import get_qc_state_for_product from lang_qc.db.mlwh_schema import PacBioRunWellMetrics from lang_qc.models.pacbio.experiment import PacBioExperiment from lang_qc.models.pacbio.qc_data import QCDataWell @@ -45,28 +45,51 @@ class PacBioWell(BaseModel, extra="forbid"): sequenced or QC metrics or assessment for such data. """ + model_config = ConfigDict( + populate_by_name=True, + from_attributes=True, + revalidate_instances="always", + extra="forbid", + ) + # Well identifies. - id_product: str = Field(title="Product identifier") - label: str = Field(title="Well label", description="The label of the PacBio well") + id_product: str = Field( + title="Product identifier", validation_alias="id_pac_bio_product" + ) + label: str = Field( + title="Well label", + description="The label of the PacBio well", + validation_alias="well_label", + ) plate_number: Optional[int] = Field( default=None, title="Plate number", description="Plate number, relevant for Revio instruments only", ) run_name: str = Field( - title="Run name", description="PacBio run name as registered in LIMS" + title="Run name", + description="PacBio run name as registered in LIMS", + validation_alias="pac_bio_run_name", ) # Run and well tracking information from SMRT Link - run_start_time: datetime = Field(default=None, title="Run start time") - run_complete_time: datetime = Field(default=None, title="Run complete time") - well_start_time: datetime = Field(default=None, title="Well start time") - well_complete_time: datetime = Field(default=None, title="Well complete time") - run_status: str = Field(default=None, title="Current PacBio run status") - well_status: str = Field(default=None, title="Current PacBio well status") + run_start_time: datetime | None = Field( + default=None, title="Run start time", validation_alias="run_start" + ) + run_complete_time: datetime | None = Field( + default=None, title="Run complete time", validation_alias="run_complete" + ) + well_start_time: datetime | None = Field( + default=None, title="Well start time", validation_alias="well_start" + ) + well_complete_time: datetime | None = Field( + default=None, title="Well complete time", validation_alias="well_complete" + ) + run_status: Optional[str] = Field(default=None, title="Current PacBio run status") + well_status: Optional[str] = Field(default=None, title="Current PacBio well status") instrument_name: str = Field(default=None, title="Instrument name") instrument_type: str = Field(default=None, title="Instrument type") - qc_state: QcState = Field( + qc_state: Optional[QcState] = Field( default=None, title="Current QC state of this well", description=""" @@ -76,20 +99,6 @@ class PacBioWell(BaseModel, extra="forbid"): """, ) - def copy_run_tracking_info(self, db_well: PacBioRunWellMetrics): - """ - Populates this object with the run and well tracking information - from a database row that is passed as an argument. - """ - self.run_start_time = db_well.run_start - self.run_complete_time = db_well.run_complete - self.well_start_time = db_well.well_start - self.well_complete_time = db_well.well_complete - self.run_status = db_well.run_status - self.well_status = db_well.well_status - self.instrument_name = db_well.instrument_name - self.instrument_type = db_well.instrument_type - class PacBioPagedWells(PagedResponse, extra="forbid"): """ @@ -114,47 +123,38 @@ class PacBioWellFull(PacBioWell): information, current QC state of this well and QC data for this well. """ - metrics: QCDataWell = Field( + metrics: Optional[QCDataWell] = Field( + default=None, title="Currently available QC data for well", ) - experiment_tracking: PacBioExperiment = Field( + experiment_tracking: Optional[PacBioExperiment] = Field( default=None, title="Experiment tracking information", description=""" Laboratory experiment tracking information for this well, if available. """, ) - model_config = ConfigDict(from_attributes=True, extra="forbid") @classmethod def from_orm(cls, mlwh_db_row: PacBioRunWellMetrics, qc_session: Session): id_product = mlwh_db_row.id_pac_bio_product - obj = cls( - id_product=id_product, - run_name=mlwh_db_row.pac_bio_run_name, - plate_number=mlwh_db_row.plate_number, - label=mlwh_db_row.well_label, - metrics=QCDataWell.from_orm(mlwh_db_row), - ) - obj.copy_run_tracking_info(mlwh_db_row) - - experiment_info = [] - for row in mlwh_db_row.pac_bio_product_metrics: - exp_row = row.pac_bio_run - if exp_row: - experiment_info.append(exp_row) - else: - # Do not supply incomplete data. - experiment_info = [] - break - if len(experiment_info): + obj = cls.model_validate(mlwh_db_row) + obj.metrics = QCDataWell.from_orm(mlwh_db_row) + + product_metrics = mlwh_db_row.pac_bio_product_metrics + experiment_info = [ + pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None + ] + # Occasionally product rows are not linked to LIMS rows. + # Go for all or nothing, do not supply incomplete data. + if len(experiment_info) and (len(experiment_info) == len(product_metrics)): obj.experiment_tracking = PacBioExperiment.from_orm(experiment_info) - qced_products = get_qc_states_by_id_product_list( - session=qc_session, ids=[id_product], sequencing_outcomes_only=True - ).get(id_product) - if qced_products is not None: - obj.qc_state = qced_products[0] + qc_state_db = get_qc_state_for_product( + session=qc_session, id_product=id_product + ) + if qc_state_db is not None: + obj.qc_state = QcState.from_orm(qc_state_db) return obj