From c54de496458e1ea8886959223f7790cfc37cc837 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Tue, 5 Mar 2024 10:03:38 +0000
Subject: [PATCH 1/5] Use Optional type hint for fields with None default

---
 lang_qc/models/pacbio/well.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index 54adff5..9e42a25 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -57,16 +57,20 @@ class PacBioWell(BaseModel, extra="forbid"):
         title="Run name", description="PacBio run name as registered in LIMS"
     )
     # Run and well tracking information from SMRT Link
-    run_start_time: datetime = Field(default=None, title="Run start time")
-    run_complete_time: datetime = Field(default=None, title="Run complete time")
-    well_start_time: datetime = Field(default=None, title="Well start time")
-    well_complete_time: datetime = Field(default=None, title="Well complete time")
-    run_status: str = Field(default=None, title="Current PacBio run status")
-    well_status: str = Field(default=None, title="Current PacBio well status")
-    instrument_name: str = Field(default=None, title="Instrument name")
-    instrument_type: str = Field(default=None, title="Instrument type")
-
-    qc_state: QcState = Field(
+    run_start_time: Optional[datetime] = Field(default=None, title="Run start time")
+    run_complete_time: Optional[datetime] = Field(
+        default=None, title="Run complete time"
+    )
+    well_start_time: Optional[datetime] = Field(default=None, title="Well start time")
+    well_complete_time: Optional[datetime] = Field(
+        default=None, title="Well complete time"
+    )
+    run_status: Optional[str] = Field(default=None, title="Current PacBio run status")
+    well_status: Optional[str] = Field(default=None, title="Current PacBio well status")
+    instrument_name: Optional[str] = Field(default=None, title="Instrument name")
+    instrument_type: Optional[str] = Field(default=None, title="Instrument type")
+
+    qc_state: Optional[QcState] = Field(
         default=None,
         title="Current QC state of this well",
         description="""
@@ -117,7 +121,7 @@ class PacBioWellFull(PacBioWell):
     metrics: QCDataWell = Field(
         title="Currently available QC data for well",
     )
-    experiment_tracking: PacBioExperiment = Field(
+    experiment_tracking: Optional[PacBioExperiment] = Field(
         default=None,
         title="Experiment tracking information",
         description="""

From 9d1c7ceffb3064b9e066c6bbdb96736b3218f147 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Tue, 5 Mar 2024 12:05:46 +0000
Subject: [PATCH 2/5] pydantic BaseModel is replaced by pydantic dataclass

... for some models in order to simplify instantiation
of the objects.
---
 lang_qc/db/helper/wells.py       | 21 ++-----
 lang_qc/endpoints/pacbio_well.py |  5 +-
 lang_qc/models/pacbio/well.py    | 98 ++++++++++++++++----------------
 tests/test_pac_well_full.py      | 12 +++-
 4 files changed, 67 insertions(+), 69 deletions(-)

diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py
index 63ab3b7..91c1d9b 100644
--- a/lang_qc/db/helper/wells.py
+++ b/lang_qc/db/helper/wells.py
@@ -290,14 +290,7 @@ def _get_wells_for_status(
             id_product = qc_state_model.id_product
             mlwh_well = self.get_mlwh_well_by_product_id(id_product=id_product)
             if mlwh_well is not None:
-                pbw = PacBioWell(
-                    id_product=id_product,
-                    run_name=mlwh_well.pac_bio_run_name,
-                    plate_number=mlwh_well.plate_number,
-                    label=mlwh_well.well_label,
-                    qc_state=qc_state_model,
-                )
-                pbw.copy_run_tracking_info(mlwh_well)
+                pbw = PacBioWell(db_well=mlwh_well, qc_state=qc_state_model)
                 wells.append(pbw)
             else:
                 """
@@ -398,16 +391,10 @@ def _well_models(
         pb_wells = []
         for db_well in db_wells_list:
             id_product = db_well.id_pac_bio_product
-            attrs = {
-                "id_product": id_product,
-                "run_name": db_well.pac_bio_run_name,
-                "plate_number": db_well.plate_number,
-                "label": db_well.well_label,
-            }
+            qc_state = None
             if id_product in qced_products:
-                attrs["qc_state"] = qced_products[id_product][0]
-            pb_well = PacBioWell.model_validate(attrs)
-            pb_well.copy_run_tracking_info(db_well)
+                qc_state = qced_products[id_product][0]
+            pb_well = PacBioWell(db_well=db_well, qc_state=qc_state)
             pb_wells.append(pb_well)
 
         return pb_wells
diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py
index dca152e..f9d4957 100644
--- a/lang_qc/endpoints/pacbio_well.py
+++ b/lang_qc/endpoints/pacbio_well.py
@@ -29,6 +29,7 @@
 from lang_qc.db.helper.qc import (
     assign_qc_state_to_product,
     claim_qc_for_product,
+    get_qc_state_for_product,
     product_has_qc_state,
 )
 from lang_qc.db.helper.well import well_seq_product_find_or_create
@@ -179,7 +180,9 @@ def get_seq_metrics(
 
     mlwh_well = _find_well_product_or_error(id_product, mlwhdb_session)
 
-    return PacBioWellFull.from_orm(mlwh_well, qcdb_session)
+    qc_state_db = get_qc_state_for_product(session=qcdb_session, id_product=id_product)
+    qc_state = None if qc_state_db is None else QcState.from_orm(qc_state_db)
+    return PacBioWellFull(db_well=mlwh_well, qc_state=qc_state)
 
 
 @router.post(
diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index 9e42a25..efd5abc 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -21,12 +21,11 @@
 # this program. If not, see <http://www.gnu.org/licenses/>.
 
 from datetime import datetime
-from typing import Optional
+from typing import Any, Optional
 
-from pydantic import BaseModel, ConfigDict, Field
-from sqlalchemy.orm import Session
+from pydantic import Field, model_validator
+from pydantic.dataclasses import dataclass
 
-from lang_qc.db.helper.qc import get_qc_state_for_product
 from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
 from lang_qc.models.pacbio.experiment import PacBioExperiment
 from lang_qc.models.pacbio.qc_data import QCDataWell
@@ -34,7 +33,8 @@
 from lang_qc.models.qc_state import QcState
 
 
-class PacBioWell(BaseModel, extra="forbid"):
+@dataclass
+class PacBioWell:
     """
     A response model for a single PacBio well on a particular PacBio run.
     The class contains the attributes that uniquely define this well (`run_name`
@@ -45,6 +45,8 @@ class PacBioWell(BaseModel, extra="forbid"):
     sequenced or QC metrics or assessment for such data.
     """
 
+    db_well: PacBioRunWellMetrics = Field(init_var=True)
+
     # Well identifies.
     id_product: str = Field(title="Product identifier")
     label: str = Field(title="Well label", description="The label of the PacBio well")
@@ -80,19 +82,33 @@ class PacBioWell(BaseModel, extra="forbid"):
         """,
     )
 
-    def copy_run_tracking_info(self, db_well: PacBioRunWellMetrics):
+    @model_validator(mode="before")
+    def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         """
         Populates this object with the run and well tracking information
         from a database row that is passed as an argument.
         """
-        self.run_start_time = db_well.run_start
-        self.run_complete_time = db_well.run_complete
-        self.well_start_time = db_well.well_start
-        self.well_complete_time = db_well.well_complete
-        self.run_status = db_well.run_status
-        self.well_status = db_well.well_status
-        self.instrument_name = db_well.instrument_name
-        self.instrument_type = db_well.instrument_type
+
+        # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
+        mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
+        assigned = dict()
+        assigned["id_product"] = mlwh_db_row.id_pac_bio_product
+        assigned["label"] = mlwh_db_row.well_label
+        assigned["plate_number"] = mlwh_db_row.plate_number
+        assigned["run_name"] = mlwh_db_row.pac_bio_run_name
+        assigned["run_start_time"] = mlwh_db_row.run_start
+        assigned["run_complete_time"] = mlwh_db_row.run_complete
+        assigned["well_start_time"] = mlwh_db_row.well_start
+        assigned["well_complete_time"] = mlwh_db_row.well_complete
+        assigned["run_status"] = mlwh_db_row.run_status
+        assigned["well_status"] = mlwh_db_row.well_status
+        assigned["instrument_name"] = mlwh_db_row.instrument_name
+        assigned["instrument_type"] = mlwh_db_row.instrument_type
+
+        if "qc_state" in values.kwargs:
+            assigned["qc_state"] = values.kwargs["qc_state"]
+
+        return assigned
 
 
 class PacBioPagedWells(PagedResponse, extra="forbid"):
@@ -110,6 +126,7 @@ class PacBioPagedWells(PagedResponse, extra="forbid"):
     )
 
 
+@dataclass
 class PacBioWellFull(PacBioWell):
     """
     A response model for a single PacBio well on a particular PacBio run.
@@ -128,37 +145,22 @@ class PacBioWellFull(PacBioWell):
         Laboratory experiment tracking information for this well, if available.
         """,
     )
-    model_config = ConfigDict(from_attributes=True, extra="forbid")
-
-    @classmethod
-    def from_orm(cls, mlwh_db_row: PacBioRunWellMetrics, qc_session: Session):
-
-        id_product = mlwh_db_row.id_pac_bio_product
-        obj = cls(
-            id_product=id_product,
-            run_name=mlwh_db_row.pac_bio_run_name,
-            plate_number=mlwh_db_row.plate_number,
-            label=mlwh_db_row.well_label,
-            metrics=QCDataWell.from_orm(mlwh_db_row),
-        )
-        obj.copy_run_tracking_info(mlwh_db_row)
-
-        experiment_info = []
-        for row in mlwh_db_row.pac_bio_product_metrics:
-            exp_row = row.pac_bio_run
-            if exp_row:
-                experiment_info.append(exp_row)
-            else:
-                # Do not supply incomplete data.
-                experiment_info = []
-                break
-        if len(experiment_info):
-            obj.experiment_tracking = PacBioExperiment.from_orm(experiment_info)
-
-        qc_state_db = get_qc_state_for_product(
-            session=qc_session, id_product=id_product
-        )
-        if qc_state_db is not None:
-            obj.qc_state = QcState.from_orm(qc_state_db)
-
-        return obj
+
+    @model_validator(mode="before")
+    def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
+
+        assigned = super().pre_root(values)
+        mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
+
+        assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row)
+
+        product_metrics = mlwh_db_row.pac_bio_product_metrics
+        experiment_info = [
+            pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None
+        ]
+        # Occasionally product rows are not linked to LIMS rows.
+        # Go for all or nothing, do not supply incomplete data.
+        if len(experiment_info) and (len(experiment_info) == len(product_metrics)):
+            assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info)
+
+        return assigned
diff --git a/tests/test_pac_well_full.py b/tests/test_pac_well_full.py
index b1a700e..c8ff08c 100644
--- a/tests/test_pac_well_full.py
+++ b/tests/test_pac_well_full.py
@@ -1,5 +1,6 @@
 from npg_id_generation.pac_bio import PacBioEntity
 
+from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
 from lang_qc.db.helper.wells import WellWh
 from lang_qc.models.pacbio.well import PacBioWellFull
 from tests.conftest import compare_dates, insert_from_yaml
@@ -21,7 +22,7 @@ def test_creating_experiment_object(
     ).hash_product_id()
     well_row = helper.get_mlwh_well_by_product_id(id_product)
 
-    pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session)
+    pb_well = PacBioWellFull(db_well=well_row)
     assert pb_well.id_product == id_product
     assert pb_well.run_name == "TRACTION-RUN-92"
     assert pb_well.label == "A1"
@@ -45,7 +46,12 @@ def test_creating_experiment_object(
     ).hash_product_id()
     well_row = helper.get_mlwh_well_by_product_id(id_product)
 
-    pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session)
+    qc_state = get_qc_states_by_id_product_list(
+        session=qcdb_test_session,
+        ids=[id_product],
+        sequencing_outcomes_only=True,
+    )
+    pb_well = PacBioWellFull(db_well=well_row, qc_state=qc_state)
     assert pb_well.id_product == id_product
     assert pb_well.run_name == "TRACTION_RUN_1"
     assert pb_well.label == "B1"
@@ -65,7 +71,7 @@ def test_creating_experiment_object(
     ).hash_product_id()
     well_row = helper.get_mlwh_well_by_product_id(id_product)
 
-    pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session)
+    pb_well = PacBioWellFull(db_well=well_row, qc_state=None)
     assert pb_well.id_product == id_product
     assert pb_well.run_name == "TRACTION_RUN_10"
     assert pb_well.label == "C1"

From bcbabcd31530789f68e6530b1a28b20836e9fb7d Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Tue, 5 Mar 2024 13:40:56 +0000
Subject: [PATCH 3/5] Make the dataclasses semi-immutable - no change for the
 values

---
 lang_qc/models/pacbio/well.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index efd5abc..1568842 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -33,7 +33,7 @@
 from lang_qc.models.qc_state import QcState
 
 
-@dataclass
+@dataclass(kw_only=True, frozen=True)
 class PacBioWell:
     """
     A response model for a single PacBio well on a particular PacBio run.
@@ -126,7 +126,7 @@ class PacBioPagedWells(PagedResponse, extra="forbid"):
     )
 
 
-@dataclass
+@dataclass(kw_only=True, frozen=True)
 class PacBioWellFull(PacBioWell):
     """
     A response model for a single PacBio well on a particular PacBio run.

From 064c2bb3a35bf04a14c99aa46adc01f8604dd017 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 6 Mar 2024 15:22:22 +0000
Subject: [PATCH 4/5] Auto-map column names to model fields

---
 lang_qc/models/pacbio/well.py | 63 ++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index 1568842..ad6acf8 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -33,6 +33,25 @@
 from lang_qc.models.qc_state import QcState
 
 
+def get_field_names(cls):
+    """Returns a list of field names for a class given as an argument.
+
+    The fields that can only be used at the object initialisation step
+    are excluded.
+    """
+
+    field_names = []
+    for field_name in cls.__dataclass_fields__:
+        field = cls.__dataclass_fields__[field_name]
+        if field.default.init_var is True:
+            continue
+        name = field.default.validation_alias
+        if name is None:
+            name = field.name
+        field_names.append(name)
+    return field_names
+
+
 @dataclass(kw_only=True, frozen=True)
 class PacBioWell:
     """
@@ -48,24 +67,36 @@ class PacBioWell:
     db_well: PacBioRunWellMetrics = Field(init_var=True)
 
     # Well identifies.
-    id_product: str = Field(title="Product identifier")
-    label: str = Field(title="Well label", description="The label of the PacBio well")
+    id_product: str = Field(
+        title="Product identifier", validation_alias="id_pac_bio_product"
+    )
+    label: str = Field(
+        title="Well label",
+        description="The label of the PacBio well",
+        validation_alias="well_label",
+    )
     plate_number: Optional[int] = Field(
         default=None,
         title="Plate number",
         description="Plate number, relevant for Revio instruments only",
     )
     run_name: str = Field(
-        title="Run name", description="PacBio run name as registered in LIMS"
+        title="Run name",
+        description="PacBio run name as registered in LIMS",
+        validation_alias="pac_bio_run_name",
     )
     # Run and well tracking information from SMRT Link
-    run_start_time: Optional[datetime] = Field(default=None, title="Run start time")
+    run_start_time: Optional[datetime] = Field(
+        default=None, title="Run start time", validation_alias="run_start"
+    )
     run_complete_time: Optional[datetime] = Field(
-        default=None, title="Run complete time"
+        default=None, title="Run complete time", validation_alias="run_complete"
+    )
+    well_start_time: Optional[datetime] = Field(
+        default=None, title="Well start time", validation_alias="well_start"
     )
-    well_start_time: Optional[datetime] = Field(default=None, title="Well start time")
     well_complete_time: Optional[datetime] = Field(
-        default=None, title="Well complete time"
+        default=None, title="Well complete time", validation_alias="well_complete"
     )
     run_status: Optional[str] = Field(default=None, title="Current PacBio run status")
     well_status: Optional[str] = Field(default=None, title="Current PacBio well status")
@@ -91,19 +122,13 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
 
         # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
         mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
+
+        column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns]
+
         assigned = dict()
-        assigned["id_product"] = mlwh_db_row.id_pac_bio_product
-        assigned["label"] = mlwh_db_row.well_label
-        assigned["plate_number"] = mlwh_db_row.plate_number
-        assigned["run_name"] = mlwh_db_row.pac_bio_run_name
-        assigned["run_start_time"] = mlwh_db_row.run_start
-        assigned["run_complete_time"] = mlwh_db_row.run_complete
-        assigned["well_start_time"] = mlwh_db_row.well_start
-        assigned["well_complete_time"] = mlwh_db_row.well_complete
-        assigned["run_status"] = mlwh_db_row.run_status
-        assigned["well_status"] = mlwh_db_row.well_status
-        assigned["instrument_name"] = mlwh_db_row.instrument_name
-        assigned["instrument_type"] = mlwh_db_row.instrument_type
+        for field_name in get_field_names(cls):
+            if field_name in column_names:
+                assigned[field_name] = getattr(mlwh_db_row, field_name)
 
         if "qc_state" in values.kwargs:
             assigned["qc_state"] = values.kwargs["qc_state"]

From 4d2efbde3c041ae6e29cda1f9f380c5b5d9ae543 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 6 Mar 2024 16:33:20 +0000
Subject: [PATCH 5/5] Update documentation

---
 lang_qc/models/pacbio/well.py | 45 ++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index ad6acf8..9deea4f 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -37,7 +37,8 @@ def get_field_names(cls):
     """Returns a list of field names for a class given as an argument.
 
     The fields that can only be used at the object initialisation step
-    are excluded.
+    are excluded. For fields, which have a validation_alias defined,
+    this alias is returned rather than the field name.
     """
 
     field_names = []
@@ -54,14 +55,24 @@ def get_field_names(cls):
 
 @dataclass(kw_only=True, frozen=True)
 class PacBioWell:
-    """
-    A response model for a single PacBio well on a particular PacBio run.
-    The class contains the attributes that uniquely define this well (`run_name`
-    and `label`), along with the time line and the current QC state of this well,
-    if any.
+    """A basic response model for a single PacBio well.
+
+    `run_name`, `label`, `plate_number`, and `id_product` fields uniquely
+    identify the well. The model also has fields that reflect the time line
+    of the run and information about a PacBio instrument. The optional
+    `qc_state  field might contain the current QC state of the well.
+
+    The best way to instantiate the model is via the constructor, supplying
+    the an ORM object representing a database row with information about
+    the well and, optionally, the model representing the current QC state.
 
-    This model does not contain any information about data that was
-    sequenced or QC metrics or assessment for such data.
+    Examples:
+        well_model = PacBioWell(db_well=well_row)
+        well_model = PacBioWell(db_well=well_row, qc_state=current_qc_state)
+
+    Mapping of the database values to this model's fields is performed by
+    a pre `__init__` hook. To enable automatic mapping, some fields of this
+    model have `validation_alias` set.
     """
 
     db_well: PacBioRunWellMetrics = Field(init_var=True)
@@ -137,9 +148,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
 
 
 class PacBioPagedWells(PagedResponse, extra="forbid"):
-    """
-    A response model for paged data about PacBio wells.
-    """
+    """A response model for paged data about PacBio wells."""
 
     wells: list[PacBioWell] = Field(
         default=[],
@@ -153,11 +162,15 @@ class PacBioPagedWells(PagedResponse, extra="forbid"):
 
 @dataclass(kw_only=True, frozen=True)
 class PacBioWellFull(PacBioWell):
-    """
-    A response model for a single PacBio well on a particular PacBio run.
-    The class contains the attributes that uniquely define this well (`run_name`
-    and `label`), along with the laboratory experiment and sequence run tracking
-    information, current QC state of this well and QC data for this well.
+    """A full response model for a single PacBio well.
+
+    The model has teh fields that uniquely define the well (`run_name`, `label`,
+    `plate_number`, `id_product`), along with the laboratory experiment and
+    sequence run tracking information, current QC state of this well and
+    QC data for this well.
+
+    Instance creation is described in the documentation of this class's parent
+    `PacBioWell`.
     """
 
     metrics: QCDataWell = Field(