From 59c25027793658b82462c2ba1ea77df08dadcc4d Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Thu, 2 May 2024 13:39:17 +0000
Subject: [PATCH 01/16] Put in some syntactic sugar checksum types to enable
 differentiation between use of id_product in mlwh tables

---
 lang_qc/endpoints/pacbio_well.py |  9 +++++----
 lang_qc/util/type_checksum.py    | 16 ++++++++++++++++
 tests/test_checksum_type.py      |  2 +-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py
index f9d4957..27534f6 100644
--- a/lang_qc/endpoints/pacbio_well.py
+++ b/lang_qc/endpoints/pacbio_well.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, 2023 Genome Research Ltd.
+# Copyright (c) 2022, 2023, 2024 Genome Research Ltd.
 #
 # Authors:
 #   Adam Blanchet
@@ -40,13 +40,14 @@
 from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellFull
 from lang_qc.models.qc_flow_status import QcFlowStatusEnum
 from lang_qc.models.qc_state import QcState, QcStateBasic
+from lang_qc.models.pacbio.qc_data import QCPoolMetrics
 from lang_qc.util.auth import check_user
 from lang_qc.util.errors import (
     InconsistentInputError,
     InvalidDictValueError,
     RunNotFoundError,
 )
-from lang_qc.util.type_checksum import ChecksumSHA256
+from lang_qc.util.type_checksum import ChecksumSHA256, PacBioWellSHA256, PacBioProductSHA256
 
 """
 A collection of API endpoints that are specific to the PacBio sequencing
@@ -173,7 +174,7 @@ def get_wells_in_run(
     response_model=PacBioWellFull,
 )
 def get_seq_metrics(
-    id_product: ChecksumSHA256,
+    id_product: PacBioWellSHA256,
     mlwhdb_session: Session = Depends(get_mlwh_db),
     qcdb_session: Session = Depends(get_qc_db),
 ) -> PacBioWellFull:
@@ -210,7 +211,7 @@ def get_seq_metrics(
     status_code=status.HTTP_201_CREATED,
 )
 def claim_qc(
-    id_product: ChecksumSHA256,
+    id_product: PacBioWellSHA256,
     user: User = Depends(check_user),
     qcdb_session: Session = Depends(get_qc_db),
     mlwhdb_session: Session = Depends(get_mlwh_db),
diff --git a/lang_qc/util/type_checksum.py b/lang_qc/util/type_checksum.py
index a704d3e..15b8bdc 100644
--- a/lang_qc/util/type_checksum.py
+++ b/lang_qc/util/type_checksum.py
@@ -40,3 +40,19 @@ def validate(cls, v, _):
 
     def __repr__(self):
         return f"ChecksumSHA256({super().__repr__()})"
+
+
+class PacBioWellSHA256(ChecksumSHA256):
+    """
+    A checksum generated from the coordinates of a single well on a plate in a PacBio run
+    """
+    pass
+
+
+class PacBioProductSHA256(ChecksumSHA256):
+    """
+    A checksum generated from the combination of run, well, plate and any tags required for deplexing
+    See `npg_id_generation.pac_bio.PacBioEntity`.
+    Tags only contribute to the checksum when samples are multiplexed.
+    """
+    pass
\ No newline at end of file
diff --git a/tests/test_checksum_type.py b/tests/test_checksum_type.py
index aba01f7..41474f7 100644
--- a/tests/test_checksum_type.py
+++ b/tests/test_checksum_type.py
@@ -7,7 +7,7 @@
 
 
 class ChecksumSHA256User(BaseModel):
-    product_chcksm: ChecksumSHA256
+    product_chcksm: ChecksumSHA256 | None = None
 
 
 def test_valid_checksum():

From 486faca2e5629578a77ba1741e64567e6145fb10 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Fri, 10 May 2024 15:44:11 +0000
Subject: [PATCH 02/16] Undo type fix, it seems to impact how the code works.
 Disturbing.

---
 tests/test_checksum_type.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_checksum_type.py b/tests/test_checksum_type.py
index 41474f7..aba01f7 100644
--- a/tests/test_checksum_type.py
+++ b/tests/test_checksum_type.py
@@ -7,7 +7,7 @@
 
 
 class ChecksumSHA256User(BaseModel):
-    product_chcksm: ChecksumSHA256 | None = None
+    product_chcksm: ChecksumSHA256
 
 
 def test_valid_checksum():

From 2b9dc32659af72ac08afce95c36befa9ced07636 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Fri, 10 May 2024 15:47:04 +0000
Subject: [PATCH 03/16] Define a response model for pool metrics for a given
 well

---
 lang_qc/models/pacbio/qc_data.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/lang_qc/models/pacbio/qc_data.py b/lang_qc/models/pacbio/qc_data.py
index 3fe13e5..8f64b84 100644
--- a/lang_qc/models/pacbio/qc_data.py
+++ b/lang_qc/models/pacbio/qc_data.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, 2023 Genome Research Ltd.
+# Copyright (c) 2022, 2023, 2024 Genome Research Ltd.
 #
 # Authors:
 #   Marina Gourtovaia <mg8@sanger.ac.uk>
@@ -23,6 +23,7 @@
 from pydantic import BaseModel, ConfigDict, Field
 
 from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
+from lang_qc.util.type_checksum import PacBioProductSHA256
 
 
 # Pydantic prohibits us from defining these as @classmethod or @staticmethod
@@ -153,3 +154,31 @@ def from_orm(cls, obj: PacBioRunWellMetrics):
                     qc_data[name]["value"] = getattr(obj, name, None)
 
         return cls.model_validate(qc_data)
+
+
+class SampleDeplexingStats(BaseModel):
+    """
+    A representation of metrics for one product, some direct from the DB and others inferred
+
+    For a long time tag2_name was null and tag1_name was silently used at both ends of the sequence.
+    As a result tag2_name will be None for most data in or before 2024.
+    """
+
+    id_product: PacBioProductSHA256
+    tag1_name: str | None
+    tag2_name: str | None
+    hifi_read_bases: int | None
+    hifi_num_reads: int | None
+    hifi_read_length_mean: float | None
+    hifi_bases_percent: float | None
+    percentage_total_reads: float | None
+
+
+class QCPoolMetrics(BaseModel):
+    pool_coeff_of_variance: float | None = Field(
+        title="Coefficient of variance for reads in the pool",
+        description="Percentage of the standard deviation w.r.t. mean, reported when the pool is larger than one",
+    )
+    products: list[SampleDeplexingStats] = Field(
+        title="List of products and their metrics"
+    )

From 3b3c32aecb6a6a8223b74b9b09da93e82868e659 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Tue, 14 May 2024 16:07:47 +0000
Subject: [PATCH 04/16] Allow WellWh helper to compute pool metrics

---
 lang_qc/db/helper/wells.py         |  45 ++++++-
 tests/fixtures/sample_data.py      | 205 +++++++++++++++++++++++++++++
 tests/test_pac_bio_qc_data_well.py |  32 +++++
 3 files changed, 280 insertions(+), 2 deletions(-)
 create mode 100644 tests/fixtures/sample_data.py

diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py
index 976dd63..48a3042 100644
--- a/lang_qc/db/helper/wells.py
+++ b/lang_qc/db/helper/wells.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, 2023 Genome Research Ltd.
+# Copyright (c) 2022, 2023, 2024 Genome Research Ltd.
 #
 # Authors:
 #   Marina Gourtovaia <mg8@sanger.ac.uk>
@@ -21,6 +21,7 @@
 
 import logging
 from datetime import date, datetime, timedelta
+from statistics import mean, stdev
 from typing import ClassVar, List
 
 from pydantic import BaseModel, ConfigDict, Field
@@ -33,11 +34,13 @@
 )
 from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
 from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
+from lang_qc.models.pacbio.qc_data import QCPoolMetrics, SampleDeplexingStats
 from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellSummary
 from lang_qc.models.pager import PagedResponse
 from lang_qc.models.qc_flow_status import QcFlowStatusEnum
 from lang_qc.models.qc_state import QcState as QcStateModel
 from lang_qc.util.errors import EmptyListOfRunNamesError, RunNotFoundError
+from lang_qc.util.type_checksum import PacBioWellSHA256
 
 """
 This package is using an undocumented feature of Pydantic, type
@@ -64,7 +67,7 @@ class WellWh(BaseModel):
     # The TestClient seems to be keeping these instances alive and changing them.
 
     def get_mlwh_well_by_product_id(
-        self, id_product: str
+        self, id_product: PacBioWellSHA256
     ) -> PacBioRunWellMetrics | None:
         """
         Returns a well row record from the well metrics table or
@@ -77,6 +80,44 @@ def get_mlwh_well_by_product_id(
             )
         ).scalar_one_or_none()
 
+    def get_metrics_by_well_product_id(
+        self, id_product: PacBioWellSHA256
+    ) -> QCPoolMetrics | None:
+        well = self.get_mlwh_well_by_product_id(id_product)
+        if well:
+            product_metrics = well.pac_bio_product_metrics
+            if len(product_metrics) == 1:
+                return None
+
+            cov: float | None
+            if any(p.hifi_num_reads is None for p in product_metrics):
+                cov = None
+            else:
+                hifi_reads = [prod.hifi_num_reads for prod in product_metrics]
+                cov = stdev(hifi_reads) / mean(hifi_reads) * 100
+
+            return QCPoolMetrics(
+                pool_coeff_of_variance=cov,
+                products=[
+                    SampleDeplexingStats(
+                        id_product=prod.id_pac_bio_product,
+                        tag1_name=prod.pac_bio_run.tag_identifier,
+                        tag2_name=prod.pac_bio_run.tag2_identifier,
+                        hifi_read_bases=prod.hifi_read_bases,
+                        hifi_num_reads=prod.hifi_num_reads,
+                        hifi_read_length_mean=prod.hifi_read_length_mean,
+                        hifi_bases_percent=prod.hifi_bases_percent,
+                        percentage_total_reads=(
+                            prod.hifi_num_reads / well.hifi_num_reads * 100
+                            if well.hifi_num_reads
+                            else None
+                        ),
+                    )
+                    for prod in product_metrics
+                ],
+            )
+        return None
+
     def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
         """
         Get recent not QC-ed completed wells from the mlwh database.
diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
new file mode 100644
index 0000000..15ed224
--- /dev/null
+++ b/tests/fixtures/sample_data.py
@@ -0,0 +1,205 @@
+from datetime import datetime
+
+import pytest
+from npg_id_generation.pac_bio import PacBioEntity
+
+from lang_qc.db.mlwh_schema import (
+    PacBioProductMetrics,
+    PacBioRun,
+    PacBioRunWellMetrics,
+    Sample,
+    Study,
+)
+
+
+@pytest.fixture(scope="function", params=["AAAAAAAA", None])
+def simplex_run(mlwhdb_test_session):
+    """
+    A single sample, well, run mlwh fixture that provides both an explicit tag1
+    for the sample, and an implicit default tag (when the PacBio instrument is
+    run with default barcodes)
+    """
+    run_name = "RUN"
+    well_label = "A1"
+    plate_number = 1
+    tag1 = mlwhdb_test_session.param
+
+    common_run_attribs = {
+        "recorded_at": datetime.now(),
+        "last_updated": datetime.now(),
+        "pipeline_id_lims": "nobody cares",
+        "cost_code": "probably ToL",
+        "id_lims": 1,
+        "plate_uuid_lims": "uuid1",
+        "well_uuid_lims": "uuid1",
+        "pac_bio_library_tube_id_lims": "id",
+        "pac_bio_library_tube_uuid": "uuid",
+        "pac_bio_library_tube_name": "bob",
+    }
+
+    well_metrics_a1 = PacBioRunWellMetrics(
+        pac_bio_run_name=run_name,
+        well_label=well_label,
+        plate_number=plate_number,
+        instrument_type="Revio",
+        id_pac_bio_product=PacBioEntity(
+            run_name=run_name,
+            well_label=well_label,
+            plate_number=plate_number,
+        ).hash_product_id(),
+    )
+
+    study = Study(
+        id_lims="id",
+        id_study_lims="1",
+    )
+
+    # This run-well-plate has one singly tagged sample
+    simplex_run = PacBioRun(
+        pac_bio_run_name=run_name,
+        well_label=well_label,
+        plate_number=plate_number,
+        id_pac_bio_run_lims=0,
+        sample=Sample(
+            id_lims="id",
+            id_sample_lims="1",
+        ),
+        study=study,
+        plate_barcode="ABCD",
+        pac_bio_product_metrics=[
+            PacBioProductMetrics(
+                id_pac_bio_product=PacBioEntity(
+                    run_name=run_name,
+                    well_label=well_label,
+                    plate_number=plate_number,
+                    tags=tag1,
+                ).hash_product_id(),
+                qc=1,
+                hifi_read_bases=900,
+                hifi_num_reads=10,
+                hifi_read_length_mean=90,
+                barcode_quality_score_mean=34,
+                hifi_read_quality_mean=35,
+                hifi_bases_percent=90.001,
+                pac_bio_run_well_metrics=well_metrics_a1,
+            )
+        ],
+        **common_run_attribs
+    )
+    mlwhdb_test_session.add(simplex_run)
+    mlwhdb_test_session.commit()
+
+
+@pytest.fixture(scope="function")
+def multiplexed_run(mlwhdb_test_session):
+    "runs for several (2) samples in one run-well-plate"
+
+    run_name = "RUN"
+    well_label = "B1"
+    plate_number = 1
+    tag1 = "AAAAAAA"
+
+    common_run_attribs = {
+        "recorded_at": datetime.now(),
+        "last_updated": datetime.now(),
+        "pipeline_id_lims": "nobody cares",
+        "cost_code": "probably ToL",
+        "id_lims": 1,
+        "plate_uuid_lims": "uuid1",
+        "well_uuid_lims": "uuid1",
+        "pac_bio_library_tube_id_lims": "id",
+        "pac_bio_library_tube_uuid": "uuid",
+        "pac_bio_library_tube_name": "bob",
+    }
+
+    study = Study(
+        id_lims="id",
+        id_study_lims="1",
+    )
+
+    tag1 = "TTTTTTTT"
+    tag1_2 = "GGGGGGGG"
+    well_metrics_b1 = PacBioRunWellMetrics(
+        pac_bio_run_name=run_name,
+        well_label=well_label,
+        plate_number=plate_number,
+        instrument_type="Revio",
+        id_pac_bio_product=PacBioEntity(
+            run_name=run_name,
+            well_label=well_label,
+            plate_number=plate_number,
+        ).hash_product_id(),
+        hifi_num_reads=30,
+    )
+
+    multiplex_run_1 = PacBioRun(
+        pac_bio_run_name=run_name,
+        well_label=well_label,
+        plate_number=plate_number,
+        id_pac_bio_run_lims=1,
+        sample=Sample(
+            id_lims="pooled_id_1",
+            id_sample_lims="2",
+        ),
+        study=study,
+        plate_barcode="ABCD",
+        pac_bio_product_metrics=[
+            PacBioProductMetrics(
+                id_pac_bio_product=PacBioEntity(
+                    run_name=run_name,
+                    well_label=well_label,
+                    plate_number=plate_number,
+                    tags=tag1,
+                ).hash_product_id(),
+                qc=1,
+                hifi_read_bases=900,
+                hifi_num_reads=20,
+                hifi_read_length_mean=45,
+                barcode_quality_score_mean=34,
+                hifi_read_quality_mean=35,
+                hifi_bases_percent=90.001,
+                pac_bio_run_well_metrics=well_metrics_b1,
+            ),
+        ],
+        **common_run_attribs
+    )
+
+    multiplex_run_2 = PacBioRun(
+        pac_bio_run_name=run_name,
+        well_label=well_label,
+        plate_number=plate_number,
+        id_pac_bio_run_lims=2,
+        sample=Sample(
+            id_lims="pooled_id_2",
+            id_sample_lims="3",
+        ),
+        study=study,
+        plate_barcode="ABCD",
+        pac_bio_product_metrics=[
+            PacBioProductMetrics(
+                id_pac_bio_product=PacBioEntity(
+                    run_name=run_name,
+                    well_label=well_label,
+                    plate_number=plate_number,
+                    tags=tag1_2,
+                ).hash_product_id(),
+                qc=1,
+                hifi_read_bases=100,
+                hifi_num_reads=10,
+                hifi_read_length_mean=10,
+                barcode_quality_score_mean=34,
+                hifi_read_quality_mean=35,
+                hifi_bases_percent=100.00,
+                pac_bio_run_well_metrics=well_metrics_b1,
+            )
+        ],
+        **common_run_attribs
+    )
+
+    mlwhdb_test_session.add_all([multiplex_run_1, multiplex_run_2])
+    mlwhdb_test_session.commit()
+
+
+# Some runs use "default barcodes" and the tag1 fields in pac_bio_run are empty. When this is true, we also lose the deplex stats
+# Show user "default" in the interface?
+# Not all runs get any hifi stats in pac_bio_product_metrics. Not all runs use the hifi reads setting
diff --git a/tests/test_pac_bio_qc_data_well.py b/tests/test_pac_bio_qc_data_well.py
index 701cce8..32a07df 100644
--- a/tests/test_pac_bio_qc_data_well.py
+++ b/tests/test_pac_bio_qc_data_well.py
@@ -2,6 +2,7 @@
 
 from lang_qc.db.helper.wells import WellWh
 from lang_qc.models.pacbio.qc_data import QCDataWell
+from tests.fixtures.sample_data import multiplexed_run, simplex_run
 
 
 def test_creating_qc_data_well(mlwhdb_test_session, mlwhdb_load_runs):
@@ -98,3 +99,34 @@ def test_creating_qc_data_well(mlwhdb_test_session, mlwhdb_load_runs):
     assert (
         qc.percentage_deplexed_reads["value"] == None
     ), "Absent metrics mean this is set to none"
+
+
+def test_pool_metrics_from_single_sample_well(mlwhdb_test_session, simplex_run):
+    helper = WellWh(session=mlwhdb_test_session)
+    id = PacBioEntity(run_name="RUN", well_label="A1", plate_number=1).hash_product_id()
+
+    metrics = helper.get_metrics_by_well_product_id(id)
+    assert metrics is None, "Got no metrics for a one-sample well"
+
+
+def test_pool_metrics_from_well(mlwhdb_test_session, multiplexed_run):
+    helper = WellWh(session=mlwhdb_test_session)
+    id = PacBioEntity(run_name="RUN", well_label="B1", plate_number=1).hash_product_id()
+    metrics = helper.get_metrics_by_well_product_id(id)
+
+    assert metrics, "Two samples means we get a metrics response"
+    assert (
+        int(metrics.pool_coeff_of_variance) == 47
+    ), "Variance between 20 and 10 is ~47%"
+
+    assert metrics.products[0].hifi_read_bases == 100
+    assert (
+        metrics.products[1].hifi_read_bases == 900
+    ), "hifi read base counts are faithfully copied"
+
+    assert (
+        int(metrics.products[0].percentage_total_reads) == 33
+    ), "10 of 30 reads is 33.3%"
+    assert (
+        int(metrics.products[1].percentage_total_reads) == 66
+    ), "20 of 30 reads is 66.6%"

From b092e194cb56e45d0c02076e147ff46a5ae09ac0 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Tue, 14 May 2024 16:23:40 +0000
Subject: [PATCH 05/16] fixture parametrisation not quite right

---
 tests/fixtures/sample_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index 15ed224..c359b26 100644
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -13,7 +13,7 @@
 
 
 @pytest.fixture(scope="function", params=["AAAAAAAA", None])
-def simplex_run(mlwhdb_test_session):
+def simplex_run(request, mlwhdb_test_session):
     """
     A single sample, well, run mlwh fixture that provides both an explicit tag1
     for the sample, and an implicit default tag (when the PacBio instrument is
@@ -22,7 +22,7 @@ def simplex_run(mlwhdb_test_session):
     run_name = "RUN"
     well_label = "A1"
     plate_number = 1
-    tag1 = mlwhdb_test_session.param
+    tag1 = request.param
 
     common_run_attribs = {
         "recorded_at": datetime.now(),

From 4382aa72462736efd1dba00e97f0d9cfb2bec814 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Fri, 17 May 2024 13:36:50 +0000
Subject: [PATCH 06/16] An (untested) endpoint for fetching pool stats

---
 lang_qc/endpoints/pacbio_well.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py
index 27534f6..f232488 100644
--- a/lang_qc/endpoints/pacbio_well.py
+++ b/lang_qc/endpoints/pacbio_well.py
@@ -47,7 +47,10 @@
     InvalidDictValueError,
     RunNotFoundError,
 )
-from lang_qc.util.type_checksum import ChecksumSHA256, PacBioWellSHA256, PacBioProductSHA256
+from lang_qc.util.type_checksum import (
+    ChecksumSHA256,
+    PacBioWellSHA256,
+)
 
 """
 A collection of API endpoints that are specific to the PacBio sequencing
@@ -186,6 +189,28 @@ def get_seq_metrics(
     return PacBioWellFull(db_well=mlwh_well, qc_state=qc_state)
 
 
+@router.get(
+    "/products/{id_product}/seq_level/pool",
+    summary="Get sample (deplexing) metrics for a multiplexed well product by the well ID",
+    responses={
+        status.HTTP_404_NOT_FOUND: {"description": "Product not found"},
+        status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid product ID"},
+    },
+    response_model=QCPoolMetrics,
+)
+def get_product_metrics(
+    id_product: PacBioWellSHA256, mlwhdb_session: Session = Depends(get_mlwh_db)
+) -> QCPoolMetrics:
+    metrics = WellWh(mlwh_session=mlwhdb_session).get_metrics_by_well_product_id(
+        id_product
+    )
+    if metrics is None:
+        raise HTTPException(
+            status_code=404, detail="Well does not have any pool metrics"
+        )
+    return metrics
+
+
 @router.post(
     "/products/{id_product}/qc_claim",
     summary="Claim the well to start QC",

From 7f329ffe60f2bc0b1ce07ee79b899fa4980715ae Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Mon, 20 May 2024 15:26:13 +0000
Subject: [PATCH 07/16] Make pool fixture self-cleaning

---
 tests/fixtures/sample_data.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index c359b26..7891a0d 100644
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -12,7 +12,7 @@
 )
 
 
-@pytest.fixture(scope="function", params=["AAAAAAAA", None])
+@pytest.fixture(scope="module", params=["AAAAAAAA", None])
 def simplex_run(request, mlwhdb_test_session):
     """
     A single sample, well, run mlwh fixture that provides both an explicit tag1
@@ -88,6 +88,10 @@ def simplex_run(request, mlwhdb_test_session):
     )
     mlwhdb_test_session.add(simplex_run)
     mlwhdb_test_session.commit()
+    yield simplex_run
+    mlwhdb_test_session.delete(simplex_run)
+    mlwhdb_test_session.delete(study)
+    mlwhdb_test_session.commit()
 
 
 @pytest.fixture(scope="function")
@@ -198,6 +202,11 @@ def multiplexed_run(mlwhdb_test_session):
 
     mlwhdb_test_session.add_all([multiplex_run_1, multiplex_run_2])
     mlwhdb_test_session.commit()
+    yield (multiplex_run_1, multiplex_run_2)
+    mlwhdb_test_session.delete(multiplex_run_1)
+    mlwhdb_test_session.delete(multiplex_run_2)
+    mlwhdb_test_session.delete(study)
+    mlwhdb_test_session.commit()
 
 
 # Some runs use "default barcodes" and the tag1 fields in pac_bio_run are empty. When this is true, we also lose the deplex stats

From 3c9b9bb275a0f7785fe968e3bf7fea274675a12e Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Mon, 20 May 2024 17:00:45 +0000
Subject: [PATCH 08/16] Add metrics from mlwh to a multi-sample well, and test
 pool API endpoint

Black rides again
---
 lang_qc/util/type_checksum.py                 |  4 +-
 .../mlwh_pb_runs/300-PacBioProductMetrics.yml | 40 +++++++++----------
 .../endpoints/test_single_well_qc_details.py  | 18 +++++++++
 3 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/lang_qc/util/type_checksum.py b/lang_qc/util/type_checksum.py
index 15b8bdc..4a72dce 100644
--- a/lang_qc/util/type_checksum.py
+++ b/lang_qc/util/type_checksum.py
@@ -46,6 +46,7 @@ class PacBioWellSHA256(ChecksumSHA256):
     """
     A checksum generated from the coordinates of a single well on a plate in a PacBio run
     """
+
     pass
 
 
@@ -55,4 +56,5 @@ class PacBioProductSHA256(ChecksumSHA256):
     See `npg_id_generation.pac_bio.PacBioEntity`.
     Tags only contribute to the checksum when samples are multiplexed.
     """
-    pass
\ No newline at end of file
+
+    pass
diff --git a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
index 0b6de2e..6485990 100644
--- a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
+++ b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
@@ -255,11 +255,11 @@
   id_pac_bio_tmp: 120632
   last_changed: 2024-02-28 11:10:15
   qc: 1
-- barcode_quality_score_mean: ~
-  hifi_bases_percent: ~
-  hifi_num_reads: ~
-  hifi_read_bases: ~
-  hifi_read_length_mean: ~
+- barcode_quality_score_mean: 97
+  hifi_bases_percent: 27.49
+  hifi_num_reads: 1952224
+  hifi_read_bases: 21504288522
+  hifi_read_length_mean: 11015
   hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30023
   id_pac_bio_product: 74af5a311e15af654336aea65826a2c4974842d752e25875b0303ad5a3556167
@@ -267,11 +267,11 @@
   id_pac_bio_tmp: 120633
   last_changed: 2024-02-28 11:10:15
   qc: 1
-- barcode_quality_score_mean: ~
-  hifi_bases_percent: ~
-  hifi_num_reads: ~
-  hifi_read_bases: ~
-  hifi_read_length_mean: ~
+- barcode_quality_score_mean: 96
+  hifi_bases_percent: 19.62
+  hifi_num_reads: 1139885
+  hifi_read_bases: 15344650012
+  hifi_read_length_mean: 13461
   hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30024
   id_pac_bio_product: 11022006a649937c570d100ccb382dddadf9a7174ee303903c8d2b7cd7efb328
@@ -279,11 +279,11 @@
   id_pac_bio_tmp: 120634
   last_changed: 2024-02-28 11:10:15
   qc: 1
-- barcode_quality_score_mean: ~
-  hifi_bases_percent: ~
-  hifi_num_reads: ~
-  hifi_read_bases: ~
-  hifi_read_length_mean: ~
+- barcode_quality_score_mean: 96
+  hifi_bases_percent: 23.7
+  hifi_num_reads: 1751410
+  hifi_read_bases: 18538781061
+  hifi_read_length_mean: 10585
   hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30025
   id_pac_bio_product: e6a2157d0fda8faae1288025e99ce5f8133f1466b752a67809668e5b9b16d5b1
@@ -291,11 +291,11 @@
   id_pac_bio_tmp: 120635
   last_changed: 2024-02-28 11:10:15
   qc: 1
-- barcode_quality_score_mean: ~
-  hifi_bases_percent: ~
-  hifi_num_reads: ~
-  hifi_read_bases: ~
-  hifi_read_length_mean: ~
+- barcode_quality_score_mean: 97
+  hifi_bases_percent: 28.72
+  hifi_num_reads: 1991282
+  hifi_read_bases: 22462478066
+  hifi_read_length_mean: 11280
   hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30026
   id_pac_bio_product: 9840280d97c98ff3ddda36ac95cf3b87f5810cc3be73a64c27d6ab92cfaab0ac
diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py
index 7931e42..b9b8d62 100644
--- a/tests/endpoints/test_single_well_qc_details.py
+++ b/tests/endpoints/test_single_well_qc_details.py
@@ -1,4 +1,5 @@
 from fastapi.testclient import TestClient
+from npg_id_generation.pac_bio import PacBioEntity
 
 from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users
 
@@ -165,3 +166,20 @@ def test_get_well_info(
     assert result["plate_number"] == 2
     assert result["id_product"] == id_product
     assert result["qc_state"] is None
+
+
+def test_get_pool_info(test_client: TestClient, mlwhdb_load_runs):
+    id_product = PacBioEntity(
+        run_name="TRACTION-RUN-1140", well_label="D1", plate_number=1
+    ).hash_product_id()
+    response = test_client.get(f"/pacbio/products/{id_product}/seq_level/pool")
+    assert response.status_code == 200
+
+    data = response.json()
+    assert int(data["pool_coeff_of_variance"]) == 23, "variance is calculated"
+    assert {prod["tag1_name"] for prod in data["products"]} == {
+        "bc2036",
+        "bc2040",
+        "bc2054",
+        "bc2063",
+    }, "Correct products present"

From 7a55cc6e3426f48567a0d247de1afac4430ee9d3 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Thu, 23 May 2024 14:54:01 +0000
Subject: [PATCH 09/16] parameterised fixture triggers unique condition in DB,
 so make more dynamic

shorten some lines for flake8
---
 lang_qc/models/pacbio/qc_data.py   |   2 +-
 lang_qc/util/type_checksum.py      |   4 +-
 tests/fixtures/sample_data.py      | 119 +++++++++++++++--------------
 tests/test_pac_bio_qc_data_well.py |   6 +-
 4 files changed, 70 insertions(+), 61 deletions(-)

diff --git a/lang_qc/models/pacbio/qc_data.py b/lang_qc/models/pacbio/qc_data.py
index 8f64b84..259a178 100644
--- a/lang_qc/models/pacbio/qc_data.py
+++ b/lang_qc/models/pacbio/qc_data.py
@@ -177,7 +177,7 @@ class SampleDeplexingStats(BaseModel):
 class QCPoolMetrics(BaseModel):
     pool_coeff_of_variance: float | None = Field(
         title="Coefficient of variance for reads in the pool",
-        description="Percentage of the standard deviation w.r.t. mean, reported when the pool is larger than one",
+        description="Percentage of the standard deviation w.r.t. mean, when pool is more than one",
     )
     products: list[SampleDeplexingStats] = Field(
         title="List of products and their metrics"
diff --git a/lang_qc/util/type_checksum.py b/lang_qc/util/type_checksum.py
index 4a72dce..c78b099 100644
--- a/lang_qc/util/type_checksum.py
+++ b/lang_qc/util/type_checksum.py
@@ -52,8 +52,8 @@ class PacBioWellSHA256(ChecksumSHA256):
 
 class PacBioProductSHA256(ChecksumSHA256):
     """
-    A checksum generated from the combination of run, well, plate and any tags required for deplexing
-    See `npg_id_generation.pac_bio.PacBioEntity`.
+    A checksum generated from the combination of run, well, plate and any tags required for
+    deplexing, see `npg_id_generation.pac_bio.PacBioEntity`.
     Tags only contribute to the checksum when samples are multiplexed.
     """
 
diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index 7891a0d..dd26780 100644
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -19,7 +19,8 @@ def simplex_run(request, mlwhdb_test_session):
     for the sample, and an implicit default tag (when the PacBio instrument is
     run with default barcodes)
     """
-    run_name = "RUN"
+    run_name = "RUN-9999"
+    run_name += request.param if request.param else ""
     well_label = "A1"
     plate_number = 1
     tag1 = request.param
@@ -49,6 +50,23 @@ def simplex_run(request, mlwhdb_test_session):
         ).hash_product_id(),
     )
 
+    product = PacBioProductMetrics(
+        id_pac_bio_product=PacBioEntity(
+            run_name=run_name,
+            well_label=well_label,
+            plate_number=plate_number,
+            tags=tag1,
+        ).hash_product_id(),
+        qc=1,
+        hifi_read_bases=900,
+        hifi_num_reads=10,
+        hifi_read_length_mean=90,
+        barcode_quality_score_mean=34,
+        hifi_read_quality_mean=35,
+        hifi_bases_percent=90.001,
+        pac_bio_run_well_metrics=well_metrics_a1,
+    )
+
     study = Study(
         id_lims="id",
         id_study_lims="1",
@@ -62,28 +80,11 @@ def simplex_run(request, mlwhdb_test_session):
         id_pac_bio_run_lims=0,
         sample=Sample(
             id_lims="id",
-            id_sample_lims="1",
+            id_sample_lims=request.param or "1",
         ),
         study=study,
         plate_barcode="ABCD",
-        pac_bio_product_metrics=[
-            PacBioProductMetrics(
-                id_pac_bio_product=PacBioEntity(
-                    run_name=run_name,
-                    well_label=well_label,
-                    plate_number=plate_number,
-                    tags=tag1,
-                ).hash_product_id(),
-                qc=1,
-                hifi_read_bases=900,
-                hifi_num_reads=10,
-                hifi_read_length_mean=90,
-                barcode_quality_score_mean=34,
-                hifi_read_quality_mean=35,
-                hifi_bases_percent=90.001,
-                pac_bio_run_well_metrics=well_metrics_a1,
-            )
-        ],
+        pac_bio_product_metrics=[product],
         **common_run_attribs
     )
     mlwhdb_test_session.add(simplex_run)
@@ -91,6 +92,8 @@ def simplex_run(request, mlwhdb_test_session):
     yield simplex_run
     mlwhdb_test_session.delete(simplex_run)
     mlwhdb_test_session.delete(study)
+    mlwhdb_test_session.delete(product)
+    mlwhdb_test_session.delete(well_metrics_a1)
     mlwhdb_test_session.commit()
 
 
@@ -101,7 +104,6 @@ def multiplexed_run(mlwhdb_test_session):
     run_name = "RUN"
     well_label = "B1"
     plate_number = 1
-    tag1 = "AAAAAAA"
 
     common_run_attribs = {
         "recorded_at": datetime.now(),
@@ -136,6 +138,23 @@ def multiplexed_run(mlwhdb_test_session):
         hifi_num_reads=30,
     )
 
+    product_1 = PacBioProductMetrics(
+        id_pac_bio_product=PacBioEntity(
+            run_name=run_name,
+            well_label=well_label,
+            plate_number=plate_number,
+            tags=tag1,
+        ).hash_product_id(),
+        qc=1,
+        hifi_read_bases=900,
+        hifi_num_reads=20,
+        hifi_read_length_mean=45,
+        barcode_quality_score_mean=34,
+        hifi_read_quality_mean=35,
+        hifi_bases_percent=90.001,
+        pac_bio_run_well_metrics=well_metrics_b1,
+    )
+
     multiplex_run_1 = PacBioRun(
         pac_bio_run_name=run_name,
         well_label=well_label,
@@ -147,27 +166,27 @@ def multiplexed_run(mlwhdb_test_session):
         ),
         study=study,
         plate_barcode="ABCD",
-        pac_bio_product_metrics=[
-            PacBioProductMetrics(
-                id_pac_bio_product=PacBioEntity(
-                    run_name=run_name,
-                    well_label=well_label,
-                    plate_number=plate_number,
-                    tags=tag1,
-                ).hash_product_id(),
-                qc=1,
-                hifi_read_bases=900,
-                hifi_num_reads=20,
-                hifi_read_length_mean=45,
-                barcode_quality_score_mean=34,
-                hifi_read_quality_mean=35,
-                hifi_bases_percent=90.001,
-                pac_bio_run_well_metrics=well_metrics_b1,
-            ),
-        ],
+        pac_bio_product_metrics=[product_1],
         **common_run_attribs
     )
 
+    product_2 = PacBioProductMetrics(
+        id_pac_bio_product=PacBioEntity(
+            run_name=run_name,
+            well_label=well_label,
+            plate_number=plate_number,
+            tags=tag1_2,
+        ).hash_product_id(),
+        qc=1,
+        hifi_read_bases=100,
+        hifi_num_reads=10,
+        hifi_read_length_mean=10,
+        barcode_quality_score_mean=34,
+        hifi_read_quality_mean=35,
+        hifi_bases_percent=100.00,
+        pac_bio_run_well_metrics=well_metrics_b1,
+    )
+
     multiplex_run_2 = PacBioRun(
         pac_bio_run_name=run_name,
         well_label=well_label,
@@ -179,24 +198,7 @@ def multiplexed_run(mlwhdb_test_session):
         ),
         study=study,
         plate_barcode="ABCD",
-        pac_bio_product_metrics=[
-            PacBioProductMetrics(
-                id_pac_bio_product=PacBioEntity(
-                    run_name=run_name,
-                    well_label=well_label,
-                    plate_number=plate_number,
-                    tags=tag1_2,
-                ).hash_product_id(),
-                qc=1,
-                hifi_read_bases=100,
-                hifi_num_reads=10,
-                hifi_read_length_mean=10,
-                barcode_quality_score_mean=34,
-                hifi_read_quality_mean=35,
-                hifi_bases_percent=100.00,
-                pac_bio_run_well_metrics=well_metrics_b1,
-            )
-        ],
+        pac_bio_product_metrics=[product_2],
         **common_run_attribs
     )
 
@@ -206,6 +208,9 @@ def multiplexed_run(mlwhdb_test_session):
     mlwhdb_test_session.delete(multiplex_run_1)
     mlwhdb_test_session.delete(multiplex_run_2)
     mlwhdb_test_session.delete(study)
+    mlwhdb_test_session.delete(well_metrics_b1)
+    mlwhdb_test_session.delete(product_1)
+    mlwhdb_test_session.delete(product_2)
     mlwhdb_test_session.commit()
 
 
diff --git a/tests/test_pac_bio_qc_data_well.py b/tests/test_pac_bio_qc_data_well.py
index 32a07df..bd4318a 100644
--- a/tests/test_pac_bio_qc_data_well.py
+++ b/tests/test_pac_bio_qc_data_well.py
@@ -103,7 +103,11 @@ def test_creating_qc_data_well(mlwhdb_test_session, mlwhdb_load_runs):
 
 def test_pool_metrics_from_single_sample_well(mlwhdb_test_session, simplex_run):
     helper = WellWh(session=mlwhdb_test_session)
-    id = PacBioEntity(run_name="RUN", well_label="A1", plate_number=1).hash_product_id()
+    id = PacBioEntity(
+        run_name=simplex_run.pac_bio_run_name,
+        well_label=simplex_run.well_label,
+        plate_number=simplex_run.plate_number,
+    ).hash_product_id()
 
     metrics = helper.get_metrics_by_well_product_id(id)
     assert metrics is None, "Got no metrics for a one-sample well"

From 686481bc11762f889ae98a967bfde15adccf35f2 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Thu, 23 May 2024 15:00:13 +0000
Subject: [PATCH 10/16] Stop fixture polluting other tests in module

import sort
---
 lang_qc/endpoints/pacbio_well.py | 7 ++-----
 tests/fixtures/sample_data.py    | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py
index f232488..7e15724 100644
--- a/lang_qc/endpoints/pacbio_well.py
+++ b/lang_qc/endpoints/pacbio_well.py
@@ -37,20 +37,17 @@
 from lang_qc.db.mlwh_connection import get_mlwh_db
 from lang_qc.db.qc_connection import get_qc_db
 from lang_qc.db.qc_schema import User
+from lang_qc.models.pacbio.qc_data import QCPoolMetrics
 from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellFull
 from lang_qc.models.qc_flow_status import QcFlowStatusEnum
 from lang_qc.models.qc_state import QcState, QcStateBasic
-from lang_qc.models.pacbio.qc_data import QCPoolMetrics
 from lang_qc.util.auth import check_user
 from lang_qc.util.errors import (
     InconsistentInputError,
     InvalidDictValueError,
     RunNotFoundError,
 )
-from lang_qc.util.type_checksum import (
-    ChecksumSHA256,
-    PacBioWellSHA256,
-)
+from lang_qc.util.type_checksum import ChecksumSHA256, PacBioWellSHA256
 
 """
 A collection of API endpoints that are specific to the PacBio sequencing
diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index dd26780..8a9b64e 100644
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -12,7 +12,7 @@
 )
 
 
-@pytest.fixture(scope="module", params=["AAAAAAAA", None])
+@pytest.fixture(scope="function", params=["AAAAAAAA", None])
 def simplex_run(request, mlwhdb_test_session):
     """
     A single sample, well, run mlwh fixture that provides both an explicit tag1

From 2b6ae77d84f9d8386006d418355c740e69181b7f Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Thu, 23 May 2024 16:18:07 +0000
Subject: [PATCH 11/16] Data not needed for defunct mlwh column

---
 .../mlwh_pb_runs/300-PacBioProductMetrics.yml | 24 -------------------
 tests/fixtures/sample_data.py                 |  3 ---
 2 files changed, 27 deletions(-)

diff --git a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
index 6485990..f46e1d1 100644
--- a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
+++ b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
@@ -176,7 +176,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30016
   id_pac_bio_product: 3b37d8c1a317f229a3aae182f160f8e4f4856607fb15f1ab0588dde66640afda
   id_pac_bio_rw_metrics_tmp: 6206
@@ -188,7 +187,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30017
   id_pac_bio_product: 2b9048414306eb7683056bd91f6ec81f0b2dbf69484b3dd2dbe39932b52bedbb
   id_pac_bio_rw_metrics_tmp: 6206
@@ -200,7 +198,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30018
   id_pac_bio_product: f50319c97e28f2e0a67ebbc736080c4e98f23cdf6e5b7cec964349ffb13ae797
   id_pac_bio_rw_metrics_tmp: 6207
@@ -212,7 +209,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30019
   id_pac_bio_product: 080733cab28898fcd69d1a418c7675cba38a548c9c20ac2da48a84c5658ee6b2
   id_pac_bio_rw_metrics_tmp: 6207
@@ -224,7 +220,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30020
   id_pac_bio_product: 14be4b6a6bb857c0967d56c90d2b57edc1401cdb5f95379312fb8e5ca71e09fa
   id_pac_bio_rw_metrics_tmp: 6207
@@ -236,7 +231,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30021
   id_pac_bio_product: 4153f3a64e39588bf626c4dda42e5ee74b424bba67d69bb74bb029adda2e642c
   id_pac_bio_rw_metrics_tmp: 6208
@@ -248,7 +242,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30022
   id_pac_bio_product: fbbcd5cac5d086ce64b3a37646e261b4c784fce6755fd65d6d41f048d2267c61
   id_pac_bio_rw_metrics_tmp: 6208
@@ -260,7 +253,6 @@
   hifi_num_reads: 1952224
   hifi_read_bases: 21504288522
   hifi_read_length_mean: 11015
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30023
   id_pac_bio_product: 74af5a311e15af654336aea65826a2c4974842d752e25875b0303ad5a3556167
   id_pac_bio_rw_metrics_tmp: 6209
@@ -272,7 +264,6 @@
   hifi_num_reads: 1139885
   hifi_read_bases: 15344650012
   hifi_read_length_mean: 13461
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30024
   id_pac_bio_product: 11022006a649937c570d100ccb382dddadf9a7174ee303903c8d2b7cd7efb328
   id_pac_bio_rw_metrics_tmp: 6209
@@ -284,7 +275,6 @@
   hifi_num_reads: 1751410
   hifi_read_bases: 18538781061
   hifi_read_length_mean: 10585
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30025
   id_pac_bio_product: e6a2157d0fda8faae1288025e99ce5f8133f1466b752a67809668e5b9b16d5b1
   id_pac_bio_rw_metrics_tmp: 6209
@@ -296,7 +286,6 @@
   hifi_num_reads: 1991282
   hifi_read_bases: 22462478066
   hifi_read_length_mean: 11280
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30026
   id_pac_bio_product: 9840280d97c98ff3ddda36ac95cf3b87f5810cc3be73a64c27d6ab92cfaab0ac
   id_pac_bio_rw_metrics_tmp: 6209
@@ -308,7 +297,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30027
   id_pac_bio_product: 81141cdff1f57c0fc0fc5f88856fa7c6d2945acc5fa6e53e7d1214d17a00c410
   id_pac_bio_rw_metrics_tmp: 6210
@@ -320,7 +308,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30028
   id_pac_bio_product: 4145bf889c130ecaadcd4d757d0a3ca98d68629556427a27ebc08840ffdd0e0f
   id_pac_bio_rw_metrics_tmp: 6210
@@ -332,7 +319,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30029
   id_pac_bio_product: 5b99ad09c31afd4917da39d44fc6cc40e1915572e80c20acbfda6d6c031e74c5
   id_pac_bio_rw_metrics_tmp: 6211
@@ -344,7 +330,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30030
   id_pac_bio_product: 0152d7945c4f74fac3ff828012ad2c01a95574df213d7664e7989e1039727cb5
   id_pac_bio_rw_metrics_tmp: 6211
@@ -356,7 +341,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30031
   id_pac_bio_product: 110e4562a6d28dd96973a98fcc1464d6c82dc413296b95d0c71727d21fa2a193
   id_pac_bio_rw_metrics_tmp: 6212
@@ -368,7 +352,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30032
   id_pac_bio_product: af65875cfecca04ee585c67525661f57a07d7f1427aa15ca39e158c791d63aa5
   id_pac_bio_rw_metrics_tmp: 6212
@@ -380,7 +363,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30033
   id_pac_bio_product: c24d50afb4c048f38dca230a03fb4880912713adf7db7a3ec4d5f57ee3c4cdec
   id_pac_bio_rw_metrics_tmp: 6212
@@ -392,7 +374,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30034
   id_pac_bio_product: baa1e87601ca9c16d95b7fda9d9346557de4aaf4adb5c15383d0f8d9366692bf
   id_pac_bio_rw_metrics_tmp: 6213
@@ -404,7 +385,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30035
   id_pac_bio_product: f88bcfb888f075442a005368c070ba83d895b07c013c68e1cb292fce4aaa40f2
   id_pac_bio_rw_metrics_tmp: 6213
@@ -416,7 +396,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30036
   id_pac_bio_product: 61d2c6fc72d593949cf7b60812a0076c9af57b0fa71b394f0669e410e040458e
   id_pac_bio_rw_metrics_tmp: 6213
@@ -428,7 +407,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30037
   id_pac_bio_product: 252c8d3dc0b4c81e6d7359b0808ba962013e7b320eb9b979da526cecf5fdd019
   id_pac_bio_rw_metrics_tmp: 6213
@@ -440,7 +418,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30153
   id_pac_bio_product: 2135bf0b32c6b987042e67e062647aa21ac956c1d3385627b7a1d4cd670c355f
   id_pac_bio_rw_metrics_tmp: 6306
@@ -452,7 +429,6 @@
   hifi_num_reads: ~
   hifi_read_bases: ~
   hifi_read_length_mean: ~
-  hifi_read_quality_mean: ~
   id_pac_bio_pr_metrics_tmp: 30154
   id_pac_bio_product: 790e8882c97615d79ebe27b782eefa87eede2cecda8ebd960cdd88300059f196
   id_pac_bio_rw_metrics_tmp: 6307
diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index 8a9b64e..818142f 100644
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -62,7 +62,6 @@ def simplex_run(request, mlwhdb_test_session):
         hifi_num_reads=10,
         hifi_read_length_mean=90,
         barcode_quality_score_mean=34,
-        hifi_read_quality_mean=35,
         hifi_bases_percent=90.001,
         pac_bio_run_well_metrics=well_metrics_a1,
     )
@@ -150,7 +149,6 @@ def multiplexed_run(mlwhdb_test_session):
         hifi_num_reads=20,
         hifi_read_length_mean=45,
         barcode_quality_score_mean=34,
-        hifi_read_quality_mean=35,
         hifi_bases_percent=90.001,
         pac_bio_run_well_metrics=well_metrics_b1,
     )
@@ -182,7 +180,6 @@ def multiplexed_run(mlwhdb_test_session):
         hifi_num_reads=10,
         hifi_read_length_mean=10,
         barcode_quality_score_mean=34,
-        hifi_read_quality_mean=35,
         hifi_bases_percent=100.00,
         pac_bio_run_well_metrics=well_metrics_b1,
     )

From 3df42faf70a9a549a8a426d90a83b5640145c7aa Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Tue, 11 Jun 2024 13:52:02 +0000
Subject: [PATCH 12/16] Update mlwh model to include new barcode4deplexing
 column

---
 lang_qc/db/mlwh_schema.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py
index 395916f..d445120 100644
--- a/lang_qc/db/mlwh_schema.py
+++ b/lang_qc/db/mlwh_schema.py
@@ -609,6 +609,11 @@ class PacBioProductMetrics(Base):
     hifi_read_length_mean = Column(
         mysqlINTEGER(unsigned=True), nullable=True, comment="The mean HiFi read length"
     )
+    barcode4deplexing = Column(
+        mysqlVARCHAR(62),
+        nullable=True,
+        comment="The barcode recorded in producing deplexed metrics for this product",
+    )
     barcode_quality_score_mean = Column(
         mysqlSMALLINT(unsigned=True),
         nullable=True,

From 11be2e055ef3894a9dcfa978deb5b50e09d5a51a Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Tue, 11 Jun 2024 14:04:13 +0000
Subject: [PATCH 13/16] Supplement fixture with barcode IDs

---
 .../mlwh_pb_runs/300-PacBioProductMetrics.yml | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
index f46e1d1..c02bb36 100644
--- a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
+++ b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml
@@ -172,6 +172,7 @@
   id_pac_bio_rw_metrics_tmp: 1735
   id_pac_bio_tmp: 99008
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2020--bc2020
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -183,6 +184,7 @@
   last_changed: 2024-02-28 14:10:14
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2011--bc2011
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -194,6 +196,7 @@
   last_changed: 2024-02-28 14:10:14
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc1011_BAK8A_OA--bc1011_BAK8A_OA
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -205,6 +208,7 @@
   last_changed: 2024-03-05 15:10:36
   qc: 0
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc1022_BAK8B_OA--bc1022_BAK8B_OA
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -216,6 +220,7 @@
   last_changed: 2024-03-05 15:10:36
   qc: 0
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc1001_BAK8A_OA--bc1001_BAK8A_OA
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -227,6 +232,7 @@
   last_changed: 2024-03-05 15:10:36
   qc: 0
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2035--bc2035
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -238,6 +244,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2052--bc2052
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -249,6 +256,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: 97
+  barcode4deplexing: bc2036--bc2036
   hifi_bases_percent: 27.49
   hifi_num_reads: 1952224
   hifi_read_bases: 21504288522
@@ -260,6 +268,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: 96
+  barcode4deplexing: bc2040--bc2040
   hifi_bases_percent: 19.62
   hifi_num_reads: 1139885
   hifi_read_bases: 15344650012
@@ -271,6 +280,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: 96
+  barcode4deplexing: bc2054--bc2054
   hifi_bases_percent: 23.7
   hifi_num_reads: 1751410
   hifi_read_bases: 18538781061
@@ -282,6 +292,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: 97
+  barcode4deplexing: bc2063--bc2063
   hifi_bases_percent: 28.72
   hifi_num_reads: 1991282
   hifi_read_bases: 22462478066
@@ -293,6 +304,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2016--bc2016
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -304,6 +316,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2096--bc2096
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -315,6 +328,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2056--bc2056
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -326,6 +340,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2072--bc2072
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -337,6 +352,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2021--bc2021
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -348,6 +364,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2011--bc2011
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -359,6 +376,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2015--bc2015
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -370,6 +388,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2083--bc2083
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -381,6 +400,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2084--bc2084
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -392,6 +412,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2085--bc2085
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -403,6 +424,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2094--bc2094
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -414,6 +436,7 @@
   last_changed: 2024-02-28 11:10:15
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2070--bc2070
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~
@@ -425,6 +448,7 @@
   last_changed: 2024-03-08 12:10:14
   qc: 1
 - barcode_quality_score_mean: ~
+  barcode4deplexing: bc2055--bc2055
   hifi_bases_percent: ~
   hifi_num_reads: ~
   hifi_read_bases: ~

From 6e5472a0d5bcf7b8521600b6b99e7b2f04aaa737 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <kt19@sanger.ac.uk>
Date: Wed, 12 Jun 2024 12:51:48 +0000
Subject: [PATCH 14/16] Add deplexing barcodes and modes to test data. Check
 deplexing mode to determine whether to run stats or not.

---
 lang_qc/db/helper/wells.py       | 5 ++---
 lang_qc/models/pacbio/qc_data.py | 1 +
 tests/fixtures/sample_data.py    | 5 +++++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py
index 48a3042..69fac7e 100644
--- a/lang_qc/db/helper/wells.py
+++ b/lang_qc/db/helper/wells.py
@@ -84,10 +84,8 @@ def get_metrics_by_well_product_id(
         self, id_product: PacBioWellSHA256
     ) -> QCPoolMetrics | None:
         well = self.get_mlwh_well_by_product_id(id_product)
-        if well:
+        if well and well.demultiplex_mode and "Instrument" in well.demultiplex_mode:
             product_metrics = well.pac_bio_product_metrics
-            if len(product_metrics) == 1:
-                return None
 
             cov: float | None
             if any(p.hifi_num_reads is None for p in product_metrics):
@@ -103,6 +101,7 @@ def get_metrics_by_well_product_id(
                         id_product=prod.id_pac_bio_product,
                         tag1_name=prod.pac_bio_run.tag_identifier,
                         tag2_name=prod.pac_bio_run.tag2_identifier,
+                        deplexing_barcode=prod.barcode4deplexing,
                         hifi_read_bases=prod.hifi_read_bases,
                         hifi_num_reads=prod.hifi_num_reads,
                         hifi_read_length_mean=prod.hifi_read_length_mean,
diff --git a/lang_qc/models/pacbio/qc_data.py b/lang_qc/models/pacbio/qc_data.py
index 259a178..fb9a874 100644
--- a/lang_qc/models/pacbio/qc_data.py
+++ b/lang_qc/models/pacbio/qc_data.py
@@ -167,6 +167,7 @@ class SampleDeplexingStats(BaseModel):
     id_product: PacBioProductSHA256
     tag1_name: str | None
     tag2_name: str | None
+    deplexing_barcode: str | None
     hifi_read_bases: int | None
     hifi_num_reads: int | None
     hifi_read_length_mean: float | None
diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index 818142f..e86fbb5 100644
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -48,6 +48,7 @@ def simplex_run(request, mlwhdb_test_session):
             well_label=well_label,
             plate_number=plate_number,
         ).hash_product_id(),
+        demultiplex_mode=None,
     )
 
     product = PacBioProductMetrics(
@@ -64,6 +65,7 @@ def simplex_run(request, mlwhdb_test_session):
         barcode_quality_score_mean=34,
         hifi_bases_percent=90.001,
         pac_bio_run_well_metrics=well_metrics_a1,
+        barcode4deplexing=None,
     )
 
     study = Study(
@@ -135,6 +137,7 @@ def multiplexed_run(mlwhdb_test_session):
             plate_number=plate_number,
         ).hash_product_id(),
         hifi_num_reads=30,
+        demultiplex_mode="OnInstrument",
     )
 
     product_1 = PacBioProductMetrics(
@@ -151,6 +154,7 @@ def multiplexed_run(mlwhdb_test_session):
         barcode_quality_score_mean=34,
         hifi_bases_percent=90.001,
         pac_bio_run_well_metrics=well_metrics_b1,
+        barcode4deplexing="bc10--bc10",
     )
 
     multiplex_run_1 = PacBioRun(
@@ -182,6 +186,7 @@ def multiplexed_run(mlwhdb_test_session):
         barcode_quality_score_mean=34,
         hifi_bases_percent=100.00,
         pac_bio_run_well_metrics=well_metrics_b1,
+        barcode4deplexing="bc11--bc11",
     )
 
     multiplex_run_2 = PacBioRun(

From 2f9be8aec83686f16c8bbfa7ebd1c2f4d29a08cd Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 12 Jun 2024 16:59:42 +0100
Subject: [PATCH 15/16] Added a check for unlinked data.

---
 lang_qc/db/helper/wells.py         | 27 ++++++++++++++++++---------
 tests/test_pac_bio_qc_data_well.py | 13 +++++++++++++
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py
index 69fac7e..57d7b01 100644
--- a/lang_qc/db/helper/wells.py
+++ b/lang_qc/db/helper/wells.py
@@ -85,7 +85,15 @@ def get_metrics_by_well_product_id(
     ) -> QCPoolMetrics | None:
         well = self.get_mlwh_well_by_product_id(id_product)
         if well and well.demultiplex_mode and "Instrument" in well.demultiplex_mode:
+
             product_metrics = well.pac_bio_product_metrics
+            lib_lims_data = [
+                row
+                for row in map(lambda product: product.pac_bio_run, product_metrics)
+                if row is not None
+            ]
+            if len(lib_lims_data) != len(product_metrics):
+                raise Exception("Partially linked LIMS data or no linked LIMS data")
 
             cov: float | None
             if any(p.hifi_num_reads is None for p in product_metrics):
@@ -94,13 +102,13 @@ def get_metrics_by_well_product_id(
                 hifi_reads = [prod.hifi_num_reads for prod in product_metrics]
                 cov = stdev(hifi_reads) / mean(hifi_reads) * 100
 
-            return QCPoolMetrics(
-                pool_coeff_of_variance=cov,
-                products=[
+            sample_stats = []
+            for (i, prod) in enumerate(product_metrics):
+                sample_stats.append(
                     SampleDeplexingStats(
                         id_product=prod.id_pac_bio_product,
-                        tag1_name=prod.pac_bio_run.tag_identifier,
-                        tag2_name=prod.pac_bio_run.tag2_identifier,
+                        tag1_name=lib_lims_data[i].tag_identifier,
+                        tag2_name=lib_lims_data[i].tag2_identifier,
                         deplexing_barcode=prod.barcode4deplexing,
                         hifi_read_bases=prod.hifi_read_bases,
                         hifi_num_reads=prod.hifi_num_reads,
@@ -108,13 +116,14 @@ def get_metrics_by_well_product_id(
                         hifi_bases_percent=prod.hifi_bases_percent,
                         percentage_total_reads=(
                             prod.hifi_num_reads / well.hifi_num_reads * 100
-                            if well.hifi_num_reads
+                            if (well.hifi_num_reads and prod.hifi_num_reads)
                             else None
                         ),
                     )
-                    for prod in product_metrics
-                ],
-            )
+                )
+
+            return QCPoolMetrics(pool_coeff_of_variance=cov, products=sample_stats)
+
         return None
 
     def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
diff --git a/tests/test_pac_bio_qc_data_well.py b/tests/test_pac_bio_qc_data_well.py
index bd4318a..3be9de9 100644
--- a/tests/test_pac_bio_qc_data_well.py
+++ b/tests/test_pac_bio_qc_data_well.py
@@ -1,3 +1,4 @@
+import pytest
 from npg_id_generation.pac_bio import PacBioEntity
 
 from lang_qc.db.helper.wells import WellWh
@@ -134,3 +135,15 @@ def test_pool_metrics_from_well(mlwhdb_test_session, multiplexed_run):
     assert (
         int(metrics.products[1].percentage_total_reads) == 66
     ), "20 of 30 reads is 66.6%"
+
+
+def test_pool_metrics_from_well(mlwhdb_test_session):
+
+    id = PacBioEntity(
+        run_name="TRACTION-RUN-1140", well_label="C1", plate_number=2
+    ).hash_product_id()
+    helper = WellWh(session=mlwhdb_test_session)
+    with pytest.raises(
+        Exception, match=r"Partially linked LIMS data or no linked LIMS data"
+    ):
+        helper.get_metrics_by_well_product_id(id)

From 372fc567c64a4f70806079291b8e48d0390afa60 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Thu, 13 Jun 2024 12:09:32 +0100
Subject: [PATCH 16/16] Simplified getting linked lims data.

---
 lang_qc/db/helper/wells.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py
index 57d7b01..4c8f089 100644
--- a/lang_qc/db/helper/wells.py
+++ b/lang_qc/db/helper/wells.py
@@ -88,9 +88,9 @@ def get_metrics_by_well_product_id(
 
             product_metrics = well.pac_bio_product_metrics
             lib_lims_data = [
-                row
-                for row in map(lambda product: product.pac_bio_run, product_metrics)
-                if row is not None
+                product.pac_bio_run
+                for product in product_metrics
+                if product.pac_bio_run is not None
             ]
             if len(lib_lims_data) != len(product_metrics):
                 raise Exception("Partially linked LIMS data or no linked LIMS data")