Skip to content

Commit

Permalink
Merge pull request #189 from mgcam/prune_unknowns
Browse files Browse the repository at this point in the history
Removed QC-ed wells from the results for the 'Unknown' tab
  • Loading branch information
nerdstrike authored Jan 25, 2024
2 parents 6585cad + 620c10b commit f0bd2e9
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 40 deletions.
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,28 @@
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Changed

* The 'Unknown' UI tab no longer displays the wells that have any
sequencing QC state associated with it. This feature was requested
by the users.

* To increase code readability, renamed qc_state_for_product_exists
method in lang_qc::db::helper::qc to product_has_qc_state.

* Added method products_have_qc_state to lang_qc::db::helper::qc,
used it to optimise (reduce the number of database queries) some
of the existing back-end code.

### Fixed

* The client side JavaScript dependency, element-plus, is pinned
to version 2.4.4. In mid-January 2024 this was the highest version
that worked with our code. The version expression we had "^2.3.7"
allowed for fetching the latest available version of this library.

## [1.5.0]

### Added
Expand Down
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
},
"dependencies": {
"@element-plus/icons-vue": "^2.0.10",
"element-plus": "^2.3.7",
"element-plus": "2.4.4",
"lodash": "^4.17.21",
"pinia": "^2.0.23",
"vue": "^3.2.37",
Expand Down
37 changes: 34 additions & 3 deletions lang_qc/db/helper/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

APPLICATION_NAME = "LangQC"
DEFAULT_QC_TYPE = "sequencing"
SEQUENCING_QC_TYPE = DEFAULT_QC_TYPE
CLAIMED_QC_STATE = "Claimed"
DEFAULT_FINALITY = False
ONLY_PRELIM_STATES = (CLAIMED_QC_STATE, "On hold")
Expand Down Expand Up @@ -78,7 +79,7 @@ def get_qc_states_by_id_product_list(
If only sequencing type QC states are required, an optional
argument, sequencing_outcomes_only, should be set to True.
If this case it is guaranteed that the list of QcState objects
In this case it is guaranteed that the list of QcState objects
has only one member.
Product IDs for which no QC states are found are omitted
Expand All @@ -100,7 +101,7 @@ def get_qc_states_by_id_product_list(
)


def qc_state_for_product_exists(
def product_has_qc_state(
session: Session, id_product: ChecksumSHA256, qc_type: str = None
) -> bool:
"""
Expand Down Expand Up @@ -132,6 +133,36 @@ def qc_state_for_product_exists(
return bool(session.execute(query).scalar_one())


def products_have_qc_state(
session: Session, ids: list[ChecksumSHA256], sequencing_outcomes_only: bool = False
) -> set[ChecksumSHA256]:
"""
Given a list of product IDs, returns a potentially empty subset of this list
as a Set object. Each product, identified by the product ID in the returned
set, has some QC state associated with it. If `sequencing_outcome_only`
boolean flag is true, the product must have sequencing QC state associated
with it; `library` QC state is excluded.
Arguments:
`session` - `sqlalchemy.orm.Session`, a connection for LangQC database.
`ids` - a list of string product IDs.
`sequencing_outcome_only` - a boolean flag indicating whether the caller
is interested in `sequencing` QC states only
"""

query = select(SeqProduct.id_product).where(SeqProduct.id_product.in_(ids))
if sequencing_outcomes_only is True:
query = query.join(SeqProduct.qc_state).where(
QcStateDb.qc_type == _get_qc_type_row(session, SEQUENCING_QC_TYPE)
)

# We asked to retrieve data for one column only. The return value for
# each row is an array with a single value from this column.
product_ids = [row[0] for row in session.execute(query).all()]

return set(product_ids)


def get_seq_product(session: Session, id_product: ChecksumSHA256) -> SeqProduct:
"""
Given a product ID, returns a SeqProduct row for a database entity with
Expand All @@ -149,7 +180,7 @@ def get_seq_product(session: Session, id_product: ChecksumSHA256) -> SeqProduct:


def get_qc_state_for_product(
session: Session, id_product: ChecksumSHA256, qc_type: str = "sequencing"
session: Session, id_product: ChecksumSHA256, qc_type: str = DEFAULT_QC_TYPE
) -> QcStateDb:
"""
Returns a QcState database row associated with the product with the
Expand Down
31 changes: 19 additions & 12 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

from lang_qc.db.helper.qc import (
get_qc_states_by_id_product_list,
qc_state_for_product_exists,
products_have_qc_state,
)
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
Expand Down Expand Up @@ -365,15 +365,11 @@ def _upcoming_wells(self):
)
)

wells = []
for w in self.session.execute(query).scalars().all():
if (
qc_state_for_product_exists(
session=self.qcdb_session, id_product=w.id_pac_bio_product
)
is False
):
wells.append(w)
wells = self.session.execute(query).scalars().all()
ids_with_qc_state = products_have_qc_state(
session=self.qcdb_session, ids=[w.id_pac_bio_product for w in wells]
)
wells = [w for w in wells if w.id_pac_bio_product not in ids_with_qc_state]

self.total_number_of_items = len(wells) # Save the number of retrieved wells.

Expand Down Expand Up @@ -420,10 +416,21 @@ def _aborted_and_unknown_wells(self, qc_flow_status: QcFlowStatusEnum):
.all()
)

qc_state_applicable = True
if qc_flow_status == QcFlowStatusEnum.UNKNOWN:
# Remove the wells that the QC team has dealt with.
ids_with_qc_state = products_have_qc_state(
session=self.qcdb_session,
ids=[w.id_pac_bio_product for w in wells],
sequencing_outcomes_only=True,
)
wells = [w for w in wells if w.id_pac_bio_product not in ids_with_qc_state]
qc_state_applicable = False

# Save the number of retrieved rows.
self.total_number_of_items = len(wells)

return self._well_models(self.slice_data(wells), True)
return self._well_models(self.slice_data(wells), qc_state_applicable)

def _well_models(
self,
Expand Down Expand Up @@ -454,7 +461,7 @@ def _well_models(
sequencing_outcomes_only=True,
).get(id_product)
# A well can have only one or zero current sequencing outcomes.
if qced_products is not None:
if qced_products is not None and (len(qced_products) > 0):
attrs["qc_state"] = qced_products[0]
pb_well = PacBioWell.model_validate(attrs)
pb_well.copy_run_tracking_info(db_well)
Expand Down
9 changes: 3 additions & 6 deletions lang_qc/endpoints/pacbio_well.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from lang_qc.db.helper.qc import (
assign_qc_state_to_product,
claim_qc_for_product,
qc_state_for_product_exists,
product_has_qc_state,
)
from lang_qc.db.helper.well import well_seq_product_find_or_create
from lang_qc.db.helper.wells import PacBioPagedWellsFactory, WellWh
Expand Down Expand Up @@ -216,7 +216,7 @@ def claim_qc(
mlwh_well = _find_well_product_or_error(id_product, mlwhdb_session)

# Checking for any type of QC state
if qc_state_for_product_exists(qcdb_session, id_product):
if product_has_qc_state(qcdb_session, id_product):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Well for product {id_product} has QC state assigned",
Expand Down Expand Up @@ -261,10 +261,7 @@ def assign_qc_state(

mlwh_well = _find_well_product_or_error(id_product, mlwhdb_session)

if (
qc_state_for_product_exists(qcdb_session, id_product, request_body.qc_type)
is False
):
if product_has_qc_state(qcdb_session, id_product, request_body.qc_type) is False:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="QC state of an unclaimed well cannot be updated",
Expand Down
76 changes: 75 additions & 1 deletion tests/test_pb_wells_factory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pytest
from npg_id_generation.pac_bio import PacBioEntity
from sqlalchemy import select

from lang_qc.db.helper.wells import PacBioPagedWellsFactory, RunNotFoundError
from lang_qc.db.qc_schema import QcState
from lang_qc.db.qc_schema import QcState, QcType, SeqProduct
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell
from lang_qc.models.qc_flow_status import QcFlowStatusEnum
from lang_qc.models.qc_state import QcState as QcStateModel
Expand Down Expand Up @@ -175,13 +176,15 @@ def test_paged_retrieval_for_statuses(
QcFlowStatusEnum.ON_HOLD.name: 2,
QcFlowStatusEnum.QC_COMPLETE.name: 4,
QcFlowStatusEnum.UPCOMING.name: 4,
QcFlowStatusEnum.UNKNOWN.name: 2,
}

for status in [
QcFlowStatusEnum.IN_PROGRESS,
QcFlowStatusEnum.ON_HOLD,
QcFlowStatusEnum.QC_COMPLETE,
QcFlowStatusEnum.UPCOMING,
QcFlowStatusEnum.UNKNOWN,
]:

factory = PacBioPagedWellsFactory(
Expand Down Expand Up @@ -425,3 +428,74 @@ def test_known_run_names_input(
qc_state_objs = [well.qc_state for well in wells]
assert qc_state_objs[0] is None
assert qc_state_objs[1] is None


def test_retrieval_for_unknown_status(
qcdb_test_session, mlwhdb_test_session, load_data4well_retrieval
):
# SUBTRACTION_RUN_15:A1
# TRACTION_RUN_14:B1

factory = PacBioPagedWellsFactory(
qcdb_session=qcdb_test_session,
mlwh_session=mlwhdb_test_session,
page_size=10,
page_number=1,
)
paged_wells = factory.create_for_qc_status(QcFlowStatusEnum.UNKNOWN)
assert (
paged_wells.total_number_of_items == 2
), "two wells with unknown status, no qc state"

# Create seq QC states for these wells and test that they are gone
# from the Unknown status.

id = PacBioEntity(run_name="TRACTION_RUN_14", well_label="B1").hash_product_id()

seq_product = SeqProduct(id_product=id, id_seq_platform=1, has_seq_data=1)
qcdb_test_session.add(seq_product)
qcdb_test_session.commit()

qc_type_row = (
qcdb_test_session.execute(select(QcType).where(QcType.qc_type == "library"))
.scalars()
.one()
)
qc_state = QcState(
created_by="LangQC",
is_preliminary=1,
id_qc_state_dict=2,
qc_type=qc_type_row,
seq_product=seq_product,
id_user=1,
)
qcdb_test_session.add(qc_state)
qcdb_test_session.commit()

paged_wells = factory.create_for_qc_status(QcFlowStatusEnum.UNKNOWN)
assert (
paged_wells.total_number_of_items == 2
), """assigning library qc state to a well does not affect
its selection for the unknown status"""

qc_type_row = (
qcdb_test_session.execute(select(QcType).where(QcType.qc_type == "sequencing"))
.scalars()
.one()
)
qc_state = QcState(
created_by="LangQC",
is_preliminary=1,
id_qc_state_dict=3,
qc_type=qc_type_row,
seq_product=seq_product,
id_user=1,
)
qcdb_test_session.add(qc_state)
qcdb_test_session.commit()

paged_wells = factory.create_for_qc_status(QcFlowStatusEnum.UNKNOWN)
assert (
paged_wells.total_number_of_items == 1
), """assigning sequencing qc state to a well removes it
from the selection for the unknown status"""
Loading

0 comments on commit f0bd2e9

Please sign in to comment.