Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upcoming tab #186

Merged
merged 2 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Added

* Back-end code for the 'Upcoming' tab. The 'Upcoming' tab is
automatically appended to the collection of the UI tabs for
filtering wells.

### Changed

* Increased the look-back period for the inbox query from 4 weeks to
Expand Down
68 changes: 65 additions & 3 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
from sqlalchemy import and_, or_, select
from sqlalchemy.orm import Session

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.qc import (
get_qc_states_by_id_product_list,
qc_state_for_product_exists,
)
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell
Expand All @@ -42,6 +45,8 @@
Here this type is used to mark a purely internal to the class variables.
"""

INBOX_LOOK_BACK_NUM_WEEKS = 12


class WellWh(BaseModel):
"""
Expand All @@ -54,7 +59,6 @@ class WellWh(BaseModel):
title="SQLAlchemy Session",
description="A SQLAlchemy Session for the ml warehouse database",
)
INBOX_LOOK_BACK_NUM_WEEKS: ClassVar = 12

class Config:
allow_mutation = False
Expand All @@ -78,6 +82,8 @@ def get_mlwh_well_by_product_id(
def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
"""
Get recent not QC-ed completed wells from the mlwh database.
Recent wells are defined as wells that completed within the
last 12 weeks.
"""

######
Expand All @@ -92,7 +98,7 @@ def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
# Using current local time.
# Generating a date rather than a timestamp here in order to have a consistent
# earliest date for the look-back period during the QC team's working day.
my_date = date.today() - timedelta(weeks=self.INBOX_LOOK_BACK_NUM_WEEKS)
my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# Select the wells that has not been QC-ed, but later double-check against
Expand Down Expand Up @@ -213,6 +219,8 @@ def create_for_qc_status(
QcFlowStatusEnum.UNKNOWN,
]:
wells = self._aborted_and_unknown_wells(qc_flow_status)
elif qc_flow_status == QcFlowStatusEnum.UPCOMING:
wells = self._upcoming_wells()
else:
wells = self._get_wells_for_status(qc_flow_status)

Expand Down Expand Up @@ -322,6 +330,60 @@ def _add_tracking_info(self, wells: List[PacBioWell]):
else:
well.copy_run_tracking_info(db_well)

def _upcoming_wells(self):
"""
Upcoming wells are recent wells, which do not belong to any other
QC flow statuses as defined in QcFlowStatus. Recent wells are defined
as wells that belong to runs that started within the last 12 weeks.
"""

recent_completed_product_ids = [
w.id_pac_bio_product for w in self.recent_completed_wells()
]

my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# If queries for any other filters change, this query should be revised
# since we are repeating (but negating) a few condition that are
# associated with some of the statuses (filters).

query = (
select(PacBioRunWellMetrics)
.where(PacBioRunWellMetrics.run_start > look_back_min_date)
.where(PacBioRunWellMetrics.qc_seq_state.is_(None))
.where(
PacBioRunWellMetrics.id_pac_bio_product.not_in(
recent_completed_product_ids
)
)
.where(PacBioRunWellMetrics.well_status.not_like("Abort%"))
.where(PacBioRunWellMetrics.well_status.not_like("Terminat%"))
.where(PacBioRunWellMetrics.well_status.not_like("Fail%"))
.where(PacBioRunWellMetrics.well_status.not_like("Error%"))
.where(PacBioRunWellMetrics.well_status.not_in(["Unknown", "On hold"]))
.order_by(
PacBioRunWellMetrics.run_start,
PacBioRunWellMetrics.pac_bio_run_name,
PacBioRunWellMetrics.plate_number,
PacBioRunWellMetrics.well_label,
)
)

wells = []
for w in self.session.execute(query).scalars().all():
if (
qc_state_for_product_exists(
session=self.qcdb_session, id_product=w.id_pac_bio_product
)
is False
):
wells.append(w)

self.total_number_of_items = len(wells) # Save the number of retrieved wells.

return self._well_models(self.slice_data(wells), False)

def _recent_inbox_wells(self, recent_wells):

inbox_wells_indexes = []
Expand Down
8 changes: 7 additions & 1 deletion lang_qc/models/qc_flow_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@ class QcFlowStatus(BaseModel):
@unique
class QcFlowStatusEnum(str, Enum):
"""
An enumeration of known QC flow states. The order of the statuses is
An enumeration of known QC flow statuses. The order of the statuses is
consistent with the temporal flow of the manual QC process.

Logically the upcoming status should be in the beginning. In order
to keep the order of tab consistent with early versions and to separate
this status from more relevant to teh QC process statuses, this status
is placed at the end.
"""

INBOX = "inbox"
Expand All @@ -53,6 +58,7 @@ class QcFlowStatusEnum(str, Enum):
QC_COMPLETE = "qc_complete"
ABORTED = "aborted"
UNKNOWN = "unknown"
UPCOMING = "upcoming"

@classmethod
def qc_flow_statuses(cls) -> "List[QcFlowStatus]":
Expand Down
1 change: 1 addition & 0 deletions tests/endpoints/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_get_config(test_client: TestClient, load_dicts_and_users):
{"label": "QC Complete", "param": "qc_complete"},
{"label": "Aborted", "param": "aborted"},
{"label": "Unknown", "param": "unknown"},
{"label": "Upcoming", "param": "upcoming"},
],
"qc_states": [
{"description": "Passed", "only_prelim": False},
Expand Down
16 changes: 16 additions & 0 deletions tests/endpoints/test_filtered_wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,22 @@ def test_aborted_filter(test_client: TestClient, load_data4well_retrieval):
_assert_filtered_results(response, [], 10, 100, num_total)


def test_upcoming_filter(test_client: TestClient, load_data4well_retrieval):
"""Test passing `upcoming` filter."""

expected_data = [
{"TRACTION_RUN_12:B1": None},
{"TRACTION_RUN_12:C1": None},
{"TRACTION_RUN_6:A1": None},
{"TRACTION_RUN_6:B1": None},
]
num_total = len(expected_data)
response = test_client.get(
"/pacbio/wells?qc_status=upcoming&page_size=10&page_number=1"
)
_assert_filtered_results(response, expected_data, 10, 1, num_total)


def _assert_filtered_results(
response, expected_data, page_size, page_number, total_number
):
Expand Down
61 changes: 49 additions & 12 deletions tests/fixtures/well_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,38 @@
"Revio",
1,
],
[
"TRACTION_RUN_6",
"A1",
"2022-12-12 15:47:25",
"2022-12-19 16:43:31",
"2022-12-12 15:57:31",
"2022-12-14 06:42:33",
"Running",
"Running",
"OnInstrument",
None,
None,
"1234",
"Revio",
2,
],
[
"TRACTION_RUN_6",
"B1",
"2022-12-12 15:47:25",
"2022-12-19 16:43:31",
"2022-12-13 20:52:47",
"2022-12-15 10:37:35",
"Running",
"Running",
"OnInstrument",
None,
None,
"1234",
"Revio",
2,
],
[
"TRACTION_RUN_7",
"A1",
Expand Down Expand Up @@ -866,7 +898,7 @@ def load_data4well_retrieval(
# We want some wells to be in the inbox. For that their run_complete dates
# should be within, for example, last four weeks. Therefore, we need to
# update the timestamps for these runs.
_update_timestamps4inbox()
_update_timestamps()

# Transform a list of lists into a list of hashes, which map to db rows.
mlwh_data4insert = []
Expand All @@ -890,7 +922,7 @@ def load_data4well_retrieval(
"instrument_type": record[12],
"plate_number": record[13],
}
# Add QC state for one runs.
# Add QC state for one run.
if (data["pac_bio_run_name"] == "TRACTION_RUN_4") and (
data["well_label"] in ("A1", "B1")
):
Expand Down Expand Up @@ -956,31 +988,35 @@ def _get_dict_of_dict_rows(qcdb_test_session):
}


def _update_timestamps4inbox():
def _update_timestamps():

# Designated inbox wells:
# TRACTION_RUN_3 - A1, B1,
# TRACTION_RUN_4 - C1, D1,
# TRACTION_RUN_10 - A1, B1, C1
# TRACTION_RUN_12 - A1

#
# These wells do not have a record in a fixture for the LangQC database,
# values for their run status, ccs_execution_mode, polymerase_num_reads,
# hifi_num_reads are set in a way that makes them eligible for the QC
# inbox. Here we make sure that these wells have recent (ie within 4 weeks)
# completion dates.
# We also update dates for TRACTION_RUN_1, which does have wells in QC.

# We also update dates for TRACTION_RUN_1, which does have wells in QC,
# and TRACTION_RUN_6, which partially fits into the upcoming status.

# Find the earliest date in the set.
inbox_runs = [f"TRACTION_RUN_{run}" for run in (1, 3, 4, 10, 12)]
runs = [f"TRACTION_RUN_{run}" for run in (1, 3, 4, 6, 10, 12)]
date_tuples = [
(record[2], record[3], record[4], record[5])
for record in MLWH_DATA
if record[0] in inbox_runs
if record[0] in runs
]
dates = []
for dt in date_tuples:
dates.extend([datetime.strptime(date, DATE_FORMAT) for date in dt])
dates.extend(
[datetime.strptime(date, DATE_FORMAT) for date in dt if date is not None]
)
old_earliest = min(dates)
# Find the date 26 days from today.
new_earliest = date.today() - timedelta(days=26)
Expand All @@ -989,9 +1025,10 @@ def _update_timestamps4inbox():
datetime(new_earliest.year, new_earliest.month, new_earliest.day) - old_earliest
)
delta_plus = timedelta(delta.days)
# Amend all dates for the inbox data by adding delta.
# Amend all dates by adding delta.
for index, record in enumerate(MLWH_DATA):
if record[0] in inbox_runs:
if record[0] in runs:
for i in (2, 3, 4, 5):
time = datetime.strptime(record[i], DATE_FORMAT) + delta_plus
MLWH_DATA[index][i] = time.strftime(DATE_FORMAT)
if record[i] is not None:
time = datetime.strptime(record[i], DATE_FORMAT) + delta_plus
MLWH_DATA[index][i] = time.strftime(DATE_FORMAT)
2 changes: 2 additions & 0 deletions tests/test_pb_wells_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,14 @@ def test_paged_retrieval_for_statuses(
QcFlowStatusEnum.IN_PROGRESS.name: 11,
QcFlowStatusEnum.ON_HOLD.name: 2,
QcFlowStatusEnum.QC_COMPLETE.name: 4,
QcFlowStatusEnum.UPCOMING.name: 4,
}

for status in [
QcFlowStatusEnum.IN_PROGRESS,
QcFlowStatusEnum.ON_HOLD,
QcFlowStatusEnum.QC_COMPLETE,
QcFlowStatusEnum.UPCOMING,
]:

factory = PacBioPagedWellsFactory(
Expand Down
Loading