Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increased the look-back period for inbox. #185

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Changed

* Increased the look-back period for the inbox query from 4 weeks to
12 weeks. Introduced a preliminary filtering by the QC state, which is
now available in ml warehouse. Since the ml warehouse QC state might not
be up-to-date, a final check against the LangQC database is retained.

## [1.4.1] - 2023-08-23

### Added
Expand Down
18 changes: 10 additions & 8 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class WellWh(BaseModel):
title="SQLAlchemy Session",
description="A SQLAlchemy Session for the ml warehouse database",
)
INBOX_LOOK_BACK_NUM_WEEKS: ClassVar = 4
INBOX_LOOK_BACK_NUM_WEEKS: ClassVar = 12

class Config:
allow_mutation = False
Expand All @@ -77,18 +77,16 @@ def get_mlwh_well_by_product_id(

def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
"""
Get recent completed wells from the mlwh database.
The implementation of the inbox query might change when the QC outcomes
become available in mlwh.
Get recent not QC-ed completed wells from the mlwh database.
"""

######
# It is important not to show aborted wells in the inbox.
#
# The well can be complete as in Illumina 'run complete' but that's not
# the same as analysis complete which the other conditions are trying for.
# It potentially gets a bit easier with v11 but those conditions should
# still work ok.
# The well can be complete, but that's not the same as analysis
# complete which the other conditions are trying for.
# It potentially gets a bit easier with v11 but those conditions
# should still work ok.
#

# Using current local time.
Expand All @@ -97,11 +95,15 @@ def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
my_date = date.today() - timedelta(weeks=self.INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# Select the wells that has not been QC-ed, but later double-check against
# the LangQC database.

# TODO: fall back to run_complete when well_complete is undefined

query = (
select(PacBioRunWellMetrics)
.where(PacBioRunWellMetrics.well_status == "Complete")
.where(PacBioRunWellMetrics.qc_seq_state.is_(None))
.where(PacBioRunWellMetrics.run_complete > look_back_min_date)
.where(PacBioRunWellMetrics.polymerase_num_reads.is_not(None))
.where(
Expand Down
52 changes: 29 additions & 23 deletions tests/fixtures/well_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,34 +864,40 @@ def load_data4well_retrieval(
qcdb_test_session.commit()

# We want some wells to be in the inbox. For that their run_complete dates
# should be within last four weeks. Therefore, we need to update the timestamps
# for these runs.
# should be within, for example, last four weeks. Therefore, we need to
# update the timestamps for these runs.
_update_timestamps4inbox()

# Transform a list of lists into a list of hashes, which map to db rows.
mlwh_data4insert = []
for record in MLWH_DATA:
mlwh_data4insert.append(
{
"pac_bio_run_name": record[0],
"well_label": record[1],
"run_start": record[2],
"run_complete": record[3],
"well_start": record[4],
"well_complete": record[5],
"well_status": record[6],
"run_status": record[7],
"ccs_execution_mode": record[8],
"polymerase_num_reads": record[9],
"hifi_num_reads": record[10],
"id_pac_bio_product": PacBioEntity(
run_name=record[0], well_label=record[1], plate_number=record[13]
).hash_product_id(),
"instrument_name": record[11],
"instrument_type": record[12],
"plate_number": record[13],
}
)
data = {
"pac_bio_run_name": record[0],
"well_label": record[1],
"run_start": record[2],
"run_complete": record[3],
"well_start": record[4],
"well_complete": record[5],
"well_status": record[6],
"run_status": record[7],
"ccs_execution_mode": record[8],
"polymerase_num_reads": record[9],
"hifi_num_reads": record[10],
"id_pac_bio_product": PacBioEntity(
run_name=record[0], well_label=record[1], plate_number=record[13]
).hash_product_id(),
"instrument_name": record[11],
"instrument_type": record[12],
"plate_number": record[13],
}
# Add QC state for one runs.
if (data["pac_bio_run_name"] == "TRACTION_RUN_4") and (
data["well_label"] in ("A1", "B1")
):
data["qc_seq_state"] = "Failed"
data["qc_seq_date"] = data["run_complete"]
mlwh_data4insert.append(data)

mlwhdb_test_session.execute(insert(PacBioRunWellMetrics), mlwh_data4insert)
mlwhdb_test_session.commit()

Expand Down
4 changes: 1 addition & 3 deletions tests/test_wh_data_retrieval_pb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ def test_completed_wells_retrieval(mlwhdb_test_session, load_data4well_retrieval
["TRACTION_RUN_1", "B1"],
["TRACTION_RUN_1", "C1"],
["TRACTION_RUN_1", "D1"],
["TRACTION_RUN_4", "A1"],
["TRACTION_RUN_4", "B1"],
["TRACTION_RUN_4", "C1"],
["TRACTION_RUN_4", "D1"],
["TRACTION_RUN_3", "A1"],
Expand All @@ -42,7 +40,7 @@ def test_completed_wells_retrieval(mlwhdb_test_session, load_data4well_retrieval
.scalars()
.all()
)
time = datetime.now() - timedelta(days=40)
time = datetime.now() - timedelta(days=130)
for row in wells_to_update:
if row.well_complete is not None:
row.well_complete = time
Expand Down