Skip to content

Commit

Permalink
Aded an endpoint for retrieving recent Qc states.
Browse files Browse the repository at this point in the history
  • Loading branch information
mgcam committed May 23, 2024
1 parent b0a80fe commit 8b1895b
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 4 deletions.
58 changes: 57 additions & 1 deletion lang_qc/db/helper/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# this program. If not, see <http://www.gnu.org/licenses/>.

from collections import defaultdict
from datetime import datetime
from datetime import date, datetime, timedelta

from sqlalchemy import and_, func, select
from sqlalchemy.exc import NoResultFound
Expand Down Expand Up @@ -105,6 +105,62 @@ def get_qc_states_by_id_product_list(
return dict(response)


def get_qc_states(
session: Session,
num_weeks: int,
sequencing_outcomes_only: bool = False,
final_only: bool = False,
) -> dict[ChecksumSHA256, list[QcState]]:
"""
Returns a dictionary where keys are the product IDs, and the values are
lists of QcState records of any type for the same product.
The num_weeks argument limits the look-back time window.
If only sequencing type QC states are required, an optional
argument, sequencing_outcomes_only, should be set to True.
In this case it is guaranteed that the list of QcState objects
has only one member.
If only final QC states are required, an optional argument final_only
should be set to True.
"""

if num_weeks < 1:
raise ValueError("num_weeks should be a positive number")

query = (
select(QcStateDb)
.join(QcStateDb.seq_product)
.join(QcType)
.join(QcStateDict)
.join(User)
.where(QcStateDb.date_updated > date.today() - timedelta(weeks=num_weeks))
.options(
selectinload(QcStateDb.seq_product),
selectinload(QcStateDb.qc_type),
selectinload(QcStateDb.user),
selectinload(QcStateDb.qc_state_dict),
)
)
if sequencing_outcomes_only is True:
query = query.where(QcType.qc_type == SEQUENCING_QC_TYPE)
if final_only is True:
query = query.where(QcStateDb.is_preliminary == 0)

qc_states_dict = dict()
for qc_state in [
QcState.from_orm(row) for row in session.execute(query).scalars().all()
]:
id = qc_state.id_product
if id in qc_states_dict:
qc_states_dict[id].append(qc_state)
else:
qc_states_dict[id] = [qc_state]

return qc_states_dict


def product_has_qc_state(
session: Session, id_product: ChecksumSHA256, qc_type: str = None
) -> bool:
Expand Down
44 changes: 42 additions & 2 deletions lang_qc/endpoints/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,19 @@
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

from fastapi import APIRouter, Depends
from typing import Annotated

from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from starlette import status

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.qc import get_qc_states, get_qc_states_by_id_product_list
from lang_qc.db.qc_connection import get_qc_db
from lang_qc.models.qc_state import QcState
from lang_qc.util.type_checksum import ChecksumSHA256

RECENTLY_QCED_NUM_WEEKS = 4

router = APIRouter(
prefix="/products",
tags=["product"],
Expand Down Expand Up @@ -62,3 +66,39 @@ def bulk_qc_fetch(
):

return get_qc_states_by_id_product_list(session=qcdb_session, ids=request_body)


@router.get(
"/qc",
summary="Returns a dictionary of QC states",
description="""
The response is a dictionary of lists of QcState models hashed on product IDs.
Multiple QC states for the same product might be returned if the query is not
constrained to a single QC type.
Query parameters constrain the semantics of the response.
`weeks` - number of weeks to look back, defaults to four.
`seq_level` - a boolean option. If `True`, only `sequencing` type QC states
are returned. If `False` (the default), all types of QC states are
returned.
`final` - a boolean option. If `True`, only final QC states are returned.
If `False` (the default), both final and preliminary QC states are
returned.
""",
responses={
status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid number of weeks"}
},
response_model=dict[ChecksumSHA256, list[QcState]],
)
def qc_fetch(
weeks: Annotated[int, Query(gt=0)] = RECENTLY_QCED_NUM_WEEKS,
seq_level: bool = False,
final: bool = False,
qcdb_session: Session = Depends(get_qc_db),
) -> dict[ChecksumSHA256, list[QcState]]:
return get_qc_states(
session=qcdb_session,
num_weeks=weeks,
sequencing_outcomes_only=seq_level,
final_only=final,
)
52 changes: 52 additions & 0 deletions tests/endpoints/test_dump_qc_states.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from datetime import datetime

import pytest
from fastapi.testclient import TestClient

from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users
Expand Down Expand Up @@ -57,3 +60,52 @@ def test_get_qc_by_product_id(test_client: TestClient, load_data4well_retrieval)
assert len(response_data) == 1
assert MISSING_CHECKSUM not in response_data
assert FIRST_GOOD_CHECKSUM in response_data


def test_get_qc(test_client: TestClient, load_data4well_retrieval):

response = test_client.get("/products/qc")
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 0

response = test_client.get("/products/qc?weeks=-1")
assert response.status_code == 422

# Earliest test QC states are updated on 2022-02-15
interval = datetime.today() - datetime(year=2022, month=2, day=15)
num_weeks = int(interval.days / 7 + 2)

response = test_client.get(f"/products/qc?weeks={num_weeks}")
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 18
assert sum([len(l) for (id, l) in response_data.items()]) == 34

response = test_client.get(
f"/products/qc?weeks={num_weeks}&final=false&seq_level=no"
)
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 18
assert sum([len(l) for (id, l) in response_data.items()]) == 34

response = test_client.get(f"/products/qc?weeks={num_weeks}&final=true")
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 4
assert sum([len(l) for (id, l) in response_data.items()]) == 8

response = test_client.get(
f"/products/qc?weeks={num_weeks}&final=True&seq_level=yes"
)
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 4
assert sum([len(l) for (id, l) in response_data.items()]) == 4
product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585"
assert product_id in response_data
qc_state = response_data[product_id][0]
assert qc_state["id_product"] == product_id
assert qc_state["is_preliminary"] is False
assert qc_state["qc_type"] == "sequencing"
113 changes: 112 additions & 1 deletion tests/test_qc_state_retrieval.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from datetime import datetime, timedelta

import pytest
from sqlalchemy import select

from lang_qc.db.helper.qc import (
get_qc_state_for_product,
get_qc_states,
get_qc_states_by_id_product_list,
product_has_qc_state,
products_have_qc_state,
qc_state_dict,
)
from lang_qc.db.qc_schema import QcState
from lang_qc.models.qc_state import QcState as QcStateModel
from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users

MISSING_CHECKSUM = "A" * 64
Expand All @@ -24,7 +30,7 @@
two_good_ids_list = [FIRST_GOOD_CHECKSUM, SECOND_GOOD_CHECKSUM]


def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval):
def test_bulk_retrieval_by_id(qcdb_test_session, load_data4well_retrieval):

# The test below demonstrates that no run-time type checking of
# product IDs is performed.
Expand Down Expand Up @@ -66,6 +72,111 @@ def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval):
assert MISSING_CHECKSUM not in qc_states


def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval):

with pytest.raises(ValueError, match=r"num_weeks should be a positive number"):
assert get_qc_states(qcdb_test_session, num_weeks=-1)

qc_states = (
qcdb_test_session.execute(select(QcState).order_by(QcState.date_updated.desc()))
.scalars()
.all()
)
now = datetime.today()
max_interval = now - qc_states[-1].date_updated
max_num_weeks = int(max_interval.days / 7 + 1)
min_interval = now - qc_states[0].date_updated
min_num_weeks = int(min_interval.days / 7 - 1)

assert min_num_weeks > 2
# Set the look-back number of weeks to teh period with no records.
qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=(min_num_weeks - 1))
assert len(qc_states_dict) == 0

# Retrieve all available QC states.
qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=max_num_weeks)
# Test total number of QcState objects.
assert sum([len(l) for (id, l) in qc_states_dict.items()]) == len(qc_states)
# Test number of items in the dictionary.
assert len(qc_states_dict) == len(
{qc_state.id_seq_product: 1 for qc_state in qc_states}
)

# Retrieve all available final QC states.
qc_states_dict = get_qc_states(
qcdb_test_session, num_weeks=max_num_weeks, final_only=True
)
assert sum([len(l) for (id, l) in qc_states_dict.items()]) == len(
[qc_state for qc_state in qc_states if qc_state.is_preliminary == 0]
)
assert {id: len(l) for (id, l) in qc_states_dict.items()} == {
"e47765a207c810c2c281d5847e18c3015f3753b18bd92e8a2bea1219ba3127ea": 2,
"977089cd272dffa70c808d74159981c0d1363840875452a868a4c5e15f1b2072": 2,
"dc99ab8cb6762df5c935adaeb1f0c49ff34af96b6fa3ebf9a90443079c389579": 2,
"5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585": 2,
}

# Retrieve all available sequencing type QC states.
qc_states_dict = get_qc_states(
qcdb_test_session, num_weeks=max_num_weeks, sequencing_outcomes_only=True
)
assert len(qc_states_dict) == len(
[qc_state for qc_state in qc_states if qc_state.qc_type.qc_type == "sequencing"]
)

# Retrieve all available sequencing type final QC states.
qc_states_dict = get_qc_states(
qcdb_test_session,
num_weeks=max_num_weeks,
final_only=True,
sequencing_outcomes_only=True,
)
assert len(qc_states_dict) == len(
[
qc_state
for qc_state in qc_states
if (
qc_state.is_preliminary == 0
and qc_state.qc_type.qc_type == "sequencing"
)
]
)
assert {id: len(l) for (id, l) in qc_states_dict.items()} == {
"e47765a207c810c2c281d5847e18c3015f3753b18bd92e8a2bea1219ba3127ea": 1,
"977089cd272dffa70c808d74159981c0d1363840875452a868a4c5e15f1b2072": 1,
"dc99ab8cb6762df5c935adaeb1f0c49ff34af96b6fa3ebf9a90443079c389579": 1,
"5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585": 1,
}

# Retrieve recent sequencing type final QC states.
num_weeks = max_num_weeks - 44
qc_states_dict = get_qc_states(
qcdb_test_session,
num_weeks=num_weeks,
final_only=True,
sequencing_outcomes_only=True,
)
earliest_time = now - timedelta(weeks=num_weeks)
assert len(qc_states_dict) == len(
[
qc_state
for qc_state in qc_states
if (
qc_state.date_updated > earliest_time
and qc_state.is_preliminary == 0
and qc_state.qc_type.qc_type == "sequencing"
)
]
)
product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585"
assert {id: len(l) for (id, l) in qc_states_dict.items()} == {product_id: 1}
qc_state = qc_states_dict[product_id][0]
assert isinstance(qc_state, QcStateModel)
assert qc_state.id_product == product_id
assert qc_state.is_preliminary is False
assert qc_state.qc_type == "sequencing"


def test_product_existence(qcdb_test_session, load_data4well_retrieval):

assert product_has_qc_state(qcdb_test_session, MISSING_CHECKSUM) is False
Expand Down

0 comments on commit 8b1895b

Please sign in to comment.