Skip to content

Commit

Permalink
Merge pull request #212 from mgcam/study_name4summary
Browse files Browse the repository at this point in the history
PacBioWellSummary as a response model for well summaries
  • Loading branch information
nerdstrike authored Mar 22, 2024
2 parents 7de46f6 + a43b88a commit f599e37
Show file tree
Hide file tree
Showing 23 changed files with 2,979 additions and 822 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Changed

* To simplify object instantiation and fields' assignment for some
of the response models, converted `PacBioWell` and `PacBioWellFull`
models to pydantic dataclasses.

### Added

* A new response model `PacBioWellSummary`, which replaces `PacBioWell`
in the latest's capacity of the response model for a PacBio well
summary.
* A new field, `study_names`, a potentially empty sorted array of
unique study names, is added to the response model for a PacBio
well summary.

## [2.0.0] - 2024-02-20

### Changed
Expand Down
12 changes: 6 additions & 6 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
)
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellSummary
from lang_qc.models.pager import PagedResponse
from lang_qc.models.qc_flow_status import QcFlowStatusEnum
from lang_qc.models.qc_state import QcState as QcStateModel
Expand Down Expand Up @@ -195,7 +195,7 @@ def create_for_qc_status(
specified by the `page_size`, `page_number` object's attributes and
`qc_flow_status` argument of this function..
The `PacBioWell` objects in `wells` attribute of the returned object
The `PacBioWellPacBioWell` objects in `wells` attribute of the returned object
are sorted in a way appropriate for the requested `qc_flow_status`.
For the 'in progress' and 'on hold' requests the wells with most recently
assigned QC states come first. For inbox requests the wells with least
Expand Down Expand Up @@ -230,7 +230,7 @@ def create_for_run(self, run_name: str) -> PacBioPagedWells:
"""
Returns `PacBioPagedWells` object that corresponds to the criteria
specified by the `page_size` and `page_number` attributes.
The `PacBioWell` objects in `wells` attribute of the returned object
The `PacBioWellSummary` objects in `wells` attribute of the returned object
belong to runs specified by the `run_name` argument and are sorted
by the run name and well label.
"""
Expand Down Expand Up @@ -281,7 +281,7 @@ def _retrieve_paged_qc_states(

def _get_wells_for_status(
self, qc_flow_status: QcFlowStatusEnum
) -> List[PacBioWell]:
) -> List[PacBioWellSummary]:

wells = []

Expand All @@ -290,7 +290,7 @@ def _get_wells_for_status(
id_product = qc_state_model.id_product
mlwh_well = self.get_mlwh_well_by_product_id(id_product=id_product)
if mlwh_well is not None:
pbw = PacBioWell(db_well=mlwh_well, qc_state=qc_state_model)
pbw = PacBioWellSummary(db_well=mlwh_well, qc_state=qc_state_model)
wells.append(pbw)
else:
"""
Expand Down Expand Up @@ -394,7 +394,7 @@ def _well_models(
qc_state = None
if id_product in qced_products:
qc_state = qced_products[id_product][0]
pb_well = PacBioWell(db_well=db_well, qc_state=qc_state)
pb_well = PacBioWellSummary(db_well=db_well, qc_state=qc_state)
pb_wells.append(pb_well)

return pb_wells
Expand Down
21 changes: 21 additions & 0 deletions lang_qc/db/mlwh_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,27 @@ class PacBioRunWellMetrics(Base):
"PacBioProductMetrics", back_populates="pac_bio_run_well_metrics"
)

def get_experiment_info(self):
"""Returns a list of PacBioRun mlwh database rows.
Returns LIMS information about the PacBio experiment
for this well, one pac_bio_run table row per sample
(product) in the well.
If any or all of the pac_bio_product_metrics rows linked
to this well record are not linked to the pac_bio_run
table, and empty array is returned, thus preventing incomplete
data being supplied to the client.
"""
product_metrics = self.pac_bio_product_metrics
experiment_info = [
pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None
]
if len(experiment_info) != len(product_metrics):
experiment_info = []

return experiment_info


class PacBioProductMetrics(Base):
__tablename__ = "pac_bio_product_metrics"
Expand Down
52 changes: 38 additions & 14 deletions lang_qc/models/pacbio/well.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PacBioWell:
`run_name`, `label`, `plate_number`, and `id_product` fields uniquely
identify the well. The model also has fields that reflect the time line
of the run and information about a PacBio instrument. The optional
`qc_state field might contain the current QC state of the well.
`qc_state` field might contain the current QC state of the well.
The best way to instantiate the model is via the constructor, supplying
the an ORM object representing a database row with information about
Expand Down Expand Up @@ -133,6 +133,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:

# https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
assert mlwh_db_row

column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns]

Expand All @@ -147,14 +148,41 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
return assigned


@dataclass(kw_only=True, frozen=True)
class PacBioWellSummary(PacBioWell):
"""A response model for a summary about a single PacBio well.
Adds `study_names` to a list of attributes of the parent class `PacBioWell`.
Instance creation is described in the documentation of the parent class.
`get_experiment_info` method in this package is used to retrieve study
information, see its documentation for details.
"""

study_names: list = Field(
title="An alphabetically sorted list of distinct study names",
)

@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:

assigned = super().pre_root(values)
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
assigned["study_names"] = sorted(
set([row.study.name for row in mlwh_db_row.get_experiment_info()])
)

return assigned


class PacBioPagedWells(PagedResponse, extra="forbid"):
"""A response model for paged data about PacBio wells."""

wells: list[PacBioWell] = Field(
wells: list[PacBioWellSummary] = Field(
default=[],
title="A list of PacBioWell objects",
title="A list of PacBioWellSummary objects",
description="""
A list of `PacBioWell` objects that corresponds to the page number
A list of `PacBioWellSummary` objects that corresponds to the page number
and size specified by the `page_size` and `page_number` attributes.
""",
)
Expand All @@ -164,13 +192,16 @@ class PacBioPagedWells(PagedResponse, extra="forbid"):
class PacBioWellFull(PacBioWell):
"""A full response model for a single PacBio well.
The model has teh fields that uniquely define the well (`run_name`, `label`,
The model has the fields that uniquely define the well (`run_name`, `label`,
`plate_number`, `id_product`), along with the laboratory experiment and
sequence run tracking information, current QC state of this well and
QC data for this well.
Instance creation is described in the documentation of this class's parent
`PacBioWell`.
`get_experiment_info` method in this package is used to retrieve information
about the experiment, see its documentation for details.
"""

metrics: QCDataWell = Field(
Expand All @@ -189,16 +220,9 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:

assigned = super().pre_root(values)
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]

assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row)

product_metrics = mlwh_db_row.pac_bio_product_metrics
experiment_info = [
pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None
]
# Occasionally product rows are not linked to LIMS rows.
# Go for all or nothing, do not supply incomplete data.
if len(experiment_info) and (len(experiment_info) == len(product_metrics)):
experiment_info = mlwh_db_row.get_experiment_info()
if len(experiment_info):
assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info)

return assigned
78 changes: 42 additions & 36 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,6 @@
test_ini = os.path.join(os.path.dirname(__file__), "testdb.ini")


@pytest.fixture(scope="package")
def config() -> configparser.ConfigParser:
# Database credentials for the test MySQL instance are stored here. This
# should be an instance in a container, discarded after each test run.
test_config = configparser.ConfigParser()
test_config.read(test_ini)
return test_config


def mysql_url(
config: configparser.ConfigParser,
section: str,
Expand Down Expand Up @@ -70,6 +61,43 @@ def mysql_url(
)


def insert_from_yaml(session, dir_path, module_name):

# Load the schema module where the table ORM classes are defined.
module = importlib.import_module(module_name)

# Find all files in a given directory.
dir_obj = pathlib.Path(dir_path)
file_paths = list(str(f) for f in dir_obj.iterdir())
file_paths.sort()

for file_path in file_paths:
with open(file_path, "r") as f:
(head, file_name) = os.path.split(file_path)
# File name example: 200-PacBioRun.yml
m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name)
if m is not None:
class_name = m.group(1)
table_class = getattr(module, class_name)
data = yaml.safe_load(f)
session.execute(insert(table_class), data)

session.commit()


def compare_dates(date_obj, date_string):
assert date_obj.isoformat(sep=" ", timespec="seconds") == date_string


@pytest.fixture(scope="package")
def config() -> configparser.ConfigParser:
# Database credentials for the test MySQL instance are stored here. This
# should be an instance in a container, discarded after each test run.
test_config = configparser.ConfigParser()
test_config.read(test_ini)
return test_config


@pytest.fixture(scope="module", name="mlwhdb_test_sessionfactory")
def create_mlwhdb_test_sessionfactory(config):
"""Create a MLWH SQLAlchemy session factory, using credentials from config.
Expand Down Expand Up @@ -165,30 +193,8 @@ def override_get_qc_db():
return client


def insert_from_yaml(session, dir_path, module_name):

# Load the schema module where the table ORM classes are defined.
module = importlib.import_module(module_name)

# Find all files in a given directory.
dir_obj = pathlib.Path(dir_path)
file_paths = list(str(f) for f in dir_obj.iterdir())
file_paths.sort()

for file_path in file_paths:
with open(file_path, "r") as f:
(head, file_name) = os.path.split(file_path)
# File name example: 200-PacBioRun.yml
m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name)
if m is None:
raise Exception(f"Unexpected file {file_path} in fixtures.")
class_name = m.group(1)
table_class = getattr(module, class_name)
data = yaml.safe_load(f)
session.execute(insert(table_class), data)

session.commit()


def compare_dates(date_obj, date_string):
assert date_obj.isoformat(sep=" ", timespec="seconds") == date_string
@pytest.fixture(scope="module", name="mlwhdb_load_runs")
def mlwhdb_load_from_yaml(mlwhdb_test_session):
insert_from_yaml(
mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema"
)
Loading

0 comments on commit f599e37

Please sign in to comment.