From 8788fd948482c6d4355dd019597ab78cb7b609b1 Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 23 Feb 2024 11:33:40 +0000 Subject: [PATCH 01/33] Added a scripts to validate ID generator All PacBio mlwh records are inspected, the ID is generated from scratch and compared to the one stored in the database. --- misc/validate_id_generator.py | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100755 misc/validate_id_generator.py diff --git a/misc/validate_id_generator.py b/misc/validate_id_generator.py new file mode 100755 index 00000000..67c55d3d --- /dev/null +++ b/misc/validate_id_generator.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +from npg_id_generation.pac_bio import PacBioEntity, concatenate_tags +from sqlalchemy import select + +from lang_qc.db.mlwh_connection import get_mlwh_db +from lang_qc.db.mlwh_schema import PacBioRunWellMetrics + +session = next(get_mlwh_db()) + +count = 0 +num_mismatches = 0 +for well in session.execute(select(PacBioRunWellMetrics)).scalars(): + id_generated = PacBioEntity( + run_name=well.pac_bio_run_name, + well_label=well.well_label, + plate_number=well.plate_number, + ).hash_product_id() + if well.id_pac_bio_product != id_generated: + num_mismatches += 1 + print(f"Mismatch for stored ID {well.id_pac_bio_product}") + count += 1 + +print(f"{count} PacBioRunWellMetrics records examined, {num_mismatches} mismatches") + +count = 0 +num_mismatches = 0 +unlinked = 0 +for well in session.execute(select(PacBioRunWellMetrics)).scalars(): + for product in well.pac_bio_product_metrics: + run = product.pac_bio_run + if run is None: + unlinked += 1 + continue + tags = [] + if run.tag_sequence is not None: + tags.append(run.tag_sequence) + if run.tag2_sequence is not None: + tags.append(run.tag2_sequence) + tags_string = concatenate_tags(tags) if len(tags) else None + id_generated = PacBioEntity( + run_name=well.pac_bio_run_name, + well_label=well.well_label, + plate_number=well.plate_number, + tags=tags_string, + ).hash_product_id() + if product.id_pac_bio_product != id_generated: + num_mismatches += 1 + print(f"Mismatch for stored ID {well.id_pac_bio_product}") + count += 1 + +print(f"{count} PacBioProductlMetrics records examined, {num_mismatches} mismatches") +print(f"{unlinked} product rows are not linked to LIMS data") From 23ef8303ff9899bbc4292768db4d70b59decb10f Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Mon, 26 Feb 2024 15:53:19 +0000 Subject: [PATCH 02/33] Update PacBioProductMetrics definition to include new per-product metrics --- lang_qc/db/mlwh_schema.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py index 9b7ededb..5f307623 100644 --- a/lang_qc/db/mlwh_schema.py +++ b/lang_qc/db/mlwh_schema.py @@ -579,6 +579,30 @@ class PacBioProductMetrics(Base): index=True, comment="The final QC outcome of the product as 0(failed), 1(passed) or NULL", ) + hifi_read_bases = Column( + mysqlBIGINT(unsigned=True), nullable=True, comment="The number of HiFi bases" + ) + hifi_num_reads = Column( + mysqlINTEGER(unsigned=True), nullable=True, comment="The number of HiFi reads" + ) + hifi_read_length_mean = Column( + mysqlSMALLINT(unsigned=True), nullable=True, comment="The mean HiFi read length" + ) + barcode_quality_score = Column( + mysqlSMALLINT(unsigned=True), + nullable=True, + comment="The mean barcode HiFi quality score", + ) + hifi_read_quality_mean = Column( + mysqlSMALLINT(unsigned=True), + nullable=True, + comment="The mean HiFi base quality", + ) + hifi_bases_percent = Column( + mysqlFLOAT(), + nullable=True, + comment="The HiFi bases expressed as a percentage of the total HiFi bases", + ) pac_bio_run_well_metrics = relationship( "PacBioRunWellMetrics", back_populates="pac_bio_product_metrics" From 5daae743533ccaf5f6d102cd747dd41c4016a903 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 28 Feb 2024 16:24:53 +0000 Subject: [PATCH 03/33] Removed unused method --- lang_qc/db/helper/wells.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index 100f407f..5ba05895 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -310,21 +310,6 @@ def _get_wells_for_status( return wells - def _add_tracking_info(self, wells: List[PacBioWell]): - - for well in wells: - # One query for all or query per well? The latter for now to avoid the need - # to match the records later. Should be fast enough for small-ish pages, we - # query on a unique key. - db_well = self.get_mlwh_well_by_product_id(product_id=well.product_id) - if db_well is None: - # No error if no matching mlwh record is found. - logging.warning( - f"No mlwh record for run '{well.run_name}' well '{well.label}'" - ) - else: - well.copy_run_tracking_info(db_well) - def _upcoming_wells(self): """ Upcoming wells are recent wells, which do not belong to any other From 08cf2eed23daf05dcebccd28afa5721c37f6313e Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 28 Feb 2024 17:36:33 +0000 Subject: [PATCH 04/33] Incorrect type for hifi_read_length_mean --- lang_qc/db/mlwh_schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py index 5f307623..bc1ea86a 100644 --- a/lang_qc/db/mlwh_schema.py +++ b/lang_qc/db/mlwh_schema.py @@ -586,7 +586,7 @@ class PacBioProductMetrics(Base): mysqlINTEGER(unsigned=True), nullable=True, comment="The number of HiFi reads" ) hifi_read_length_mean = Column( - mysqlSMALLINT(unsigned=True), nullable=True, comment="The mean HiFi read length" + mysqlINTEGER(unsigned=True), nullable=True, comment="The mean HiFi read length" ) barcode_quality_score = Column( mysqlSMALLINT(unsigned=True), @@ -594,7 +594,7 @@ class PacBioProductMetrics(Base): comment="The mean barcode HiFi quality score", ) hifi_read_quality_mean = Column( - mysqlSMALLINT(unsigned=True), + mysqlINTEGER(unsigned=True), nullable=True, comment="The mean HiFi base quality", ) From 5c6dd8e271345eb263a7f2ae5956fd5680f79e47 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 4 Mar 2024 13:31:06 +0000 Subject: [PATCH 05/33] Ensure empty lists of qc states are not returned. The descripton of the 'get_qc_states_by_id_product_list' method in npg_langqc.db.helper.qc says that when sequencing qc states are requested "it is guaranteed that the list of QcState objects has only one member". However, the implementation did not warrant this. It was possible to get an empty list of qc states. --- lang_qc/db/helper/qc.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lang_qc/db/helper/qc.py b/lang_qc/db/helper/qc.py index c4705708..fb5fe2cd 100644 --- a/lang_qc/db/helper/qc.py +++ b/lang_qc/db/helper/qc.py @@ -422,11 +422,18 @@ def _map_to_qc_state_models( """ response = dict() for product in seq_products: - response[product.id_product] = [] - for qc in product.qc_state: - if sequencing_outcomes_only and (qc.qc_type.qc_type != "sequencing"): - continue - response[product.id_product].append(QcState.from_orm(qc)) + # qc_states = [] + # for qc in product.qc_state: + # if sequencing_outcomes_only and (qc.qc_type.qc_type != "sequencing"): + # continue + # qc_states.append(QcState.from_orm(qc)) + qc_states = [ + QcState.from_orm(qc) + for qc in product.qc_state + if (not sequencing_outcomes_only) or (qc.qc_type.qc_type == "sequencing") + ] + if len(qc_states) != 0: + response[product.id_product] = qc_states return response From 429911f9dfc5bd0258c08ff00d622357d41114a8 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 4 Mar 2024 13:37:24 +0000 Subject: [PATCH 06/33] A single call to retrieve qc states for products. Replaced multiple calls to retrieve qc states for a list of products. To simplify the logic, made this retrieval unconditional. Whether the wells are likely to have any associated qc state or not, an attempt to get qc states is made. --- lang_qc/db/helper/wells.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index 5ba05895..a32ab403 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -245,7 +245,7 @@ def create_for_run(self, run_name: str) -> PacBioPagedWells: page_number=self.page_number, page_size=self.page_size, total_number_of_items=total_number_of_wells, - wells=self._well_models(wells, True), + wells=self._well_models(wells), ) def _build_query4status(self, qc_flow_status: QcFlowStatusEnum): @@ -358,7 +358,7 @@ def _upcoming_wells(self): self.total_number_of_items = len(wells) # Save the number of retrieved wells. - return self._well_models(self.slice_data(wells), False) + return self._well_models(self.slice_data(wells)) def _recent_inbox_wells(self, recent_wells): @@ -401,7 +401,6 @@ def _aborted_and_unknown_wells(self, qc_flow_status: QcFlowStatusEnum): .all() ) - qc_state_applicable = True if qc_flow_status == QcFlowStatusEnum.UNKNOWN: # Remove the wells that the QC team has dealt with. ids_with_qc_state = products_have_qc_state( @@ -410,25 +409,22 @@ def _aborted_and_unknown_wells(self, qc_flow_status: QcFlowStatusEnum): sequencing_outcomes_only=True, ) wells = [w for w in wells if w.id_pac_bio_product not in ids_with_qc_state] - qc_state_applicable = False # Save the number of retrieved rows. self.total_number_of_items = len(wells) - return self._well_models(self.slice_data(wells), qc_state_applicable) + return self._well_models(self.slice_data(wells)) def _well_models( self, db_wells_list: List[PacBioRunWellMetrics], - qc_state_applicable: bool = False, ): - # Normally QC data is not available for the inbox, aborted, etc. - # wells. If some well with a non-inbox status has QC state assigned, - # the same well will also be retrieved by the 'in progress' or - # 'on hold' or 'qc complete' queries. However, it is useful to display - # the QC state if it is available. The `qc_state_applicable` argument - # is a hint to fetch QC state. + qced_products = get_qc_states_by_id_product_list( + session=self.qcdb_session, + ids=[db_well.id_pac_bio_product for db_well in db_wells_list], + sequencing_outcomes_only=True, + ) pb_wells = [] for db_well in db_wells_list: id_product = db_well.id_pac_bio_product @@ -438,16 +434,8 @@ def _well_models( "plate_number": db_well.plate_number, "label": db_well.well_label, } - if qc_state_applicable: - # TODO: Query by all IDs at once. - qced_products = get_qc_states_by_id_product_list( - session=self.qcdb_session, - ids=[id_product], - sequencing_outcomes_only=True, - ).get(id_product) - # A well can have only one or zero current sequencing outcomes. - if qced_products is not None and (len(qced_products) > 0): - attrs["qc_state"] = qced_products[0] + if id_product in qced_products: + attrs["qc_state"] = qced_products[id_product][0] pb_well = PacBioWell.model_validate(attrs) pb_well.copy_run_tracking_info(db_well) pb_wells.append(pb_well) From ce953e3af91392922325ca4e77b39a07b4246817 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 4 Mar 2024 13:53:40 +0000 Subject: [PATCH 07/33] Optimised qc state retrieval for a single product. Where sequencing qc state for a single product is needed, it is more efficient to use `get_qc_state_for_product` method since it executes one SQL query, no additional filtering is involved. --- lang_qc/models/pacbio/well.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index d83baa87..54adff5d 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -26,7 +26,7 @@ from pydantic import BaseModel, ConfigDict, Field from sqlalchemy.orm import Session -from lang_qc.db.helper.qc import get_qc_states_by_id_product_list +from lang_qc.db.helper.qc import get_qc_state_for_product from lang_qc.db.mlwh_schema import PacBioRunWellMetrics from lang_qc.models.pacbio.experiment import PacBioExperiment from lang_qc.models.pacbio.qc_data import QCDataWell @@ -151,10 +151,10 @@ def from_orm(cls, mlwh_db_row: PacBioRunWellMetrics, qc_session: Session): if len(experiment_info): obj.experiment_tracking = PacBioExperiment.from_orm(experiment_info) - qced_products = get_qc_states_by_id_product_list( - session=qc_session, ids=[id_product], sequencing_outcomes_only=True - ).get(id_product) - if qced_products is not None: - obj.qc_state = qced_products[0] + qc_state_db = get_qc_state_for_product( + session=qc_session, id_product=id_product + ) + if qc_state_db is not None: + obj.qc_state = QcState.from_orm(qc_state_db) return obj From c08cd8430c68d7ab7ac4b3b7a9039dbb4a94c3e8 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 4 Mar 2024 14:35:00 +0000 Subject: [PATCH 08/33] Uniform compute for wells that have not been qc-ed. --- lang_qc/db/helper/wells.py | 58 ++++++++++++++------------------------ 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index a32ab403..63ab3b77 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -350,40 +350,18 @@ def _upcoming_wells(self): ) ) - wells = self.session.execute(query).scalars().all() - ids_with_qc_state = products_have_qc_state( - session=self.qcdb_session, ids=[w.id_pac_bio_product for w in wells] - ) - wells = [w for w in wells if w.id_pac_bio_product not in ids_with_qc_state] - + recent_wells = self.session.execute(query).scalars().all() + wells = self._wells_without_seq_qc_state(recent_wells) self.total_number_of_items = len(wells) # Save the number of retrieved wells. return self._well_models(self.slice_data(wells)) def _recent_inbox_wells(self, recent_wells): - inbox_wells_indexes = [] - for index, db_well in enumerate(recent_wells): - id_product = db_well.id_pac_bio_product - # TODO: Create a method for retrieving a seq. QC state for a product. - qced_products = get_qc_states_by_id_product_list( - session=self.qcdb_session, - ids=[id_product], - sequencing_outcomes_only=True, - ).get(id_product) - if qced_products is None: - inbox_wells_indexes.append(index) - - # Save the number of retrieved rows. - self.total_number_of_items = len(inbox_wells_indexes) - - inbox_wells = [] - # Iterate over indexes of records we want for this page and retrieve data - # for this page. - for index in self.slice_data(inbox_wells_indexes): - inbox_wells.append(recent_wells[index]) - - return self._well_models(inbox_wells) + wells = self._wells_without_seq_qc_state(recent_wells) + self.total_number_of_items = len(wells) + + return self._well_models(self.slice_data(wells)) def _aborted_and_unknown_wells(self, qc_flow_status: QcFlowStatusEnum): @@ -402,15 +380,7 @@ def _aborted_and_unknown_wells(self, qc_flow_status: QcFlowStatusEnum): ) if qc_flow_status == QcFlowStatusEnum.UNKNOWN: - # Remove the wells that the QC team has dealt with. - ids_with_qc_state = products_have_qc_state( - session=self.qcdb_session, - ids=[w.id_pac_bio_product for w in wells], - sequencing_outcomes_only=True, - ) - wells = [w for w in wells if w.id_pac_bio_product not in ids_with_qc_state] - - # Save the number of retrieved rows. + wells = self._wells_without_seq_qc_state(wells) self.total_number_of_items = len(wells) return self._well_models(self.slice_data(wells)) @@ -441,3 +411,17 @@ def _well_models( pb_wells.append(pb_well) return pb_wells + + def _wells_without_seq_qc_state( + self, + db_wells_list: List[PacBioRunWellMetrics], + ): + + ids_with_qc_state = products_have_qc_state( + session=self.qcdb_session, + ids=[w.id_pac_bio_product for w in db_wells_list], + sequencing_outcomes_only=True, + ) + return [ + w for w in db_wells_list if w.id_pac_bio_product not in ids_with_qc_state + ] From c89c411ddca187b6879aeabe41562ba4308686f0 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 4 Mar 2024 17:47:47 +0000 Subject: [PATCH 09/33] Moved the code next to its doc --- lang_qc/db/helper/qc.py | 43 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/lang_qc/db/helper/qc.py b/lang_qc/db/helper/qc.py index fb5fe2cd..5edbfbfd 100644 --- a/lang_qc/db/helper/qc.py +++ b/lang_qc/db/helper/qc.py @@ -95,10 +95,18 @@ def get_qc_states_by_id_product_list( `sequencing_outcomes_only`- a boolean flag, False by default. """ - return _map_to_qc_state_models( - seq_products=_get_seq_product_by_id_list(session, ids), - sequencing_outcomes_only=sequencing_outcomes_only, - ) + seq_products = _get_seq_product_by_id_list(session, ids) + response = dict() + for product in seq_products: + qc_states = [ + QcState.from_orm(qc) + for qc in product.qc_state + if (not sequencing_outcomes_only) or (qc.qc_type.qc_type == "sequencing") + ] + if len(qc_states) != 0: + response[product.id_product] = qc_states + + return response def product_has_qc_state( @@ -410,33 +418,6 @@ def _get_seq_product_by_id_list( return session.execute(query).scalars().all() -def _map_to_qc_state_models( - seq_products: list[SeqProduct], sequencing_outcomes_only: bool = False -) -> dict[ChecksumSHA256, list[QcState]]: - """ - Given a list of SeqProducts, convert all related QC states into - QcState response format and hashes them by their product ID. - - If only sequencing type QC states are required, an optional - argument, sequencing_outcomes_only, should be set to True. - """ - response = dict() - for product in seq_products: - # qc_states = [] - # for qc in product.qc_state: - # if sequencing_outcomes_only and (qc.qc_type.qc_type != "sequencing"): - # continue - # qc_states.append(QcState.from_orm(qc)) - qc_states = [ - QcState.from_orm(qc) - for qc in product.qc_state - if (not sequencing_outcomes_only) or (qc.qc_type.qc_type == "sequencing") - ] - if len(qc_states) != 0: - response[product.id_product] = qc_states - return response - - def _get_qc_type_row(session: Session, qc_type: str) -> QcType: qc_type_row = None From f5f79b7f19659ecca26b556f7ccbcab5a331bce4 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 4 Mar 2024 18:30:51 +0000 Subject: [PATCH 10/33] Simplified getting qc states for multiple products. --- lang_qc/db/helper/qc.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/lang_qc/db/helper/qc.py b/lang_qc/db/helper/qc.py index 5edbfbfd..fd635ce0 100644 --- a/lang_qc/db/helper/qc.py +++ b/lang_qc/db/helper/qc.py @@ -19,6 +19,7 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . +from collections import defaultdict from datetime import datetime from sqlalchemy import and_, func, select @@ -95,18 +96,13 @@ def get_qc_states_by_id_product_list( `sequencing_outcomes_only`- a boolean flag, False by default. """ - seq_products = _get_seq_product_by_id_list(session, ids) - response = dict() - for product in seq_products: - qc_states = [ - QcState.from_orm(qc) - for qc in product.qc_state - if (not sequencing_outcomes_only) or (qc.qc_type.qc_type == "sequencing") - ] - if len(qc_states) != 0: - response[product.id_product] = qc_states + qc_states = _get_qc_state_by_id_list(session, ids, sequencing_outcomes_only) - return response + response = defaultdict(list) + for state in qc_states: + response[state.seq_product.id_product].append(QcState.from_orm(state)) + + return dict(response) def product_has_qc_state( @@ -393,28 +389,30 @@ def assign_qc_state_to_product( return qc_state_db -def _get_seq_product_by_id_list( - session: Session, ids: list[ChecksumSHA256] -) -> list[SeqProduct]: +def _get_qc_state_by_id_list( + session: Session, ids: list[ChecksumSHA256], sequencing_outcomes_only: bool +) -> list[QcStateDb]: """ Generates and executes a query for SeqProducts from a list of product IDs. Prefetch all related QC states, types, etc. """ query = ( - select(SeqProduct) - .join(QcStateDb) + select(QcStateDb) + .join(QcStateDb.seq_product) .join(QcType) .join(QcStateDict) .join(User) .where(SeqProduct.id_product.in_(ids)) .options( - selectinload(SeqProduct.qc_state).options( - selectinload(QcStateDb.qc_type), - selectinload(QcStateDb.user), - selectinload(QcStateDb.qc_state_dict), - ) + selectinload(QcStateDb.seq_product), + selectinload(QcStateDb.qc_type), + selectinload(QcStateDb.user), + selectinload(QcStateDb.qc_state_dict), ) ) + if sequencing_outcomes_only is True: + query = query.where(QcType.qc_type == SEQUENCING_QC_TYPE) + return session.execute(query).scalars().all() From c54de496458e1ea8886959223f7790cfc37cc837 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 5 Mar 2024 10:03:38 +0000 Subject: [PATCH 11/33] Use Optional type hint for fields with None default --- lang_qc/models/pacbio/well.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 54adff5d..9e42a255 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -57,16 +57,20 @@ class PacBioWell(BaseModel, extra="forbid"): title="Run name", description="PacBio run name as registered in LIMS" ) # Run and well tracking information from SMRT Link - run_start_time: datetime = Field(default=None, title="Run start time") - run_complete_time: datetime = Field(default=None, title="Run complete time") - well_start_time: datetime = Field(default=None, title="Well start time") - well_complete_time: datetime = Field(default=None, title="Well complete time") - run_status: str = Field(default=None, title="Current PacBio run status") - well_status: str = Field(default=None, title="Current PacBio well status") - instrument_name: str = Field(default=None, title="Instrument name") - instrument_type: str = Field(default=None, title="Instrument type") - - qc_state: QcState = Field( + run_start_time: Optional[datetime] = Field(default=None, title="Run start time") + run_complete_time: Optional[datetime] = Field( + default=None, title="Run complete time" + ) + well_start_time: Optional[datetime] = Field(default=None, title="Well start time") + well_complete_time: Optional[datetime] = Field( + default=None, title="Well complete time" + ) + run_status: Optional[str] = Field(default=None, title="Current PacBio run status") + well_status: Optional[str] = Field(default=None, title="Current PacBio well status") + instrument_name: Optional[str] = Field(default=None, title="Instrument name") + instrument_type: Optional[str] = Field(default=None, title="Instrument type") + + qc_state: Optional[QcState] = Field( default=None, title="Current QC state of this well", description=""" @@ -117,7 +121,7 @@ class PacBioWellFull(PacBioWell): metrics: QCDataWell = Field( title="Currently available QC data for well", ) - experiment_tracking: PacBioExperiment = Field( + experiment_tracking: Optional[PacBioExperiment] = Field( default=None, title="Experiment tracking information", description=""" From 9d1c7ceffb3064b9e066c6bbdb96736b3218f147 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 5 Mar 2024 12:05:46 +0000 Subject: [PATCH 12/33] pydantic BaseModel is replaced by pydantic dataclass ... for some models in order to simplify instantiation of the objects. --- lang_qc/db/helper/wells.py | 21 ++----- lang_qc/endpoints/pacbio_well.py | 5 +- lang_qc/models/pacbio/well.py | 98 ++++++++++++++++---------------- tests/test_pac_well_full.py | 12 +++- 4 files changed, 67 insertions(+), 69 deletions(-) diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index 63ab3b77..91c1d9b7 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -290,14 +290,7 @@ def _get_wells_for_status( id_product = qc_state_model.id_product mlwh_well = self.get_mlwh_well_by_product_id(id_product=id_product) if mlwh_well is not None: - pbw = PacBioWell( - id_product=id_product, - run_name=mlwh_well.pac_bio_run_name, - plate_number=mlwh_well.plate_number, - label=mlwh_well.well_label, - qc_state=qc_state_model, - ) - pbw.copy_run_tracking_info(mlwh_well) + pbw = PacBioWell(db_well=mlwh_well, qc_state=qc_state_model) wells.append(pbw) else: """ @@ -398,16 +391,10 @@ def _well_models( pb_wells = [] for db_well in db_wells_list: id_product = db_well.id_pac_bio_product - attrs = { - "id_product": id_product, - "run_name": db_well.pac_bio_run_name, - "plate_number": db_well.plate_number, - "label": db_well.well_label, - } + qc_state = None if id_product in qced_products: - attrs["qc_state"] = qced_products[id_product][0] - pb_well = PacBioWell.model_validate(attrs) - pb_well.copy_run_tracking_info(db_well) + qc_state = qced_products[id_product][0] + pb_well = PacBioWell(db_well=db_well, qc_state=qc_state) pb_wells.append(pb_well) return pb_wells diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py index dca152e9..f9d49573 100644 --- a/lang_qc/endpoints/pacbio_well.py +++ b/lang_qc/endpoints/pacbio_well.py @@ -29,6 +29,7 @@ from lang_qc.db.helper.qc import ( assign_qc_state_to_product, claim_qc_for_product, + get_qc_state_for_product, product_has_qc_state, ) from lang_qc.db.helper.well import well_seq_product_find_or_create @@ -179,7 +180,9 @@ def get_seq_metrics( mlwh_well = _find_well_product_or_error(id_product, mlwhdb_session) - return PacBioWellFull.from_orm(mlwh_well, qcdb_session) + qc_state_db = get_qc_state_for_product(session=qcdb_session, id_product=id_product) + qc_state = None if qc_state_db is None else QcState.from_orm(qc_state_db) + return PacBioWellFull(db_well=mlwh_well, qc_state=qc_state) @router.post( diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 9e42a255..efd5abcd 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -21,12 +21,11 @@ # this program. If not, see . from datetime import datetime -from typing import Optional +from typing import Any, Optional -from pydantic import BaseModel, ConfigDict, Field -from sqlalchemy.orm import Session +from pydantic import Field, model_validator +from pydantic.dataclasses import dataclass -from lang_qc.db.helper.qc import get_qc_state_for_product from lang_qc.db.mlwh_schema import PacBioRunWellMetrics from lang_qc.models.pacbio.experiment import PacBioExperiment from lang_qc.models.pacbio.qc_data import QCDataWell @@ -34,7 +33,8 @@ from lang_qc.models.qc_state import QcState -class PacBioWell(BaseModel, extra="forbid"): +@dataclass +class PacBioWell: """ A response model for a single PacBio well on a particular PacBio run. The class contains the attributes that uniquely define this well (`run_name` @@ -45,6 +45,8 @@ class PacBioWell(BaseModel, extra="forbid"): sequenced or QC metrics or assessment for such data. """ + db_well: PacBioRunWellMetrics = Field(init_var=True) + # Well identifies. id_product: str = Field(title="Product identifier") label: str = Field(title="Well label", description="The label of the PacBio well") @@ -80,19 +82,33 @@ class PacBioWell(BaseModel, extra="forbid"): """, ) - def copy_run_tracking_info(self, db_well: PacBioRunWellMetrics): + @model_validator(mode="before") + def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: """ Populates this object with the run and well tracking information from a database row that is passed as an argument. """ - self.run_start_time = db_well.run_start - self.run_complete_time = db_well.run_complete - self.well_start_time = db_well.well_start - self.well_complete_time = db_well.well_complete - self.run_status = db_well.run_status - self.well_status = db_well.well_status - self.instrument_name = db_well.instrument_name - self.instrument_type = db_well.instrument_type + + # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi + mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] + assigned = dict() + assigned["id_product"] = mlwh_db_row.id_pac_bio_product + assigned["label"] = mlwh_db_row.well_label + assigned["plate_number"] = mlwh_db_row.plate_number + assigned["run_name"] = mlwh_db_row.pac_bio_run_name + assigned["run_start_time"] = mlwh_db_row.run_start + assigned["run_complete_time"] = mlwh_db_row.run_complete + assigned["well_start_time"] = mlwh_db_row.well_start + assigned["well_complete_time"] = mlwh_db_row.well_complete + assigned["run_status"] = mlwh_db_row.run_status + assigned["well_status"] = mlwh_db_row.well_status + assigned["instrument_name"] = mlwh_db_row.instrument_name + assigned["instrument_type"] = mlwh_db_row.instrument_type + + if "qc_state" in values.kwargs: + assigned["qc_state"] = values.kwargs["qc_state"] + + return assigned class PacBioPagedWells(PagedResponse, extra="forbid"): @@ -110,6 +126,7 @@ class PacBioPagedWells(PagedResponse, extra="forbid"): ) +@dataclass class PacBioWellFull(PacBioWell): """ A response model for a single PacBio well on a particular PacBio run. @@ -128,37 +145,22 @@ class PacBioWellFull(PacBioWell): Laboratory experiment tracking information for this well, if available. """, ) - model_config = ConfigDict(from_attributes=True, extra="forbid") - - @classmethod - def from_orm(cls, mlwh_db_row: PacBioRunWellMetrics, qc_session: Session): - - id_product = mlwh_db_row.id_pac_bio_product - obj = cls( - id_product=id_product, - run_name=mlwh_db_row.pac_bio_run_name, - plate_number=mlwh_db_row.plate_number, - label=mlwh_db_row.well_label, - metrics=QCDataWell.from_orm(mlwh_db_row), - ) - obj.copy_run_tracking_info(mlwh_db_row) - - experiment_info = [] - for row in mlwh_db_row.pac_bio_product_metrics: - exp_row = row.pac_bio_run - if exp_row: - experiment_info.append(exp_row) - else: - # Do not supply incomplete data. - experiment_info = [] - break - if len(experiment_info): - obj.experiment_tracking = PacBioExperiment.from_orm(experiment_info) - - qc_state_db = get_qc_state_for_product( - session=qc_session, id_product=id_product - ) - if qc_state_db is not None: - obj.qc_state = QcState.from_orm(qc_state_db) - - return obj + + @model_validator(mode="before") + def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: + + assigned = super().pre_root(values) + mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] + + assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row) + + product_metrics = mlwh_db_row.pac_bio_product_metrics + experiment_info = [ + pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None + ] + # Occasionally product rows are not linked to LIMS rows. + # Go for all or nothing, do not supply incomplete data. + if len(experiment_info) and (len(experiment_info) == len(product_metrics)): + assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info) + + return assigned diff --git a/tests/test_pac_well_full.py b/tests/test_pac_well_full.py index b1a700e6..c8ff08cb 100644 --- a/tests/test_pac_well_full.py +++ b/tests/test_pac_well_full.py @@ -1,5 +1,6 @@ from npg_id_generation.pac_bio import PacBioEntity +from lang_qc.db.helper.qc import get_qc_states_by_id_product_list from lang_qc.db.helper.wells import WellWh from lang_qc.models.pacbio.well import PacBioWellFull from tests.conftest import compare_dates, insert_from_yaml @@ -21,7 +22,7 @@ def test_creating_experiment_object( ).hash_product_id() well_row = helper.get_mlwh_well_by_product_id(id_product) - pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session) + pb_well = PacBioWellFull(db_well=well_row) assert pb_well.id_product == id_product assert pb_well.run_name == "TRACTION-RUN-92" assert pb_well.label == "A1" @@ -45,7 +46,12 @@ def test_creating_experiment_object( ).hash_product_id() well_row = helper.get_mlwh_well_by_product_id(id_product) - pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session) + qc_state = get_qc_states_by_id_product_list( + session=qcdb_test_session, + ids=[id_product], + sequencing_outcomes_only=True, + ) + pb_well = PacBioWellFull(db_well=well_row, qc_state=qc_state) assert pb_well.id_product == id_product assert pb_well.run_name == "TRACTION_RUN_1" assert pb_well.label == "B1" @@ -65,7 +71,7 @@ def test_creating_experiment_object( ).hash_product_id() well_row = helper.get_mlwh_well_by_product_id(id_product) - pb_well = PacBioWellFull.from_orm(well_row, qcdb_test_session) + pb_well = PacBioWellFull(db_well=well_row, qc_state=None) assert pb_well.id_product == id_product assert pb_well.run_name == "TRACTION_RUN_10" assert pb_well.label == "C1" From bcbabcd31530789f68e6530b1a28b20836e9fb7d Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 5 Mar 2024 13:40:56 +0000 Subject: [PATCH 13/33] Make the dataclasses semi-immutable - no change for the values --- lang_qc/models/pacbio/well.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index efd5abcd..1568842a 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -33,7 +33,7 @@ from lang_qc.models.qc_state import QcState -@dataclass +@dataclass(kw_only=True, frozen=True) class PacBioWell: """ A response model for a single PacBio well on a particular PacBio run. @@ -126,7 +126,7 @@ class PacBioPagedWells(PagedResponse, extra="forbid"): ) -@dataclass +@dataclass(kw_only=True, frozen=True) class PacBioWellFull(PacBioWell): """ A response model for a single PacBio well on a particular PacBio run. From 064c2bb3a35bf04a14c99aa46adc01f8604dd017 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 6 Mar 2024 15:22:22 +0000 Subject: [PATCH 14/33] Auto-map column names to model fields --- lang_qc/models/pacbio/well.py | 63 ++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 1568842a..ad6acf84 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -33,6 +33,25 @@ from lang_qc.models.qc_state import QcState +def get_field_names(cls): + """Returns a list of field names for a class given as an argument. + + The fields that can only be used at the object initialisation step + are excluded. + """ + + field_names = [] + for field_name in cls.__dataclass_fields__: + field = cls.__dataclass_fields__[field_name] + if field.default.init_var is True: + continue + name = field.default.validation_alias + if name is None: + name = field.name + field_names.append(name) + return field_names + + @dataclass(kw_only=True, frozen=True) class PacBioWell: """ @@ -48,24 +67,36 @@ class PacBioWell: db_well: PacBioRunWellMetrics = Field(init_var=True) # Well identifies. - id_product: str = Field(title="Product identifier") - label: str = Field(title="Well label", description="The label of the PacBio well") + id_product: str = Field( + title="Product identifier", validation_alias="id_pac_bio_product" + ) + label: str = Field( + title="Well label", + description="The label of the PacBio well", + validation_alias="well_label", + ) plate_number: Optional[int] = Field( default=None, title="Plate number", description="Plate number, relevant for Revio instruments only", ) run_name: str = Field( - title="Run name", description="PacBio run name as registered in LIMS" + title="Run name", + description="PacBio run name as registered in LIMS", + validation_alias="pac_bio_run_name", ) # Run and well tracking information from SMRT Link - run_start_time: Optional[datetime] = Field(default=None, title="Run start time") + run_start_time: Optional[datetime] = Field( + default=None, title="Run start time", validation_alias="run_start" + ) run_complete_time: Optional[datetime] = Field( - default=None, title="Run complete time" + default=None, title="Run complete time", validation_alias="run_complete" + ) + well_start_time: Optional[datetime] = Field( + default=None, title="Well start time", validation_alias="well_start" ) - well_start_time: Optional[datetime] = Field(default=None, title="Well start time") well_complete_time: Optional[datetime] = Field( - default=None, title="Well complete time" + default=None, title="Well complete time", validation_alias="well_complete" ) run_status: Optional[str] = Field(default=None, title="Current PacBio run status") well_status: Optional[str] = Field(default=None, title="Current PacBio well status") @@ -91,19 +122,13 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] + + column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns] + assigned = dict() - assigned["id_product"] = mlwh_db_row.id_pac_bio_product - assigned["label"] = mlwh_db_row.well_label - assigned["plate_number"] = mlwh_db_row.plate_number - assigned["run_name"] = mlwh_db_row.pac_bio_run_name - assigned["run_start_time"] = mlwh_db_row.run_start - assigned["run_complete_time"] = mlwh_db_row.run_complete - assigned["well_start_time"] = mlwh_db_row.well_start - assigned["well_complete_time"] = mlwh_db_row.well_complete - assigned["run_status"] = mlwh_db_row.run_status - assigned["well_status"] = mlwh_db_row.well_status - assigned["instrument_name"] = mlwh_db_row.instrument_name - assigned["instrument_type"] = mlwh_db_row.instrument_type + for field_name in get_field_names(cls): + if field_name in column_names: + assigned[field_name] = getattr(mlwh_db_row, field_name) if "qc_state" in values.kwargs: assigned["qc_state"] = values.kwargs["qc_state"] From 4d2efbde3c041ae6e29cda1f9f380c5b5d9ae543 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 6 Mar 2024 16:33:20 +0000 Subject: [PATCH 15/33] Update documentation --- lang_qc/models/pacbio/well.py | 45 ++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index ad6acf84..9deea4f0 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -37,7 +37,8 @@ def get_field_names(cls): """Returns a list of field names for a class given as an argument. The fields that can only be used at the object initialisation step - are excluded. + are excluded. For fields, which have a validation_alias defined, + this alias is returned rather than the field name. """ field_names = [] @@ -54,14 +55,24 @@ def get_field_names(cls): @dataclass(kw_only=True, frozen=True) class PacBioWell: - """ - A response model for a single PacBio well on a particular PacBio run. - The class contains the attributes that uniquely define this well (`run_name` - and `label`), along with the time line and the current QC state of this well, - if any. + """A basic response model for a single PacBio well. + + `run_name`, `label`, `plate_number`, and `id_product` fields uniquely + identify the well. The model also has fields that reflect the time line + of the run and information about a PacBio instrument. The optional + `qc_state field might contain the current QC state of the well. + + The best way to instantiate the model is via the constructor, supplying + the an ORM object representing a database row with information about + the well and, optionally, the model representing the current QC state. - This model does not contain any information about data that was - sequenced or QC metrics or assessment for such data. + Examples: + well_model = PacBioWell(db_well=well_row) + well_model = PacBioWell(db_well=well_row, qc_state=current_qc_state) + + Mapping of the database values to this model's fields is performed by + a pre `__init__` hook. To enable automatic mapping, some fields of this + model have `validation_alias` set. """ db_well: PacBioRunWellMetrics = Field(init_var=True) @@ -137,9 +148,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: class PacBioPagedWells(PagedResponse, extra="forbid"): - """ - A response model for paged data about PacBio wells. - """ + """A response model for paged data about PacBio wells.""" wells: list[PacBioWell] = Field( default=[], @@ -153,11 +162,15 @@ class PacBioPagedWells(PagedResponse, extra="forbid"): @dataclass(kw_only=True, frozen=True) class PacBioWellFull(PacBioWell): - """ - A response model for a single PacBio well on a particular PacBio run. - The class contains the attributes that uniquely define this well (`run_name` - and `label`), along with the laboratory experiment and sequence run tracking - information, current QC state of this well and QC data for this well. + """A full response model for a single PacBio well. + + The model has teh fields that uniquely define the well (`run_name`, `label`, + `plate_number`, `id_product`), along with the laboratory experiment and + sequence run tracking information, current QC state of this well and + QC data for this well. + + Instance creation is described in the documentation of this class's parent + `PacBioWell`. """ metrics: QCDataWell = Field( From f8761b228a3d0fd4e6c0e33170d73710cf986d42 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Fri, 15 Mar 2024 14:49:27 +0000 Subject: [PATCH 16/33] Incorrect column name in mlwh --- lang_qc/db/mlwh_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py index bc1ea86a..533c80ea 100644 --- a/lang_qc/db/mlwh_schema.py +++ b/lang_qc/db/mlwh_schema.py @@ -588,7 +588,7 @@ class PacBioProductMetrics(Base): hifi_read_length_mean = Column( mysqlINTEGER(unsigned=True), nullable=True, comment="The mean HiFi read length" ) - barcode_quality_score = Column( + barcode_quality_score_mean = Column( mysqlSMALLINT(unsigned=True), nullable=True, comment="The mean barcode HiFi quality score", From d9c3751e943109fd6c3b06175e9ed274a49447b0 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 6 Mar 2024 16:33:20 +0000 Subject: [PATCH 17/33] Update documentation --- lang_qc/models/pacbio/well.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 9deea4f0..03229233 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -60,7 +60,7 @@ class PacBioWell: `run_name`, `label`, `plate_number`, and `id_product` fields uniquely identify the well. The model also has fields that reflect the time line of the run and information about a PacBio instrument. The optional - `qc_state field might contain the current QC state of the well. + `qc_state` field might contain the current QC state of the well. The best way to instantiate the model is via the constructor, supplying the an ORM object representing a database row with information about From 85b9c1a38fa8b2a07442eb042add55e32b2ca3a8 Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 7 Mar 2024 17:44:31 +0000 Subject: [PATCH 18/33] Added pb well summary model --- lang_qc/db/helper/wells.py | 12 +-- lang_qc/models/pacbio/well.py | 43 ++++++++-- tests/test_pac_well_full.py | 85 -------------------- tests/test_pac_well_models.py | 140 +++++++++++++++++++++++++++++++++ tests/test_pb_wells_factory.py | 14 ++-- 5 files changed, 190 insertions(+), 104 deletions(-) delete mode 100644 tests/test_pac_well_full.py create mode 100644 tests/test_pac_well_models.py diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index 91c1d9b7..8e56a0a8 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -33,7 +33,7 @@ ) from lang_qc.db.mlwh_schema import PacBioRunWellMetrics from lang_qc.db.qc_schema import QcState, QcStateDict, QcType -from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell +from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellSummary from lang_qc.models.pager import PagedResponse from lang_qc.models.qc_flow_status import QcFlowStatusEnum from lang_qc.models.qc_state import QcState as QcStateModel @@ -195,7 +195,7 @@ def create_for_qc_status( specified by the `page_size`, `page_number` object's attributes and `qc_flow_status` argument of this function.. - The `PacBioWell` objects in `wells` attribute of the returned object + The `PacBioWellPacBioWell` objects in `wells` attribute of the returned object are sorted in a way appropriate for the requested `qc_flow_status`. For the 'in progress' and 'on hold' requests the wells with most recently assigned QC states come first. For inbox requests the wells with least @@ -230,7 +230,7 @@ def create_for_run(self, run_name: str) -> PacBioPagedWells: """ Returns `PacBioPagedWells` object that corresponds to the criteria specified by the `page_size` and `page_number` attributes. - The `PacBioWell` objects in `wells` attribute of the returned object + The `PacBioWellSummary` objects in `wells` attribute of the returned object belong to runs specified by the `run_name` argument and are sorted by the run name and well label. """ @@ -281,7 +281,7 @@ def _retrieve_paged_qc_states( def _get_wells_for_status( self, qc_flow_status: QcFlowStatusEnum - ) -> List[PacBioWell]: + ) -> List[PacBioWellSummary]: wells = [] @@ -290,7 +290,7 @@ def _get_wells_for_status( id_product = qc_state_model.id_product mlwh_well = self.get_mlwh_well_by_product_id(id_product=id_product) if mlwh_well is not None: - pbw = PacBioWell(db_well=mlwh_well, qc_state=qc_state_model) + pbw = PacBioWellSummary(db_well=mlwh_well, qc_state=qc_state_model) wells.append(pbw) else: """ @@ -394,7 +394,7 @@ def _well_models( qc_state = None if id_product in qced_products: qc_state = qced_products[id_product][0] - pb_well = PacBioWell(db_well=db_well, qc_state=qc_state) + pb_well = PacBioWellSummary(db_well=db_well, qc_state=qc_state) pb_wells.append(pb_well) return pb_wells diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 03229233..054a3088 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -53,6 +53,14 @@ def get_field_names(cls): return field_names +def get_experiment_info(db_well: PacBioRunWellMetrics): + return [ + pbr + for pbr in [pm.pac_bio_run for pm in db_well.pac_bio_product_metrics] + if pbr is not None + ] + + @dataclass(kw_only=True, frozen=True) class PacBioWell: """A basic response model for a single PacBio well. @@ -133,6 +141,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] + assert mlwh_db_row column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns] @@ -147,14 +156,38 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: return assigned +@dataclass(kw_only=True, frozen=True) +class PacBioWellSummary(PacBioWell): + """A response model for a summary about a single PacBio well. + + Adds `study_names` to a list of attributes of the parent class `PacBioWell`. + Instance creation is described in the documentation of the parent class. + """ + + study_names: list = Field( + title="A list of study names", + ) + + @model_validator(mode="before") + def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: + + assigned = super().pre_root(values) + mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] + assigned["study_names"] = [ + row.study.name for row in get_experiment_info(mlwh_db_row) + ] + + return assigned + + class PacBioPagedWells(PagedResponse, extra="forbid"): """A response model for paged data about PacBio wells.""" - wells: list[PacBioWell] = Field( + wells: list[PacBioWellSummary] = Field( default=[], - title="A list of PacBioWell objects", + title="A list of PacBioWellSummary objects", description=""" - A list of `PacBioWell` objects that corresponds to the page number + A list of `PacBioWellSummary` objects that corresponds to the page number and size specified by the `page_size` and `page_number` attributes. """, ) @@ -193,9 +226,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row) product_metrics = mlwh_db_row.pac_bio_product_metrics - experiment_info = [ - pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None - ] + experiment_info = get_experiment_info(mlwh_db_row) # Occasionally product rows are not linked to LIMS rows. # Go for all or nothing, do not supply incomplete data. if len(experiment_info) and (len(experiment_info) == len(product_metrics)): diff --git a/tests/test_pac_well_full.py b/tests/test_pac_well_full.py deleted file mode 100644 index c8ff08cb..00000000 --- a/tests/test_pac_well_full.py +++ /dev/null @@ -1,85 +0,0 @@ -from npg_id_generation.pac_bio import PacBioEntity - -from lang_qc.db.helper.qc import get_qc_states_by_id_product_list -from lang_qc.db.helper.wells import WellWh -from lang_qc.models.pacbio.well import PacBioWellFull -from tests.conftest import compare_dates, insert_from_yaml -from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users - - -def test_creating_experiment_object( - mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval -): - - insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" - ) - helper = WellWh(session=mlwhdb_test_session) - - # Full mlwh data, no data in the lang_qc database. - id_product = PacBioEntity( - run_name="TRACTION-RUN-92", well_label="A1" - ).hash_product_id() - well_row = helper.get_mlwh_well_by_product_id(id_product) - - pb_well = PacBioWellFull(db_well=well_row) - assert pb_well.id_product == id_product - assert pb_well.run_name == "TRACTION-RUN-92" - assert pb_well.label == "A1" - assert pb_well.plate_number is None - assert pb_well.qc_state is None - compare_dates(pb_well.run_start_time, "2022-04-14 12:52:34") - compare_dates(pb_well.run_complete_time, "2022-04-20 09:16:53") - compare_dates(pb_well.well_start_time, "2022-04-14 13:02:48") - compare_dates(pb_well.well_complete_time, "2022-04-16 12:36:21") - assert pb_well.run_status == "Complete" - assert pb_well.well_status == "Complete" - assert pb_well.metrics is not None - assert pb_well.experiment_tracking is not None - assert pb_well.instrument_name == "64222E" - assert pb_well.instrument_type == "Sequel2e" - - # Only run_well mlwh data (no products), and data in the lang_qc database. - # Very sketchy mlwh qc metrics data - id_product = PacBioEntity( - run_name="TRACTION_RUN_1", well_label="B1" - ).hash_product_id() - well_row = helper.get_mlwh_well_by_product_id(id_product) - - qc_state = get_qc_states_by_id_product_list( - session=qcdb_test_session, - ids=[id_product], - sequencing_outcomes_only=True, - ) - pb_well = PacBioWellFull(db_well=well_row, qc_state=qc_state) - assert pb_well.id_product == id_product - assert pb_well.run_name == "TRACTION_RUN_1" - assert pb_well.label == "B1" - assert pb_well.plate_number is None - assert pb_well.run_status == "Complete" - assert pb_well.well_status == "Complete" - assert pb_well.qc_state is not None - assert pb_well.metrics is not None - assert pb_well.experiment_tracking is None - assert pb_well.instrument_name == "64016" - assert pb_well.instrument_type == "Sequel2" - - # Only run_well mlwh data (no products), no data in the lang_qc database. - # Very sketchy mlwh qc metrics data - id_product = PacBioEntity( - run_name="TRACTION_RUN_10", well_label="C1" - ).hash_product_id() - well_row = helper.get_mlwh_well_by_product_id(id_product) - - pb_well = PacBioWellFull(db_well=well_row, qc_state=None) - assert pb_well.id_product == id_product - assert pb_well.run_name == "TRACTION_RUN_10" - assert pb_well.label == "C1" - assert pb_well.plate_number == 1 - assert pb_well.well_status == "Complete" - assert pb_well.run_status == "Aborted" - assert pb_well.qc_state is None - assert pb_well.metrics is not None - assert pb_well.experiment_tracking is None - assert pb_well.instrument_name == "1234" - assert pb_well.instrument_type == "Revio" diff --git a/tests/test_pac_well_models.py b/tests/test_pac_well_models.py new file mode 100644 index 00000000..c8f69715 --- /dev/null +++ b/tests/test_pac_well_models.py @@ -0,0 +1,140 @@ +from npg_id_generation.pac_bio import PacBioEntity + +from lang_qc.db.helper.qc import get_qc_states_by_id_product_list +from lang_qc.db.helper.wells import WellWh +from lang_qc.models.pacbio.well import PacBioWellFull, PacBioWellSummary +from tests.conftest import compare_dates, insert_from_yaml +from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users + +yaml_is_loaded: bool = False + + +def _prepare_data(mlwhdb_session, qcdb_session, run_name, well_label): + """Loads LIMS data for one well. + + Returns a tuple of an mlwh db row and QC state model for that well. + """ + + global yaml_is_loaded + + if yaml_is_loaded is False: + insert_from_yaml( + mlwhdb_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" + ) + yaml_is_loaded = True + + id_product = PacBioEntity( + run_name=run_name, well_label=well_label + ).hash_product_id() + well_row = WellWh(session=mlwhdb_session).get_mlwh_well_by_product_id(id_product) + + qc_state = None + qc_states = get_qc_states_by_id_product_list( + session=qcdb_session, + ids=[id_product], + sequencing_outcomes_only=True, + ) + if id_product in qc_states: + qc_state = qc_states[id_product][0] + + return (well_row, qc_state) + + +def _examine_well_model_a1(pb_well, id_product): + + assert pb_well.id_product == id_product + assert pb_well.run_name == "TRACTION-RUN-92" + assert pb_well.label == "A1" + assert pb_well.plate_number is None + assert pb_well.qc_state is None + compare_dates(pb_well.run_start_time, "2022-04-14 12:52:34") + compare_dates(pb_well.run_complete_time, "2022-04-20 09:16:53") + compare_dates(pb_well.well_start_time, "2022-04-14 13:02:48") + compare_dates(pb_well.well_complete_time, "2022-04-16 12:36:21") + assert pb_well.run_status == "Complete" + assert pb_well.well_status == "Complete" + assert pb_well.instrument_name == "64222E" + assert pb_well.instrument_type == "Sequel2e" + + +def _examine_well_model_b1(pb_well, id_product): + + assert pb_well.id_product == id_product + assert pb_well.run_name == "TRACTION_RUN_1" + assert pb_well.label == "B1" + assert pb_well.plate_number is None + assert pb_well.run_status == "Complete" + assert pb_well.well_status == "Complete" + assert pb_well.qc_state is not None + assert pb_well.instrument_name == "64016" + assert pb_well.instrument_type == "Sequel2" + + +def _examine_well_model_c1(pb_well, id_product): + + assert pb_well.id_product == id_product + assert pb_well.run_name == "TRACTION_RUN_10" + assert pb_well.label == "C1" + assert pb_well.plate_number == 1 + assert pb_well.well_status == "Complete" + assert pb_well.run_status == "Aborted" + assert pb_well.qc_state is None + assert pb_well.instrument_name == "1234" + assert pb_well.instrument_type == "Revio" + + +def test_create_full_model( + mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval +): + # Full mlwh data, no data in the lang_qc database. + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-92", "A1" + ) + pb_well = PacBioWellFull(db_well=well_row) + _examine_well_model_a1(pb_well, well_row.id_pac_bio_product) + assert pb_well.metrics is not None + assert pb_well.experiment_tracking is not None + + # Only run_well mlwh data (no products), and data in the lang_qc database. + # Very sketchy mlwh qc metrics data. + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION_RUN_1", "B1" + ) + pb_well = PacBioWellFull(db_well=well_row, qc_state=qc_state) + _examine_well_model_b1(pb_well, well_row.id_pac_bio_product) + assert pb_well.metrics is not None + assert pb_well.experiment_tracking is None + + # Only run_well mlwh data (no products), no data in the lang_qc database. + # Very sketchy mlwh qc metrics data. + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION_RUN_10", "C1" + ) + pb_well = PacBioWellFull(db_well=well_row, qc_state=None) + _examine_well_model_c1(pb_well, well_row.id_pac_bio_product) + assert pb_well.metrics is not None + assert pb_well.experiment_tracking is None + + +def test_create_summary_model( + mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval +): + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-92", "A1" + ) + pb_well = PacBioWellSummary(db_well=well_row) + _examine_well_model_a1(pb_well, well_row.id_pac_bio_product) + assert pb_well.study_names == ["Tree of Life - ASG"] + + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION_RUN_1", "B1" + ) + pb_well = PacBioWellSummary(db_well=well_row, qc_state=qc_state) + _examine_well_model_b1(pb_well, well_row.id_pac_bio_product) + assert pb_well.study_names == [] + + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION_RUN_10", "C1" + ) + pb_well = PacBioWellFull(db_well=well_row, qc_state=None) + _examine_well_model_c1(pb_well, well_row.id_pac_bio_product) diff --git a/tests/test_pb_wells_factory.py b/tests/test_pb_wells_factory.py index b4638d3f..c9bbc92d 100644 --- a/tests/test_pb_wells_factory.py +++ b/tests/test_pb_wells_factory.py @@ -4,7 +4,7 @@ from lang_qc.db.helper.wells import PacBioPagedWellsFactory, RunNotFoundError from lang_qc.db.qc_schema import QcState, QcType, SeqProduct -from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell +from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellSummary from lang_qc.models.qc_flow_status import QcFlowStatusEnum from lang_qc.models.qc_state import QcState as QcStateModel from tests.conftest import compare_dates @@ -141,7 +141,7 @@ def test_inbox_wells_retrieval( mlwh_data = load_data4well_retrieval well = paged_wells.wells[0] - assert isinstance(well, PacBioWell) + assert isinstance(well, PacBioWellSummary) assert well.run_name == "TRACTION_RUN_10" assert well.label == "C1" assert well.qc_state is None @@ -154,7 +154,7 @@ def test_inbox_wells_retrieval( compare_dates(well.well_complete_time, well_fixture[5]) well = paged_wells.wells[1] - assert isinstance(well, PacBioWell) + assert isinstance(well, PacBioWellSummary) assert well.run_name == "TRACTION_RUN_12" assert well.label == "A1" assert well.qc_state is None @@ -290,7 +290,7 @@ def test_fully_retrieved_data_for_statuses( paged_wells = factory.create_for_qc_status(QcFlowStatusEnum.QC_COMPLETE) well = paged_wells.wells[0] - assert isinstance(well, PacBioWell) + assert isinstance(well, PacBioWellSummary) assert well.run_name == "TRACTION_RUN_5" assert well.label == "B1" compare_dates(well.run_start_time, "2022-12-14 11:56:33") @@ -312,7 +312,7 @@ def test_fully_retrieved_data_for_statuses( assert qc_state.created_by == "LangQC" well = paged_wells.wells[3] - assert isinstance(well, PacBioWell) + assert isinstance(well, PacBioWellSummary) assert well.run_name == "TRACTION_RUN_2" assert well.label == "D1" compare_dates(well.run_start_time, "2022-12-02 15:11:22") @@ -394,7 +394,7 @@ def test_known_run_names_input( wells = paged_wells_obj.wells assert len(wells) == 4 object_type_set = {type(well) for well in wells} - assert object_type_set == {PacBioWell} + assert object_type_set == {PacBioWellSummary} run_name_set = {well.run_name for well in wells} assert run_name_set == {"TRACTION_RUN_1"} label_list = [well.label for well in wells] @@ -419,7 +419,7 @@ def test_known_run_names_input( wells = paged_wells_obj.wells assert len(wells) == 2 object_type_set = {type(well) for well in wells} - assert object_type_set == {PacBioWell} + assert object_type_set == {PacBioWellSummary} run_names = [well.run_name for well in wells] assert run_names == 2 * ["TRACTION_RUN_3"] label_list = [well.label for well in wells] From d8a42a2ccd6449002e088df3f7178c284fef47e1 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 11 Mar 2024 18:03:04 +0000 Subject: [PATCH 19/33] Added new test fixtures and tests. Refined the semantics of the 'study_names' field. --- lang_qc/models/pacbio/well.py | 45 +- tests/conftest.py | 11 +- tests/data/mlwh_pb_run_92/100-Study.yml | 43 - .../200-PacBioRunWellMetrics.yml | 253 ----- .../300-PacBioProductMetrics.yml | 175 ---- .../100-Sample.yml | 479 +++++++++ tests/data/mlwh_pb_runs/100-Study.yml | 122 +++ .../200-PacBioRun.yml | 553 +++++++++- .../mlwh_pb_runs/200-PacBioRunWellMetrics.yml | 944 ++++++++++++++++++ .../mlwh_pb_runs/300-PacBioProductMetrics.yml | 462 +++++++++ tests/data/mlwh_pb_runs/README | 19 + .../endpoints/test_single_well_qc_details.py | 2 +- tests/test_pac_bio_experiment.py | 2 +- ..._well_models.py => test_pb_well_models.py} | 70 +- 14 files changed, 2679 insertions(+), 501 deletions(-) delete mode 100644 tests/data/mlwh_pb_run_92/100-Study.yml delete mode 100644 tests/data/mlwh_pb_run_92/200-PacBioRunWellMetrics.yml delete mode 100644 tests/data/mlwh_pb_run_92/300-PacBioProductMetrics.yml rename tests/data/{mlwh_pb_run_92 => mlwh_pb_runs}/100-Sample.yml (59%) create mode 100644 tests/data/mlwh_pb_runs/100-Study.yml rename tests/data/{mlwh_pb_run_92 => mlwh_pb_runs}/200-PacBioRun.yml (68%) create mode 100644 tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml create mode 100644 tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml create mode 100644 tests/data/mlwh_pb_runs/README rename tests/{test_pac_well_models.py => test_pb_well_models.py} (65%) diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 054a3088..84b11f57 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -54,11 +54,25 @@ def get_field_names(cls): def get_experiment_info(db_well: PacBioRunWellMetrics): - return [ - pbr - for pbr in [pm.pac_bio_run for pm in db_well.pac_bio_product_metrics] - if pbr is not None + """Returns a list of PacBioRun mlwh database rows. + + Returns LIMS information about the PacBio experiment + for this well, one pac_bio_run table row per sample + (product) in the well. + + If any or all of the pac_bio_product_metrics rows linked + to this well record are not linked to the pac_bio_run + table, and empty array is returned, thus preventing incomplete + data being supplied to the client. + """ + product_metrics = db_well.pac_bio_product_metrics + experiment_info = [ + pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None ] + if len(experiment_info) != len(product_metrics): + experiment_info = [] + + return experiment_info @dataclass(kw_only=True, frozen=True) @@ -162,10 +176,13 @@ class PacBioWellSummary(PacBioWell): Adds `study_names` to a list of attributes of the parent class `PacBioWell`. Instance creation is described in the documentation of the parent class. + + `get_experiment_info` method in this package is used to retrieve study + information, see its documentation for details. """ study_names: list = Field( - title="A list of study names", + title="An alphabetically sorted list of distinct study names", ) @model_validator(mode="before") @@ -173,9 +190,9 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: assigned = super().pre_root(values) mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] - assigned["study_names"] = [ - row.study.name for row in get_experiment_info(mlwh_db_row) - ] + assigned["study_names"] = sorted( + set([row.study.name for row in get_experiment_info(mlwh_db_row)]) + ) return assigned @@ -197,13 +214,16 @@ class PacBioPagedWells(PagedResponse, extra="forbid"): class PacBioWellFull(PacBioWell): """A full response model for a single PacBio well. - The model has teh fields that uniquely define the well (`run_name`, `label`, + The model has the fields that uniquely define the well (`run_name`, `label`, `plate_number`, `id_product`), along with the laboratory experiment and sequence run tracking information, current QC state of this well and QC data for this well. Instance creation is described in the documentation of this class's parent `PacBioWell`. + + `get_experiment_info` method in this package is used to retrieve information + about the experiment, see its documentation for details. """ metrics: QCDataWell = Field( @@ -222,14 +242,9 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: assigned = super().pre_root(values) mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] - assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row) - - product_metrics = mlwh_db_row.pac_bio_product_metrics experiment_info = get_experiment_info(mlwh_db_row) - # Occasionally product rows are not linked to LIMS rows. - # Go for all or nothing, do not supply incomplete data. - if len(experiment_info) and (len(experiment_info) == len(product_metrics)): + if len(experiment_info): assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info) return assigned diff --git a/tests/conftest.py b/tests/conftest.py index b4588056..52c20060 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -180,12 +180,11 @@ def insert_from_yaml(session, dir_path, module_name): (head, file_name) = os.path.split(file_path) # File name example: 200-PacBioRun.yml m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name) - if m is None: - raise Exception(f"Unexpected file {file_path} in fixtures.") - class_name = m.group(1) - table_class = getattr(module, class_name) - data = yaml.safe_load(f) - session.execute(insert(table_class), data) + if m is not None: + class_name = m.group(1) + table_class = getattr(module, class_name) + data = yaml.safe_load(f) + session.execute(insert(table_class), data) session.commit() diff --git a/tests/data/mlwh_pb_run_92/100-Study.yml b/tests/data/mlwh_pb_run_92/100-Study.yml deleted file mode 100644 index 86e6fed4..00000000 --- a/tests/data/mlwh_pb_run_92/100-Study.yml +++ /dev/null @@ -1,43 +0,0 @@ -- accession_number: ERP129860 - aligned: 1 - contains_human_dna: 0 - contaminated_human_dna: 0 - id_lims: SQSCP - id_study_lims: 6457 - id_study_tmp: 6287 - name: Tree of Life - ASG - reference_genome: ' ' - remove_x_and_autosomes: 0 - separate_y_chromosome_data: 0 - study_title: Tree of Life - ASG - study_visibility: Public - uuid_study_lims: 8d58238e-a2b5-11eb-84d2-fa163eac3af7 -- accession_number: ERP116890 - aligned: 1 - contains_human_dna: 0 - contaminated_human_dna: 0 - id_lims: SQSCP - id_study_lims: 5901 - id_study_tmp: 5735 - name: DTOL_Darwin Tree of Life - reference_genome: ' ' - remove_x_and_autosomes: 0 - separate_y_chromosome_data: 0 - study_title: Darwin Tree of Life - study_visibility: Public - uuid_study_lims: cf04ea86-ac82-11e9-8998-68b599768938 -- accession_number: ERP141224 - aligned: 1 - contains_human_dna: 0 - contaminated_human_dna: 0 - description: Study for release prior to publication of selected datasets - id_lims: SQSCP - id_study_lims: 7069 - id_study_tmp: 6942 - name: Alternative Enzymes 2022 microbial genomes - reference_genome: Clostridium_difficile (Strain_630) - remove_x_and_autosomes: 0 - separate_y_chromosome_data: 0 - study_title: Alternative Enzymes 2022 microbial genomes - study_visibility: Public - uuid_study_lims: 39ba6ae6-3500-11ed-b3f3-fa163eac3af7 diff --git a/tests/data/mlwh_pb_run_92/200-PacBioRunWellMetrics.yml b/tests/data/mlwh_pb_run_92/200-PacBioRunWellMetrics.yml deleted file mode 100644 index f3461522..00000000 --- a/tests/data/mlwh_pb_run_92/200-PacBioRunWellMetrics.yml +++ /dev/null @@ -1,253 +0,0 @@ ---- -- adapter_dimer_percent: 0 - binding_kit: Sequel II Binding Kit 2.2 - ccs_execution_mode: OnInstrument - cell_lot_number: 417079 - chemistry_sw_version: 10.2.0.133424 - chip_type: 8mChip - control_concordance_mean: 0.868282 - control_concordance_mode: 0.91 - control_num_reads: 24837 - control_read_length_mean: 50169 - created_by: eg18 - demultiplex_mode: ~ - heteroduplex_analysis: ~ - hifi_barcoded_reads: ~ - hifi_bases_in_barcoded_reads: ~ - hifi_low_quality_num_reads: 260532 - hifi_low_quality_read_bases: 2670039556 - hifi_low_quality_read_length_mean: 10248 - hifi_low_quality_read_quality_median: 17 - hifi_num_reads: 2877051 - hifi_number_passes_mean: 18 - hifi_only_reads: ~ - hifi_read_bases: 27076668646 - hifi_read_length_mean: 9411 - hifi_read_quality_median: 43 - id_pac_bio_product: cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61 - id_pac_bio_rw_metrics_tmp: 1732 - include_kinetics: 0 - insert_length_mean: 16227 - insert_length_n50: 23250 - instrument_name: 64222E - instrument_sw_version: 10.1.0.119549 - instrument_type: Sequel2e - loading_conc: 80 - local_base_rate: 2.76789 - movie_minutes: 1800 - movie_name: m64222e_220414_130247 - p0_num: 2800130 - p1_num: 5033971 - p2_num: 180570 - pac_bio_run_name: TRACTION-RUN-92 - polymerase_num_reads: 5009134 - polymerase_read_bases: 645567171652 - polymerase_read_length_mean: 128878 - polymerase_read_length_n50: 251250 - primary_analysis_sw_version: 10.1.0.119549 - productive_zmws_num: 7989834 - run_complete: 2022-04-20 09:16:53 - run_start: 2022-04-14 12:52:34 - run_status: Complete - run_transfer_complete: 2022-04-20 16:03:18 - sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) - sequencing_kit_lot_number: 123660 - short_insert_percent: 0 - sl_hostname: pacbio01.dnapipelines.sanger.ac.uk - sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da - sl_ccs_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29ro - ts_run_name: r64222e_20220414_125138 - unique_molecular_bases: 68501667840 - well_complete: 2022-04-16 12:36:21 - well_label: A1 - well_start: 2022-04-14 13:02:48 - well_status: Complete -- adapter_dimer_percent: 0 - binding_kit: Sequel II Binding Kit 2.2 - ccs_execution_mode: OnInstrument - cell_lot_number: 417079 - chemistry_sw_version: 10.2.0.133424 - chip_type: 8mChip - control_concordance_mean: 0.865786 - control_concordance_mode: 0.91 - control_num_reads: 42962 - control_read_length_mean: 51810 - created_by: eg18 - demultiplex_mode: ~ - heteroduplex_analysis: ~ - hifi_barcoded_reads: ~ - hifi_bases_in_barcoded_reads: ~ - hifi_low_quality_num_reads: 204155 - hifi_low_quality_read_bases: 1887795985 - hifi_low_quality_read_length_mean: 9246 - hifi_low_quality_read_quality_median: 16 - hifi_num_reads: 2082542 - hifi_number_passes_mean: 18 - hifi_only_reads: ~ - hifi_read_bases: 17706147796 - hifi_read_length_mean: 8502 - hifi_read_quality_median: 40 - id_pac_bio_product: 63fb9a37ff19c248fc7d99bd254a61085226ded540de7c5445daf1398e339833 - id_pac_bio_rw_metrics_tmp: 1733 - include_kinetics: 0 - insert_length_mean: 16407 - insert_length_n50: 28250 - instrument_name: 64222E - instrument_sw_version: 10.1.0.119549 - instrument_type: Sequel2e - loading_conc: 80 - local_base_rate: 2.80566 - movie_minutes: 1800 - movie_name: m64222e_220415_235643 - p0_num: 4427179 - p1_num: 3512385 - p2_num: 75107 - pac_bio_run_name: TRACTION-RUN-92 - polymerase_num_reads: 3469423 - polymerase_read_bases: 455958509506 - polymerase_read_length_mean: 131422 - polymerase_read_length_n50: 251750 - primary_analysis_sw_version: 10.1.0.119549 - productive_zmws_num: 7971709 - run_complete: 2022-04-20 09:16:53 - run_start: 2022-04-14 12:52:34 - run_status: Complete - run_transfer_complete: 2022-04-20 16:03:18 - sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) - sequencing_kit_lot_number: 123660 - short_insert_percent: 0 - sl_hostname: pacbio01.dnapipelines.sanger.ac.uk - sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da - sl_ccs_uuid: ~ - ts_run_name: r64222e_20220414_125138 - unique_molecular_bases: 45990051840 - well_complete: 2022-04-17 23:16:35 - well_label: B1 - well_start: 2022-04-15 23:56:43 - well_status: Complete -- adapter_dimer_percent: 0 - binding_kit: Sequel II Binding Kit 2.2 - ccs_execution_mode: OnInstrument - cell_lot_number: 417079 - chemistry_sw_version: 10.2.0.133424 - chip_type: 8mChip - control_concordance_mean: 0.866249 - control_concordance_mode: 0.89 - control_num_reads: 40787 - control_read_length_mean: 49421 - created_by: eg18 - demultiplex_mode: ~ - heteroduplex_analysis: ~ - hifi_low_quality_num_reads: 173039 - hifi_low_quality_read_bases: 1743404181 - hifi_low_quality_read_length_mean: 10075 - hifi_low_quality_read_quality_median: 17 - hifi_num_reads: 1698191 - hifi_number_passes_mean: 17 - hifi_only_reads: ~ - hifi_read_bases: 16029605719 - hifi_read_length_mean: 9439 - hifi_read_quality_median: 40 - hifi_barcoded_reads: ~ - hifi_bases_in_barcoded_reads: ~ - id_pac_bio_product: a65eae06f3048a186aeb9104d0a8d3f46ca59dff7747eec9918fcfa85587a3c2 - id_pac_bio_rw_metrics_tmp: 1734 - include_kinetics: 0 - insert_length_mean: 15260 - insert_length_n50: 15750 - instrument_name: 64222E - instrument_sw_version: 10.1.0.119549 - instrument_type: Sequel2e - loading_conc: 130 - local_base_rate: 2.76102 - movie_minutes: 1800 - movie_name: m64222e_220417_105320 - p0_num: 5019384 - p1_num: 2944690 - p2_num: 50597 - pac_bio_run_name: TRACTION-RUN-92 - polymerase_num_reads: 2903903 - polymerase_read_bases: 360656040891 - polymerase_read_length_mean: 124197 - polymerase_read_length_n50: 244750 - primary_analysis_sw_version: 10.1.0.119549 - productive_zmws_num: 7973884 - run_complete: 2022-04-20 09:16:53 - run_start: 2022-04-14 12:52:34 - run_status: Complete - run_transfer_complete: 2022-04-20 16:03:18 - sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) - sequencing_kit_lot_number: 123660 - short_insert_percent: 0 - sl_hostname: pacbio01.dnapipelines.sanger.ac.uk - sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da - sl_ccs_uuid: ~ - ts_run_name: r64222e_20220414_125138 - unique_molecular_bases: 37178580992 - well_complete: 2022-04-19 04:22:35 - well_label: C1 - well_start: 2022-04-17 10:53:21 - well_status: Complete -- adapter_dimer_percent: 0 - binding_kit: Sequel II Binding Kit 2.2 - ccs_execution_mode: OnInstrument - cell_lot_number: 417079 - chemistry_sw_version: 10.2.0.133424 - chip_type: 8mChip - control_concordance_mean: 0.863292 - control_concordance_mode: 0.91 - control_num_reads: 24842 - control_read_length_mean: 44982 - created_by: eg18 - demultiplex_mode: ~ - heteroduplex_analysis: ~ - hifi_barcoded_reads: ~ - hifi_bases_in_barcoded_reads: ~ - hifi_low_quality_num_reads: 193115 - hifi_low_quality_read_bases: 1672216211 - hifi_low_quality_read_length_mean: 8659 - hifi_low_quality_read_quality_median: 16 - hifi_num_reads: 1912626 - hifi_number_passes_mean: 18 - hifi_only_reads: ~ - hifi_read_bases: 14052550494 - hifi_read_length_mean: 7347 - hifi_read_quality_median: 41 - id_pac_bio_product: c5babd5516f7b9faab8415927e5f300d5152bb96b8b922e768d876469a14fa5d - id_pac_bio_rw_metrics_tmp: 1735 - include_kinetics: 0 - insert_length_mean: 15043 - insert_length_n50: 33750 - instrument_name: 64222E - instrument_sw_version: 10.1.0.119549 - instrument_type: Sequel2e - loading_conc: 80 - local_base_rate: 2.71738 - movie_minutes: 1800 - movie_name: m64222e_220418_214938 - p0_num: 3784476 - p1_num: 4078117 - p2_num: 152078 - pac_bio_run_name: TRACTION-RUN-92 - polymerase_num_reads: 4053275 - polymerase_read_bases: 405355872925 - polymerase_read_length_mean: 100007 - polymerase_read_length_n50: 234250 - primary_analysis_sw_version: 10.1.0.119549 - productive_zmws_num: 7989829 - run_complete: 2022-04-20 09:16:53 - run_start: 2022-04-14 12:52:34 - run_status: Complete - run_transfer_complete: 2022-04-20 16:03:18 - sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) - sequencing_kit_lot_number: 123660 - short_insert_percent: 0.01 - sl_hostname: pacbio01.dnapipelines.sanger.ac.uk - sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da - sl_ccs_uuid: ~ - ts_run_name: r64222e_20220414_125138 - unique_molecular_bases: 48624848896 - well_complete: 2022-04-20 16:03:18 - well_label: D1 - well_start: 2022-04-18 21:49:38 - well_status: Complete diff --git a/tests/data/mlwh_pb_run_92/300-PacBioProductMetrics.yml b/tests/data/mlwh_pb_run_92/300-PacBioProductMetrics.yml deleted file mode 100644 index b27f0519..00000000 --- a/tests/data/mlwh_pb_run_92/300-PacBioProductMetrics.yml +++ /dev/null @@ -1,175 +0,0 @@ ---- -- id_pac_bio_pr_metrics_tmp: 9675 - id_pac_bio_product: cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61 - id_pac_bio_rw_metrics_tmp: 1732 - id_pac_bio_tmp: 98966 -- id_pac_bio_pr_metrics_tmp: 9676 - id_pac_bio_product: 63fb9a37ff19c248fc7d99bd254a61085226ded540de7c5445daf1398e339833 - id_pac_bio_rw_metrics_tmp: 1733 - id_pac_bio_tmp: 98967 -- id_pac_bio_pr_metrics_tmp: 9677 - id_pac_bio_product: a65eae06f3048a186aeb9104d0a8d3f46ca59dff7747eec9918fcfa85587a3c2 - id_pac_bio_rw_metrics_tmp: 1734 - id_pac_bio_tmp: 98968 -- id_pac_bio_pr_metrics_tmp: 9678 - id_pac_bio_product: 57538925519f7ae568fbd5cd075fd2fb600a2273394f6537a17d97917e224b11 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98969 -- id_pac_bio_pr_metrics_tmp: 9679 - id_pac_bio_product: 2307cbc4bf6b6917fa6a20d7e077302ad617c4df61cc0f845facfb24548360e4 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98970 -- id_pac_bio_pr_metrics_tmp: 9680 - id_pac_bio_product: 568b595c45b01faa601cab34ac91987b738106e2c10d2343fed1a2823726b036 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98971 -- id_pac_bio_pr_metrics_tmp: 9681 - id_pac_bio_product: 5a7af02c0fa288456f61046c15fcc2f14ae14ef890eea7de2239abdbc06cd1a2 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98972 -- id_pac_bio_pr_metrics_tmp: 9682 - id_pac_bio_product: 6efad9725471425b8574eaa8772cfd077cfca761c6654dbd36fb3bdb3e939ac3 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98973 -- id_pac_bio_pr_metrics_tmp: 9683 - id_pac_bio_product: 40053aa11e7cf4b74c0dc641c42fbe45145f007a6edacd7e1d476d20d3dc8899 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98974 -- id_pac_bio_pr_metrics_tmp: 9684 - id_pac_bio_product: f0993ebd8924e24e202c269a34598972541c3bccbe9778ca38889d8119ef8fbc - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98975 -- id_pac_bio_pr_metrics_tmp: 9685 - id_pac_bio_product: 67a948d085b4be890f02a04779ed44351bf64506f14e1a703cede5b901f7732d - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98976 -- id_pac_bio_pr_metrics_tmp: 9686 - id_pac_bio_product: 22be6501a64e534e8bcd4d94222b8b226880d8574b2c2a13b47993d67bf953ed - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98977 -- id_pac_bio_pr_metrics_tmp: 9687 - id_pac_bio_product: a85234a2ce0b894cdde92dd592202b93e973aec86a22584d5d5aad6207936c3a - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98978 -- id_pac_bio_pr_metrics_tmp: 9688 - id_pac_bio_product: 925c97e811bd1320575eb09a9c6208ebceb092f39be9804f13ae6ea57d6ee3f4 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98979 -- id_pac_bio_pr_metrics_tmp: 9689 - id_pac_bio_product: 4cd35d863fb960d2e65b11c3092034272b71c63a7cceb8a4eb82459b442e5649 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98980 -- id_pac_bio_pr_metrics_tmp: 9690 - id_pac_bio_product: 13e08185c8f214ddb7fda427eb12de14d2328966dc6dd407e04931540d563764 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98981 -- id_pac_bio_pr_metrics_tmp: 9691 - id_pac_bio_product: b5131d50fc95fcd36b1670a0a200424d8ab1568b4a8c4e3ae267ab78dd482189 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98982 -- id_pac_bio_pr_metrics_tmp: 9692 - id_pac_bio_product: f16140784d4fe024bda8c121e3f9aefb3d8907f11787be71894120048b2cc504 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98983 -- id_pac_bio_pr_metrics_tmp: 9693 - id_pac_bio_product: 5a9fe0fb67614b9fd250dcf711e1fcf3e2ce13f450ecc68af699035490e3e0ec - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98984 -- id_pac_bio_pr_metrics_tmp: 9694 - id_pac_bio_product: 9f045c9356f681c1b452ec6dd92415264f7e9646b91e82bbe9f739d2d97b621c - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98985 -- id_pac_bio_pr_metrics_tmp: 9695 - id_pac_bio_product: c060619f13f862370b4047c4a897aa4d9e907f6252d1f5e2962e09495d32becb - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98986 -- id_pac_bio_pr_metrics_tmp: 9696 - id_pac_bio_product: f997587d6945cc8feaf015e6f74247c6d5d72afeae5e4aac550cb69ebdbd8381 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98987 -- id_pac_bio_pr_metrics_tmp: 9697 - id_pac_bio_product: 044ddae518c4f27a9b4c362098d78dbddbfb3c777eb7b755d64413e4cde4cd3c - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98988 -- id_pac_bio_pr_metrics_tmp: 9698 - id_pac_bio_product: 03e42278867da02c593029170ce087048e62f6f69165a3080e94f21d3de5e6b9 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98989 -- id_pac_bio_pr_metrics_tmp: 9699 - id_pac_bio_product: fdf0c4eb042807b7694842569a907e530fe603ade6470e1b4878beab7de9c055 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98990 -- id_pac_bio_pr_metrics_tmp: 9700 - id_pac_bio_product: a5c8aa41feee6cf782936786d3702c716fac193e5943398662975136fd5e30a1 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98991 -- id_pac_bio_pr_metrics_tmp: 9701 - id_pac_bio_product: 11c1424601a9dd783e9eab728934f88205b1c2251ce5b5da64d9ba25a34458fb - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98992 -- id_pac_bio_pr_metrics_tmp: 9702 - id_pac_bio_product: 9708d23c8ea56d4d0680dc6f3f78b102d08708d70f8205e079a0215f9331c27d - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98993 -- id_pac_bio_pr_metrics_tmp: 9703 - id_pac_bio_product: e0d3aa0edd255e4bcf14ad2c855a195677f44109ad444e395845a7b82c1d477d - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98994 -- id_pac_bio_pr_metrics_tmp: 9704 - id_pac_bio_product: 41887713d6f40e42b97384931254e986635189cc432652ac5d16d11c371b6d71 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98995 -- id_pac_bio_pr_metrics_tmp: 9705 - id_pac_bio_product: 49da248e9147189033f81b7f5f44f036bb26be0a5c3efcf61c484dc8dc50d432 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98996 -- id_pac_bio_pr_metrics_tmp: 9706 - id_pac_bio_product: ccc81d884b5319d7d6dd4beb14944fd9dc7b6063933a57882f53652da435af2d - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98997 -- id_pac_bio_pr_metrics_tmp: 9707 - id_pac_bio_product: 6ac18d5b5b63eca3a3773686733be395a58d67213b597111c2b51c6fec4ce814 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98998 -- id_pac_bio_pr_metrics_tmp: 9708 - id_pac_bio_product: dcb13705145a63f6034692b6e5cd568fc34e9c6669f0b0ce37a54160c45a1a04 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 98999 -- id_pac_bio_pr_metrics_tmp: 9709 - id_pac_bio_product: 4e6c23a4627eb23514d03630512548d521f822d44467f951ae61e169d16a868e - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99000 -- id_pac_bio_pr_metrics_tmp: 9710 - id_pac_bio_product: 878b1e205768c5965cdce2b11d779d41ad267f5d84cac8652e64d9c7139fd692 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99001 -- id_pac_bio_pr_metrics_tmp: 9711 - id_pac_bio_product: 75ad3eb849261accf19df9c5acfb97b10f73d658dd1f403a032dab25fdeee64a - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99002 -- id_pac_bio_pr_metrics_tmp: 9712 - id_pac_bio_product: b209eb8f689ff8391bf42dece19d8de44ef4ee63158219cc81bd0cb45196b631 - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99003 -- id_pac_bio_pr_metrics_tmp: 9713 - id_pac_bio_product: 18078959e14a3416b428624e6cc200c649a8b3b92fc40a20444e9cdca373137b - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99004 -- id_pac_bio_pr_metrics_tmp: 9714 - id_pac_bio_product: 0a82107d88ee710dcf46139a8710f443da8f4051142c478cb0a99ec5c6d64a2a - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99005 -- id_pac_bio_pr_metrics_tmp: 9715 - id_pac_bio_product: 6a81185dec0b9dcf06412b43fad08ce92cbf5ad492da7fa25dee63723950de5d - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99006 -- id_pac_bio_pr_metrics_tmp: 9716 - id_pac_bio_product: 7f3912931b878be0b3c949fc20acd59d51b68a27a04fa89491a74905219f8deb - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99007 -- id_pac_bio_pr_metrics_tmp: 9717 - id_pac_bio_product: 4cf97b14a1c23d892f3b9609642d1978b56ca4a7fca6995897be68c29ed9211d - id_pac_bio_rw_metrics_tmp: 1735 - id_pac_bio_tmp: 99008 - - diff --git a/tests/data/mlwh_pb_run_92/100-Sample.yml b/tests/data/mlwh_pb_runs/100-Sample.yml similarity index 59% rename from tests/data/mlwh_pb_run_92/100-Sample.yml rename to tests/data/mlwh_pb_runs/100-Sample.yml index e0ae6a0b..7b8c233e 100644 --- a/tests/data/mlwh_pb_run_92/100-Sample.yml +++ b/tests/data/mlwh_pb_runs/100-Sample.yml @@ -1,3 +1,4 @@ +--- - accession_number: SAMEA11604903 common_name: Lamellibrachia barhami consent_withdrawn: 0 @@ -815,3 +816,481 @@ supplier_name: promegaB_12 taxon_id: 4932 uuid_sample_lims: b887566c-b3f6-11ec-9caa-fa163eac3af7 +- accession_number: SAMEA10332404 + common_name: Verbascum thapsus + consent_withdrawn: 0 + control: 0 + created: 2022-10-21 08:10:07 + donor_id: SAMEA10332243 + id_lims: SQSCP + id_sample_lims: 8500551 + id_sample_tmp: 8427375 + last_updated: 2022-10-21 08:10:07 + name: DTOL13174735 + public_name: daVerThap1 + recorded_at: 2022-10-21 08:10:08 + sanger_sample_id: DTOL13174735 + supplier_name: FS63842463 + taxon_id: 39388 + uuid_sample_lims: c64320ec-5117-11ed-9bd8-fa163eac3af7 +- accession_number: SAMEA9065892 + common_name: Thaumetopoea processionea + consent_withdrawn: 0 + control: 0 + created: 2022-10-28 16:01:15 + donor_id: SAMEA9065811 + id_lims: SQSCP + id_sample_lims: 8509377 + id_sample_tmp: 8436094 + last_updated: 2022-10-28 16:01:16 + name: DTOL13161568 + public_name: ilThaProc3 + recorded_at: 2022-10-28 16:01:16 + sanger_sample_id: DTOL13161568 + supplier_name: FD30823809 + taxon_id: 499230 + uuid_sample_lims: c0a80bf8-56d9-11ed-975e-fa163eea3084 +- accession_number: SAMEA10369798 + cohort: 2982900000 + common_name: Agrimonia eupatoria + consent_withdrawn: 0 + created: 2023-03-01 11:55:55 + customer_measured_volume: 60 + description: Plant MagAttract v4 + donor_id: SAMEA10369788 + id_lims: SQSCP + id_sample_lims: 8633375 + id_sample_tmp: 8559246 + last_updated: 2023-03-01 11:55:59 + name: DTOL13630447 + public_name: drAgrEupa1 + recorded_at: 2023-03-01 11:56:01 + sample_type: vouchering + sanger_sample_id: DTOL13630447 + strain: Cryoprep + supplier_name: KDTOL10382 + taxon_id: 57912 + uuid_sample_lims: 05bd2f02-b828-11ed-b0d1-fa163eea3084 +- accession_number: SAMEA7696557 + common_name: Tachypodoiulus niger + consent_withdrawn: 0 + control: 0 + created: 2023-03-17 15:23:41 + donor_id: SAMEA7696482 + id_lims: SQSCP + id_sample_lims: 8654507 + id_sample_tmp: 8579905 + last_updated: 2023-03-17 15:23:41 + name: DTOL13642523 + public_name: qdTacNige1 + recorded_at: 2023-03-17 15:23:42 + sanger_sample_id: DTOL13642523 + supplier_name: SubSam_qdTacNige1_3685 + taxon_id: 433595 + uuid_sample_lims: b2b60038-c4d7-11ed-8019-fa163eac3af7 +- accession_number: SAMEA12813521 + common_name: Clistosaccus paguri + consent_withdrawn: 0 + control: 0 + created: 2023-03-23 08:54:42 + donor_id: SAMEA12813472 + id_lims: SQSCP + id_sample_lims: 8657549 + id_sample_tmp: 8582947 + last_updated: 2023-03-23 08:54:42 + name: 6771STDY13618009 + public_name: qxCliPagu9 + recorded_at: 2023-03-23 08:54:43 + sanger_sample_id: 6771STDY13618009 + supplier_name: SubSam_qxCliPagu9_3522 + taxon_id: 319673 + uuid_sample_lims: 5a41b3e0-c958-11ed-8ab8-fa163eac3af7 +- accession_number: SAMEA7521974 + common_name: Borago officinalis + consent_withdrawn: 0 + control: 0 + created: 2023-03-30 14:59:21 + donor_id: SAMEA7521932 + id_lims: SQSCP + id_sample_lims: 8663492 + id_sample_tmp: 8588834 + last_updated: 2023-03-30 14:59:21 + name: DTOL13653276 + public_name: daBorOffi1 + recorded_at: 2023-03-30 14:59:21 + sanger_sample_id: DTOL13653276 + supplier_name: FS63807999 + taxon_id: 13363 + uuid_sample_lims: 73aa3736-cf0b-11ed-aa59-fa163eea3084 +- accession_number: SAMEA112963171 + cohort: 306847500 + common_name: Selandria serva + consent_withdrawn: 0 + country_of_origin: not provided + created: 2023-06-07 15:29:56 + customer_measured_volume: 14 + date_of_sample_collection: 2022-07-05 + description: MagAttract + donor_id: SAMEA112963078 + id_lims: SQSCP + id_sample_lims: 8713294 + id_sample_tmp: 8625878 + last_updated: 2023-06-07 15:30:03 + name: DTOL13788282 + public_name: iySelServ2 + recorded_at: 2023-06-07 15:30:06 + sample_type: Vouchering + sanger_sample_id: DTOL13788282 + strain: Powermash + supplier_name: NHMUK015059202 + taxon_id: 410286 + uuid_sample_lims: 2839dbd0-0548-11ee-add0-fa163eea3084 +- accession_number: SAMEA113426911 + cohort: 260392500 + common_name: Chamaemyia aridella + consent_withdrawn: 0 + country_of_origin: not provided + created: 2023-06-14 13:22:28 + customer_measured_volume: 3 + date_of_sample_collection: not provided + description: MagAttract + donor_id: SAMEA113425591 + id_lims: SQSCP + id_sample_lims: 8718071 + id_sample_tmp: 8630655 + last_updated: 2023-06-14 13:22:32 + name: DTOL13795633 + public_name: idChaArid1 + recorded_at: 2023-06-14 13:22:35 + sample_type: Vouchering + sanger_sample_id: DTOL13795633 + strain: Powermash + supplier_name: Ox002879 + taxon_id: 2881674 + uuid_sample_lims: 8260135e-0ab6-11ee-b5ba-fa163eac3af7 +- accession_number: SAMEA113425916 + cohort: 260392500 + common_name: Chlorops ringens + consent_withdrawn: 0 + country_of_origin: not provided + created: 2023-06-14 13:22:28 + customer_measured_volume: 1 + date_of_sample_collection: not provided + description: MagAttract + donor_id: SAMEA113425730 + id_lims: SQSCP + id_sample_lims: 8718072 + id_sample_tmp: 8630656 + last_updated: 2023-06-14 13:22:32 + name: DTOL13795634 + public_name: idChlRing1 + recorded_at: 2023-06-14 13:22:35 + sample_type: Vouchering + sanger_sample_id: DTOL13795634 + strain: Powermash + supplier_name: Ox003155 + taxon_id: 3039479 + uuid_sample_lims: 8262fee8-0ab6-11ee-b5ba-fa163eac3af7 +- accession_number: SAMEA113425922 + cohort: 260392500 + common_name: Diastata fuscula + consent_withdrawn: 0 + country_of_origin: not provided + created: 2023-06-14 13:22:28 + customer_measured_volume: 1 + date_of_sample_collection: not provided + description: MagAttract + donor_id: SAMEA113425735 + id_lims: SQSCP + id_sample_lims: 8718076 + id_sample_tmp: 8630660 + last_updated: 2023-06-14 13:22:32 + name: DTOL13795638 + public_name: idDiaFusc1 + recorded_at: 2023-06-14 13:22:35 + sample_type: Vouchering + sanger_sample_id: DTOL13795638 + strain: Powermash + supplier_name: Ox003161 + taxon_id: 1262299 + uuid_sample_lims: 826e9b7c-0ab6-11ee-b5ba-fa163eac3af7 +- common_name: Procloeon bifidum + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-11-15 11:46:28 + date_of_sample_collection: not provided + description: SAMEA112222208 + donor_id: DTOL14162027 + id_lims: SQSCP + id_sample_lims: 9444455 + id_sample_tmp: 9302420 + last_updated: 2023-11-15 11:46:28 + name: DTOL14162027 + public_name: ieProBifi2 + recorded_at: 2023-11-15 11:46:28 + sanger_sample_id: DTOL14162027 + strain: SAMEA112222162 + supplier_name: SubSam_ieProBifi2_5062 + taxon_id: 1592913 + uuid_sample_lims: 9ca7c5f8-83ac-11ee-9e2f-024293460e78 +- accession_number: SAMEA111431652 + common_name: Linnaea borealis + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-11-17 08:36:09 + date_of_sample_collection: not provided + donor_id: SAMEA7535987 + id_lims: SQSCP + id_sample_lims: 9451544 + id_sample_tmp: 9309564 + last_updated: 2023-11-17 08:36:10 + name: DTOL13810175 + public_name: daLinBore1 + recorded_at: 2023-11-17 08:36:10 + sanger_sample_id: DTOL13810175 + supplier_name: SubSam_daLinBore1_4724 + taxon_id: 77623 + uuid_sample_lims: 5bc32040-8524-11ee-a665-024224dd57f4 +- accession_number: SAMEA7522163 + common_name: Chaenorhinum minus + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-11-24 09:47:11 + date_of_sample_collection: not provided + donor_id: SAMEA7522063 + id_lims: SQSCP + id_sample_lims: 9463590 + id_sample_tmp: 9321705 + last_updated: 2023-11-24 09:47:12 + name: DTOL14291044 + public_name: daChaMinu2 + recorded_at: 2023-11-24 09:47:12 + sanger_sample_id: DTOL14291044 + supplier_name: SubSam_daChaMinu2_5222 + taxon_id: 105913 + uuid_sample_lims: 70e98a88-8aae-11ee-becc-024224dd57f4 +- accession_number: SAMEA111431250 + common_name: Rosa spinosissima + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-11-24 10:53:53 + date_of_sample_collection: not provided + donor_id: SAMEA111431184 + id_lims: SQSCP + id_sample_lims: 9463632 + id_sample_tmp: 9321748 + last_updated: 2023-11-24 10:53:54 + name: DTOL14452869 + public_name: drRosSpin1 + recorded_at: 2023-11-24 10:53:54 + sanger_sample_id: DTOL14452869 + supplier_name: SubSam_drRosSpin1_5278 + taxon_id: 74630 + uuid_sample_lims: c239f784-8ab7-11ee-ae26-024293460e78 +- accession_number: SAMEA9335449 + common_name: Cephalanthera damasonium + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-11-24 11:10:32 + date_of_sample_collection: not provided + donor_id: SAMEA9335281 + id_lims: SQSCP + id_sample_lims: 9463663 + id_sample_tmp: 9321777 + last_updated: 2023-11-24 11:10:33 + name: DTOL14290946 + public_name: lsCepDama1 + recorded_at: 2023-11-24 11:10:33 + sanger_sample_id: DTOL14290946 + supplier_name: SubSam_lsCepDama1_5149 + taxon_id: 78719 + uuid_sample_lims: 15aef692-8aba-11ee-ae26-024293460e78 +- accession_number: SAMEA110187304 + common_name: Caenorhabditis sp. BRC20456 + concentration_determined_by: Spectrophotometer + consent_withdrawn: 0 + control: 0 + country_of_origin: Solomon Islands + created: 2023-11-27 09:25:21 + customer_measured_concentration: 14.7 + customer_measured_volume: 45 + date_of_sample_collection: 2015-01-01 + date_of_sample_extraction: 11/23 + dna_source: Tissue + donor_id: SAMEA110187300 + extraction_method: Monarch Extraction + gender: Mixed + id_lims: SQSCP + id_sample_lims: 9463876 + id_sample_tmp: 9321998 + last_updated: 2023-11-27 09:25:23 + name: BlaxGeNe14563575 + public_name: nxCaeSpeh1 + purification_method: Ethanol + purified: Yes + recorded_at: 2023-11-27 09:25:23 + sanger_sample_id: BlaxGeNe14563575 + storage_conditions: +4C + supplier_name: BRC20456 + taxon_id: 2883082 + uuid_sample_lims: e36343f6-8d06-11ee-890d-024293460e78 +- accession_number: SAMEA10369857 + common_name: Halimione portulacoides + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-01 12:08:58 + date_of_sample_collection: not provided + donor_id: SAMEA10369847 + id_lims: SQSCP + id_sample_lims: 9478716 + id_sample_tmp: 9336860 + last_updated: 2023-12-01 12:08:59 + name: DTOL14523233 + public_name: dcHalPort1 + recorded_at: 2023-12-01 12:08:59 + sample_type: LI + sanger_sample_id: DTOL14523233 + supplier_name: SubSam_5444 + taxon_id: 376799 + uuid_sample_lims: 68647e92-9042-11ee-a0e6-024293460e78 +- accession_number: SAMEA10369760 + common_name: Origanum vulgare + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-01 12:08:59 + date_of_sample_collection: not provided + donor_id: SAMEA10369670 + id_lims: SQSCP + id_sample_lims: 9478726 + id_sample_tmp: 9336869 + last_updated: 2023-12-01 12:08:59 + name: DTOL14523243 + public_name: daOriVulg1 + recorded_at: 2023-12-01 12:08:59 + sample_type: LI + sanger_sample_id: DTOL14523243 + supplier_name: SubSam_5438 + taxon_id: 39352 + uuid_sample_lims: 688268d0-9042-11ee-a0e6-024293460e78 +- accession_number: SAMEA9335181 + common_name: Hippuris vulgaris + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-04 13:03:46 + date_of_sample_collection: not provided + donor_id: SAMEA9335108 + id_lims: SQSCP + id_sample_lims: 9478845 + id_sample_tmp: 9336988 + last_updated: 2023-12-04 13:03:47 + name: DTOL14523333 + public_name: daHipVulg1 + recorded_at: 2023-12-04 13:03:47 + sanger_sample_id: DTOL14523333 + supplier_name: SubSam_5456 + taxon_id: 39321 + uuid_sample_lims: 8f4b9580-92a5-11ee-a0e6-024293460e78 +- accession_number: SAMEA9335379 + common_name: Linum bienne + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-13 08:24:26 + date_of_sample_collection: not provided + donor_id: SAMEA9335256 + id_lims: SQSCP + id_sample_lims: 9506156 + id_sample_tmp: 9358950 + last_updated: 2023-12-13 08:24:27 + name: DTOL14200517 + public_name: ddLinBien1 + recorded_at: 2023-12-13 08:24:28 + sanger_sample_id: DTOL14200517 + supplier_name: SubSam_ddLinBien1_5026 + taxon_id: 347648 + uuid_sample_lims: 074a7f78-9991-11ee-8911-024224dd57f4 +- accession_number: SAMEA113970418 + common_name: Arabis scabra + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-13 08:24:27 + date_of_sample_collection: not provided + donor_id: SAMEA113970357 + id_lims: SQSCP + id_sample_lims: 9506178 + id_sample_tmp: 9358974 + last_updated: 2023-12-13 08:24:27 + name: DTOL14200539 + public_name: ddAraScab1 + recorded_at: 2023-12-13 08:24:28 + sanger_sample_id: DTOL14200539 + supplier_name: SubSam_ddAraScab1_5048 + taxon_id: 50454 + uuid_sample_lims: 0788e31c-9991-11ee-8911-024224dd57f4 +- accession_number: SAMEA9143697 + common_name: Veronica hederifolia + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-14 12:28:19 + date_of_sample_collection: not provided + donor_id: SAMEA9143033 + id_lims: SQSCP + id_sample_lims: 9511440 + id_sample_tmp: 9364225 + last_updated: 2023-12-14 12:28:19 + name: DTOL_RD14581308 + public_name: daVerHede1 + recorded_at: 2023-12-14 12:28:20 + sanger_sample_id: DTOL_RD14581308 + supplier_name: SubSam_5606 + taxon_id: 202477 + uuid_sample_lims: 438aed2e-9a7c-11ee-a4e9-024224dd57f4 +- accession_number: SAMEA113425515 + common_name: Graphomya maculata + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2023-12-18 12:39:16 + date_of_sample_collection: not provided + donor_id: SAMEA113425419 + id_lims: SQSCP + id_sample_lims: 9515159 + id_sample_tmp: 9367590 + last_updated: 2023-12-18 12:39:16 + name: DTOL14026427 + public_name: idGraMacu2 + recorded_at: 2023-12-18 12:39:17 + sanger_sample_id: DTOL14026427 + supplier_name: SubSam_idGraMacu2_4846 + taxon_id: 1230139 + uuid_sample_lims: 74d0724a-9da2-11ee-8314-024293460e78 +- accession_number: SAMEA110451460 + common_name: Brachypodium pinnatum + consent_withdrawn: 0 + control: 0 + country_of_origin: not provided + created: 2024-01-03 12:32:18 + date_of_sample_collection: not provided + donor_id: SAMEA110451258 + id_lims: SQSCP + id_sample_lims: 9518398 + id_sample_tmp: 9371025 + last_updated: 2024-01-03 12:32:18 + name: DTOL14180244 + public_name: lpBraPinn1 + recorded_at: 2024-01-03 12:32:18 + sanger_sample_id: DTOL14180244 + supplier_name: SubSam_lpBraPinn1_4987 + taxon_id: 29663 + uuid_sample_lims: 22008a52-aa34-11ee-bf16-024224dd57f4 + diff --git a/tests/data/mlwh_pb_runs/100-Study.yml b/tests/data/mlwh_pb_runs/100-Study.yml new file mode 100644 index 00000000..453d6bae --- /dev/null +++ b/tests/data/mlwh_pb_runs/100-Study.yml @@ -0,0 +1,122 @@ +- accession_number: ERP129860 + aligned: 1 + contains_human_dna: 0 + contaminated_human_dna: 0 + id_lims: SQSCP + id_study_lims: 6457 + id_study_tmp: 6287 + name: Tree of Life - ASG + reference_genome: ' ' + remove_x_and_autosomes: 0 + separate_y_chromosome_data: 0 + study_title: Tree of Life - ASG + study_visibility: Public + uuid_study_lims: 8d58238e-a2b5-11eb-84d2-fa163eac3af7 +- accession_number: ERP116890 + aligned: 1 + contains_human_dna: 0 + contaminated_human_dna: 0 + id_lims: SQSCP + id_study_lims: 5901 + id_study_tmp: 5735 + name: DTOL_Darwin Tree of Life + reference_genome: ' ' + remove_x_and_autosomes: 0 + separate_y_chromosome_data: 0 + study_title: Darwin Tree of Life + study_visibility: Public + uuid_study_lims: cf04ea86-ac82-11e9-8998-68b599768938 +- accession_number: ERP141224 + aligned: 1 + contains_human_dna: 0 + contaminated_human_dna: 0 + description: Study for release prior to publication of selected datasets + id_lims: SQSCP + id_study_lims: 7069 + id_study_tmp: 6942 + name: Alternative Enzymes 2022 microbial genomes + reference_genome: Clostridium_difficile (Strain_630) + remove_x_and_autosomes: 0 + separate_y_chromosome_data: 0 + study_title: Alternative Enzymes 2022 microbial genomes + study_visibility: Public + uuid_study_lims: 39ba6ae6-3500-11ed-b3f3-fa163eac3af7 +- abbreviation: DTOL_RD + abstract: Sequencing and assembly of genomes from British species for the Darwin Tree of Life Project + accession_number: ERP115230 + aligned: 1 + contains_human_dna: 0 + contaminated_human_dna: 0 + created: 2019-05-10 08:36:03 + data_release_sort_of_study: genotyping or cytogenetics + data_release_strategy: not applicable + data_release_timing: never + description: "Sequencing and assembly of genomes from British species for the Darwin Tree of Life Project" + faculty_sponsor: MB + id_lims: SQSCP + id_study_lims: 5822 + id_study_tmp: 5656 + last_updated: 2023-07-24 15:18:11 + name: DTOL_Darwin R&D + recorded_at: 2023-07-24 15:18:11 + reference_genome: ' ' + remove_x_and_autosomes: 0 + separate_y_chromosome_data: 0 + state: active + study_title: Darwin R&D + study_type: Whole Genome Sequencing + study_visibility: Hold + uuid_study_lims: a54462b6-72fe-11e9-9cbc-68b599768938 +- abbreviation: BlaxGeNe + abstract: 'High quality reference genome sequences and formal species descriptions' + accession_number: ERP120055 + aligned: 1 + contains_human_dna: 0 + contaminated_human_dna: 0 + created: 2020-02-17 14:34:51 + data_release_sort_of_study: genomic sequencing + data_release_strategy: open + data_release_timing: standard + description: 'other description' + faculty_sponsor: MB + id_lims: SQSCP + id_study_lims: 6137 + id_study_tmp: 5967 + last_updated: 2023-07-24 15:18:11 + name: 959 Nematode Genomes + recorded_at: 2023-07-24 15:18:12 + reference_genome: ' ' + remove_x_and_autosomes: 0 + separate_y_chromosome_data: 0 + state: active + study_title: 959 Nematode Genomes + study_type: Whole Genome Sequencing + study_visibility: Public + uuid_study_lims: a845b2e4-5192-11ea-91ec-fa163e9d6485 +- abbreviation: 6771STDY + abstract: 'some abstract' + accession_number: ERP135012 + aligned: 1 + contains_human_dna: 0 + contaminated_human_dna: 0 + created: 2022-01-19 16:07:11 + data_release_sort_of_study: genomic sequencing + data_release_strategy: open + data_release_timing: standard + description: 'some description' + faculty_sponsor: MB + id_lims: SQSCP + id_study_lims: 6771 + id_study_tmp: 6615 + last_updated: 2024-01-05 10:36:10 + name: ToL_Blaxter_ Reference Genomes_ DNA + recorded_at: 2024-01-05 10:36:10 + reference_genome: ' ' + remove_x_and_autosomes: 0 + separate_y_chromosome_data: 0 + state: active + study_title: Reference Genome + study_type: Whole Genome Sequencing + study_visibility: Hold + uuid_study_lims: dbfc7680-7941-11ec-bf7f-fa163eea3084 + diff --git a/tests/data/mlwh_pb_run_92/200-PacBioRun.yml b/tests/data/mlwh_pb_runs/200-PacBioRun.yml similarity index 68% rename from tests/data/mlwh_pb_run_92/200-PacBioRun.yml rename to tests/data/mlwh_pb_runs/200-PacBioRun.yml index c403aff7..b87a5e5f 100644 --- a/tests/data/mlwh_pb_run_92/200-PacBioRun.yml +++ b/tests/data/mlwh_pb_runs/200-PacBioRun.yml @@ -1246,5 +1246,556 @@ tag_set_name: Sequel_48_Microbial_Barcoded_OHA_v1 well_label: D1 well_uuid_lims: 1cc67f96-ec1d-47da-a991-2fcfd53d09be - +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120626 + id_sample_tmp: 9336988 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7673 + pac_bio_library_tube_id_lims: 5980 + pac_bio_library_tube_name: DTOL14523333 + pac_bio_library_tube_uuid: 6e4ff201-b417-4041-a0e2-c24a7a5c7f06 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2020 + tag_sequence: ACTATGCGTCGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: A1 + well_uuid_lims: 2ae339ab-ddf7-4520-9c33-dde1c1055158 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120627 + id_sample_tmp: 9321748 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7673 + pac_bio_library_tube_id_lims: 5981 + pac_bio_library_tube_name: DTOL14452869 + pac_bio_library_tube_uuid: 32f08465-d1db-4c7f-bd16-f78c13a7587e + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2011 + tag_sequence: CTGACAGTACGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: A1 + well_uuid_lims: 2ae339ab-ddf7-4520-9c33-dde1c1055158 +- cost_code: S4773 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120628 + id_sample_tmp: 8436094 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7674 + pac_bio_library_tube_id_lims: 5982 + pac_bio_library_tube_name: DTOL13161568 + pac_bio_library_tube_uuid: df7b6f48-83e3-4c79-a2c2-ac7b88263bf1 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc1011_BAK8A_OA + tag_sequence: CTATACGTATATCTATT + tag_set_id_lims: 1 + tag_set_name: Sequel_16_barcodes_v3 + well_label: B1 + well_uuid_lims: dc4995b1-62aa-446e-9524-74be301d85ea +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120629 + id_sample_tmp: 8579905 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7674 + pac_bio_library_tube_id_lims: 5983 + pac_bio_library_tube_name: DTOL13642523 + pac_bio_library_tube_uuid: 424e1d84-1d07-4883-9b15-90ce3318e987 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc1022_BAK8B_OA + tag_sequence: CACTCACGTGTGATATT + tag_set_id_lims: 1 + tag_set_name: Sequel_16_barcodes_v3 + well_label: B1 + well_uuid_lims: dc4995b1-62aa-446e-9524-74be301d85ea +- cost_code: S4773 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120630 + id_sample_tmp: 8427375 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7674 + pac_bio_library_tube_id_lims: 5984 + pac_bio_library_tube_name: DTOL13174735 + pac_bio_library_tube_uuid: 1f54742d-c690-4b60-ba75-b610796cd419 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc1001_BAK8A_OA + tag_sequence: CACATATCAGAGTGCGT + tag_set_id_lims: 1 + tag_set_name: Sequel_16_barcodes_v3 + well_label: B1 + well_uuid_lims: dc4995b1-62aa-446e-9524-74be301d85ea +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120631 + id_sample_tmp: 9302420 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7675 + pac_bio_library_tube_id_lims: 5985 + pac_bio_library_tube_name: DTOL14162027 + pac_bio_library_tube_uuid: 772c136a-d5b4-481e-9b3b-975d5e655eff + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2035 + tag_sequence: CAGCTGACATGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: C1 + well_uuid_lims: a083ef9a-730c-45bb-a4b9-8e2c31b510bb +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120632 + id_sample_tmp: 9309564 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7675 + pac_bio_library_tube_id_lims: 5986 + pac_bio_library_tube_name: DTOL13810175 + pac_bio_library_tube_uuid: a37f15e5-4971-4f61-b34f-6ca48e759a9a + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2052 + tag_sequence: CAGACTAGTCGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: C1 + well_uuid_lims: a083ef9a-730c-45bb-a4b9-8e2c31b510bb +- cost_code: S4773 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120633 + id_sample_tmp: 8559246 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7676 + pac_bio_library_tube_id_lims: 5987 + pac_bio_library_tube_name: DTOL13630447 + pac_bio_library_tube_uuid: ecbc19bd-be7c-41e9-8e4e-fba4da36099b + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2036 + tag_sequence: ACGTCGCTGCGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: a0a9450d-68df-41c1-9241-eb117f3f9210 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120634 + id_sample_tmp: 8588834 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7676 + pac_bio_library_tube_id_lims: 5988 + pac_bio_library_tube_name: DTOL13653276 + pac_bio_library_tube_uuid: ad99a84b-6a73-4847-a18e-fc31edbb42d1 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2040 + tag_sequence: TGCTGCGACTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: a0a9450d-68df-41c1-9241-eb117f3f9210 +- cost_code: S10513 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120635 + id_sample_tmp: 8582947 + id_study_tmp: 6615 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7676 + pac_bio_library_tube_id_lims: 5989 + pac_bio_library_tube_name: 6771STDY13618009 + pac_bio_library_tube_uuid: 8e3f3c37-7db1-4f35-85f1-b39b96d5cd53 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2054 + tag_sequence: CTGCGATCACGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: a0a9450d-68df-41c1-9241-eb117f3f9210 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120636 + id_sample_tmp: 9321705 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7676 + pac_bio_library_tube_id_lims: 5990 + pac_bio_library_tube_name: DTOL14291044 + pac_bio_library_tube_uuid: 820294b7-2a66-492f-a9a3-81e2cfa8c4f9 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 90d9a51b-0a57-4580-a449-7928b51cffe7 + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2063 + tag_sequence: TCTGCATCATGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: a0a9450d-68df-41c1-9241-eb117f3f9210 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120637 + id_sample_tmp: 9336869 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7677 + pac_bio_library_tube_id_lims: 5991 + pac_bio_library_tube_name: DTOL14523243 + pac_bio_library_tube_uuid: b51938ee-533c-4fc3-b4dd-32db93a44518 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2016 + tag_sequence: ATCTGCACGTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: A1 + well_uuid_lims: ad154a80-2e40-4f83-bb9d-9e5cda87cf70 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120638 + id_sample_tmp: 9371025 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7677 + pac_bio_library_tube_id_lims: 5992 + pac_bio_library_tube_name: DTOL14180244 + pac_bio_library_tube_uuid: e2c918e2-2401-4c48-8e74-05cdee1945f6 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2096 + tag_sequence: ATGTACTAGTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: A1 + well_uuid_lims: ad154a80-2e40-4f83-bb9d-9e5cda87cf70 +- cost_code: s4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120639 + id_sample_tmp: 9367590 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7678 + pac_bio_library_tube_id_lims: 5993 + pac_bio_library_tube_name: DTOL14026427 + pac_bio_library_tube_uuid: 8cfefce6-f87b-41d8-b3ef-133fd36f8871 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2056 + tag_sequence: TCGCAGCGTCGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: B1 + well_uuid_lims: d3988e8a-4284-4011-9f60-f905cd8f8f45 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120640 + id_sample_tmp: 9358950 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7678 + pac_bio_library_tube_id_lims: 5994 + pac_bio_library_tube_name: DTOL14200517 + pac_bio_library_tube_uuid: 8d316f10-7433-4989-b456-624309dbff49 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2072 + tag_sequence: TATCAGTAGTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: B1 + well_uuid_lims: d3988e8a-4284-4011-9f60-f905cd8f8f45 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120641 + id_sample_tmp: 9358974 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7679 + pac_bio_library_tube_id_lims: 5996 + pac_bio_library_tube_name: DTOL14200539 + pac_bio_library_tube_uuid: aed9d9da-feb1-4cdf-aaf8-e6c38b66d938 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2021 + tag_sequence: CGTACATGCTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: C1 + well_uuid_lims: 60aa3fb3-5277-41b3-b269-ac0343e3cfe0 +- cost_code: S4698 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120642 + id_sample_tmp: 9321998 + id_study_tmp: 5967 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7679 + pac_bio_library_tube_id_lims: 6112 + pac_bio_library_tube_name: BlaxGeNe14563575 + pac_bio_library_tube_uuid: 15dd954a-1422-4058-a691-a13350350e86 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2011 + tag_sequence: CTGACAGTACGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: C1 + well_uuid_lims: 60aa3fb3-5277-41b3-b269-ac0343e3cfe0 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120643 + id_sample_tmp: 9336860 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7679 + pac_bio_library_tube_id_lims: 6113 + pac_bio_library_tube_name: DTOL14523233 + pac_bio_library_tube_uuid: 9b763255-4f11-4a22-99d8-4f02be5b3493 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: Pacbio_HiFi + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2015 + tag_sequence: CGACATAGATGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: C1 + well_uuid_lims: 60aa3fb3-5277-41b3-b269-ac0343e3cfe0 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120644 + id_sample_tmp: 8625878 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7680 + pac_bio_library_tube_id_lims: 5997 + pac_bio_library_tube_name: DTOL13788282 + pac_bio_library_tube_uuid: c5f1229c-3182-473a-a9f6-0f2dc696213f + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2083 + tag_sequence: ACTCATCAGTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: d33e27cc-1f31-4189-b8b5-bc2a03bbcc16 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120645 + id_sample_tmp: 8630655 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7680 + pac_bio_library_tube_id_lims: 5998 + pac_bio_library_tube_name: DTOL13795633 + pac_bio_library_tube_uuid: 935cfd51-e36c-49d8-bf30-3a5e616eb663 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2084 + tag_sequence: CTGAGCACTCGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: d33e27cc-1f31-4189-b8b5-bc2a03bbcc16 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120646 + id_sample_tmp: 8630656 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7680 + pac_bio_library_tube_id_lims: 5999 + pac_bio_library_tube_name: DTOL13795634 + pac_bio_library_tube_uuid: 88461b2d-7abe-45cc-98d0-640205f01c42 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2085 + tag_sequence: ATCATCTACTGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: d33e27cc-1f31-4189-b8b5-bc2a03bbcc16 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1140 + id_pac_bio_tmp: 120647 + id_sample_tmp: 8630660 + id_study_tmp: 5735 + last_updated: 2024-02-23 10:05:37 + pac_bio_library_tube_barcode: TRAC-2-7680 + pac_bio_library_tube_id_lims: 6000 + pac_bio_library_tube_name: DTOL13795638 + pac_bio_library_tube_uuid: 69733884-664d-427a-bcbb-9b24dfe1ff06 + pac_bio_run_name: TRACTION-RUN-1140 + pac_bio_run_uuid: c0fc009f-1f86-4a61-8869-5d1f76b90ea9 + pipeline_id_lims: PacBio_Ultra_Low_Input + plate_number: 2 + plate_uuid_lims: 15e4d545-76b3-44e3-98f4-4eef01ef437e + recorded_at: 2024-02-23 10:05:37 + tag_identifier: bc2094 + tag_sequence: TAGATACAGCGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: d33e27cc-1f31-4189-b8b5-bc2a03bbcc16 +- cost_code: s10492 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1162 + id_pac_bio_tmp: 120767 + id_sample_tmp: 9364225 + id_study_tmp: 5656 + last_updated: 2024-03-05 16:15:13 + pac_bio_library_tube_barcode: TRAC-2-7576 + pac_bio_library_tube_id_lims: 11158 + pac_bio_library_tube_name: DTOL_RD14581308 + pac_bio_library_tube_uuid: 7edf3079-d7cf-4a14-9587-1a0312a75518 + pac_bio_run_name: TRACTION-RUN-1162 + pac_bio_run_uuid: 0a11c900-22ce-402b-82b4-3fb9fe7c65b6 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 99f3e81b-3857-4c5a-a2b1-0e427bb6d4f4 + recorded_at: 2024-03-05 16:15:13 + tag_identifier: bc2070 + tag_sequence: TACTGCTCACGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: C1 + well_uuid_lims: c18becff-8538-4820-a3b3-7e1a9c0d1f17 +- cost_code: S4699 + id_lims: Traction + id_pac_bio_run_lims: TRACTION-RUN-1162 + id_pac_bio_tmp: 120768 + id_sample_tmp: 9321777 + id_study_tmp: 5735 + last_updated: 2024-03-05 16:15:13 + pac_bio_library_tube_barcode: TRAC-2-7128 + pac_bio_library_tube_id_lims: 10799 + pac_bio_library_tube_name: DTOL14290946 + pac_bio_library_tube_uuid: 806baad7-11c9-4375-ab3c-ac408d78129f + pac_bio_run_name: TRACTION-RUN-1162 + pac_bio_run_uuid: 0a11c900-22ce-402b-82b4-3fb9fe7c65b6 + pipeline_id_lims: Pacbio_HiFi + plate_number: 1 + plate_uuid_lims: 99f3e81b-3857-4c5a-a2b1-0e427bb6d4f4 + recorded_at: 2024-03-05 16:15:13 + tag_identifier: bc2055 + tag_sequence: CTCAGCATACGAGTAT + tag_set_id_lims: 9 + tag_set_name: Pacbio_96_barcode_plate_v3 + well_label: D1 + well_uuid_lims: 6bbf1bf3-bde6-4894-beb4-87dcab110000 diff --git a/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml b/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml new file mode 100644 index 00000000..db7d1211 --- /dev/null +++ b/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml @@ -0,0 +1,944 @@ +--- +- adapter_dimer_percent: 0 + binding_kit: Sequel II Binding Kit 2.2 + ccs_execution_mode: OnInstrument + cell_lot_number: 417079 + chemistry_sw_version: 10.2.0.133424 + chip_type: 8mChip + control_concordance_mean: 0.868282 + control_concordance_mode: 0.91 + control_num_reads: 24837 + control_read_length_mean: 50169 + created_by: eg18 + demultiplex_mode: ~ + heteroduplex_analysis: ~ + hifi_barcoded_reads: ~ + hifi_bases_in_barcoded_reads: ~ + hifi_low_quality_num_reads: 260532 + hifi_low_quality_read_bases: 2670039556 + hifi_low_quality_read_length_mean: 10248 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 2877051 + hifi_number_passes_mean: 18 + hifi_only_reads: ~ + hifi_read_bases: 27076668646 + hifi_read_length_mean: 9411 + hifi_read_quality_median: 43 + id_pac_bio_product: cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61 + id_pac_bio_rw_metrics_tmp: 1732 + include_kinetics: 0 + insert_length_mean: 16227 + insert_length_n50: 23250 + instrument_name: 64222E + instrument_sw_version: 10.1.0.119549 + instrument_type: Sequel2e + loading_conc: 80 + local_base_rate: 2.76789 + movie_minutes: 1800 + movie_name: m64222e_220414_130247 + p0_num: 2800130 + p1_num: 5033971 + p2_num: 180570 + pac_bio_run_name: TRACTION-RUN-92 + polymerase_num_reads: 5009134 + polymerase_read_bases: 645567171652 + polymerase_read_length_mean: 128878 + polymerase_read_length_n50: 251250 + primary_analysis_sw_version: 10.1.0.119549 + productive_zmws_num: 7989834 + run_complete: 2022-04-20 09:16:53 + run_start: 2022-04-14 12:52:34 + run_status: Complete + run_transfer_complete: 2022-04-20 16:03:18 + sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) + sequencing_kit_lot_number: 123660 + short_insert_percent: 0 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da + sl_ccs_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29ro + ts_run_name: r64222e_20220414_125138 + unique_molecular_bases: 68501667840 + well_complete: 2022-04-16 12:36:21 + well_label: A1 + well_start: 2022-04-14 13:02:48 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Sequel II Binding Kit 2.2 + ccs_execution_mode: OnInstrument + cell_lot_number: 417079 + chemistry_sw_version: 10.2.0.133424 + chip_type: 8mChip + control_concordance_mean: 0.865786 + control_concordance_mode: 0.91 + control_num_reads: 42962 + control_read_length_mean: 51810 + created_by: eg18 + demultiplex_mode: ~ + heteroduplex_analysis: ~ + hifi_barcoded_reads: ~ + hifi_bases_in_barcoded_reads: ~ + hifi_low_quality_num_reads: 204155 + hifi_low_quality_read_bases: 1887795985 + hifi_low_quality_read_length_mean: 9246 + hifi_low_quality_read_quality_median: 16 + hifi_num_reads: 2082542 + hifi_number_passes_mean: 18 + hifi_only_reads: ~ + hifi_read_bases: 17706147796 + hifi_read_length_mean: 8502 + hifi_read_quality_median: 40 + id_pac_bio_product: 63fb9a37ff19c248fc7d99bd254a61085226ded540de7c5445daf1398e339833 + id_pac_bio_rw_metrics_tmp: 1733 + include_kinetics: 0 + insert_length_mean: 16407 + insert_length_n50: 28250 + instrument_name: 64222E + instrument_sw_version: 10.1.0.119549 + instrument_type: Sequel2e + loading_conc: 80 + local_base_rate: 2.80566 + movie_minutes: 1800 + movie_name: m64222e_220415_235643 + p0_num: 4427179 + p1_num: 3512385 + p2_num: 75107 + pac_bio_run_name: TRACTION-RUN-92 + polymerase_num_reads: 3469423 + polymerase_read_bases: 455958509506 + polymerase_read_length_mean: 131422 + polymerase_read_length_n50: 251750 + primary_analysis_sw_version: 10.1.0.119549 + productive_zmws_num: 7971709 + run_complete: 2022-04-20 09:16:53 + run_start: 2022-04-14 12:52:34 + run_status: Complete + run_transfer_complete: 2022-04-20 16:03:18 + sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) + sequencing_kit_lot_number: 123660 + short_insert_percent: 0 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da + sl_ccs_uuid: ~ + ts_run_name: r64222e_20220414_125138 + unique_molecular_bases: 45990051840 + well_complete: 2022-04-17 23:16:35 + well_label: B1 + well_start: 2022-04-15 23:56:43 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Sequel II Binding Kit 2.2 + ccs_execution_mode: OnInstrument + cell_lot_number: 417079 + chemistry_sw_version: 10.2.0.133424 + chip_type: 8mChip + control_concordance_mean: 0.866249 + control_concordance_mode: 0.89 + control_num_reads: 40787 + control_read_length_mean: 49421 + created_by: eg18 + demultiplex_mode: ~ + heteroduplex_analysis: ~ + hifi_low_quality_num_reads: 173039 + hifi_low_quality_read_bases: 1743404181 + hifi_low_quality_read_length_mean: 10075 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 1698191 + hifi_number_passes_mean: 17 + hifi_only_reads: ~ + hifi_read_bases: 16029605719 + hifi_read_length_mean: 9439 + hifi_read_quality_median: 40 + hifi_barcoded_reads: ~ + hifi_bases_in_barcoded_reads: ~ + id_pac_bio_product: a65eae06f3048a186aeb9104d0a8d3f46ca59dff7747eec9918fcfa85587a3c2 + id_pac_bio_rw_metrics_tmp: 1734 + include_kinetics: 0 + insert_length_mean: 15260 + insert_length_n50: 15750 + instrument_name: 64222E + instrument_sw_version: 10.1.0.119549 + instrument_type: Sequel2e + loading_conc: 130 + local_base_rate: 2.76102 + movie_minutes: 1800 + movie_name: m64222e_220417_105320 + p0_num: 5019384 + p1_num: 2944690 + p2_num: 50597 + pac_bio_run_name: TRACTION-RUN-92 + polymerase_num_reads: 2903903 + polymerase_read_bases: 360656040891 + polymerase_read_length_mean: 124197 + polymerase_read_length_n50: 244750 + primary_analysis_sw_version: 10.1.0.119549 + productive_zmws_num: 7973884 + run_complete: 2022-04-20 09:16:53 + run_start: 2022-04-14 12:52:34 + run_status: Complete + run_transfer_complete: 2022-04-20 16:03:18 + sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) + sequencing_kit_lot_number: 123660 + short_insert_percent: 0 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da + sl_ccs_uuid: ~ + ts_run_name: r64222e_20220414_125138 + unique_molecular_bases: 37178580992 + well_complete: 2022-04-19 04:22:35 + well_label: C1 + well_start: 2022-04-17 10:53:21 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Sequel II Binding Kit 2.2 + ccs_execution_mode: OnInstrument + cell_lot_number: 417079 + chemistry_sw_version: 10.2.0.133424 + chip_type: 8mChip + control_concordance_mean: 0.863292 + control_concordance_mode: 0.91 + control_num_reads: 24842 + control_read_length_mean: 44982 + created_by: eg18 + demultiplex_mode: ~ + heteroduplex_analysis: ~ + hifi_barcoded_reads: ~ + hifi_bases_in_barcoded_reads: ~ + hifi_low_quality_num_reads: 193115 + hifi_low_quality_read_bases: 1672216211 + hifi_low_quality_read_length_mean: 8659 + hifi_low_quality_read_quality_median: 16 + hifi_num_reads: 1912626 + hifi_number_passes_mean: 18 + hifi_only_reads: ~ + hifi_read_bases: 14052550494 + hifi_read_length_mean: 7347 + hifi_read_quality_median: 41 + id_pac_bio_product: c5babd5516f7b9faab8415927e5f300d5152bb96b8b922e768d876469a14fa5d + id_pac_bio_rw_metrics_tmp: 1735 + include_kinetics: 0 + insert_length_mean: 15043 + insert_length_n50: 33750 + instrument_name: 64222E + instrument_sw_version: 10.1.0.119549 + instrument_type: Sequel2e + loading_conc: 80 + local_base_rate: 2.71738 + movie_minutes: 1800 + movie_name: m64222e_220418_214938 + p0_num: 3784476 + p1_num: 4078117 + p2_num: 152078 + pac_bio_run_name: TRACTION-RUN-92 + polymerase_num_reads: 4053275 + polymerase_read_bases: 405355872925 + polymerase_read_length_mean: 100007 + polymerase_read_length_n50: 234250 + primary_analysis_sw_version: 10.1.0.119549 + productive_zmws_num: 7989829 + run_complete: 2022-04-20 09:16:53 + run_start: 2022-04-14 12:52:34 + run_status: Complete + run_transfer_complete: 2022-04-20 16:03:18 + sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) + sequencing_kit_lot_number: 123660 + short_insert_percent: 0.01 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da + sl_ccs_uuid: ~ + ts_run_name: r64222e_20220414_125138 + unique_molecular_bases: 48624848896 + well_complete: 2022-04-20 16:03:18 + well_label: D1 + well_start: 2022-04-18 21:49:38 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.907739 + control_concordance_mode: 0.91 + control_num_reads: 2756 + control_read_length_mean: 78413 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 3528067 + hifi_bases_in_barcoded_reads: 46846424758 + hifi_low_quality_num_reads: 84204 + hifi_low_quality_read_bases: 1260401933 + hifi_low_quality_read_length_mean: 14968 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 3541519 + hifi_number_passes_mean: 14 + hifi_only_reads: 1 + hifi_read_bases: 47020035735 + hifi_read_length_mean: 13307 + hifi_read_quality_median: 37 + id_pac_bio_product: 51094a0e0ca9d839d0ffa4fcf08bb963cbdfd3e9ed0edb3045b8f68f3db5e3af + id_pac_bio_rw_metrics_tmp: 6206 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 14:10:14 + loading_conc: 132 + local_base_rate: 2.6208 + movie_minutes: 1440 + movie_name: m84093_240223_140830_s1 + p0_num: 16987657 + p1_num: 8139802 + p2_num: 38365 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 1 + polymerase_num_reads: 8137046 + polymerase_read_bases: 661743638541 + polymerase_read_length_mean: 81325 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 13:54:34 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0 + sl_ccs_uuid: 963d008f-3e0c-4c6c-b506-1ff04dffccdd + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 115761872896 + well_complete: 2024-02-24 23:29:30 + well_label: A1 + well_start: 2024-02-23 14:08:30 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.905139 + control_concordance_mode: 0.91 + control_num_reads: 1952 + control_read_length_mean: 74119 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 4478577 + hifi_bases_in_barcoded_reads: 51165353149 + hifi_low_quality_num_reads: 100793 + hifi_low_quality_read_bases: 1244549883 + hifi_low_quality_read_length_mean: 12347 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 4501489 + hifi_number_passes_mean: 15 + hifi_only_reads: 1 + hifi_read_bases: 51421432183 + hifi_read_length_mean: 11453 + hifi_read_quality_median: 38 + id_pac_bio_product: 513c674f489b106c6af716dd0d210826ff03b7648d50888839c3722ca1b10dbf + id_pac_bio_rw_metrics_tmp: 6210 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 11:10:15 + loading_conc: 203 + local_base_rate: 2.6256 + movie_minutes: 1440 + movie_name: m84093_240224_142512_s1 + p0_num: 15323739 + p1_num: 9783967 + p2_num: 58118 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 2 + polymerase_num_reads: 9782015 + polymerase_read_bases: 773527509148 + polymerase_read_length_mean: 79076 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 10:57:13 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0 + sl_ccs_uuid: 75d2a390-9840-460e-a6b2-1de7d1bd75d5 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 126338891776 + well_complete: 2024-02-26 00:27:52 + well_label: A1 + well_start: 2024-02-24 14:25:12 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.904677 + control_concordance_mode: 0.91 + control_num_reads: 3035 + control_read_length_mean: 43102 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 3448648 + hifi_bases_in_barcoded_reads: 31779577424 + hifi_low_quality_num_reads: 266811 + hifi_low_quality_read_bases: 2539344050 + hifi_low_quality_read_length_mean: 9517 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 4787230 + hifi_number_passes_mean: 11 + hifi_only_reads: 1 + hifi_read_bases: 45578791158 + hifi_read_length_mean: 9559 + hifi_read_quality_median: 37 + id_pac_bio_product: 26b18bab742e5b1d0dae2f5462b7e42ee5f40bd5e505c8dec69b11591b8884c8 + id_pac_bio_rw_metrics_tmp: 6207 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-03-05 15:10:36 + loading_conc: 207 + local_base_rate: 2.21877 + movie_minutes: 1440 + movie_name: m84093_240223_144004_s2 + p0_num: 13791241 + p1_num: 11298110 + p2_num: 76473 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 1 + polymerase_num_reads: 11295075 + polymerase_read_bases: 690231867682 + polymerase_read_length_mean: 61109 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 0 + qc_seq_date: 2024-03-05 14:30:59 + qc_seq_state: 'Failed, SMRT cell' + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0.01 + sl_ccs_uuid: e72398a9-cc9b-4c98-a304-e1ba6b6c30ba + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 123412045824 + well_complete: 2024-02-25 00:07:54 + well_label: B1 + well_start: 2024-02-23 14:40:04 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.911228 + control_concordance_mode: 0.91 + control_num_reads: 2308 + control_read_length_mean: 69661 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 2899704 + hifi_bases_in_barcoded_reads: 36621724417 + hifi_low_quality_num_reads: 121486 + hifi_low_quality_read_bases: 1479247536 + hifi_low_quality_read_length_mean: 12176 + hifi_low_quality_read_quality_median: 16 + hifi_num_reads: 2913451 + hifi_number_passes_mean: 13 + hifi_only_reads: 1 + hifi_read_bases: 36760645135 + hifi_read_length_mean: 12648 + hifi_read_quality_median: 38 + id_pac_bio_product: 9aa3536a661d9c80ef0e962ef7cbd795c5893ed0600b2e331824f4d17cee6f70 + id_pac_bio_rw_metrics_tmp: 6211 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 11:10:15 + loading_conc: 147 + local_base_rate: 2.38396 + movie_minutes: 1440 + movie_name: m84093_240224_145729_s2 + p0_num: 17389564 + p1_num: 7735267 + p2_num: 40993 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 2 + polymerase_num_reads: 7732959 + polymerase_read_bases: 635399455224 + polymerase_read_length_mean: 82168 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 10:50:29 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0 + sl_ccs_uuid: abab78b7-889a-473d-ae1d-0957071c9c1f + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 131024281600 + well_complete: 2024-02-26 04:53:26 + well_label: B1 + well_start: 2024-02-24 14:57:29 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.908116 + control_concordance_mode: 0.91 + control_num_reads: 3508 + control_read_length_mean: 77822 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 4785745 + hifi_bases_in_barcoded_reads: 55176093961 + hifi_low_quality_num_reads: 98395 + hifi_low_quality_read_bases: 1210135020 + hifi_low_quality_read_length_mean: 12298 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 4807206 + hifi_number_passes_mean: 15 + hifi_only_reads: 1 + hifi_read_bases: 55421297045 + hifi_read_length_mean: 11559 + hifi_read_quality_median: 39 + id_pac_bio_product: c1fa777f6154e38df42aa01f2243d54e50d71c1b0279e17d48f928412b13098e + id_pac_bio_rw_metrics_tmp: 6208 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 11:10:15 + loading_conc: 251 + local_base_rate: 2.57069 + movie_minutes: 1440 + movie_name: m84093_240223_151221_s3 + p0_num: 15698926 + p1_num: 9425055 + p2_num: 41843 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 1 + polymerase_num_reads: 9421547 + polymerase_read_bases: 832856275408 + polymerase_read_length_mean: 88399 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 10:58:28 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0 + sl_ccs_uuid: 0cc14fc3-09fe-449a-9836-836c020868d1 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 123970002944 + well_complete: 2024-02-25 04:56:03 + well_label: C1 + well_start: 2024-02-23 15:12:21 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.908994 + control_concordance_mode: 0.91 + control_num_reads: 2911 + control_read_length_mean: 66109 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 5591332 + hifi_bases_in_barcoded_reads: 78369321152 + hifi_low_quality_num_reads: 167825 + hifi_low_quality_read_bases: 2497650060 + hifi_low_quality_read_length_mean: 14882 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 5614217 + hifi_number_passes_mean: 11 + hifi_only_reads: 1 + hifi_read_bases: 78684120092 + hifi_read_length_mean: 14045 + hifi_read_quality_median: 35 + id_pac_bio_product: 26928ba6ec2a00c04dd6c7c68008ec9436e3979a384b9f708dc371c99f272e17 + id_pac_bio_rw_metrics_tmp: 6212 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 11:10:15 + loading_conc: 193 + local_base_rate: 2.52023 + movie_minutes: 1440 + movie_name: m84093_240224_152946_s3 + p0_num: 11834823 + p1_num: 13242033 + p2_num: 88968 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 2 + polymerase_num_reads: 13239122 + polymerase_read_bases: 1008552342223 + polymerase_read_length_mean: 76180 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 10:49:43 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0 + sl_ccs_uuid: a8e25fa2-cc02-4900-8726-d4596fc420a8 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 203873124352 + well_complete: 2024-02-26 08:09:31 + well_label: C1 + well_start: 2024-02-24 15:29:46 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.904516 + control_concordance_mode: 0.91 + control_num_reads: 2354 + control_read_length_mean: 69681 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 6834801 + hifi_bases_in_barcoded_reads: 77850197661 + hifi_low_quality_num_reads: 276005 + hifi_low_quality_read_bases: 3291108067 + hifi_low_quality_read_length_mean: 11924 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 6868920 + hifi_number_passes_mean: 13 + hifi_only_reads: 1 + hifi_read_bases: 78217597101 + hifi_read_length_mean: 11417 + hifi_read_quality_median: 37 + id_pac_bio_product: 842022fd31778158517b3e3e5bdccfbaac3e0b874a395cfc7912bc17b303edf9 + id_pac_bio_rw_metrics_tmp: 6209 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 11:10:15 + loading_conc: 230 + local_base_rate: 2.35137 + movie_minutes: 1440 + movie_name: m84093_240223_154438_s4 + p0_num: 8854913 + p1_num: 16179228 + p2_num: 131683 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 1 + polymerase_num_reads: 16176874 + polymerase_read_bases: 1155795587428 + polymerase_read_length_mean: 71447 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 10:57:37 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0 + sl_ccs_uuid: fe20e437-8962-4d30-9db6-9719d6e23bc0 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 213622784000 + well_complete: 2024-02-25 09:59:06 + well_label: D1 + well_start: 2024-02-23 15:44:38 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.906096 + control_concordance_mode: 0.91 + control_num_reads: 3293 + control_read_length_mean: 66482 + created_by: su3 + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 7749814 + hifi_bases_in_barcoded_reads: 81448324274 + hifi_low_quality_num_reads: 222688 + hifi_low_quality_read_bases: 2321289472 + hifi_low_quality_read_length_mean: 10423 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 7767562 + hifi_number_passes_mean: 13 + hifi_only_reads: 1 + hifi_read_bases: 81629248758 + hifi_read_length_mean: 10540 + hifi_read_quality_median: 38 + id_pac_bio_product: f101c62463c6fdbf07635f3848f5b60d4c4ba137ba0cf3fbe0df886aff1cdeeb + id_pac_bio_rw_metrics_tmp: 6213 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84093 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-02-28 11:10:15 + loading_conc: 249 + local_base_rate: 2.59623 + movie_minutes: 1440 + movie_name: m84093_240224_160202_s4 + p0_num: 10939806 + p1_num: 14128436 + p2_num: 97582 + pac_bio_run_name: TRACTION-RUN-1140 + plate_number: 2 + polymerase_num_reads: 14125143 + polymerase_read_bases: 1279541616312 + polymerase_read_length_mean: 90586 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-02-28 10:49:03 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-02-25 20:53:05 + run_start: 2024-02-23 10:28:12 + run_status: Complete + run_transfer_complete: 2024-02-26 13:41:02 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033482 + short_insert_percent: 0.06 + sl_ccs_uuid: 2607cccf-674b-4bb8-9186-19ecaab35e02 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: 89539688-31c7-49e5-af17-db4735e0a0c1 + ts_run_name: r84093_20240223_102558 + unique_molecular_bases: 175556214784 + well_complete: 2024-02-26 13:41:02 + well_label: D1 + well_start: 2024-02-24 16:02:02 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.912356 + control_concordance_mode: 0.93 + control_num_reads: 1999 + control_read_length_mean: 62463 + created_by: mls + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 6173350 + hifi_bases_in_barcoded_reads: 78726403707 + hifi_low_quality_num_reads: 177743 + hifi_low_quality_read_bases: 2378270804 + hifi_low_quality_read_length_mean: 13380 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 6203304 + hifi_number_passes_mean: 11 + hifi_only_reads: 1 + hifi_read_bases: 79073748871 + hifi_read_length_mean: 12777 + hifi_read_quality_median: 37 + id_pac_bio_product: 763de43c3b5a025bb4b6a6b1f17d921a9ddc92795f21b2156270c62ddbe2292d + id_pac_bio_rw_metrics_tmp: 6306 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84098 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-03-08 12:10:14 + loading_conc: 222 + local_base_rate: 2.28141 + movie_minutes: 1440 + movie_name: m84098_240306_114240_s3 + p0_num: 11809042 + p1_num: 13243515 + p2_num: 113267 + pac_bio_run_name: TRACTION-RUN-1162 + plate_number: 1 + polymerase_num_reads: 13241516 + polymerase_read_bases: 1027325804889 + polymerase_read_length_mean: 77584 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-03-08 11:58:01 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-03-07 16:57:46 + run_start: 2024-03-06 10:20:53 + run_status: Complete + run_transfer_complete: 2024-03-08 00:51:51 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033676 + short_insert_percent: 0 + sl_ccs_uuid: 8f43ff90-3791-4c91-b353-b281ea9e05a2 + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: e5ae50f3-d68b-4292-84f3-3fd10c1397ef + ts_run_name: r84098_20240306_101912 + unique_molecular_bases: 194611986432 + well_complete: 2024-03-08 00:51:51 + well_label: C1 + well_start: 2024-03-06 11:42:40 + well_status: Complete +- adapter_dimer_percent: 0 + binding_kit: Revio polymerase kit + ccs_execution_mode: OnInstrument + cell_lot_number: 1000002284 + chemistry_sw_version: 13.0.0.205983 + chip_type: 25mChip + control_concordance_mean: 0.910574 + control_concordance_mode: 0.91 + control_num_reads: 2648 + control_read_length_mean: 64785 + created_by: mls + demultiplex_mode: OnInstrument + heteroduplex_analysis: 0 + hifi_barcoded_reads: 5669031 + hifi_bases_in_barcoded_reads: 60299550012 + hifi_low_quality_num_reads: 129962 + hifi_low_quality_read_bases: 1451269786 + hifi_low_quality_read_length_mean: 11166 + hifi_low_quality_read_quality_median: 17 + hifi_num_reads: 5695377 + hifi_number_passes_mean: 14 + hifi_only_reads: 1 + hifi_read_bases: 60589827836 + hifi_read_length_mean: 10668 + hifi_read_quality_median: 38 + id_pac_bio_product: f26bda789770f7029867a84ad5175818fde63fc45af79754d46cfccfd3e437b0 + id_pac_bio_rw_metrics_tmp: 6307 + include_kinetics: 0 + insert_length_mean: ~ + insert_length_n50: ~ + instrument_name: 84098 + instrument_sw_version: 13.0.0.212033 + instrument_type: Revio + last_changed: 2024-03-08 12:10:14 + loading_conc: 253 + local_base_rate: 2.44579 + movie_minutes: 1440 + movie_name: m84098_240306_121456_s4 + p0_num: 13167825 + p1_num: 11795718 + p2_num: 202281 + pac_bio_run_name: TRACTION-RUN-1162 + plate_number: 1 + polymerase_num_reads: 11793070 + polymerase_read_bases: 898707409648 + polymerase_read_length_mean: 76206 + polymerase_read_length_n50: ~ + primary_analysis_sw_version: 13.0.0.212033 + productive_zmws_num: 25165824 + qc_seq: 1 + qc_seq_date: 2024-03-08 11:56:48 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + run_complete: 2024-03-07 16:57:46 + run_start: 2024-03-06 10:20:53 + run_status: Complete + run_transfer_complete: 2024-03-08 00:51:51 + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 033676 + short_insert_percent: 0 + sl_ccs_uuid: 8463bd15-b2f2-4780-a9e6-a4547e64d9de + sl_hostname: pacbio01.dnapipelines.sanger.ac.uk + sl_run_uuid: e5ae50f3-d68b-4292-84f3-3fd10c1397ef + ts_run_name: r84098_20240306_101912 + unique_molecular_bases: 152831311872 + well_complete: 2024-03-07 23:26:11 + well_label: D1 + well_start: 2024-03-06 12:14:56 + well_status: Complete + diff --git a/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml new file mode 100644 index 00000000..0b6de2e2 --- /dev/null +++ b/tests/data/mlwh_pb_runs/300-PacBioProductMetrics.yml @@ -0,0 +1,462 @@ +--- +- id_pac_bio_pr_metrics_tmp: 9675 + id_pac_bio_product: cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61 + id_pac_bio_rw_metrics_tmp: 1732 + id_pac_bio_tmp: 98966 +- id_pac_bio_pr_metrics_tmp: 9676 + id_pac_bio_product: 63fb9a37ff19c248fc7d99bd254a61085226ded540de7c5445daf1398e339833 + id_pac_bio_rw_metrics_tmp: 1733 + id_pac_bio_tmp: 98967 +- id_pac_bio_pr_metrics_tmp: 9677 + id_pac_bio_product: a65eae06f3048a186aeb9104d0a8d3f46ca59dff7747eec9918fcfa85587a3c2 + id_pac_bio_rw_metrics_tmp: 1734 + id_pac_bio_tmp: 98968 +- id_pac_bio_pr_metrics_tmp: 9678 + id_pac_bio_product: 57538925519f7ae568fbd5cd075fd2fb600a2273394f6537a17d97917e224b11 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98969 +- id_pac_bio_pr_metrics_tmp: 9679 + id_pac_bio_product: 2307cbc4bf6b6917fa6a20d7e077302ad617c4df61cc0f845facfb24548360e4 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98970 +- id_pac_bio_pr_metrics_tmp: 9680 + id_pac_bio_product: 568b595c45b01faa601cab34ac91987b738106e2c10d2343fed1a2823726b036 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98971 +- id_pac_bio_pr_metrics_tmp: 9681 + id_pac_bio_product: 5a7af02c0fa288456f61046c15fcc2f14ae14ef890eea7de2239abdbc06cd1a2 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98972 +- id_pac_bio_pr_metrics_tmp: 9682 + id_pac_bio_product: 6efad9725471425b8574eaa8772cfd077cfca761c6654dbd36fb3bdb3e939ac3 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98973 +- id_pac_bio_pr_metrics_tmp: 9683 + id_pac_bio_product: 40053aa11e7cf4b74c0dc641c42fbe45145f007a6edacd7e1d476d20d3dc8899 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98974 +- id_pac_bio_pr_metrics_tmp: 9684 + id_pac_bio_product: f0993ebd8924e24e202c269a34598972541c3bccbe9778ca38889d8119ef8fbc + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98975 +- id_pac_bio_pr_metrics_tmp: 9685 + id_pac_bio_product: 67a948d085b4be890f02a04779ed44351bf64506f14e1a703cede5b901f7732d + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98976 +- id_pac_bio_pr_metrics_tmp: 9686 + id_pac_bio_product: 22be6501a64e534e8bcd4d94222b8b226880d8574b2c2a13b47993d67bf953ed + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98977 +- id_pac_bio_pr_metrics_tmp: 9687 + id_pac_bio_product: a85234a2ce0b894cdde92dd592202b93e973aec86a22584d5d5aad6207936c3a + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98978 +- id_pac_bio_pr_metrics_tmp: 9688 + id_pac_bio_product: 925c97e811bd1320575eb09a9c6208ebceb092f39be9804f13ae6ea57d6ee3f4 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98979 +- id_pac_bio_pr_metrics_tmp: 9689 + id_pac_bio_product: 4cd35d863fb960d2e65b11c3092034272b71c63a7cceb8a4eb82459b442e5649 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98980 +- id_pac_bio_pr_metrics_tmp: 9690 + id_pac_bio_product: 13e08185c8f214ddb7fda427eb12de14d2328966dc6dd407e04931540d563764 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98981 +- id_pac_bio_pr_metrics_tmp: 9691 + id_pac_bio_product: b5131d50fc95fcd36b1670a0a200424d8ab1568b4a8c4e3ae267ab78dd482189 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98982 +- id_pac_bio_pr_metrics_tmp: 9692 + id_pac_bio_product: f16140784d4fe024bda8c121e3f9aefb3d8907f11787be71894120048b2cc504 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98983 +- id_pac_bio_pr_metrics_tmp: 9693 + id_pac_bio_product: 5a9fe0fb67614b9fd250dcf711e1fcf3e2ce13f450ecc68af699035490e3e0ec + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98984 +- id_pac_bio_pr_metrics_tmp: 9694 + id_pac_bio_product: 9f045c9356f681c1b452ec6dd92415264f7e9646b91e82bbe9f739d2d97b621c + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98985 +- id_pac_bio_pr_metrics_tmp: 9695 + id_pac_bio_product: c060619f13f862370b4047c4a897aa4d9e907f6252d1f5e2962e09495d32becb + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98986 +- id_pac_bio_pr_metrics_tmp: 9696 + id_pac_bio_product: f997587d6945cc8feaf015e6f74247c6d5d72afeae5e4aac550cb69ebdbd8381 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98987 +- id_pac_bio_pr_metrics_tmp: 9697 + id_pac_bio_product: 044ddae518c4f27a9b4c362098d78dbddbfb3c777eb7b755d64413e4cde4cd3c + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98988 +- id_pac_bio_pr_metrics_tmp: 9698 + id_pac_bio_product: 03e42278867da02c593029170ce087048e62f6f69165a3080e94f21d3de5e6b9 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98989 +- id_pac_bio_pr_metrics_tmp: 9699 + id_pac_bio_product: fdf0c4eb042807b7694842569a907e530fe603ade6470e1b4878beab7de9c055 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98990 +- id_pac_bio_pr_metrics_tmp: 9700 + id_pac_bio_product: a5c8aa41feee6cf782936786d3702c716fac193e5943398662975136fd5e30a1 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98991 +- id_pac_bio_pr_metrics_tmp: 9701 + id_pac_bio_product: 11c1424601a9dd783e9eab728934f88205b1c2251ce5b5da64d9ba25a34458fb + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98992 +- id_pac_bio_pr_metrics_tmp: 9702 + id_pac_bio_product: 9708d23c8ea56d4d0680dc6f3f78b102d08708d70f8205e079a0215f9331c27d + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98993 +- id_pac_bio_pr_metrics_tmp: 9703 + id_pac_bio_product: e0d3aa0edd255e4bcf14ad2c855a195677f44109ad444e395845a7b82c1d477d + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98994 +- id_pac_bio_pr_metrics_tmp: 9704 + id_pac_bio_product: 41887713d6f40e42b97384931254e986635189cc432652ac5d16d11c371b6d71 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98995 +- id_pac_bio_pr_metrics_tmp: 9705 + id_pac_bio_product: 49da248e9147189033f81b7f5f44f036bb26be0a5c3efcf61c484dc8dc50d432 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98996 +- id_pac_bio_pr_metrics_tmp: 9706 + id_pac_bio_product: ccc81d884b5319d7d6dd4beb14944fd9dc7b6063933a57882f53652da435af2d + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98997 +- id_pac_bio_pr_metrics_tmp: 9707 + id_pac_bio_product: 6ac18d5b5b63eca3a3773686733be395a58d67213b597111c2b51c6fec4ce814 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98998 +- id_pac_bio_pr_metrics_tmp: 9708 + id_pac_bio_product: dcb13705145a63f6034692b6e5cd568fc34e9c6669f0b0ce37a54160c45a1a04 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 98999 +- id_pac_bio_pr_metrics_tmp: 9709 + id_pac_bio_product: 4e6c23a4627eb23514d03630512548d521f822d44467f951ae61e169d16a868e + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99000 +- id_pac_bio_pr_metrics_tmp: 9710 + id_pac_bio_product: 878b1e205768c5965cdce2b11d779d41ad267f5d84cac8652e64d9c7139fd692 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99001 +- id_pac_bio_pr_metrics_tmp: 9711 + id_pac_bio_product: 75ad3eb849261accf19df9c5acfb97b10f73d658dd1f403a032dab25fdeee64a + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99002 +- id_pac_bio_pr_metrics_tmp: 9712 + id_pac_bio_product: b209eb8f689ff8391bf42dece19d8de44ef4ee63158219cc81bd0cb45196b631 + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99003 +- id_pac_bio_pr_metrics_tmp: 9713 + id_pac_bio_product: 18078959e14a3416b428624e6cc200c649a8b3b92fc40a20444e9cdca373137b + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99004 +- id_pac_bio_pr_metrics_tmp: 9714 + id_pac_bio_product: 0a82107d88ee710dcf46139a8710f443da8f4051142c478cb0a99ec5c6d64a2a + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99005 +- id_pac_bio_pr_metrics_tmp: 9715 + id_pac_bio_product: 6a81185dec0b9dcf06412b43fad08ce92cbf5ad492da7fa25dee63723950de5d + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99006 +- id_pac_bio_pr_metrics_tmp: 9716 + id_pac_bio_product: 7f3912931b878be0b3c949fc20acd59d51b68a27a04fa89491a74905219f8deb + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99007 +- id_pac_bio_pr_metrics_tmp: 9717 + id_pac_bio_product: 4cf97b14a1c23d892f3b9609642d1978b56ca4a7fca6995897be68c29ed9211d + id_pac_bio_rw_metrics_tmp: 1735 + id_pac_bio_tmp: 99008 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30016 + id_pac_bio_product: 3b37d8c1a317f229a3aae182f160f8e4f4856607fb15f1ab0588dde66640afda + id_pac_bio_rw_metrics_tmp: 6206 + id_pac_bio_tmp: ~ + last_changed: 2024-02-28 14:10:14 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30017 + id_pac_bio_product: 2b9048414306eb7683056bd91f6ec81f0b2dbf69484b3dd2dbe39932b52bedbb + id_pac_bio_rw_metrics_tmp: 6206 + id_pac_bio_tmp: ~ + last_changed: 2024-02-28 14:10:14 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30018 + id_pac_bio_product: f50319c97e28f2e0a67ebbc736080c4e98f23cdf6e5b7cec964349ffb13ae797 + id_pac_bio_rw_metrics_tmp: 6207 + id_pac_bio_tmp: 120628 + last_changed: 2024-03-05 15:10:36 + qc: 0 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30019 + id_pac_bio_product: 080733cab28898fcd69d1a418c7675cba38a548c9c20ac2da48a84c5658ee6b2 + id_pac_bio_rw_metrics_tmp: 6207 + id_pac_bio_tmp: 120629 + last_changed: 2024-03-05 15:10:36 + qc: 0 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30020 + id_pac_bio_product: 14be4b6a6bb857c0967d56c90d2b57edc1401cdb5f95379312fb8e5ca71e09fa + id_pac_bio_rw_metrics_tmp: 6207 + id_pac_bio_tmp: 120630 + last_changed: 2024-03-05 15:10:36 + qc: 0 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30021 + id_pac_bio_product: 4153f3a64e39588bf626c4dda42e5ee74b424bba67d69bb74bb029adda2e642c + id_pac_bio_rw_metrics_tmp: 6208 + id_pac_bio_tmp: 120631 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30022 + id_pac_bio_product: fbbcd5cac5d086ce64b3a37646e261b4c784fce6755fd65d6d41f048d2267c61 + id_pac_bio_rw_metrics_tmp: 6208 + id_pac_bio_tmp: 120632 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30023 + id_pac_bio_product: 74af5a311e15af654336aea65826a2c4974842d752e25875b0303ad5a3556167 + id_pac_bio_rw_metrics_tmp: 6209 + id_pac_bio_tmp: 120633 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30024 + id_pac_bio_product: 11022006a649937c570d100ccb382dddadf9a7174ee303903c8d2b7cd7efb328 + id_pac_bio_rw_metrics_tmp: 6209 + id_pac_bio_tmp: 120634 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30025 + id_pac_bio_product: e6a2157d0fda8faae1288025e99ce5f8133f1466b752a67809668e5b9b16d5b1 + id_pac_bio_rw_metrics_tmp: 6209 + id_pac_bio_tmp: 120635 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30026 + id_pac_bio_product: 9840280d97c98ff3ddda36ac95cf3b87f5810cc3be73a64c27d6ab92cfaab0ac + id_pac_bio_rw_metrics_tmp: 6209 + id_pac_bio_tmp: 120636 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30027 + id_pac_bio_product: 81141cdff1f57c0fc0fc5f88856fa7c6d2945acc5fa6e53e7d1214d17a00c410 + id_pac_bio_rw_metrics_tmp: 6210 + id_pac_bio_tmp: 120637 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30028 + id_pac_bio_product: 4145bf889c130ecaadcd4d757d0a3ca98d68629556427a27ebc08840ffdd0e0f + id_pac_bio_rw_metrics_tmp: 6210 + id_pac_bio_tmp: 120638 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30029 + id_pac_bio_product: 5b99ad09c31afd4917da39d44fc6cc40e1915572e80c20acbfda6d6c031e74c5 + id_pac_bio_rw_metrics_tmp: 6211 + id_pac_bio_tmp: 120639 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30030 + id_pac_bio_product: 0152d7945c4f74fac3ff828012ad2c01a95574df213d7664e7989e1039727cb5 + id_pac_bio_rw_metrics_tmp: 6211 + id_pac_bio_tmp: 120640 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30031 + id_pac_bio_product: 110e4562a6d28dd96973a98fcc1464d6c82dc413296b95d0c71727d21fa2a193 + id_pac_bio_rw_metrics_tmp: 6212 + id_pac_bio_tmp: 120641 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30032 + id_pac_bio_product: af65875cfecca04ee585c67525661f57a07d7f1427aa15ca39e158c791d63aa5 + id_pac_bio_rw_metrics_tmp: 6212 + id_pac_bio_tmp: ~ + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30033 + id_pac_bio_product: c24d50afb4c048f38dca230a03fb4880912713adf7db7a3ec4d5f57ee3c4cdec + id_pac_bio_rw_metrics_tmp: 6212 + id_pac_bio_tmp: 120643 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30034 + id_pac_bio_product: baa1e87601ca9c16d95b7fda9d9346557de4aaf4adb5c15383d0f8d9366692bf + id_pac_bio_rw_metrics_tmp: 6213 + id_pac_bio_tmp: 120644 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30035 + id_pac_bio_product: f88bcfb888f075442a005368c070ba83d895b07c013c68e1cb292fce4aaa40f2 + id_pac_bio_rw_metrics_tmp: 6213 + id_pac_bio_tmp: 120645 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30036 + id_pac_bio_product: 61d2c6fc72d593949cf7b60812a0076c9af57b0fa71b394f0669e410e040458e + id_pac_bio_rw_metrics_tmp: 6213 + id_pac_bio_tmp: 120646 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30037 + id_pac_bio_product: 252c8d3dc0b4c81e6d7359b0808ba962013e7b320eb9b979da526cecf5fdd019 + id_pac_bio_rw_metrics_tmp: 6213 + id_pac_bio_tmp: 120647 + last_changed: 2024-02-28 11:10:15 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30153 + id_pac_bio_product: 2135bf0b32c6b987042e67e062647aa21ac956c1d3385627b7a1d4cd670c355f + id_pac_bio_rw_metrics_tmp: 6306 + id_pac_bio_tmp: 120767 + last_changed: 2024-03-08 12:10:14 + qc: 1 +- barcode_quality_score_mean: ~ + hifi_bases_percent: ~ + hifi_num_reads: ~ + hifi_read_bases: ~ + hifi_read_length_mean: ~ + hifi_read_quality_mean: ~ + id_pac_bio_pr_metrics_tmp: 30154 + id_pac_bio_product: 790e8882c97615d79ebe27b782eefa87eede2cecda8ebd960cdd88300059f196 + id_pac_bio_rw_metrics_tmp: 6307 + id_pac_bio_tmp: 120768 + last_changed: 2024-03-08 12:10:14 + qc: 1 + diff --git a/tests/data/mlwh_pb_runs/README b/tests/data/mlwh_pb_runs/README new file mode 100644 index 00000000..c61aee8a --- /dev/null +++ b/tests/data/mlwh_pb_runs/README @@ -0,0 +1,19 @@ +This set of mlwh fixtures contains data for runs TRACTION-RUN-{92,1140,1162} + +TRACTION-RUN-1140 + +plate 1 +A1 2 samples, LIMS links deleted +B1 3 samples +C1 2 samples +D1 4 samples, belong to two studies + +plate2 +A1 2 samples +B1 2 samples +C1 3 samples, belong to two studies, LIMS link to one samples is deleted, +D1 4 sample + +TRACTION-RUN-1162 +plate1 +C1 and D1, both with 1 sample, different studies diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py index b54c4b93..43e43a9f 100644 --- a/tests/endpoints/test_single_well_qc_details.py +++ b/tests/endpoints/test_single_well_qc_details.py @@ -9,7 +9,7 @@ def test_get_well_info( ): insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" + mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" ) id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa67" diff --git a/tests/test_pac_bio_experiment.py b/tests/test_pac_bio_experiment.py index a9556b27..b0392ca9 100644 --- a/tests/test_pac_bio_experiment.py +++ b/tests/test_pac_bio_experiment.py @@ -12,7 +12,7 @@ def test_creating_experiment_object(mlwhdb_test_session): # Four wells, D1 has 40 samples, the rest have one sample each. insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" + mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" ) query = ( diff --git a/tests/test_pac_well_models.py b/tests/test_pb_well_models.py similarity index 65% rename from tests/test_pac_well_models.py rename to tests/test_pb_well_models.py index c8f69715..c062ccb5 100644 --- a/tests/test_pac_well_models.py +++ b/tests/test_pb_well_models.py @@ -1,7 +1,9 @@ from npg_id_generation.pac_bio import PacBioEntity +from sqlalchemy.orm import Session from lang_qc.db.helper.qc import get_qc_states_by_id_product_list from lang_qc.db.helper.wells import WellWh +from lang_qc.db.mlwh_schema import PacBioRunWellMetrics from lang_qc.models.pacbio.well import PacBioWellFull, PacBioWellSummary from tests.conftest import compare_dates, insert_from_yaml from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users @@ -9,7 +11,13 @@ yaml_is_loaded: bool = False -def _prepare_data(mlwhdb_session, qcdb_session, run_name, well_label): +def _prepare_data( + mlwhdb_session: Session, + qcdb_session: Session, + run_name: str, + well_label: str, + plate_number: int = None, +): """Loads LIMS data for one well. Returns a tuple of an mlwh db row and QC state model for that well. @@ -19,12 +27,12 @@ def _prepare_data(mlwhdb_session, qcdb_session, run_name, well_label): if yaml_is_loaded is False: insert_from_yaml( - mlwhdb_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" + mlwhdb_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" ) yaml_is_loaded = True id_product = PacBioEntity( - run_name=run_name, well_label=well_label + run_name=run_name, well_label=well_label, plate_number=plate_number ).hash_product_id() well_row = WellWh(session=mlwhdb_session).get_mlwh_well_by_product_id(id_product) @@ -40,7 +48,7 @@ def _prepare_data(mlwhdb_session, qcdb_session, run_name, well_label): return (well_row, qc_state) -def _examine_well_model_a1(pb_well, id_product): +def _examine_well_model_a1(pb_well: PacBioRunWellMetrics, id_product: str): assert pb_well.id_product == id_product assert pb_well.run_name == "TRACTION-RUN-92" @@ -57,7 +65,7 @@ def _examine_well_model_a1(pb_well, id_product): assert pb_well.instrument_type == "Sequel2e" -def _examine_well_model_b1(pb_well, id_product): +def _examine_well_model_b1(pb_well: PacBioRunWellMetrics, id_product: str): assert pb_well.id_product == id_product assert pb_well.run_name == "TRACTION_RUN_1" @@ -70,7 +78,7 @@ def _examine_well_model_b1(pb_well, id_product): assert pb_well.instrument_type == "Sequel2" -def _examine_well_model_c1(pb_well, id_product): +def _examine_well_model_c1(pb_well: PacBioRunWellMetrics, id_product: str): assert pb_well.id_product == id_product assert pb_well.run_name == "TRACTION_RUN_10" @@ -138,3 +146,53 @@ def test_create_summary_model( ) pb_well = PacBioWellFull(db_well=well_row, qc_state=None) _examine_well_model_c1(pb_well, well_row.id_pac_bio_product) + + +def test_create_summary_model_study_info( + mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval +): + # Well with two samples, none is linked to LIMS + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1140", "A1", 1 + ) + pb_well = PacBioWellSummary(db_well=well_row) + assert pb_well.study_names == [] + + # Fully linked wells with one sample + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1162", "C1" + ) + pb_well = PacBioWellSummary(db_well=well_row) + assert pb_well.study_names == ["DTOL_Darwin R&D"] + + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1162", "D1", 1 + ) + pb_well = PacBioWellSummary(db_well=well_row) + assert pb_well.study_names == ["DTOL_Darwin Tree of Life"] + + # A fully linked well with multiple samples, all belonging to the same study + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1140", "B1", 1 + ) + pb_well = PacBioWellSummary(db_well=well_row) + assert pb_well.study_names == ["DTOL_Darwin Tree of Life"] + + # A fully linked well with multiple samples, which belong to two studies + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1140", "D1", 1 + ) + pb_well = PacBioWellSummary(db_well=well_row) + assert pb_well.study_names == [ + "DTOL_Darwin Tree of Life", + "ToL_Blaxter_ Reference Genomes_ DNA", + ] + + # A partially linked well with three samples, which belong to two studies. + # The LIMS link for one of the samples is deleted so that two other samples + # belong to the same study. + (well_row, qc_state) = _prepare_data( + mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1140", "C1", 2 + ) + pb_well = PacBioWellSummary(db_well=well_row) + assert pb_well.study_names == [] From 9e78a0666b92dde5431d2f1345782f5339abfca4 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 12 Mar 2024 12:00:05 +0000 Subject: [PATCH 20/33] Updated CHANGELOG.md --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9458940..b4143d76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,23 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] + +### Changed + +* To simplify object instantiation and fields' assignment for some + of the response models, converted `PacBioWell` and `PacBioWellFull` + models to pydantic dataclasses. + +### Added + +* A new response model `PacBioWellSummary`, which replaces `PacBioWell` + in the latest's capacity of the response model for a PacBio well + summary. +* A new field, `study_names`, a potentially empty sorted array of + unique study names, is added to the response model for a PacBio + well summary. + ## [2.0.0] - 2024-02-20 ### Changed From 82493a04d8b6b9a28b9e6fdb512277945a1f85ce Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 18 Mar 2024 08:58:34 +0000 Subject: [PATCH 21/33] Moved a utility function into the table class. The function does a search on a table row, does not take any other arguments. The table class of this row is a natural home for this type of function. --- lang_qc/db/mlwh_schema.py | 21 +++++++++++++++++++++ lang_qc/models/pacbio/well.py | 26 ++------------------------ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py index 533c80ea..2a98011a 100644 --- a/lang_qc/db/mlwh_schema.py +++ b/lang_qc/db/mlwh_schema.py @@ -538,6 +538,27 @@ class PacBioRunWellMetrics(Base): "PacBioProductMetrics", back_populates="pac_bio_run_well_metrics" ) + def get_experiment_info(self): + """Returns a list of PacBioRun mlwh database rows. + + Returns LIMS information about the PacBio experiment + for this well, one pac_bio_run table row per sample + (product) in the well. + + If any or all of the pac_bio_product_metrics rows linked + to this well record are not linked to the pac_bio_run + table, and empty array is returned, thus preventing incomplete + data being supplied to the client. + """ + product_metrics = self.pac_bio_product_metrics + experiment_info = [ + pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None + ] + if len(experiment_info) != len(product_metrics): + experiment_info = [] + + return experiment_info + class PacBioProductMetrics(Base): __tablename__ = "pac_bio_product_metrics" diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py index 84b11f57..d2047a59 100644 --- a/lang_qc/models/pacbio/well.py +++ b/lang_qc/models/pacbio/well.py @@ -53,28 +53,6 @@ def get_field_names(cls): return field_names -def get_experiment_info(db_well: PacBioRunWellMetrics): - """Returns a list of PacBioRun mlwh database rows. - - Returns LIMS information about the PacBio experiment - for this well, one pac_bio_run table row per sample - (product) in the well. - - If any or all of the pac_bio_product_metrics rows linked - to this well record are not linked to the pac_bio_run - table, and empty array is returned, thus preventing incomplete - data being supplied to the client. - """ - product_metrics = db_well.pac_bio_product_metrics - experiment_info = [ - pbr for pbr in [pm.pac_bio_run for pm in product_metrics] if pbr is not None - ] - if len(experiment_info) != len(product_metrics): - experiment_info = [] - - return experiment_info - - @dataclass(kw_only=True, frozen=True) class PacBioWell: """A basic response model for a single PacBio well. @@ -191,7 +169,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: assigned = super().pre_root(values) mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] assigned["study_names"] = sorted( - set([row.study.name for row in get_experiment_info(mlwh_db_row)]) + set([row.study.name for row in mlwh_db_row.get_experiment_info()]) ) return assigned @@ -243,7 +221,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]: assigned = super().pre_root(values) mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"] assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row) - experiment_info = get_experiment_info(mlwh_db_row) + experiment_info = mlwh_db_row.get_experiment_info() if len(experiment_info): assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info) From 201b7d4236f08f8298e151401900b646a68f3bf1 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 18 Mar 2024 10:07:32 +0000 Subject: [PATCH 22/33] Merged two sets of mlwh db JSON fixtures --- .../200-PacBioRunWellMetrics.yml | 132 ------------------ .../mlwh_pb_runs/200-PacBioRunWellMetrics.yml | 67 +++++++++ tests/data/mlwh_pb_runs/README | 8 +- tests/test_pac_bio_qc_data_well.py | 2 +- 4 files changed, 75 insertions(+), 134 deletions(-) delete mode 100644 tests/data/mlwh_pb_demux_525/200-PacBioRunWellMetrics.yml diff --git a/tests/data/mlwh_pb_demux_525/200-PacBioRunWellMetrics.yml b/tests/data/mlwh_pb_demux_525/200-PacBioRunWellMetrics.yml deleted file mode 100644 index 9c24aedd..00000000 --- a/tests/data/mlwh_pb_demux_525/200-PacBioRunWellMetrics.yml +++ /dev/null @@ -1,132 +0,0 @@ ---- -# Two runs, one with a well with no deplexing, the other with -- adapter_dimer_percent: 0 - binding_kit: Sequel II Binding Kit 2.2 - ccs_execution_mode: OnInstrument - cell_lot_number: 417079 - chemistry_sw_version: 10.2.0.133424 - chip_type: 8mChip - control_concordance_mean: 0.868282 - control_concordance_mode: 0.91 - control_num_reads: 24837 - control_read_length_mean: 50169 - created_by: eg18 - demultiplex_mode: OnInstrument - heteroduplex_analysis: ~ - hifi_barcoded_reads: ~ - hifi_bases_in_barcoded_reads: ~ - hifi_low_quality_num_reads: 260532 - hifi_low_quality_read_bases: 2670039556 - hifi_low_quality_read_length_mean: 10248 - hifi_low_quality_read_quality_median: 17 - hifi_num_reads: 2877051 - hifi_number_passes_mean: 18 - hifi_only_reads: ~ - hifi_read_bases: 27076668646 - hifi_read_length_mean: 9411 - hifi_read_quality_median: 43 - id_pac_bio_product: cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61 - id_pac_bio_rw_metrics_tmp: 1732 - include_kinetics: 0 - insert_length_mean: 16227 - insert_length_n50: 23250 - instrument_name: 64222E - instrument_sw_version: 10.1.0.119549 - instrument_type: Sequel2e - loading_conc: 80 - local_base_rate: 2.76789 - movie_minutes: 1800 - movie_name: m64222e_220414_130247 - p0_num: 2800130 - p1_num: 5033971 - p2_num: 180570 - pac_bio_run_name: TRACTION-RUN-92 - polymerase_num_reads: 5009134 - polymerase_read_bases: 645567171652 - polymerase_read_length_mean: 128878 - polymerase_read_length_n50: 251250 - primary_analysis_sw_version: 10.1.0.119549 - productive_zmws_num: 7989834 - run_complete: 2022-04-20 09:16:53 - run_start: 2022-04-14 12:52:34 - run_status: Complete - run_transfer_complete: 2022-04-20 16:03:18 - sequencing_kit: Sequel II Sequencing Plate 2.0 (4 rxn) - sequencing_kit_lot_number: 123660 - short_insert_percent: 0 - sl_hostname: pacbio01.dnapipelines.sanger.ac.uk - sl_run_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29da - sl_ccs_uuid: 7f5d45ed-aa93-46a6-92b2-4b11d4bf29ro - ts_run_name: r64222e_20220414_125138 - unique_molecular_bases: 68501667840 - well_complete: 2022-04-16 12:36:21 - well_label: A1 - well_start: 2022-04-14 13:02:48 - well_status: Complete -- id_pac_bio_rw_metrics_tmp: 3725 - id_pac_bio_product: 7454d3e822388a257437d1736361fb00c7493ab8b24ecd8ce3506ff45deb6716 - pac_bio_run_name: TRACTION-RUN-525 - well_label: A1 - qc_seq_state: Passed - qc_seq_state_is_final: 1 - qc_seq_date: 2023-04-12 16:49:52 - qc_seq: 1 - instrument_type: Revio - instrument_name: 84047 - chip_type: 25mChip - sl_hostname: pacbio02.dnapipelines.sanger.ac.uk - sl_run_uuid: f1490bb9-7a99-45b2-9d79-24582881742d - sl_ccs_uuid: a9ad9f86-04c2-4194-ba69-48240cb745f9 - ts_run_name: r84047_20230404_164149 - movie_name: m84047_230404_164952_s1 - movie_minutes: 1440 - created_by: chc - binding_kit: Revio polymerase kit - sequencing_kit: Revio sequencing plate - sequencing_kit_lot_number: 030157 - cell_lot_number: 1000000291 - ccs_execution_mode: OnInstrument - demultiplex_mode: OnInstrument - include_kinetics: 1 - hifi_only_reads: 1 - heteroduplex_analysis: 0 - loading_conc: 227 - run_start: 2023-04-04 16:42:51 - run_complete: 2023-04-05 23:16:26 - run_transfer_complete: 2023-04-06 16:28:42 - run_status: Complete - well_start: 2023-04-04 16:49:53 - well_complete: 2023-04-06 10:15:28 - well_status: Complete - chemistry_sw_version: 12.0.0.172289 - instrument_sw_version: 12.0.0.178953 - primary_analysis_sw_version: 12.0.0.1 - control_num_reads: 2724 - control_concordance_mean: 0.899633 - control_concordance_mode: 0.91 - control_read_length_mean: 59917 - local_base_rate: 2.182480 - polymerase_read_bases: 1324865295449 - polymerase_num_reads: 18759739 - polymerase_read_length_mean: 70623 - polymerase_read_length_n50: 132750 - insert_length_mean: 14885 - insert_length_n50: 20250 - unique_molecular_bases: 250195427328 - productive_zmws_num: 16777216 - p0_num: 5846918 - p1_num: 18762463 - p2_num: 556443 - adapter_dimer_percent: 0.00 - short_insert_percent: 0.00 - hifi_read_bases: 92922481974 - hifi_num_reads: 8297069 - hifi_read_length_mean: 11199 - hifi_read_quality_median: 34 - hifi_number_passes_mean: 14 - hifi_low_quality_read_bases: NULL - hifi_low_quality_num_reads: NULL - hifi_low_quality_read_length_mean: NULL - hifi_low_quality_read_quality_median: NULL - hifi_barcoded_reads: 8255091 - hifi_bases_in_barcoded_reads: 92439476821 diff --git a/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml b/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml index db7d1211..e3a76268 100644 --- a/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml +++ b/tests/data/mlwh_pb_runs/200-PacBioRunWellMetrics.yml @@ -942,3 +942,70 @@ well_start: 2024-03-06 12:14:56 well_status: Complete +- id_pac_bio_rw_metrics_tmp: 3725 + id_pac_bio_product: 7454d3e822388a257437d1736361fb00c7493ab8b24ecd8ce3506ff45deb6716 + pac_bio_run_name: TRACTION-RUN-525 + well_label: A1 + qc_seq_state: Passed + qc_seq_state_is_final: 1 + qc_seq_date: 2023-04-12 16:49:52 + qc_seq: 1 + instrument_type: Revio + instrument_name: 84047 + chip_type: 25mChip + sl_hostname: pacbio02.dnapipelines.sanger.ac.uk + sl_run_uuid: f1490bb9-7a99-45b2-9d79-24582881742d + sl_ccs_uuid: a9ad9f86-04c2-4194-ba69-48240cb745f9 + ts_run_name: r84047_20230404_164149 + movie_name: m84047_230404_164952_s1 + movie_minutes: 1440 + created_by: chc + binding_kit: Revio polymerase kit + sequencing_kit: Revio sequencing plate + sequencing_kit_lot_number: 030157 + cell_lot_number: 1000000291 + ccs_execution_mode: OnInstrument + demultiplex_mode: OnInstrument + include_kinetics: 1 + hifi_only_reads: 1 + heteroduplex_analysis: 0 + loading_conc: 227 + run_start: 2023-04-04 16:42:51 + run_complete: 2023-04-05 23:16:26 + run_transfer_complete: 2023-04-06 16:28:42 + run_status: Complete + well_start: 2023-04-04 16:49:53 + well_complete: 2023-04-06 10:15:28 + well_status: Complete + chemistry_sw_version: 12.0.0.172289 + instrument_sw_version: 12.0.0.178953 + primary_analysis_sw_version: 12.0.0.1 + control_num_reads: 2724 + control_concordance_mean: 0.899633 + control_concordance_mode: 0.91 + control_read_length_mean: 59917 + local_base_rate: 2.182480 + polymerase_read_bases: 1324865295449 + polymerase_num_reads: 18759739 + polymerase_read_length_mean: 70623 + polymerase_read_length_n50: 132750 + insert_length_mean: 14885 + insert_length_n50: 20250 + unique_molecular_bases: 250195427328 + productive_zmws_num: 16777216 + p0_num: 5846918 + p1_num: 18762463 + p2_num: 556443 + adapter_dimer_percent: 0.00 + short_insert_percent: 0.00 + hifi_read_bases: 92922481974 + hifi_num_reads: 8297069 + hifi_read_length_mean: 11199 + hifi_read_quality_median: 34 + hifi_number_passes_mean: 14 + hifi_low_quality_read_bases: NULL + hifi_low_quality_num_reads: NULL + hifi_low_quality_read_length_mean: NULL + hifi_low_quality_read_quality_median: NULL + hifi_barcoded_reads: 8255091 + hifi_bases_in_barcoded_reads: 92439476821 diff --git a/tests/data/mlwh_pb_runs/README b/tests/data/mlwh_pb_runs/README index c61aee8a..ba357637 100644 --- a/tests/data/mlwh_pb_runs/README +++ b/tests/data/mlwh_pb_runs/README @@ -1,4 +1,10 @@ -This set of mlwh fixtures contains data for runs TRACTION-RUN-{92,1140,1162} +This set of mlwh fixtures contains data for runs TRACTION-RUN-{92,525,1140,1162} + +TRACTION-RUN-92 +Wells {A,B,C,D}1, plate_number undefined + +TRACTION-RUN-525 +Well A1 only, demultiplexed, pac_bio_run_well_metrics data only TRACTION-RUN-1140 diff --git a/tests/test_pac_bio_qc_data_well.py b/tests/test_pac_bio_qc_data_well.py index ccc6557a..24432eba 100644 --- a/tests/test_pac_bio_qc_data_well.py +++ b/tests/test_pac_bio_qc_data_well.py @@ -11,7 +11,7 @@ def test_creating_qc_data_well(mlwhdb_test_session): """ insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_demux_525", "lang_qc.db.mlwh_schema" + mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" ) helper = WellWh(session=mlwhdb_test_session) From 3094d9e134a1d56cedf30647a2c3d9c63eaad751 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 18 Mar 2024 10:52:52 +0000 Subject: [PATCH 23/33] Used pytest fixtute to load run data from YAML. --- tests/conftest.py | 77 ++++++++++--------- tests/data/mlwh_pb_runs/README | 1 + .../endpoints/test_single_well_qc_details.py | 10 +-- tests/test_pac_bio_experiment.py | 7 +- tests/test_pac_bio_qc_data_well.py | 7 +- tests/test_pb_well_models.py | 20 ++--- 6 files changed, 55 insertions(+), 67 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 52c20060..0178e469 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,15 +20,6 @@ test_ini = os.path.join(os.path.dirname(__file__), "testdb.ini") -@pytest.fixture(scope="package") -def config() -> configparser.ConfigParser: - # Database credentials for the test MySQL instance are stored here. This - # should be an instance in a container, discarded after each test run. - test_config = configparser.ConfigParser() - test_config.read(test_ini) - return test_config - - def mysql_url( config: configparser.ConfigParser, section: str, @@ -70,6 +61,43 @@ def mysql_url( ) +def insert_from_yaml(session, dir_path, module_name): + + # Load the schema module where the table ORM classes are defined. + module = importlib.import_module(module_name) + + # Find all files in a given directory. + dir_obj = pathlib.Path(dir_path) + file_paths = list(str(f) for f in dir_obj.iterdir()) + file_paths.sort() + + for file_path in file_paths: + with open(file_path, "r") as f: + (head, file_name) = os.path.split(file_path) + # File name example: 200-PacBioRun.yml + m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name) + if m is not None: + class_name = m.group(1) + table_class = getattr(module, class_name) + data = yaml.safe_load(f) + session.execute(insert(table_class), data) + + session.commit() + + +def compare_dates(date_obj, date_string): + assert date_obj.isoformat(sep=" ", timespec="seconds") == date_string + + +@pytest.fixture(scope="package") +def config() -> configparser.ConfigParser: + # Database credentials for the test MySQL instance are stored here. This + # should be an instance in a container, discarded after each test run. + test_config = configparser.ConfigParser() + test_config.read(test_ini) + return test_config + + @pytest.fixture(scope="module", name="mlwhdb_test_sessionfactory") def create_mlwhdb_test_sessionfactory(config): """Create a MLWH SQLAlchemy session factory, using credentials from config. @@ -165,29 +193,8 @@ def override_get_qc_db(): return client -def insert_from_yaml(session, dir_path, module_name): - - # Load the schema module where the table ORM classes are defined. - module = importlib.import_module(module_name) - - # Find all files in a given directory. - dir_obj = pathlib.Path(dir_path) - file_paths = list(str(f) for f in dir_obj.iterdir()) - file_paths.sort() - - for file_path in file_paths: - with open(file_path, "r") as f: - (head, file_name) = os.path.split(file_path) - # File name example: 200-PacBioRun.yml - m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name) - if m is not None: - class_name = m.group(1) - table_class = getattr(module, class_name) - data = yaml.safe_load(f) - session.execute(insert(table_class), data) - - session.commit() - - -def compare_dates(date_obj, date_string): - assert date_obj.isoformat(sep=" ", timespec="seconds") == date_string +@pytest.fixture(scope="module", name="mlwhdb_load_runs") +def mlwhdb_load_from_yaml(mlwhdb_test_session): + insert_from_yaml( + mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" + ) diff --git a/tests/data/mlwh_pb_runs/README b/tests/data/mlwh_pb_runs/README index ba357637..e2559a82 100644 --- a/tests/data/mlwh_pb_runs/README +++ b/tests/data/mlwh_pb_runs/README @@ -2,6 +2,7 @@ This set of mlwh fixtures contains data for runs TRACTION-RUN-{92,525,1140,1162} TRACTION-RUN-92 Wells {A,B,C,D}1, plate_number undefined +D1 has 40 samples, the rest have one sample each TRACTION-RUN-525 Well A1 only, demultiplexed, pac_bio_run_well_metrics data only diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py index 43e43a9f..7931e425 100644 --- a/tests/endpoints/test_single_well_qc_details.py +++ b/tests/endpoints/test_single_well_qc_details.py @@ -1,17 +1,15 @@ from fastapi.testclient import TestClient -from tests.conftest import insert_from_yaml from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users def test_get_well_info( - test_client: TestClient, mlwhdb_test_session, load_data4well_retrieval + test_client: TestClient, + mlwhdb_test_session, + load_data4well_retrieval, + mlwhdb_load_runs, ): - insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" - ) - id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa67" response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 404 diff --git a/tests/test_pac_bio_experiment.py b/tests/test_pac_bio_experiment.py index b0392ca9..ff979541 100644 --- a/tests/test_pac_bio_experiment.py +++ b/tests/test_pac_bio_experiment.py @@ -3,18 +3,13 @@ from lang_qc.db.mlwh_schema import PacBioRun from lang_qc.models.pacbio.experiment import PacBioExperiment -from tests.conftest import insert_from_yaml -def test_creating_experiment_object(mlwhdb_test_session): +def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs): run_name = "TRACTION-RUN-92" # Four wells, D1 has 40 samples, the rest have one sample each. - insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" - ) - query = ( select(PacBioRun) .where(PacBioRun.pac_bio_run_name == run_name) diff --git a/tests/test_pac_bio_qc_data_well.py b/tests/test_pac_bio_qc_data_well.py index 24432eba..701cce85 100644 --- a/tests/test_pac_bio_qc_data_well.py +++ b/tests/test_pac_bio_qc_data_well.py @@ -2,18 +2,13 @@ from lang_qc.db.helper.wells import WellWh from lang_qc.models.pacbio.qc_data import QCDataWell -from tests.conftest import insert_from_yaml -def test_creating_qc_data_well(mlwhdb_test_session): +def test_creating_qc_data_well(mlwhdb_test_session, mlwhdb_load_runs): """ Check that run-well metrics are correctly transformed for client rendering """ - insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" - ) - helper = WellWh(session=mlwhdb_test_session) id_product = PacBioEntity( diff --git a/tests/test_pb_well_models.py b/tests/test_pb_well_models.py index c062ccb5..12d64a44 100644 --- a/tests/test_pb_well_models.py +++ b/tests/test_pb_well_models.py @@ -5,7 +5,7 @@ from lang_qc.db.helper.wells import WellWh from lang_qc.db.mlwh_schema import PacBioRunWellMetrics from lang_qc.models.pacbio.well import PacBioWellFull, PacBioWellSummary -from tests.conftest import compare_dates, insert_from_yaml +from tests.conftest import compare_dates from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users yaml_is_loaded: bool = False @@ -18,19 +18,11 @@ def _prepare_data( well_label: str, plate_number: int = None, ): - """Loads LIMS data for one well. + """Returns mlwh data for one well. - Returns a tuple of an mlwh db row and QC state model for that well. + Returns a tuple of an mlwh db row and QC state model for one well. """ - global yaml_is_loaded - - if yaml_is_loaded is False: - insert_from_yaml( - mlwhdb_session, "tests/data/mlwh_pb_runs", "lang_qc.db.mlwh_schema" - ) - yaml_is_loaded = True - id_product = PacBioEntity( run_name=run_name, well_label=well_label, plate_number=plate_number ).hash_product_id() @@ -92,7 +84,7 @@ def _examine_well_model_c1(pb_well: PacBioRunWellMetrics, id_product: str): def test_create_full_model( - mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval + mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs ): # Full mlwh data, no data in the lang_qc database. (well_row, qc_state) = _prepare_data( @@ -125,7 +117,7 @@ def test_create_full_model( def test_create_summary_model( - mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval + mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs ): (well_row, qc_state) = _prepare_data( mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-92", "A1" @@ -149,7 +141,7 @@ def test_create_summary_model( def test_create_summary_model_study_info( - mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval + mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs ): # Well with two samples, none is linked to LIMS (well_row, qc_state) = _prepare_data( From a43b88a16ae660dc9e3e997f59af3e216f7d9450 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 18 Mar 2024 12:24:38 +0000 Subject: [PATCH 24/33] Dropped explicit db tables clean-up. The whole db is dropped and recreated at the point of creating a session factory. --- tests/fixtures/inbox_data.py | 6 ----- tests/fixtures/utils.py | 46 ------------------------------------ 2 files changed, 52 deletions(-) delete mode 100644 tests/fixtures/utils.py diff --git a/tests/fixtures/inbox_data.py b/tests/fixtures/inbox_data.py index b8fb04e8..64379b18 100644 --- a/tests/fixtures/inbox_data.py +++ b/tests/fixtures/inbox_data.py @@ -20,7 +20,6 @@ SubProductAttr, User, ) -from tests.fixtures.utils import clean_mlwhdb, clean_qcdb @pytest.fixture @@ -99,8 +98,6 @@ def inbox_data(mlwhdb_test_session): yield True - clean_mlwhdb(mlwhdb_test_session) - @pytest.fixture() def test_data_factory(mlwhdb_test_session, qcdb_test_session): @@ -201,6 +198,3 @@ def setup_data(desired_wells): return desired_wells yield setup_data - - clean_mlwhdb(mlwhdb_test_session) - clean_qcdb(qcdb_test_session) diff --git a/tests/fixtures/utils.py b/tests/fixtures/utils.py deleted file mode 100644 index 0a4b0e9a..00000000 --- a/tests/fixtures/utils.py +++ /dev/null @@ -1,46 +0,0 @@ -from sqlalchemy import delete - -from lang_qc.db.mlwh_schema import ( - PacBioProductMetrics, - PacBioRun, - PacBioRunWellMetrics, - Sample, - Study, -) -from lang_qc.db.qc_schema import ( - ProductLayout, - QcState, - QcStateDict, - QcType, - SeqPlatform, - SeqProduct, - SubProduct, - SubProductAttr, - User, -) - - -def clean_mlwhdb(session): - print("\nCLEAN mlwh schema") - with session.begin(): - session.execute(delete(PacBioProductMetrics)) - session.execute(delete(PacBioRun)) - session.execute(delete(Study)) - session.execute(delete(Sample)) - session.execute(delete(PacBioRunWellMetrics)) - session.commit() - - -def clean_qcdb(session): - with session.begin(): - print("\nCLEAN QC DB") - session.execute(delete(QcState)) - session.execute(delete(ProductLayout)) - session.execute(delete(SeqProduct)) - session.execute(delete(SubProduct)) - session.execute(delete(QcType)) - session.execute(delete(SubProductAttr)) - session.execute(delete(SeqPlatform)) - session.execute(delete(User)) - session.execute(delete(QcStateDict)) - session.commit() From fa7df49bf1951c0af46b76ade617009fd941cacd Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 12 Mar 2024 17:37:46 +0000 Subject: [PATCH 25/33] Added study names tooltip on hover and highlighted the BIOSCAN study. mouseOver event for the well button displays a tooltip with names of studies. Study-aware well button colour scheme. --- frontend/src/components/QcView.vue | 2 +- frontend/src/components/WellTable.vue | 20 ++++++++-- .../components/__tests__/WellTable.spec.js | 39 +++++++++++++++++-- frontend/src/utils/__tests__/text.spec.js | 19 ++++++++- frontend/src/utils/text.js | 9 ++++- .../src/views/__tests__/WellsByRun.spec.js | 3 +- .../src/views/__tests__/WellsByStatus.spec.js | 4 +- 7 files changed, 82 insertions(+), 14 deletions(-) diff --git a/frontend/src/components/QcView.vue b/frontend/src/components/QcView.vue index d1783be6..0ab4c6aa 100644 --- a/frontend/src/components/QcView.vue +++ b/frontend/src/components/QcView.vue @@ -5,7 +5,7 @@ import { computed } from "vue"; import groupMetrics from "../utils/metrics.js"; - import combineLabelWithPlate from "../utils/text.js" + import { combineLabelWithPlate } from "../utils/text.js" const props = defineProps({ // Well object representing one prepared input for the instrument diff --git a/frontend/src/components/WellTable.vue b/frontend/src/components/WellTable.vue index d0522d2e..3d6eb6ea 100644 --- a/frontend/src/components/WellTable.vue +++ b/frontend/src/components/WellTable.vue @@ -2,7 +2,11 @@ /* * Renders a table for a list of wells and generates buttons for selecting wells */ -import combineLabelWithPlate from "../utils/text.js" +import { combineLabelWithPlate, listStudiesForTooltip } from "../utils/text.js" +import { ElTooltip, ElButton } from "element-plus"; + +const tooltipDelay = 500 +const studyNameHighlight = 'BIOSCAN UK for flying insects' defineProps({ wellCollection: Object @@ -28,9 +32,17 @@ defineEmits(['wellSelected']) {{ wellObj.run_name }} - + + + {{ combineLabelWithPlate(wellObj.label, wellObj.plate_number) }} + + {{ wellObj.instrument_type }} {{ wellObj.instrument_name }} {{ wellObj.qc_state ? wellObj.qc_state.qc_state : ' ' }} diff --git a/frontend/src/components/__tests__/WellTable.spec.js b/frontend/src/components/__tests__/WellTable.spec.js index b7dcd97a..13749671 100644 --- a/frontend/src/components/__tests__/WellTable.spec.js +++ b/frontend/src/components/__tests__/WellTable.spec.js @@ -7,9 +7,33 @@ describe('Rows of data give rows in the table', () => { const table = mount(WellTable, { props: { wellCollection: [ - {run_name: 'TEST1', label: 'A1', plate_number: null, instrument_name: '1234', instrument_type: 'Revio', id_product: 'ABCDEF'}, - {run_name: 'TEST1', label: 'B1', plate_number: null, instrument_name: '1234', instrument_type: 'Revio', id_product: '123456'}, - {run_name: 'TEST2', label: 'A1', plate_number: 1, instrument_name: '1234', instrument_type: 'Revio', id_product: '123457'}, + { + run_name: 'TEST1', + label: 'A1', + plate_number: null, + instrument_name: '1234', + instrument_type: 'Revio', + id_product: 'ABCDEF', + study_names: [] + }, + { + run_name: 'TEST1', + label: 'B1', + plate_number: null, + instrument_name: '1234', + instrument_type: 'Revio', + id_product: '123456', + study_names: ['Study name 1', 'Study name 2'] + }, + { + run_name: 'TEST2', + label: 'A1', + plate_number: 1, + instrument_name: '1234', + instrument_type: 'Revio', + id_product: '123457', + study_names: ['BIOSCAN UK for flying insects'] + }, ] } }) @@ -40,7 +64,14 @@ describe('Rows of data give rows in the table', () => { expect(table.emitted().wellSelected[0][0]).toHaveProperty('idProduct') expect(table.emitted().wellSelected[0][0].idProduct).toEqual('ABCDEF') - await rows[2].find('button').trigger('click') + let wellButton = rows[2].find('button') + await wellButton.trigger('click') expect(table.emitted().wellSelected[1][0].idProduct).toEqual('123456') + expect(wellButton.classes('el-tooltip__trigger')).toBeTruthy() + expect(wellButton.classes('el-button--info')).toBeTruthy() + + wellButton = rows[3].find('button') + expect(wellButton.classes('el-tooltip__trigger')).toBeTruthy() + expect(wellButton.classes('el-button--warning')).toBeTruthy() }) }) diff --git a/frontend/src/utils/__tests__/text.spec.js b/frontend/src/utils/__tests__/text.spec.js index 1d0be431..4d573097 100644 --- a/frontend/src/utils/__tests__/text.spec.js +++ b/frontend/src/utils/__tests__/text.spec.js @@ -1,7 +1,7 @@ import { describe, test, expect } from 'vitest' -import combineLabelWithPlate from '../text' +import { combineLabelWithPlate, listStudiesForTooltip } from '../text' -describe('Text processing', () => { +describe('Well label and plate display', () => { test('Print well label without plate number', () => { expect(combineLabelWithPlate('A1', undefined)).toEqual('A1') expect(combineLabelWithPlate('A1', null)).toEqual('A1') @@ -11,3 +11,18 @@ describe('Text processing', () => { expect(combineLabelWithPlate('A1', 1)).toEqual('1-A1') }) }) + +describe('Study names tooltip', () => { + test('Display a warning in case of an empty study array', () => { + expect(listStudiesForTooltip([])).toEqual('No study info') + }) + + test('Display a single study name for an array of one study', () => { + expect(listStudiesForTooltip(['My single study'])).toEqual('My single study') + }) + + test('Display multiple lines for multiple studies', () => { + expect(listStudiesForTooltip(['Study One', 'Study two', 'Study 3'])).toEqual( + 'Study One
Study two
Study 3') + }) +}) diff --git a/frontend/src/utils/text.js b/frontend/src/utils/text.js index 34f6130a..1721f4e6 100644 --- a/frontend/src/utils/text.js +++ b/frontend/src/utils/text.js @@ -1,9 +1,16 @@ // Reusable text-mangling for the interface -export default function combineLabelWithPlate(well, plate) { +export { combineLabelWithPlate, listStudiesForTooltip } + +function combineLabelWithPlate(well, plate) { if (!plate) { return well } else { return `${plate}-${well}` } } + +function listStudiesForTooltip(study_names) { + let names = study_names.length == 0 ? ['No study info'] : study_names + return names.join('
') +} diff --git a/frontend/src/views/__tests__/WellsByRun.spec.js b/frontend/src/views/__tests__/WellsByRun.spec.js index 7f645310..fa1991e8 100644 --- a/frontend/src/views/__tests__/WellsByRun.spec.js +++ b/frontend/src/views/__tests__/WellsByRun.spec.js @@ -38,6 +38,7 @@ for (let index = 0; index < 2; index++) { instrument_name: "1234", instrument_type: "Revio", id_product: `${index}23456`, + study_names: [`Study ${index}`, 'Another study'], ...someLinkGeneration }) } @@ -48,6 +49,7 @@ const secondaryRun = { instrument_name: '1234', instrument_type: 'Revio', id_product: 'ABCDEF', + study_names: [], ...someLinkGeneration } @@ -141,7 +143,6 @@ describe('Does it work?', async () => { let buttons = wrapper.findAll('button') buttons[1].trigger('click') await flushPromises() - expect(wrapper.get('#well_summary').exists()).toBe(true) }) diff --git a/frontend/src/views/__tests__/WellsByStatus.spec.js b/frontend/src/views/__tests__/WellsByStatus.spec.js index 654fb80a..6d61ed84 100644 --- a/frontend/src/views/__tests__/WellsByStatus.spec.js +++ b/frontend/src/views/__tests__/WellsByStatus.spec.js @@ -31,7 +31,8 @@ for (let index = 0; index < 10; index++) { well_status: "Complete", qc_state: null, instrument_name: 1234, - instrument_type: 'Revio' + instrument_type: 'Revio', + study_names: ['Some study'], }) } @@ -140,6 +141,7 @@ describe('View loads configuration on mount', async () => { qc_state: null, instrument_name: '1234', instrument_type: 'Revio', + study_names: [], }] }) ) From 0577d559dbbe103234a0bd905d09b7c50d88f835 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 19 Mar 2024 11:40:11 +0000 Subject: [PATCH 26/33] Introduced 'On hold external' QC state. This state will be assigned to wells when the outcome of the QC assessment depends on liasing with a third party. The existing 'On hold' state will be used for flagging internal investigations. As fas as QC statuses and workflow is concerned, the new state is handled in the same way as the existing 'On hold' state. --- .../versions/2.1.0_extend_qc_state_dict.py | 22 +++++++++++++++++++ lang_qc/db/helper/wells.py | 4 ++-- lang_qc/endpoints/config.py | 4 +++- tests/endpoints/test_config.py | 1 + tests/endpoints/test_dump_qc_states.py | 2 +- tests/endpoints/test_filtered_wells.py | 5 ++++- tests/endpoints/test_wells4run.py | 2 +- tests/fixtures/well_data.py | 3 ++- tests/test_pb_wells_factory.py | 11 +++++++--- tests/test_qc_state_retrieval.py | 9 ++++---- 10 files changed, 49 insertions(+), 14 deletions(-) create mode 100644 alembic/versions/2.1.0_extend_qc_state_dict.py diff --git a/alembic/versions/2.1.0_extend_qc_state_dict.py b/alembic/versions/2.1.0_extend_qc_state_dict.py new file mode 100644 index 00000000..8844d8dd --- /dev/null +++ b/alembic/versions/2.1.0_extend_qc_state_dict.py @@ -0,0 +1,22 @@ +"""extend_qc_state_dict + +Revision ID: 2.1.0 +Revises: 2.0.0 +Create Date: 2024-03-19 12:31:26.359652 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "2.1.0" +down_revision = "2.0.0" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute("INSERT INTO qc_state_dict VALUES ('On hold external', NULL)") + + +def downgrade() -> None: + op.execute("DELETE FROM qc_state_dict WHERE state='On hold external'") diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py index 8e56a0a8..976dd632 100644 --- a/lang_qc/db/helper/wells.py +++ b/lang_qc/db/helper/wells.py @@ -169,10 +169,10 @@ class PacBioPagedWellsFactory(WellWh, PagedResponse): # For MySQL it's OK to use case-sensitive comparison operators since # its string comparisons for the collation we use are case-insensitive. FILTERS: ClassVar = { - QcFlowStatusEnum.ON_HOLD.name: (QcStateDict.state == "On hold"), + QcFlowStatusEnum.ON_HOLD.name: (QcStateDict.state.ilike("On hold%")), QcFlowStatusEnum.QC_COMPLETE.name: (QcState.is_preliminary == 0), QcFlowStatusEnum.IN_PROGRESS.name: and_( - QcState.is_preliminary == 1, QcStateDict.state != "On hold" + QcState.is_preliminary == 1, QcStateDict.state.notilike("On hold%") ), QcFlowStatusEnum.ABORTED.name: or_( PacBioRunWellMetrics.well_status.like("Abort%"), diff --git a/lang_qc/endpoints/config.py b/lang_qc/endpoints/config.py index 2436ab16..0f8dbb19 100644 --- a/lang_qc/endpoints/config.py +++ b/lang_qc/endpoints/config.py @@ -65,6 +65,8 @@ def _states_for_update(session) -> List: states = [] for (name, row) in qc_state_dict(session).items(): if name not in ["Aborted", "Claimed"]: - states.append({"description": name, "only_prelim": row.state == "On hold"}) + states.append( + {"description": name, "only_prelim": "on hold" in row.state.lower()} + ) return states diff --git a/tests/endpoints/test_config.py b/tests/endpoints/test_config.py index 85f63557..97ab9e76 100644 --- a/tests/endpoints/test_config.py +++ b/tests/endpoints/test_config.py @@ -23,6 +23,7 @@ def test_get_config(test_client: TestClient, load_dicts_and_users): {"description": "Failed, Instrument", "only_prelim": False}, {"description": "Failed, SMRT cell", "only_prelim": False}, {"description": "On hold", "only_prelim": True}, + {"description": "On hold external", "only_prelim": True}, {"description": "Undecided", "only_prelim": False}, ], } diff --git a/tests/endpoints/test_dump_qc_states.py b/tests/endpoints/test_dump_qc_states.py index 057fe3d5..49247ee0 100644 --- a/tests/endpoints/test_dump_qc_states.py +++ b/tests/endpoints/test_dump_qc_states.py @@ -41,7 +41,7 @@ def test_get_qc_by_product_id(test_client: TestClient, load_data4well_retrieval) assert SECOND_GOOD_CHECKSUM in response_data list_1 = response_data[FIRST_GOOD_CHECKSUM] list_2 = response_data[SECOND_GOOD_CHECKSUM] - qc_states = ["On hold", "Failed, Instrument"] + qc_states = ["On hold external", "Failed, Instrument"] for index, l in enumerate([list_1, list_2]): assert len(l) == 2 # The list of QC state objects contains QC states diff --git a/tests/endpoints/test_filtered_wells.py b/tests/endpoints/test_filtered_wells.py index c65bd71f..4ea86edb 100644 --- a/tests/endpoints/test_filtered_wells.py +++ b/tests/endpoints/test_filtered_wells.py @@ -81,7 +81,10 @@ def test_on_hold_filter(test_client: TestClient, load_data4well_retrieval): """Test passing `on_hold` filter.""" status = "on_hold" - expected_data = [{"TRACTION_RUN_1:D1": "On hold"}, {"TRACTION_RUN_1:B1": "On hold"}] + expected_data = [ + {"TRACTION_RUN_1:D1": "On hold external"}, + {"TRACTION_RUN_1:B1": "On hold"}, + ] num_total = len(expected_data) response = test_client.get( diff --git a/tests/endpoints/test_wells4run.py b/tests/endpoints/test_wells4run.py index 518e1c20..1347fce7 100644 --- a/tests/endpoints/test_wells4run.py +++ b/tests/endpoints/test_wells4run.py @@ -26,7 +26,7 @@ def test_existing_run(test_client: TestClient, load_data4well_retrieval): label_list = [well["label"] for well in resp["wells"]] assert label_list == ["A1", "B1", "C1", "D1"] qc_states = [well["qc_state"]["qc_state"] for well in resp["wells"]] - assert qc_states == ["Claimed", "On hold", "Claimed", "On hold"] + assert qc_states == ["Claimed", "On hold", "Claimed", "On hold external"] assert ( resp["wells"][0]["instrument_name"] == "64016" diff --git a/tests/fixtures/well_data.py b/tests/fixtures/well_data.py index abb99a36..3754c927 100644 --- a/tests/fixtures/well_data.py +++ b/tests/fixtures/well_data.py @@ -30,6 +30,7 @@ {"state": "Passed", "outcome": 1}, {"state": "Claimed", "outcome": None}, {"state": "On hold", "outcome": None}, + {"state": "On hold external", "outcome": None}, ] PLATFORMS = [ @@ -66,7 +67,7 @@ ["TRACTION_RUN_1", "A1", "Claimed", True, "2022-12-07 07:15:19", None], ["TRACTION_RUN_1", "B1", "On hold", True, "2022-12-08 07:15:19", None], ["TRACTION_RUN_1", "C1", "Claimed", True, "2022-12-08 08:15:19", None], - ["TRACTION_RUN_1", "D1", "On hold", True, "2022-12-08 09:15:19", None], + ["TRACTION_RUN_1", "D1", "On hold external", True, "2022-12-08 09:15:19", None], ["TRACTION_RUN_1", "E1", "Claimed", True, "2022-12-07 09:15:19", None], ["TRACTION_RUN_2", "A1", "Failed, Instrument", True, "2022-12-07 15:13:56", 1], ["TRACTION_RUN_2", "B1", "Failed, Instrument", False, "2022-12-08 15:18:56", 1], diff --git a/tests/test_pb_wells_factory.py b/tests/test_pb_wells_factory.py index c9bbc92d..dc02f612 100644 --- a/tests/test_pb_wells_factory.py +++ b/tests/test_pb_wells_factory.py @@ -31,7 +31,7 @@ def test_query_for_status( assert isinstance(state, QcState) assert state.is_preliminary == 1 assert state.qc_type.qc_type == "sequencing" - assert state.qc_state_dict.state == "On hold" + assert state.qc_state_dict.state in ("On hold", "On hold external") compare_dates(state.date_updated, update_dates[index]) factory = PacBioPagedWellsFactory( @@ -400,8 +400,13 @@ def test_known_run_names_input( label_list = [well.label for well in wells] assert label_list == ["A1", "B1", "C1", "D1"] - qc_states = [well.qc_state.qc_state for well in wells] - expected_qc_states = ["Claimed", "On hold", "Claimed", "On hold"] + qc_states = sorted([well.qc_state.qc_state for well in wells]) + expected_qc_states = [ + "Claimed", + "Claimed", + "On hold", + "On hold external", + ] assert qc_states == expected_qc_states factory = PacBioPagedWellsFactory( diff --git a/tests/test_qc_state_retrieval.py b/tests/test_qc_state_retrieval.py index 7babe28d..6d5813e4 100644 --- a/tests/test_qc_state_retrieval.py +++ b/tests/test_qc_state_retrieval.py @@ -30,7 +30,7 @@ def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval): # product IDs is performed. assert get_qc_states_by_id_product_list(qcdb_test_session, ["dodo"]) == {} - qc_state_descriptions = ["On hold", "Failed, Instrument"] + qc_state_descriptions = ["On hold external", "Failed, Instrument"] qc_states = get_qc_states_by_id_product_list(qcdb_test_session, two_good_ids_list) assert len(qc_states) == 2 @@ -140,7 +140,7 @@ def test_product_qc_state_retrieval(qcdb_test_session, load_data4well_retrieval) assert qc_state is not None assert qc_state.seq_product.id_product == FIRST_GOOD_CHECKSUM assert qc_state.qc_type.qc_type == "sequencing" - assert qc_state.qc_state_dict.state == "On hold" + assert qc_state.qc_state_dict.state == "On hold external" qc_state = get_qc_state_for_product( session=qcdb_test_session, id_product=FIRST_GOOD_CHECKSUM, qc_type="sequencing" @@ -148,7 +148,7 @@ def test_product_qc_state_retrieval(qcdb_test_session, load_data4well_retrieval) assert qc_state is not None assert qc_state.seq_product.id_product == FIRST_GOOD_CHECKSUM assert qc_state.qc_type.qc_type == "sequencing" - assert qc_state.qc_state_dict.state == "On hold" + assert qc_state.qc_state_dict.state == "On hold external" qc_state = get_qc_state_for_product( session=qcdb_test_session, id_product=FIRST_GOOD_CHECKSUM, qc_type="library" @@ -156,7 +156,7 @@ def test_product_qc_state_retrieval(qcdb_test_session, load_data4well_retrieval) assert qc_state is not None assert qc_state.seq_product.id_product == FIRST_GOOD_CHECKSUM assert qc_state.qc_type.qc_type == "library" - assert qc_state.qc_state_dict.state == "On hold" + assert qc_state.qc_state_dict.state == "On hold external" qc_state = get_qc_state_for_product(qcdb_test_session, SECOND_GOOD_CHECKSUM) assert qc_state is not None @@ -183,6 +183,7 @@ def test_dict_helper(qcdb_test_session, load_dicts_and_users): "Failed, SMRT cell", "Claimed", "On hold", + "On hold external", "Undecided", ] assert list(qc_state_dict(qcdb_test_session).keys()) == expected_sorted_states From e65370d5db3b3d108fc794617f6228208f3696a0 Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 22 Mar 2024 13:25:56 +0000 Subject: [PATCH 27/33] Described recent changes. --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4143d76..92e18c3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,21 @@ and this project adheres to [Semantic Versioning](http://semver.org/). * To simplify object instantiation and fields' assignment for some of the response models, converted `PacBioWell` and `PacBioWellFull` models to pydantic dataclasses. +* Changed the response model for filtered by either QC status or run wells from + `PacBioWell` to `PacBioWellSummary`, the latter initially being identical + the former. In order to propagate information about a study to the tabbed + well summary view, added a new field, study_names, to the `PacBioWellSummary` + model. +* Added a new event to the tabbed well summary view, to the button with the well + name. Mouse hover over this button displays study names associated with the + well. +* Changed the colour scheme of the above mentioned button from grey to orange + if one of the studies associated with the well is the BIOSCAN study, which + the QC team needs to deal with slightly differently. +* Added a new QC state 'On hold external'. Semantically the new state is similar + to the existing 'On hold' state. The intended purpose of the new QC state - to + highlight the wells, which are waiting for a completion of some off-site + process (example - deplexing at http://mbrave.net/). ### Added From f2474123028f7220967240e7aba066cb232f946b Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 26 Mar 2024 16:34:28 +0000 Subject: [PATCH 28/33] Silence warnings from JS toolchain --- frontend/Dockerfile | 2 +- frontend/jsconfig.json | 3 +++ frontend/package.json | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 frontend/jsconfig.json diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 1e777afc..fbcdb61c 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18-alpine as base-stage +FROM node:20-alpine as base-stage COPY package.json /code/longue_vue/ diff --git a/frontend/jsconfig.json b/frontend/jsconfig.json new file mode 100644 index 00000000..92d8d97f --- /dev/null +++ b/frontend/jsconfig.json @@ -0,0 +1,3 @@ +{ + "exclude": ["node_modules"] +} \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json index 1e1f1172..5847f854 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -8,6 +8,7 @@ "node": ">=16.16.0", "npm": ">=8.11.0" }, + "type": "module", "scripts": { "dev": "vite --port 3000", "build": "vite build", From ba1b752cbed33e0b818a19a3210929ec5467b22b Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 3 Apr 2024 16:28:20 +0000 Subject: [PATCH 29/33] Update github test pipeline to use newer node versions in tests and actions --- .github/workflows/test.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f52c71c6..9e719153 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,9 +11,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: '3.10' architecture: 'x64' @@ -59,13 +59,13 @@ jobs: MYSQL_DATABASE: "langqc" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Poetry run: | pipx install poetry - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: '3.10' architecture: 'x64' @@ -82,10 +82,10 @@ jobs: test-frontend: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-node@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 with: - node-version: '18' + node-version: '20' - run: npm install working-directory: ./frontend - run: npm run test From 8535d013964f82a8d37c750c0046a29db356348c Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 3 Apr 2024 16:34:35 +0000 Subject: [PATCH 30/33] Update action versions in other github pipelines --- .github/workflows/deploy-pages.yml | 9 ++++----- .github/workflows/lint.yml | 6 +++--- .github/workflows/publish-docker.yml | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml index 35578d6c..bee638b2 100644 --- a/.github/workflows/deploy-pages.yml +++ b/.github/workflows/deploy-pages.yml @@ -11,13 +11,13 @@ jobs: DB_URL: "mysql+pymysql://q:q@q/q" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Poetry run: | pipx install poetry - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: '3.10' architecture: 'x64' @@ -27,14 +27,13 @@ jobs: run: | poetry env use '3.10' poetry install - + - name: Build openapi.json run: | mkdir gh_pages poetry run python -c "from lang_qc.main import app; import json; output = open('gh-pages/openapi.json', 'w'); json.dump(app.openapi(), output); output.close()" - name: Deploy github pages - uses: JamesIves/github-pages-deploy-action@v4.3.3 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: - branch: gh-pages folder: gh-pages diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a2f86555..9c82b7f3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,13 +12,13 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Poetry run: | pipx install poetry - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: '3.10' architecture: 'x64' @@ -41,7 +41,7 @@ jobs: run: | poetry run isort --check --diff --color . - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 with: node-version: 18.x diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml index 41ba6b5e..3d5376f4 100644 --- a/.github/workflows/publish-docker.yml +++ b/.github/workflows/publish-docker.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Check out repository code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Log in to registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin From a2ed83844a29b97fe77519911b77d04a817508eb Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 5 Apr 2024 17:15:12 +0100 Subject: [PATCH 31/33] Switched to light theme for the well button --- frontend/src/components/WellTable.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/WellTable.vue b/frontend/src/components/WellTable.vue index 3d6eb6ea..2fcc6ff8 100644 --- a/frontend/src/components/WellTable.vue +++ b/frontend/src/components/WellTable.vue @@ -36,7 +36,7 @@ defineEmits(['wellSelected']) :content="''.concat(listStudiesForTooltip(wellObj.study_names)).concat('')" raw-content > - From 5c4e79c1f29fcd9891e3b8040542ef3635898c88 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 9 Apr 2024 09:57:35 +0100 Subject: [PATCH 32/33] Fixed an error in the db migration script --- alembic/versions/2.1.0_extend_qc_state_dict.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/alembic/versions/2.1.0_extend_qc_state_dict.py b/alembic/versions/2.1.0_extend_qc_state_dict.py index 8844d8dd..6687af0f 100644 --- a/alembic/versions/2.1.0_extend_qc_state_dict.py +++ b/alembic/versions/2.1.0_extend_qc_state_dict.py @@ -15,7 +15,9 @@ def upgrade() -> None: - op.execute("INSERT INTO qc_state_dict VALUES ('On hold external', NULL)") + op.execute( + "INSERT INTO qc_state_dict (state, outcome) VALUES ('On hold external', NULL)" + ) def downgrade() -> None: From cb9279406e569c0592b30bf88052ef150e974e1d Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 12 Apr 2024 10:47:56 +0100 Subject: [PATCH 33/33] Updated app version to 2.1.0 Updated the Copyright year. Added release instructions to README. Updated the CHANGELOG. --- CHANGELOG.md | 2 ++ README.md | 11 +++++++++++ frontend/package.json | 2 +- frontend/src/App.vue | 2 +- lang_qc/__init__.py | 2 +- pyproject.toml | 2 +- 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e18c3f..dda3f31f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +## [2.1.0] - 2024-04-15 + ### Changed * To simplify object instantiation and fields' assignment for some diff --git a/README.md b/README.md index 9850d42f..292ec96c 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,17 @@ subjects other than development, testing and deployment. - [The QC process](docs/qc_process.md) - [The QC database schema](docs/qc_schema_explained.md) +## Prepare for release + +Prior to merging to the master branch: + +- Update CHANGELOG.md +- Update the hardcoded version in + - frontend/package.json + - lang_qc/__init__.py + - pyproject.toml +- If needed, update the Copyright year in frontend/src/App.vue + ## Install and run locally You can install the package with `pip install .` from the repository's root. diff --git a/frontend/package.json b/frontend/package.json index 5847f854..dabf760d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "npg-longue-vue", - "version": "2.0.0", + "version": "2.1.0", "description": "UI for LangQC", "author": "Kieron Taylor ", "license": "GPL-3.0-or-later", diff --git a/frontend/src/App.vue b/frontend/src/App.vue index d0d30300..ca6dfc25 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -122,7 +122,7 @@ function notInWellsByRun() { - Copyright Genome Research Ltd 2023 - client version: {{ VERSION.replace(/['"]+/g) + (DEVMODE ? "+DEV" : "") + Copyright Genome Research Ltd 2023, 2024 - client version: {{ VERSION.replace(/['"]+/g) + (DEVMODE ? "+DEV" : "") }} diff --git a/lang_qc/__init__.py b/lang_qc/__init__.py index 8c0d5d5b..9aa3f903 100644 --- a/lang_qc/__init__.py +++ b/lang_qc/__init__.py @@ -1 +1 @@ -__version__ = "2.0.0" +__version__ = "2.1.0" diff --git a/pyproject.toml b/pyproject.toml index 029c3974..95273614 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "npg_langqc" packages = [ { include = "lang_qc" }, ] -version = "2.0.0" +version = "2.1.0" description = "FastAPI application for Long Read QC" authors = ["Adam Blanchet", "Marina Gourtovaia ", "Kieron Taylor "] license = "GPL-3.0-or-later"