From 7d72c994c66dd9f87ba61ac07311dc71f8528b92 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 18 Dec 2024 10:08:01 -0500 Subject: [PATCH] feat!: add support for moa prognostic assertions (#411) close #408 Note: This work revealed a bug with uniqueness constraints (#409). This will be addressed in a separate PR. This PR focuses on changes to the harvester + transformer: `python3 -m pytest tests/unit/harvesters tests/unit/transformers` * Harvester output changed * Therapy fields are now nested inside `therapy` key * Remove `clinical_significance` and retain original values from MOA * `source_ids` -> `source_id` since we only store one ID * `MoaTransformer` now supports MOA prognostic assertions --- src/metakb/harvesters/moa.py | 28 +- src/metakb/transformers/moa.py | 232 ++++++------ tests/data/harvesters/moa/assertions.json | 58 +-- .../prognostic/moa_harvester.json | 144 ++++++++ .../{ => therapeutic}/moa_harvester.json | 82 +++-- .../harvesters/moa/test_moa_assertions.py | 25 +- tests/unit/harvesters/moa/test_moa_source.py | 12 +- .../test_moa_transformer_prognostic.py | 339 ++++++++++++++++++ ...py => test_moa_transformer_therapeutic.py} | 33 +- 9 files changed, 730 insertions(+), 223 deletions(-) create mode 100644 tests/data/transformers/prognostic/moa_harvester.json rename tests/data/transformers/{ => therapeutic}/moa_harvester.json (67%) create mode 100644 tests/unit/transformers/test_moa_transformer_prognostic.py rename tests/unit/transformers/{test_moa_transformer.py => test_moa_transformer_therapeutic.py} (88%) diff --git a/src/metakb/harvesters/moa.py b/src/metakb/harvesters/moa.py index 11e2e60c..a8cc109a 100644 --- a/src/metakb/harvesters/moa.py +++ b/src/metakb/harvesters/moa.py @@ -151,24 +151,27 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict assertion_record = { "id": assertion["assertion_id"], "context": assertion["context"], + "deprecated": assertion["deprecated"], "description": assertion["description"], "disease": { "name": assertion["disease"], "oncotree_code": assertion["oncotree_code"], "oncotree_term": assertion["oncotree_term"], }, - "therapy_name": assertion["therapy_name"], - "therapy_type": assertion["therapy_type"], - "clinical_significance": self._get_therapy( - assertion["therapy_resistance"], assertion["therapy_sensitivity"] - ), + "therapy": { + "name": assertion["therapy_name"], + "type": assertion["therapy_type"], + "strategy": assertion["therapy_strategy"], + "resistance": assertion["therapy_resistance"], + "sensitivity": assertion["therapy_sensitivity"], + }, "predictive_implication": assertion["predictive_implication"], "favorable_prognosis": assertion["favorable_prognosis"], "created_on": assertion["created_on"], "last_updated": assertion["last_updated"], "submitted_by": assertion["submitted_by"], "validated": assertion["validated"], - "source_ids": assertion["sources"][0]["source_id"], + "source_id": assertion["sources"][0]["source_id"], } for v in variants_list: @@ -177,19 +180,6 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict return assertion_record - def _get_therapy(self, resistance: bool, sensitivity: bool) -> str | None: - """Get therapy response data. - - :param resistance: `True` if Therapy Resistance. `False` if not Therapy Resistance - :param sensitivity: `True` if Therapy Sensitivity. `False` if not Therapy Sensitivity - :return: whether the therapy response is resistance or sensitivity - """ - if resistance: - return "resistance" - if sensitivity: - return "sensitivity" - return None - def _get_feature(self, v: dict) -> dict: """Get feature name from the harvested variants diff --git a/src/metakb/transformers/moa.py b/src/metakb/transformers/moa.py index ce918a83..45df5501 100644 --- a/src/metakb/transformers/moa.py +++ b/src/metakb/transformers/moa.py @@ -8,9 +8,11 @@ from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint from ga4gh.core import sha512t24u from ga4gh.core.domain_models import ( + CombinationTherapy, Disease, Gene, TherapeuticAgent, + TherapeuticSubstituteGroup, ) from ga4gh.core.entity_models import ( Coding, @@ -21,7 +23,9 @@ ) from ga4gh.va_spec.profiles.var_study_stmt import ( AlleleOriginQualifier, + PrognosticPredicate, TherapeuticResponsePredicate, + VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) from ga4gh.vrs.models import Variation @@ -85,133 +89,97 @@ async def transform(self, harvested_data: MoaHarvestedData) -> None: self._add_documents(harvested_data.sources) # Add variant therapeutic response study statement data. Will update `statements` - await self._add_variant_tr_study_stmts(harvested_data.assertions) + for assertion in harvested_data.assertions: + await self._add_variant_study_stmt(assertion) - async def _add_variant_tr_study_stmts(self, assertions: list[dict]) -> None: - """Create Variant Therapeutic Response Study Statements from MOA assertions. + async def _add_variant_study_stmt(self, assertion: dict) -> None: + """Create Variant Study Statements from MOA assertions. Will add associated values to ``processed_data`` instance variable (``therapeutic_procedures``, ``conditions``, and ``statements``). ``able_to_normalize`` and ``unable_to_normalize`` will also be mutated for associated therapeutic_procedures and conditions. - :param assertions: A list of MOA assertion records + :param assertions: MOA assertion record """ - for record in assertions: - assertion_id = f"moa.assertion:{record['id']}" - variant_id = record["variant"]["id"] + assertion_id = f"moa.assertion:{assertion['id']}" + variant_id = assertion["variant"]["id"] - # Check cache for variation record (which contains gene information) - variation_gene_map = self.able_to_normalize["variations"].get(variant_id) - if not variation_gene_map: - logger.debug( - "%s has no variation for variant_id %s", assertion_id, variant_id - ) - continue - - # Get predicate. We only support therapeutic resistance/sensitivity - if record["clinical_significance"] == "resistance": - predicate = TherapeuticResponsePredicate.RESISTANCE - elif record["clinical_significance"] == "sensitivity": - predicate = TherapeuticResponsePredicate.SENSITIVITY - else: - logger.debug( - "clinical_significance not supported: %s", - record["clinical_significance"], - ) - continue + # Check cache for variation record (which contains gene information) + variation_gene_map = self.able_to_normalize["variations"].get(variant_id) + if not variation_gene_map: + logger.debug( + "%s has no variation for variant_id %s", assertion_id, variant_id + ) + return + + # Get strength + predictive_implication = ( + assertion["predictive_implication"] + .strip() + .replace(" ", "_") + .replace("-", "_") + .upper() + ) + moa_evidence_level = MoaEvidenceLevel[predictive_implication] + strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level] - # Get strength - predictive_implication = ( - record["predictive_implication"] - .strip() - .replace(" ", "_") - .replace("-", "_") - .upper() + # Add disease + moa_disease = self._add_disease(assertion["disease"]) + if not moa_disease: + logger.debug( + "%s has no disease for disease %s", assertion_id, assertion["disease"] ) - moa_evidence_level = MoaEvidenceLevel[predictive_implication] - strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level] + return - # Add therapeutic agent. We only support one therapy, so we will skip others - therapy_name = record["therapy_name"] - if not therapy_name: - logger.debug("%s has no therapy_name", assertion_id) - continue + # Add document + document = self.able_to_normalize["documents"].get(assertion["source_id"]) - therapy_interaction_type = record["therapy_type"] - - if "+" in therapy_name: - # Indicates multiple therapies - if therapy_interaction_type.upper() in { - "COMBINATION THERAPY", - "IMMUNOTHERAPY", - "RADIATION THERAPY", - "TARGETED THERAPY", - }: - therapeutic_procedure_type = ( - TherapeuticProcedureType.COMBINATION_THERAPY - ) - else: - # skipping HORMONE and CHEMOTHERAPY for now - continue + feature_type = assertion["variant"]["feature_type"] + if feature_type == "somatic_variant": + allele_origin_qualifier = AlleleOriginQualifier.SOMATIC + elif feature_type == "germline_variant": + allele_origin_qualifier = AlleleOriginQualifier.GERMLINE + else: + allele_origin_qualifier = None + + params = { + "id": assertion_id, + "description": assertion["description"], + "strength": strength, + "subjectVariant": variation_gene_map["cv"], + "alleleOriginQualifier": allele_origin_qualifier, + "geneContextQualifier": variation_gene_map["moa_gene"], + "specifiedBy": self.processed_data.methods[0], + "reportedIn": [document], + } - therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")] - therapeutic_digest = self._get_digest_for_str_lists( - [f"moa.therapy:{tn}" for tn in therapies] - ) - therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}" - else: - therapeutic_procedure_id = f"moa.therapy:{therapy_name}" - therapies = [{"label": therapy_name}] - therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT - - moa_therapeutic = self._add_therapeutic_procedure( - therapeutic_procedure_id, - therapies, - therapeutic_procedure_type, - therapy_interaction_type, + if assertion["favorable_prognosis"] == "": + params["conditionQualifier"] = moa_disease + params["predicate"] = ( + TherapeuticResponsePredicate.RESISTANCE + if assertion["therapy"]["resistance"] + else TherapeuticResponsePredicate.SENSITIVITY ) + params["objectTherapeutic"] = self._get_therapeutic_procedure(assertion) - if not moa_therapeutic: + if not params["objectTherapeutic"]: logger.debug( - "%s has no therapeutic agent for therapy_name %s", + "%s has no therapeutic procedure for therapy_name %s", assertion_id, - therapy_name, - ) - continue - - # Add disease - moa_disease = self._add_disease(record["disease"]) - if not moa_disease: - logger.debug( - "%s has no disease for disease %s", assertion_id, record["disease"] + assertion["therapy"]["name"], ) - continue - - # Add document - document = self.able_to_normalize["documents"].get(record["source_ids"]) - - feature_type = record["variant"]["feature_type"] - if feature_type == "somatic_variant": - allele_origin_qualifier = AlleleOriginQualifier.SOMATIC - elif feature_type == "germline_variant": - allele_origin_qualifier = AlleleOriginQualifier.GERMLINE - else: - allele_origin_qualifier = None - - statement = VariantTherapeuticResponseStudyStatement( - id=assertion_id, - description=record["description"], - strength=strength, - predicate=predicate, - subjectVariant=variation_gene_map["cv"], - objectTherapeutic=moa_therapeutic, - conditionQualifier=moa_disease, - alleleOriginQualifier=allele_origin_qualifier, - geneContextQualifier=variation_gene_map["moa_gene"], - specifiedBy=self.processed_data.methods[0], - reportedIn=[document], + return + statement = VariantTherapeuticResponseStudyStatement(**params) + else: + params["objectCondition"] = moa_disease + params["predicate"] = ( + PrognosticPredicate.BETTER_OUTCOME + if assertion["favorable_prognosis"] + else PrognosticPredicate.WORSE_OUTCOME ) - self.processed_data.statements.append(statement) + statement = VariantPrognosticStudyStatement(**params) + + self.processed_data.statements.append(statement) async def _add_categorical_variants(self, variants: list[dict]) -> None: """Create Categorical Variant objects for all MOA variant records. @@ -437,6 +405,54 @@ def _add_documents(self, sources: list) -> None: self.able_to_normalize["documents"][source_id] = document self.processed_data.documents.append(document) + def _get_therapeutic_procedure( + self, assertion: dict + ) -> TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy | None: + """Get therapeutic procedure object + + :param assertion: MOA assertion record + :return: Therapeutic procedure object, if found and able to be normalized + """ + therapy = assertion["therapy"] + therapy_name = therapy["name"] + if not therapy_name: + logger.debug("%s has no therapy_name", assertion["id"]) + return None + + therapy_interaction_type = therapy["type"] + + if "+" in therapy_name: + # Indicates multiple therapies + if therapy_interaction_type.upper() in { + "COMBINATION THERAPY", + "IMMUNOTHERAPY", + "RADIATION THERAPY", + "TARGETED THERAPY", + }: + therapeutic_procedure_type = ( + TherapeuticProcedureType.COMBINATION_THERAPY + ) + else: + # skipping HORMONE and CHEMOTHERAPY for now + return None + + therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")] + therapeutic_digest = self._get_digest_for_str_lists( + [f"moa.therapy:{tn}" for tn in therapies] + ) + therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}" + else: + therapeutic_procedure_id = f"moa.therapy:{therapy_name}" + therapies = [{"label": therapy_name}] + therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT + + return self._add_therapeutic_procedure( + therapeutic_procedure_id, + therapies, + therapeutic_procedure_type, + therapy_interaction_type, + ) + def _get_therapeutic_substitute_group( self, therapeutic_sub_group_id: str, diff --git a/tests/data/harvesters/moa/assertions.json b/tests/data/harvesters/moa/assertions.json index 960aef85..69b7602b 100644 --- a/tests/data/harvesters/moa/assertions.json +++ b/tests/data/harvesters/moa/assertions.json @@ -1,9 +1,10 @@ [ { - "assertion_id": 165, - "context": "Resistance to BRAFi monotherapy", - "created_on": "12/07/23", - "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", + "assertion_id": 163, + "context": "", + "created_on": "12/05/24", + "deprecated": false, + "description": "The combination of ipilimumab and vemurafenib in a sequencing strategy showed limited efficacy in a phase II study.", "disease": "Melanoma", "favorable_prognosis": "", "features": [ @@ -24,38 +25,39 @@ "variant_annotation": "Missense" } ], - "feature_id": 165, + "feature_id": 163, "feature_type": "somatic_variant" } ], "last_updated": "2019-06-13", "oncotree_code": "MEL", "oncotree_term": "Melanoma", - "predictive_implication": "Preclinical", + "predictive_implication": "Clinical trial", "sources": [ { - "citation": "Caporali S, Alvino E, Lacal PM, et al. Targeting the PI3K/AKT/mTOR pathway overcomes the stimulating effect of dabrafenib on the invasive behavior of melanoma cells with acquired resistance to the BRAF inhibitor. Int J Oncol. 2016;49(3):1164-74.", - "doi": "10.3892/ijo.2016.3594", - "nct": "", - "pmid": 27572607, + "citation": "Amin A, Lawson DH, Salama AK, et al. Phase II study of vemurafenib followed by ipilimumab in patients with previously untreated BRAF-mutated metastatic melanoma. J Immunother Cancer. 2016;4:44.", + "doi": "10.1186/s40425-016-0148-7", + "nct": "NCT01673854", + "pmid": 27532019, "source_id": 69, "source_type": "Journal", - "url": "https://doi.org/10.3892/ijo.2016.3594" + "url": "https://doi.org/10.1186/s40425-016-0148-7" } ], "submitted_by": "breardon@broadinstitute.org", - "therapy_name": "Dabrafenib + Bevacizumab", + "therapy_name": "Ipilimumab + Vemurafenib", "therapy_resistance": "", "therapy_sensitivity": 1, - "therapy_strategy": "B-RAF inhibition + VEGF/VEGFR inhibition", - "therapy_type": "Targeted therapy", + "therapy_strategy": "CTLA-4 inhibition + B-RAF inhibition", + "therapy_type": "Combination therapy", "validated": true }, { "assertion_id": 164, - "context": "", - "created_on": "12/07/23", - "description": "The combination of ipilimumab and vemurafenib in a sequencing strategy showed limited efficacy in a phase II study.", + "context": "Resistance to BRAFi monotherapy", + "created_on": "12/05/24", + "deprecated": false, + "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", "disease": "Melanoma", "favorable_prognosis": "", "features": [ @@ -83,24 +85,24 @@ "last_updated": "2019-06-13", "oncotree_code": "MEL", "oncotree_term": "Melanoma", - "predictive_implication": "Clinical trial", + "predictive_implication": "Preclinical", "sources": [ { - "citation": "Amin A, Lawson DH, Salama AK, et al. Phase II study of vemurafenib followed by ipilimumab in patients with previously untreated BRAF-mutated metastatic melanoma. J Immunother Cancer. 2016;4:44.", - "doi": "10.1186/s40425-016-0148-7", - "nct": "NCT01673854", - "pmid": 27532019, - "source_id": 68, + "citation": "Caporali S, Alvino E, Lacal PM, et al. Targeting the PI3K/AKT/mTOR pathway overcomes the stimulating effect of dabrafenib on the invasive behavior of melanoma cells with acquired resistance to the BRAF inhibitor. Int J Oncol. 2016;49(3):1164-74.", + "doi": "10.3892/ijo.2016.3594", + "nct": "", + "pmid": 27572607, + "source_id": 70, "source_type": "Journal", - "url": "https://doi.org/10.1186/s40425-016-0148-7" + "url": "https://doi.org/10.3892/ijo.2016.3594" } ], "submitted_by": "breardon@broadinstitute.org", - "therapy_name": "Ipilimumab + Vemurafenib", + "therapy_name": "Dabrafenib + Bevacizumab", "therapy_resistance": "", "therapy_sensitivity": 1, - "therapy_strategy": "CTLA-4 inhibition + B-RAF inhibition", - "therapy_type": "Combination therapy", + "therapy_strategy": "B-RAF inhibition + VEGF/VEGFR inhibition", + "therapy_type": "Targeted therapy", "validated": true } -] \ No newline at end of file +] diff --git a/tests/data/transformers/prognostic/moa_harvester.json b/tests/data/transformers/prognostic/moa_harvester.json new file mode 100644 index 00000000..0c5eb5de --- /dev/null +++ b/tests/data/transformers/prognostic/moa_harvester.json @@ -0,0 +1,144 @@ +{ + "assertions": [ + { + "id": 141, + "context": "", + "deprecated": false, + "description": "More frequent in Chronic Myelomonocytic Leukemia.", + "disease": { + "name": "Myelodysplasia", + "oncotree_code": "MDS", + "oncotree_term": "Myelodysplasia" + }, + "therapy": { + "name": "", + "type": "", + "strategy": "", + "resistance": "", + "sensitivity": "" + }, + "predictive_implication": "Clinical evidence", + "favorable_prognosis": 0, + "created_on": "12/05/24", + "last_updated": "2019-06-13", + "submitted_by": "breardon@broadinstitute.org", + "validated": true, + "source_id": 60, + "variant": { + "id": 141, + "alternate_allele": "C", + "cdna_change": "c.4376A>G", + "chromosome": "X", + "end_position": "39921444", + "exon": "10", + "feature_type": "somatic_variant", + "gene": "BCOR", + "protein_change": "p.N1425S", + "reference_allele": "T", + "rsid": null, + "start_position": "39921444", + "variant_annotation": "Missense", + "feature": "BCOR p.N1425S (Missense)" + } + }, + { + "id": 532, + "context": "", + "deprecated": false, + "description": "The National Comprehensive Cancer Network\u00ae (NCCN\u00ae) highlights SF3B1 E622, Y623, R625, N626, H662, T663, K666, K700E, I704, G740, G742, and D781 missense variants as being associated with a favorable prognosis in patients with myelodysplastic syndromes.", + "disease": { + "name": "Myelodysplasia", + "oncotree_code": "MDS", + "oncotree_term": "Myelodysplasia" + }, + "therapy": { + "name": "", + "type": "", + "strategy": "", + "resistance": "", + "sensitivity": "" + }, + "predictive_implication": "Guideline", + "favorable_prognosis": 1, + "created_on": "12/05/24", + "last_updated": "2023-11-02", + "submitted_by": "breardon@broadinstitute.org", + "validated": true, + "source_id": 33, + "variant": { + "id": 532, + "alternate_allele": "G", + "cdna_change": "c.1866G>C", + "chromosome": "2", + "end_position": "198267491", + "exon": "14", + "feature_type": "somatic_variant", + "gene": "SF3B1", + "protein_change": "p.E622D", + "reference_allele": "C", + "rsid": "rs763149798", + "start_position": "198267491", + "variant_annotation": "Missense", + "feature": "SF3B1 p.E622D (Missense)" + } + } + ], + "sources": [ + { + "id": 33, + "type": "Guideline", + "doi": "", + "nct": "", + "pmid": "", + "url": "https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf", + "citation": "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines\u00ae) for Myelodysplastic Syndromes V.2.2023. \u00a9 National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org." + }, + { + "id": 60, + "type": "Journal", + "doi": "10.1158/1078-0432.CCR-09-2828", + "nct": "", + "pmid": 20453058, + "url": "https://doi.org/10.1158/1078-0432.CCR-09-2828", + "citation": "O'Brien C, Wallin JJ, Sampath D, et al. Predictive biomarkers of sensitivity to the phosphatidylinositol 3' kinase inhibitor GDC-0941 in breast cancer preclinical models. Clin Cancer Res. 2010;16(14):3670-83." + } + ], + "variants": [ + { + "id": 141, + "alternate_allele": "C", + "cdna_change": "c.4376A>G", + "chromosome": "X", + "end_position": "39921444", + "exon": "10", + "feature_type": "somatic_variant", + "gene": "BCOR", + "protein_change": "p.N1425S", + "reference_allele": "T", + "rsid": null, + "start_position": "39921444", + "variant_annotation": "Missense", + "feature": "BCOR p.N1425S (Missense)" + }, + { + "id": 532, + "alternate_allele": "G", + "cdna_change": "c.1866G>C", + "chromosome": "2", + "end_position": "198267491", + "exon": "14", + "feature_type": "somatic_variant", + "gene": "SF3B1", + "protein_change": "p.E622D", + "reference_allele": "C", + "rsid": "rs763149798", + "start_position": "198267491", + "variant_annotation": "Missense", + "feature": "SF3B1 p.E622D (Missense)" + } + ], + "genes": [ + "BCOR", + "SF3B1" + ] +} diff --git a/tests/data/transformers/moa_harvester.json b/tests/data/transformers/therapeutic/moa_harvester.json similarity index 67% rename from tests/data/transformers/moa_harvester.json rename to tests/data/transformers/therapeutic/moa_harvester.json index 562e3c9b..d75e0cb5 100644 --- a/tests/data/transformers/moa_harvester.json +++ b/tests/data/transformers/therapeutic/moa_harvester.json @@ -3,22 +3,27 @@ { "id": 66, "context": "", + "deprecated": false, "description": "T315I mutant ABL1 in p210 BCR-ABL cells resulted in retained high levels of phosphotyrosine at increasing concentrations of inhibitor STI-571, whereas wildtype appropriately received inhibition.", "disease": { "name": "Chronic Myelogenous Leukemia", "oncotree_code": "CML", "oncotree_term": "Chronic Myelogenous Leukemia" }, - "therapy_name": "Imatinib", - "therapy_type": "Targeted therapy", - "clinical_significance": "resistance", + "therapy": { + "name": "Imatinib", + "type": "Targeted therapy", + "strategy": "BCR-ABL inhibition", + "resistance": 1, + "sensitivity": "" + }, "predictive_implication": "Preclinical", "favorable_prognosis": "", - "created_on": "12/07/23", + "created_on": "12/05/24", "last_updated": "2023-11-30", "submitted_by": "breardon@broadinstitute.org", "validated": true, - "source_ids": 45, + "source_id": 45, "variant": { "id": 66, "alternate_allele": "T", @@ -37,26 +42,31 @@ } }, { - "id": 155, + "id": 154, "context": "Metastatic, after prior therapy", + "deprecated": false, "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", "disease": { "name": "Colorectal Adenocarcinoma", "oncotree_code": "COADREAD", "oncotree_term": "Colorectal Adenocarcinoma" }, - "therapy_name": "Cetuximab + Encorafenib", - "therapy_type": "Targeted therapy", - "clinical_significance": "sensitivity", + "therapy": { + "name": "Cetuximab + Encorafenib", + "type": "Targeted therapy", + "strategy": "EGFR inhibition + B-RAF inhibition", + "resistance": "", + "sensitivity": 1 + }, "predictive_implication": "FDA-Approved", "favorable_prognosis": "", - "created_on": "12/07/23", + "created_on": "12/05/24", "last_updated": "2020-10-15", "submitted_by": "breardon@broadinstitute.org", "validated": true, - "source_ids": 63, + "source_id": 64, "variant": { - "id": 145, + "id": 144, "alternate_allele": "T", "cdna_change": "c.1799T>A", "chromosome": "7", @@ -83,15 +93,15 @@ "url": "https://doi.org/10.1126/science.1062538", "citation": "Gorre, Mercedes E., et al. Clinical resistance to STI-571 cancer therapy caused by BCR-ABL gene mutation or amplification. Science 293.5531 (2001): 876-880." }, - { - "id": 63, - "type": "FDA", - "doi": "", - "nct": "", - "pmid": "", - "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf", - "citation": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020." - } + { + "id": 64, + "type": "FDA", + "doi": "", + "nct": "", + "pmid": "", + "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf", + "citation": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020." + } ], "variants": [ { @@ -111,21 +121,21 @@ "feature": "ABL1 p.T315I (Missense)" }, { - "id": 145, - "alternate_allele": "T", - "cdna_change": "c.1799T>A", - "chromosome": "7", - "end_position": "140453136", - "exon": "15", - "feature_type": "somatic_variant", - "gene": "BRAF", - "protein_change": "p.V600E", - "reference_allele": "A", - "rsid": "rs113488022", - "start_position": "140453136", - "variant_annotation": "Missense", - "feature": "BRAF p.V600E (Missense)" - } + "id": 144, + "alternate_allele": "T", + "cdna_change": "c.1799T>A", + "chromosome": "7", + "end_position": "140453136", + "exon": "15", + "feature_type": "somatic_variant", + "gene": "BRAF", + "protein_change": "p.V600E", + "reference_allele": "A", + "rsid": "rs113488022", + "start_position": "140453136", + "variant_annotation": "Missense", + "feature": "BRAF p.V600E (Missense)" + } ], "genes": [ "ABL1", diff --git a/tests/unit/harvesters/moa/test_moa_assertions.py b/tests/unit/harvesters/moa/test_moa_assertions.py index 300f70f7..6581c15f 100644 --- a/tests/unit/harvesters/moa/test_moa_assertions.py +++ b/tests/unit/harvesters/moa/test_moa_assertions.py @@ -11,27 +11,32 @@ @pytest.fixture(scope="module") -def assertion165(): +def assertion164(): """Create a fixture for assertion #165.""" return { - "id": 165, + "id": 164, "context": "Resistance to BRAFi monotherapy", "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", + "deprecated": False, "disease": { "name": "Melanoma", "oncotree_code": "MEL", "oncotree_term": "Melanoma", }, - "therapy_name": "Dabrafenib + Bevacizumab", - "therapy_type": "Targeted therapy", - "clinical_significance": "sensitivity", + "therapy": { + "name": "Dabrafenib + Bevacizumab", + "type": "Targeted therapy", + "strategy": "B-RAF inhibition + VEGF/VEGFR inhibition", + "resistance": "", + "sensitivity": 1, + }, "predictive_implication": "Preclinical", "favorable_prognosis": "", - "created_on": "12/07/23", + "created_on": "12/05/24", "last_updated": "2019-06-13", "submitted_by": "breardon@broadinstitute.org", "validated": True, - "source_ids": 69, + "source_id": 70, "variant": { "id": 145, "alternate_allele": "T", @@ -53,7 +58,7 @@ def assertion165(): @patch.object(MoaHarvester, "_get_all_variants") @patch.object(MoaHarvester, "_get_all_assertions") -def test_assertion_170(test_get_all_assertions, test_get_all_variants, assertion165): +def test_assertion_164(test_get_all_assertions, test_get_all_variants, assertion164): """Test moa harvester works correctly for assertions.""" moa_harvester_test_dir = TEST_HARVESTERS_DIR / SourceName.MOA.value with (moa_harvester_test_dir / "assertions.json").open() as f: @@ -70,7 +75,7 @@ def test_assertion_170(test_get_all_assertions, test_get_all_variants, assertion actual = None for a in assertions: - if a["id"] == assertion165["id"]: + if a["id"] == assertion164["id"]: actual = a break - assert actual == assertion165 + assert actual == assertion164 diff --git a/tests/unit/harvesters/moa/test_moa_source.py b/tests/unit/harvesters/moa/test_moa_source.py index bc002a26..edabee40 100644 --- a/tests/unit/harvesters/moa/test_moa_source.py +++ b/tests/unit/harvesters/moa/test_moa_source.py @@ -19,10 +19,10 @@ def sources(): @pytest.fixture(scope="module") -def source68(): - """Create a fixture for source of evidence #68.""" +def source69(): + """Create a fixture for source ID 69.""" return { - "id": 68, + "id": 69, "type": "Journal", "doi": "10.1186/s40425-016-0148-7", "nct": "NCT01673854", @@ -33,7 +33,7 @@ def source68(): @patch.object(MoaHarvester, "_get_all_assertions") -def test_source68(test_get_all_assertions, source68): +def test_source69(test_get_all_assertions, source69): """Test moa harvester works correctly for evidence.""" with (TEST_HARVESTERS_DIR / SourceName.MOA.value / "assertions.json").open() as f: data = json.load(f) @@ -44,7 +44,7 @@ def test_source68(test_get_all_assertions, source68): actual = None for s in sources: - if s["id"] == source68["id"]: + if s["id"] == source69["id"]: actual = s break - assert actual == source68 + assert actual == source69 diff --git a/tests/unit/transformers/test_moa_transformer_prognostic.py b/tests/unit/transformers/test_moa_transformer_prognostic.py new file mode 100644 index 00000000..c14f7185 --- /dev/null +++ b/tests/unit/transformers/test_moa_transformer_prognostic.py @@ -0,0 +1,339 @@ +"""Test MOA Transformation to common data model""" + +import json + +import pytest +import pytest_asyncio +from tests.conftest import TEST_TRANSFORMERS_DIR + +from metakb.normalizers import VICC_NORMALIZER_DATA +from metakb.transformers.moa import MoaTransformer + +DATA_DIR = TEST_TRANSFORMERS_DIR / "prognostic" +FILENAME = "moa_cdm.json" + + +@pytest_asyncio.fixture(scope="module") +async def data(normalizers): + """Create a MOA Transformer test fixture.""" + harvester_path = DATA_DIR / "moa_harvester.json" + moa = MoaTransformer( + data_dir=DATA_DIR, + harvester_path=harvester_path, + normalizers=normalizers, + ) + harvested_data = moa.extract_harvested_data() + await moa.transform(harvested_data) + moa.create_json(cdm_filepath=DATA_DIR / FILENAME) + with (DATA_DIR / FILENAME).open() as f: + return json.load(f) + + +@pytest.fixture(scope="module") +def moa_vid141(): + """Create a test fixture for MOA VID141.""" + return { + "id": "moa.variant:141", + "type": "CategoricalVariant", + "label": "BCOR p.N1425S (Missense)", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.pDuCLNI3mHF25uUPNSDM8LbP8p4Fsuay", + "digest": "pDuCLNI3mHF25uUPNSDM8LbP8p4Fsuay", + "type": "Allele", + "location": { + "id": "ga4gh:SL.XiatLUYcK0JzC_CROMV55bbJ_weygAkP", + "digest": "XiatLUYcK0JzC_CROMV55bbJ_weygAkP", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.VHPiWlNXV-23rh_9w2KR2PLqPd7OSKMS", + }, + "start": 1458, + "end": 1459, + "sequence": "N", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "S"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.e84USp97bhTBu8IC3wsm7nF8_GXU7Yk2", + "type": "Allele", + "label": "X-39921444-T-C", + "digest": "e84USp97bhTBu8IC3wsm7nF8_GXU7Yk2", + "location": { + "id": "ga4gh:SL.6k6-KBncHr2M-nwSTTOLNYbUN5XsMmpB", + "type": "SequenceLocation", + "digest": "6k6-KBncHr2M-nwSTTOLNYbUN5XsMmpB", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": 40062190, + "end": 40062191, + "sequence": "T", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + } + ], + "extensions": [ + { + "name": "MOA representative coordinate", + "value": { + "chromosome": "X", + "start_position": "39921444", + "end_position": "39921444", + "reference_allele": "T", + "alternate_allele": "C", + "cdna_change": "c.4376A>G", + "protein_change": "p.N1425S", + "exon": "10", + }, + } + ], + "mappings": [ + { + "coding": { + "system": "https://moalmanac.org/api/features/", + "code": "141", + }, + "relation": "exactMatch", + } + ], + } + + +@pytest.fixture(scope="module") +def moa_myelodysplasia(): + """Create test fixture for MOA disease Myelodysplasia""" + return { + "id": "moa.normalize.disease.ncit:C3247", + "type": "Disease", + "label": "Myelodysplasia", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C3247", + "label": "Myelodysplastic Syndrome", + "mondo_id": "0018881", + }, + } + ], + "mappings": [ + { + "coding": { + "label": "Myelodysplasia", + "system": "https://oncotree.mskcc.org/", + "code": "MDS", + }, + "relation": "exactMatch", + } + ], + } + + +@pytest.fixture(scope="module") +def moa_bcor(): + """Create MOA gene BCOR test fixture""" + return { + "id": "moa.normalize.gene:BCOR", + "type": "Gene", + "label": "BCOR", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:20893", "label": "BCOR"}, + } + ], + } + + +@pytest.fixture(scope="module") +def moa_source60(): + """Create MOA source ID 60 test fixture""" + return { + "id": "moa.source:60", + "extensions": [{"name": "source_type", "value": "Journal"}], + "type": "Document", + "title": "O'Brien C, Wallin JJ, Sampath D, et al. Predictive biomarkers of sensitivity to the phosphatidylinositol 3' kinase inhibitor GDC-0941 in breast cancer preclinical models. Clin Cancer Res. 2010;16(14):3670-83.", + "urls": ["https://doi.org/10.1158/1078-0432.CCR-09-2828"], + "doi": "10.1158/1078-0432.CCR-09-2828", + "pmid": 20453058, + } + + +@pytest.fixture(scope="module") +def moa_aid141_study_stmt( + moa_vid141, moa_myelodysplasia, moa_bcor, moa_source60, moa_method +): + """Create MOA AID 141 study statement test fixture.""" + return { + "id": "moa.assertion:141", + "type": "VariantPrognosticStudyStatement", + "description": "More frequent in Chronic Myelomonocytic Leukemia.", + "strength": { + "code": "e000007", + "label": "observational study evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "associatedWithWorseOutcomeFor", + "subjectVariant": moa_vid141, + "objectCondition": moa_myelodysplasia, + "alleleOriginQualifier": "somatic", + "geneContextQualifier": moa_bcor, + "specifiedBy": moa_method, + "reportedIn": [moa_source60], + } + + +@pytest.fixture(scope="module") +def moa_vid532(): + """Create a test fixture for MOA VID532.""" + return { + "id": "moa.variant:532", + "type": "CategoricalVariant", + "label": "SF3B1 p.E622D (Missense)", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.53EXGCEm1KH4W4ygbovgD_fFWskECrAJ", + "digest": "53EXGCEm1KH4W4ygbovgD_fFWskECrAJ", + "type": "Allele", + "location": { + "id": "ga4gh:SL.PvDvUEPg69q4PYBxC8jM4cEzQCCkaxHM", + "digest": "PvDvUEPg69q4PYBxC8jM4cEzQCCkaxHM", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.ST8-pVpExi5fmcLBZ_vHcVmMtvgggIJm", + }, + "start": 621, + "end": 622, + "sequence": "E", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "D"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.Vj8RALpb4HP9RtsDNiaW_N3ODw3aSj5T", + "type": "Allele", + "label": "2-198267491-C-G", + "digest": "Vj8RALpb4HP9RtsDNiaW_N3ODw3aSj5T", + "location": { + "id": "ga4gh:SL.R8r0t9A51FTOJ7Mb8VasF8L6D5Sa_FFU", + "type": "SequenceLocation", + "digest": "R8r0t9A51FTOJ7Mb8VasF8L6D5Sa_FFU", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", + }, + "start": 197402766, + "end": 197402767, + "sequence": "C", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, + } + ], + "extensions": [ + { + "name": "MOA representative coordinate", + "value": { + "chromosome": "2", + "start_position": "198267491", + "end_position": "198267491", + "reference_allele": "C", + "alternate_allele": "G", + "cdna_change": "c.1866G>C", + "protein_change": "p.E622D", + "exon": "14", + }, + } + ], + "mappings": [ + { + "coding": { + "system": "https://moalmanac.org/api/features/", + "code": "532", + }, + "relation": "exactMatch", + }, + { + "coding": { + "system": "https://www.ncbi.nlm.nih.gov/snp/", + "code": "rs763149798", + }, + "relation": "relatedMatch", + }, + ], + } + + +@pytest.fixture(scope="module") +def moa_sf3b1(): + """Create MOA gene SF3B1 test fixture""" + return { + "id": "moa.normalize.gene:SF3B1", + "type": "Gene", + "label": "SF3B1", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:10768", "label": "SF3B1"}, + } + ], + } + + +@pytest.fixture(scope="module") +def moa_source33(): + """Create MOA source ID 33 test fixture""" + return { + "id": "moa.source:33", + "extensions": [{"name": "source_type", "value": "Guideline"}], + "type": "Document", + "title": "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines\u00ae) for Myelodysplastic Syndromes V.2.2023. \u00a9 National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org.", + "urls": ["https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf"], + } + + +@pytest.fixture(scope="module") +def moa_aid532_study_stmt( + moa_vid532, moa_myelodysplasia, moa_sf3b1, moa_source33, moa_method +): + """Create MOA AID 532 study statement test fixture.""" + return { + "id": "moa.assertion:532", + "type": "VariantPrognosticStudyStatement", + "description": "The National Comprehensive Cancer Network\u00ae (NCCN\u00ae) highlights SF3B1 E622, Y623, R625, N626, H662, T663, K666, K700E, I704, G740, G742, and D781 missense variants as being associated with a favorable prognosis in patients with myelodysplastic syndromes.", + "strength": { + "code": "e000003", + "label": "professional guideline evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "associatedWithBetterOutcomeFor", + "subjectVariant": moa_vid532, + "objectCondition": moa_myelodysplasia, + "alleleOriginQualifier": "somatic", + "geneContextQualifier": moa_sf3b1, + "specifiedBy": moa_method, + "reportedIn": [moa_source33], + } + + +@pytest.fixture(scope="module") +def statements(moa_aid141_study_stmt, moa_aid532_study_stmt): + """Create test fixture for MOA prognostic statements.""" + return [moa_aid141_study_stmt, moa_aid532_study_stmt] + + +def test_moa_cdm(data, statements, check_transformed_cdm): + """Test that moa transformation works correctly.""" + check_transformed_cdm(data, statements, DATA_DIR / FILENAME) diff --git a/tests/unit/transformers/test_moa_transformer.py b/tests/unit/transformers/test_moa_transformer_therapeutic.py similarity index 88% rename from tests/unit/transformers/test_moa_transformer.py rename to tests/unit/transformers/test_moa_transformer_therapeutic.py index f0c628da..d7997b98 100644 --- a/tests/unit/transformers/test_moa_transformer.py +++ b/tests/unit/transformers/test_moa_transformer_therapeutic.py @@ -9,33 +9,34 @@ from metakb.normalizers import VICC_NORMALIZER_DATA from metakb.transformers.moa import MoaTransformer +DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic" FILENAME = "moa_cdm.json" @pytest_asyncio.fixture(scope="module") async def data(normalizers): """Create a MOA Transformer test fixture.""" - harvester_path = TEST_TRANSFORMERS_DIR / "moa_harvester.json" + harvester_path = DATA_DIR / "moa_harvester.json" moa = MoaTransformer( - data_dir=TEST_TRANSFORMERS_DIR, + data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers, ) harvested_data = moa.extract_harvested_data() await moa.transform(harvested_data) - moa.create_json(cdm_filepath=TEST_TRANSFORMERS_DIR / FILENAME) - with (TEST_TRANSFORMERS_DIR / FILENAME).open() as f: + moa.create_json(cdm_filepath=DATA_DIR / FILENAME) + with (DATA_DIR / FILENAME).open() as f: return json.load(f) @pytest.fixture(scope="module") -def moa_vid145(braf_v600e_genomic): - """Create a test fixture for MOA VID145.""" +def moa_vid144(braf_v600e_genomic): + """Create a test fixture for MOA VID144.""" genomic_rep = braf_v600e_genomic.copy() genomic_rep["label"] = "7-140453136-A-T" return { - "id": "moa.variant:145", + "id": "moa.variant:144", "type": "CategoricalVariant", "label": "BRAF p.V600E (Missense)", "constraints": [ @@ -81,7 +82,7 @@ def moa_vid145(braf_v600e_genomic): { "coding": { "system": "https://moalmanac.org/api/features/", - "code": "145", + "code": "144", }, "relation": "exactMatch", }, @@ -119,10 +120,10 @@ def moa_encorafenib(encorafenib_extensions): @pytest.fixture(scope="module") -def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): - """Create MOA AID 155 study statement test fixture. Uses CombinationTherapy.""" +def moa_aid154_study_stmt(moa_vid144, moa_cetuximab, moa_encorafenib, moa_method): + """Create MOA AID 154 study statement test fixture. Uses CombinationTherapy.""" return { - "id": "moa.assertion:155", + "id": "moa.assertion:154", "type": "VariantTherapeuticResponseStudyStatement", "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", "strength": { @@ -131,7 +132,7 @@ def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsSensitivityTo", - "subjectVariant": moa_vid145, + "subjectVariant": moa_vid144, "objectTherapeutic": { "type": "CombinationTherapy", "id": "moa.ctid:ZGlEkRBR4st6Y_nijjuR1KUV7EFHIF_S", @@ -183,7 +184,7 @@ def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method "specifiedBy": moa_method, "reportedIn": [ { - "id": "moa.source:63", + "id": "moa.source:64", "extensions": [{"name": "source_type", "value": "FDA"}], "type": "Document", "title": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020.", @@ -196,11 +197,11 @@ def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method @pytest.fixture(scope="module") -def statements(moa_aid66_study_stmt, moa_aid155_study_stmt): +def statements(moa_aid66_study_stmt, moa_aid154_study_stmt): """Create test fixture for MOA therapeutic statements.""" - return [moa_aid66_study_stmt, moa_aid155_study_stmt] + return [moa_aid66_study_stmt, moa_aid154_study_stmt] def test_moa_cdm(data, statements, check_transformed_cdm): """Test that moa transformation works correctly.""" - check_transformed_cdm(data, statements, TEST_TRANSFORMERS_DIR / FILENAME) + check_transformed_cdm(data, statements, DATA_DIR / FILENAME)