diff --git a/src/metakb/harvesters/moa.py b/src/metakb/harvesters/moa.py index 11e2e60c..a8cc109a 100644 --- a/src/metakb/harvesters/moa.py +++ b/src/metakb/harvesters/moa.py @@ -151,24 +151,27 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict assertion_record = { "id": assertion["assertion_id"], "context": assertion["context"], + "deprecated": assertion["deprecated"], "description": assertion["description"], "disease": { "name": assertion["disease"], "oncotree_code": assertion["oncotree_code"], "oncotree_term": assertion["oncotree_term"], }, - "therapy_name": assertion["therapy_name"], - "therapy_type": assertion["therapy_type"], - "clinical_significance": self._get_therapy( - assertion["therapy_resistance"], assertion["therapy_sensitivity"] - ), + "therapy": { + "name": assertion["therapy_name"], + "type": assertion["therapy_type"], + "strategy": assertion["therapy_strategy"], + "resistance": assertion["therapy_resistance"], + "sensitivity": assertion["therapy_sensitivity"], + }, "predictive_implication": assertion["predictive_implication"], "favorable_prognosis": assertion["favorable_prognosis"], "created_on": assertion["created_on"], "last_updated": assertion["last_updated"], "submitted_by": assertion["submitted_by"], "validated": assertion["validated"], - "source_ids": assertion["sources"][0]["source_id"], + "source_id": assertion["sources"][0]["source_id"], } for v in variants_list: @@ -177,19 +180,6 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict return assertion_record - def _get_therapy(self, resistance: bool, sensitivity: bool) -> str | None: - """Get therapy response data. - - :param resistance: `True` if Therapy Resistance. `False` if not Therapy Resistance - :param sensitivity: `True` if Therapy Sensitivity. `False` if not Therapy Sensitivity - :return: whether the therapy response is resistance or sensitivity - """ - if resistance: - return "resistance" - if sensitivity: - return "sensitivity" - return None - def _get_feature(self, v: dict) -> dict: """Get feature name from the harvested variants diff --git a/src/metakb/query.py b/src/metakb/query.py index cce818cd..a676744d 100644 --- a/src/metakb/query.py +++ b/src/metakb/query.py @@ -15,6 +15,7 @@ ) from ga4gh.core.entity_models import Coding, Document, Extension, Method from ga4gh.va_spec.profiles.var_study_stmt import ( + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -69,6 +70,14 @@ class TherapeuticProcedureType(str, Enum): SUBSTITUTES = "TherapeuticSubstituteGroup" +# Statement types to corresponding class mapping +STMT_TYPE_TO_CLASS = { + "VariantDiagnosticStudyStatement": VariantDiagnosticStudyStatement, + "VariantPrognosticStudyStatement": VariantPrognosticStudyStatement, + "VariantTherapeuticResponseStudyStatement": VariantTherapeuticResponseStudyStatement, +} + + def _deserialize_field(node: dict, field_name: str) -> None | dict: """Deserialize JSON blob property. @@ -493,8 +502,8 @@ def _get_nested_stmts(self, statement_nodes: list[Node]) -> list[dict]: def _get_nested_stmt(self, stmt_node: Node) -> dict: """Get information related to a statement - Only VariantTherapeuticResponseStudyStatement and VariantPrognosticStudyStatement - are supported at the moment + Only VariantTherapeuticResponseStudyStatement, VariantPrognosticStudyStatement, + and VariantDiagnosticStudyStatement are supported at the moment :param stmt_node: Neo4j graph node for statement :return: Nested statement @@ -503,15 +512,14 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict: if study_stmt_type not in { "VariantTherapeuticResponseStudyStatement", "VariantPrognosticStudyStatement", + "VariantDiagnosticStudyStatement", }: return {} - if study_stmt_type == "VariantPrognosticStudyStatement": - study_stmt_cls = VariantPrognosticStudyStatement - condition_key = "objectCondition" - else: - study_stmt_cls = VariantTherapeuticResponseStudyStatement + if study_stmt_type == "VariantTherapeuticResponseStudyStatement": condition_key = "conditionQualifier" + else: + condition_key = "objectCondition" params = { condition_key: None, @@ -559,7 +567,7 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict: else: logger.warning("relation type not supported: %s", rel_type) - return study_stmt_cls(**params).model_dump() + return STMT_TYPE_TO_CLASS[study_stmt_type](**params).model_dump() @staticmethod def _get_vicc_normalizer_extension(node: dict) -> ViccNormalizerDataExtension: @@ -917,10 +925,6 @@ async def batch_search_statements( statement_nodes = [r[0] for r in result] response.statement_ids = [n["id"] for n in statement_nodes] stmts = self._get_nested_stmts(statement_nodes) - response.statements = [ - VariantTherapeuticResponseStudyStatement(**s) - if s["type"] == "VariantTherapeuticResponseStudyStatement" - else VariantPrognosticStudyStatement(**s) - for s in stmts - ] + + response.statements = [STMT_TYPE_TO_CLASS[s["type"]](**s) for s in stmts] return response diff --git a/src/metakb/schemas/api.py b/src/metakb/schemas/api.py index 27a63b86..59251e94 100644 --- a/src/metakb/schemas/api.py +++ b/src/metakb/schemas/api.py @@ -3,6 +3,7 @@ from typing import Literal from ga4gh.va_spec.profiles.var_study_stmt import ( + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -48,7 +49,9 @@ class SearchStatementsService(BaseModel): warnings: list[StrictStr] = [] statement_ids: list[StrictStr] = [] statements: list[ - VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement + VariantTherapeuticResponseStudyStatement + | VariantPrognosticStudyStatement + | VariantDiagnosticStudyStatement ] = [] service_meta_: ServiceMeta @@ -73,6 +76,8 @@ class BatchSearchStatementsService(BaseModel): warnings: list[StrictStr] = [] statement_ids: list[StrictStr] = [] statements: list[ - VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement + VariantTherapeuticResponseStudyStatement + | VariantPrognosticStudyStatement + | VariantDiagnosticStudyStatement ] = [] service_meta_: ServiceMeta diff --git a/src/metakb/transformers/base.py b/src/metakb/transformers/base.py index eebc6c62..a8504fde 100644 --- a/src/metakb/transformers/base.py +++ b/src/metakb/transformers/base.py @@ -25,6 +25,7 @@ ) from ga4gh.core.entity_models import Coding, Document, Extension, Method from ga4gh.va_spec.profiles.var_study_stmt import ( + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -111,7 +112,9 @@ class TransformedData(BaseModel): """Define model for transformed data""" statements: list[ - VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement + VariantTherapeuticResponseStudyStatement + | VariantPrognosticStudyStatement + | VariantDiagnosticStudyStatement ] = [] categorical_variants: list[CategoricalVariant] = [] variations: list[Allele] = [] diff --git a/src/metakb/transformers/civic.py b/src/metakb/transformers/civic.py index a722c7c7..9fbb3c4b 100644 --- a/src/metakb/transformers/civic.py +++ b/src/metakb/transformers/civic.py @@ -22,8 +22,10 @@ ) from ga4gh.va_spec.profiles.var_study_stmt import ( AlleleOriginQualifier, + DiagnosticPredicate, PrognosticPredicate, TherapeuticResponsePredicate, + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -91,6 +93,8 @@ "RESISTANCE": TherapeuticResponsePredicate.RESISTANCE, "POOR_OUTCOME": PrognosticPredicate.WORSE_OUTCOME, "BETTER_OUTCOME": PrognosticPredicate.BETTER_OUTCOME, + "POSITIVE": DiagnosticPredicate.INCLUSIVE, + "NEGATIVE": DiagnosticPredicate.EXCLUSIVE, } @@ -121,6 +125,7 @@ class _CivicEvidenceType(str, Enum): PREDICTIVE = "PREDICTIVE" PROGNOSTIC = "PROGNOSTIC" + DIAGNOSTIC = "DIAGNOSTIC" class _VariationCache(BaseModel): @@ -359,8 +364,10 @@ def _add_variant_study_stmt( if evidence_type == _CivicEvidenceType.PREDICTIVE: params["objectTherapeutic"] = civic_therapeutic statement = VariantTherapeuticResponseStudyStatement(**params) - else: + elif evidence_type == _CivicEvidenceType.PROGNOSTIC: statement = VariantPrognosticStudyStatement(**params) + else: + statement = VariantDiagnosticStudyStatement(**params) self.processed_data.statements.append(statement) diff --git a/src/metakb/transformers/moa.py b/src/metakb/transformers/moa.py index ce918a83..35b9019f 100644 --- a/src/metakb/transformers/moa.py +++ b/src/metakb/transformers/moa.py @@ -8,9 +8,11 @@ from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint from ga4gh.core import sha512t24u from ga4gh.core.domain_models import ( + CombinationTherapy, Disease, Gene, TherapeuticAgent, + TherapeuticSubstituteGroup, ) from ga4gh.core.entity_models import ( Coding, @@ -21,7 +23,9 @@ ) from ga4gh.va_spec.profiles.var_study_stmt import ( AlleleOriginQualifier, + PrognosticPredicate, TherapeuticResponsePredicate, + VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) from ga4gh.vrs.models import Variation @@ -85,133 +89,97 @@ async def transform(self, harvested_data: MoaHarvestedData) -> None: self._add_documents(harvested_data.sources) # Add variant therapeutic response study statement data. Will update `statements` - await self._add_variant_tr_study_stmts(harvested_data.assertions) + for assertion in harvested_data.assertions: + await self._add_variant_study_stmt(assertion) - async def _add_variant_tr_study_stmts(self, assertions: list[dict]) -> None: - """Create Variant Therapeutic Response Study Statements from MOA assertions. + async def _add_variant_study_stmt(self, assertion: dict) -> None: + """Create Variant Study Statements from MOA assertions. Will add associated values to ``processed_data`` instance variable (``therapeutic_procedures``, ``conditions``, and ``statements``). ``able_to_normalize`` and ``unable_to_normalize`` will also be mutated for associated therapeutic_procedures and conditions. - :param assertions: A list of MOA assertion records + :param assertions: MOA assertion record """ - for record in assertions: - assertion_id = f"moa.assertion:{record['id']}" - variant_id = record["variant"]["id"] + assertion_id = f"moa.assertion:{assertion['id']}" + variant_id = assertion["variant"]["id"] - # Check cache for variation record (which contains gene information) - variation_gene_map = self.able_to_normalize["variations"].get(variant_id) - if not variation_gene_map: - logger.debug( - "%s has no variation for variant_id %s", assertion_id, variant_id - ) - continue - - # Get predicate. We only support therapeutic resistance/sensitivity - if record["clinical_significance"] == "resistance": - predicate = TherapeuticResponsePredicate.RESISTANCE - elif record["clinical_significance"] == "sensitivity": - predicate = TherapeuticResponsePredicate.SENSITIVITY - else: - logger.debug( - "clinical_significance not supported: %s", - record["clinical_significance"], - ) - continue + # Check cache for variation record (which contains gene information) + variation_gene_map = self.able_to_normalize["variations"].get(variant_id) + if not variation_gene_map: + logger.debug( + "%s has no variation for variant_id %s", assertion_id, variant_id + ) + return + + # Get strength + predictive_implication = ( + assertion["predictive_implication"] + .strip() + .replace(" ", "_") + .replace("-", "_") + .upper() + ) + moa_evidence_level = MoaEvidenceLevel[predictive_implication] + strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level] - # Get strength - predictive_implication = ( - record["predictive_implication"] - .strip() - .replace(" ", "_") - .replace("-", "_") - .upper() + # Add disease + moa_disease = self._add_disease(assertion["disease"]) + if not moa_disease: + logger.debug( + "%s has no disease for disease %s", assertion_id, assertion["disease"] ) - moa_evidence_level = MoaEvidenceLevel[predictive_implication] - strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level] + return - # Add therapeutic agent. We only support one therapy, so we will skip others - therapy_name = record["therapy_name"] - if not therapy_name: - logger.debug("%s has no therapy_name", assertion_id) - continue + # Add document + document = self.able_to_normalize["documents"].get(assertion["source_id"]) - therapy_interaction_type = record["therapy_type"] - - if "+" in therapy_name: - # Indicates multiple therapies - if therapy_interaction_type.upper() in { - "COMBINATION THERAPY", - "IMMUNOTHERAPY", - "RADIATION THERAPY", - "TARGETED THERAPY", - }: - therapeutic_procedure_type = ( - TherapeuticProcedureType.COMBINATION_THERAPY - ) - else: - # skipping HORMONE and CHEMOTHERAPY for now - continue + feature_type = assertion["variant"]["feature_type"] + if feature_type == "somatic_variant": + allele_origin_qualifier = AlleleOriginQualifier.SOMATIC + elif feature_type == "germline_variant": + allele_origin_qualifier = AlleleOriginQualifier.GERMLINE + else: + allele_origin_qualifier = None + + params = { + "id": assertion_id, + "description": assertion["description"], + "strength": strength, + "subjectVariant": variation_gene_map["cv"], + "alleleOriginQualifier": allele_origin_qualifier, + "geneContextQualifier": variation_gene_map["moa_gene"], + "specifiedBy": self.processed_data.methods[0], + "reportedIn": [document], + } - therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")] - therapeutic_digest = self._get_digest_for_str_lists( - [f"moa.therapy:{tn}" for tn in therapies] - ) - therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}" - else: - therapeutic_procedure_id = f"moa.therapy:{therapy_name}" - therapies = [{"label": therapy_name}] - therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT - - moa_therapeutic = self._add_therapeutic_procedure( - therapeutic_procedure_id, - therapies, - therapeutic_procedure_type, - therapy_interaction_type, + if assertion["favorable_prognosis"] == "": + params["conditionQualifier"] = moa_disease + params["predicate"] = ( + TherapeuticResponsePredicate.RESISTANCE + if assertion["therapy"]["resistance"] + else TherapeuticResponsePredicate.SENSITIVITY ) + params["objectTherapeutic"] = self._get_therapeutic_procedure(assertion) - if not moa_therapeutic: + if not params["objectTherapeutic"]: logger.debug( - "%s has no therapeutic agent for therapy_name %s", + "%s has no therapeutic procedure for therapy_name %s", assertion_id, - therapy_name, - ) - continue - - # Add disease - moa_disease = self._add_disease(record["disease"]) - if not moa_disease: - logger.debug( - "%s has no disease for disease %s", assertion_id, record["disease"] + assertion["therapy"]["name"], ) - continue - - # Add document - document = self.able_to_normalize["documents"].get(record["source_ids"]) - - feature_type = record["variant"]["feature_type"] - if feature_type == "somatic_variant": - allele_origin_qualifier = AlleleOriginQualifier.SOMATIC - elif feature_type == "germline_variant": - allele_origin_qualifier = AlleleOriginQualifier.GERMLINE - else: - allele_origin_qualifier = None - - statement = VariantTherapeuticResponseStudyStatement( - id=assertion_id, - description=record["description"], - strength=strength, - predicate=predicate, - subjectVariant=variation_gene_map["cv"], - objectTherapeutic=moa_therapeutic, - conditionQualifier=moa_disease, - alleleOriginQualifier=allele_origin_qualifier, - geneContextQualifier=variation_gene_map["moa_gene"], - specifiedBy=self.processed_data.methods[0], - reportedIn=[document], + return + statement = VariantTherapeuticResponseStudyStatement(**params) + else: + params["objectCondition"] = moa_disease + params["predicate"] = ( + PrognosticPredicate.BETTER_OUTCOME + if assertion["favorable_prognosis"] + else PrognosticPredicate.WORSE_OUTCOME ) - self.processed_data.statements.append(statement) + statement = VariantPrognosticStudyStatement(**params) + + self.processed_data.statements.append(statement) async def _add_categorical_variants(self, variants: list[dict]) -> None: """Create Categorical Variant objects for all MOA variant records. @@ -437,6 +405,54 @@ def _add_documents(self, sources: list) -> None: self.able_to_normalize["documents"][source_id] = document self.processed_data.documents.append(document) + def _get_therapeutic_procedure( + self, assertion: dict + ) -> TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy | None: + """Get therapeutic procedure object + + :param assertion: MOA assertion record + :return: Therapeutic procedure object, if found and able to be normalized + """ + therapy = assertion["therapy"] + therapy_name = therapy["name"] + if not therapy_name: + logger.debug("%s has no therapy_name", assertion["id"]) + return None + + therapy_interaction_type = therapy["type"] + + if "+" in therapy_name: + # Indicates multiple therapies + if therapy_interaction_type.upper() in { + "COMBINATION THERAPY", + "IMMUNOTHERAPY", + "RADIATION THERAPY", + "TARGETED THERAPY", + }: + therapeutic_procedure_type = ( + TherapeuticProcedureType.COMBINATION_THERAPY + ) + else: + # skipping HORMONE and CHEMOTHERAPY for now + return None + + therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")] + therapeutic_digest = self._get_digest_for_str_lists( + [f"moa.therapy:{tn}" for tn in therapies] + ) + therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}" + else: + therapeutic_procedure_id = f"moa.therapy:{therapy_name}" + therapies = [{"label": therapy_name}] + therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT + + return self._add_therapeutic_procedure( + therapeutic_procedure_id, + therapies, + therapeutic_procedure_type, + therapy_interaction_type, + ) + def _get_therapeutic_substitute_group( self, therapeutic_sub_group_id: str, @@ -489,12 +505,17 @@ def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: def _add_disease(self, disease: dict) -> dict | None: """Create or get disease given MOA disease. + First looks in cache for existing disease, if not found will attempt to - normalize. Will generate a digest from the original MOA disease object. This - will be used as the key in the caches. Will add the generated digest to - ``processed_data.conditions`` and ``able_to_normalize['conditions']`` if + normalize. Will generate a digest from the original MOA disease object oncotree + fields. This will be used as the key in the caches. Will add the generated digest + to ``processed_data.conditions`` and ``able_to_normalize['conditions']`` if disease-normalizer is able to normalize. Else will add the generated digest to - ``unable_to_normalize['conditions']`` + ``unable_to_normalize['conditions']``. + + Since there may be duplicate Oncotree code/terms with different names, the first + name will be used as the Disease label. Others will be added to the + alternativeLabels field. :param disease: MOA disease object :return: Disease object if disease-normalizer was able to normalize @@ -503,16 +524,26 @@ def _add_disease(self, disease: dict) -> dict | None: return None # Since MOA disease objects do not have an ID, we will create a digest from - # the original MOA disease object - disease_list = sorted([f"{k}:{v}" for k, v in disease.items() if v]) - blob = json.dumps(disease_list, separators=(",", ":"), sort_keys=True).encode( - "ascii" - ) + # the original MOA disease object. + # The `name` is as written in the source text. In an upcoming MOA release, these + # will have leading underscore to differentiate "raw" values + oncotree_code = disease["oncotree_code"] + oncotree_key = "oncotree_code" if oncotree_code else "oncotree_term" + oncotree_value = oncotree_code or disease[oncotree_key] + oncotree_kv = [f"{oncotree_key}:{oncotree_value}"] + blob = json.dumps(oncotree_kv, separators=(",", ":")).encode("ascii") disease_id = sha512t24u(blob) vrs_disease = self.able_to_normalize["conditions"].get(disease_id) if vrs_disease: + source_disease_name = disease["name"] + if source_disease_name != vrs_disease.label: + vrs_disease.alternativeLabels = vrs_disease.alternativeLabels or [] + + if source_disease_name not in vrs_disease.alternativeLabels: + vrs_disease.alternativeLabels.append(source_disease_name) return vrs_disease + vrs_disease = None if disease_id not in self.unable_to_normalize["conditions"]: vrs_disease = self._get_disease(disease) @@ -523,7 +554,7 @@ def _add_disease(self, disease: dict) -> dict | None: self.unable_to_normalize["conditions"].add(disease_id) return vrs_disease - def _get_disease(self, disease: dict) -> dict | None: + def _get_disease(self, disease: dict) -> Disease | None: """Get Disease object for a MOA disease :param disease: MOA disease record diff --git a/tests/conftest.py b/tests/conftest.py index c6f2cfa8..ec42e754 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1005,429 +1005,6 @@ def civic_aid6_document(): } -@pytest.fixture(scope="session") -def civic_eid2_statement(): - """Create a test fixture for CIViC EID2 statement.""" - return { - "id": "civic.eid:2", - "type": "Statement", - "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", - "direction": "supports", - "evidence_level": "civic.evidence_level:B", - "proposition": "proposition:KVuJMXiPm-oK4vvijE9Cakvucayay3jE", - "variation_origin": "somatic", - "variation_descriptor": "civic.vid:99", - "disease_descriptor": "civic.did:2", - "method": "method:1", - "supported_by": ["pmid:15146165"], - } - - -@pytest.fixture(scope="session") -def civic_eid2_proposition(): - """Create a test fixture for CIViC EID2 proposition.""" - return { - "id": "proposition:KVuJMXiPm-oK4vvijE9Cakvucayay3jE", - "type": "diagnostic_proposition", - "predicate": "is_diagnostic_exclusion_criterion_for", - "subject": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "object_qualifier": "ncit:C3868", - } - - -@pytest.fixture(scope="session") -def civic_vid99(): - """Create a test fixture for CIViC VID99.""" - return { - "id": "civic.vid:99", - "type": "VariationDescriptor", - "label": "D842V", - "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", - "variation_id": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "variation": { - "_id": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "location": { - "_id": "ga4gh:VSL.CvhzuX1-CV0in3YTnaq9xZGAPxmrkrFC", - "interval": { - "start": {"value": 841, "type": "Number"}, - "end": {"value": 842, "type": "Number"}, - "type": "SequenceInterval", - }, - "sequence_id": "ga4gh:SQ.XpQn9sZLGv_GU3uiWO7YHq9-_alGjrVX", - "type": "SequenceLocation", - }, - "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, - "type": "Allele", - }, - "xrefs": ["clinvar:13543", "caid:CA123194", "dbsnp:121908585"], - "alternate_labels": ["ASP842VAL"], - "extensions": [ - { - "name": "civic_representative_coordinate", - "value": { - "chromosome": "4", - "start": 55152093, - "stop": 55152093, - "reference_bases": "A", - "variant_bases": "T", - "representative_transcript": "ENST00000257290.5", - "ensembl_version": 75, - "reference_build": "GRCh37", - }, - }, - { - "name": "civic_actionability_score", - "value": "100.5", - }, - { - "name": "variant_group", - "value": [ - { - "id": "civic.variant_group:1", - "label": "Imatinib Resistance", - "description": "While imatinib has shown to be incredibly successful in treating philadelphia chromosome positive CML, patients that have shown primary or secondary resistance to the drug have been observed to harbor T315I and E255K ABL kinase domain mutations. These mutations, among others, have been observed both in primary refractory disease and acquired resistance. In gastrointestinal stromal tumors (GIST), PDGFRA 842 mutations have also been shown to confer resistance to imatinib. ", - "type": "variant_group", - } - ], - }, - ], - "structural_type": "SO:0001583", - "expressions": [ - { - "syntax": "hgvs.c", - "value": "NM_006206.4:c.2525A>T", - "type": "Expression", - }, - { - "syntax": "hgvs.p", - "value": "NP_006197.1:p.Asp842Val", - "type": "Expression", - }, - { - "syntax": "hgvs.c", - "value": "ENST00000257290.5:c.2525A>T", - "type": "Expression", - }, - { - "syntax": "hgvs.g", - "value": "NC_000004.11:g.55152093A>T", - "type": "Expression", - }, - ], - "gene_context": "civic.gid:38", - } - - -@pytest.fixture(scope="session") -def civic_did2(): - """Create a test fixture for CIViC DID2.""" - return { - "id": "civic.did:2", - "type": "DiseaseDescriptor", - "label": "Gastrointestinal Stromal Tumor", - "disease_id": "ncit:C3868", - "xrefs": ["DOID:9253"], - } - - -@pytest.fixture(scope="session") -def civic_gid38(): - """Create a test fixture for CIViC GID38.""" - return { - "id": "civic.gid:38", - "type": "GeneDescriptor", - "label": "PDGFRA", - "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", - "gene_id": "hgnc:8803", - "alternate_labels": ["PDGFRA", "PDGFR2", "PDGFR-2", "CD140A"], - "xrefs": ["ncbigene:5156"], - } - - -@pytest.fixture(scope="session") -def civic_eid74_statement(): - """Create a test fixture for CIViC EID74 statement.""" - return { - "id": "civic.eid:74", - "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", - "direction": "supports", - "evidence_level": "civic.evidence_level:B", - "proposition": "proposition:Vyzbpg-s6mw27yJfYBFxGyQeuEJacP4l", - "variation_origin": "somatic", - "variation_descriptor": "civic.vid:113", - "disease_descriptor": "civic.did:15", - "method": "method:1", - "supported_by": ["pmid:18073307"], - "type": "Statement", - } - - -@pytest.fixture(scope="session") -def civic_eid74_proposition(): - """Create a test fixture for CIViC EID74 proposition.""" - return { - "id": "proposition:Vyzbpg-s6mw27yJfYBFxGyQeuEJacP4l", - "type": "diagnostic_proposition", - "predicate": "is_diagnostic_inclusion_criterion_for", - "subject": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "object_qualifier": "ncit:C3879", - } - - -@pytest.fixture(scope="session") -def civic_vid113(): - """Create a test fixture for CIViC VID113.""" - return { - "id": "civic.vid:113", - "type": "VariationDescriptor", - "label": "M918T", - "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", - "variation_id": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "variation": { - "_id": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "location": { - "_id": "ga4gh:VSL.zkwClPQjjO0FqXWN46QRuiGgodhPjxqT", - "interval": { - "end": {"value": 918, "type": "Number"}, - "start": {"value": 917, "type": "Number"}, - "type": "SequenceInterval", - }, - "sequence_id": "ga4gh:SQ.jMu9-ItXSycQsm4hyABeW_UfSNRXRVnl", - "type": "SequenceLocation", - }, - "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, - "type": "Allele", - }, - "xrefs": ["clinvar:13919", "caid:CA009082", "dbsnp:74799832"], - "alternate_labels": ["MET918THR"], - "extensions": [ - { - "name": "civic_representative_coordinate", - "value": { - "chromosome": "10", - "start": 43617416, - "stop": 43617416, - "reference_bases": "T", - "variant_bases": "C", - "representative_transcript": "ENST00000355710.3", - "ensembl_version": 75, - "reference_build": "GRCh37", - }, - }, - {"name": "civic_actionability_score", "value": "86"}, - { - "name": "variant_group", - "value": [ - { - "id": "civic.variant_group:6", - "label": "Motesanib Resistance", - "description": "RET activation is a common oncogenic marker of medullary thyroid carcinoma. Treatment of these patients with the targeted therapeutic motesanib has shown to be effective. However, the missense mutations C634W and M918T have shown to confer motesanib resistance in cell lines. ", - "type": "variant_group", - } - ], - }, - ], - "structural_type": "SO:0001583", - "expressions": [ - { - "syntax": "hgvs.c", - "value": "NM_020975.4:c.2753T>C", - "type": "Expression", - }, - { - "syntax": "hgvs.p", - "value": "NP_065681.1:p.Met918Thr", - "type": "Expression", - }, - { - "syntax": "hgvs.c", - "value": "ENST00000355710.3:c.2753T>C", - "type": "Expression", - }, - { - "syntax": "hgvs.g", - "value": "NC_000010.10:g.43617416T>C", - "type": "Expression", - }, - ], - "gene_context": "civic.gid:42", - } - - -@pytest.fixture(scope="session") -def civic_did15(): - """Create test fixture for CIViC DID15.""" - return { - "id": "civic.did:15", - "type": "DiseaseDescriptor", - "label": "Thyroid Gland Medullary Carcinoma", - "disease_id": "ncit:C3879", - "xrefs": ["DOID:3973"], - } - - -@pytest.fixture(scope="session") -def civic_gid42(): - """Create test fixture for CIViC GID42.""" - return { - "id": "civic.gid:42", - "type": "GeneDescriptor", - "label": "RET", - "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistence. No RET-specific agents are currently clinically available but several promiscuous kinase inhibitors that target RET, among others, have been approved for MTC treatment.", - "gene_id": "hgnc:9967", - "alternate_labels": [ - "RET", - "RET-ELE1", - "PTC", - "MTC1", - "MEN2B", - "MEN2A", - "HSCR1", - "CDHR16", - "CDHF12", - ], - "xrefs": ["ncbigene:5979"], - } - - -@pytest.fixture(scope="session") -def civic_aid9_statement(): - """Create a test fixture for CIViC AID9 statement.""" - return { - "id": "civic.aid:9", - "description": "ACVR1 G328V mutations occur within the kinase domain, leading to activation of downstream signaling. Exclusively seen in high-grade pediatric gliomas, supporting diagnosis of diffuse intrinsic pontine glioma.", - "direction": "supports", - "evidence_level": "amp_asco_cap_2017_level:2C", - "proposition": "proposition:Pjri4dU2VaEKcdKtVkoAUJ8bHFXnW2My", - "variation_origin": "somatic", - "variation_descriptor": "civic.vid:1686", - "disease_descriptor": "civic.did:2950", - "method": "method:2", - "supported_by": ["civic.eid:4846", "civic.eid:6955"], - "type": "Statement", - } - - -@pytest.fixture(scope="session") -def civic_aid9_proposition(): - """Create a test fixture for CIViC AID9 proposition.""" - return { - "id": "proposition:Pjri4dU2VaEKcdKtVkoAUJ8bHFXnW2My", - "predicate": "is_diagnostic_inclusion_criterion_for", - "subject": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", - "object_qualifier": "DOID:0080684", - "type": "diagnostic_proposition", - } - - -@pytest.fixture(scope="session") -def civic_vid1686(): - """Create a test fixture for CIViC VID1686.""" - return { - "id": "civic.vid:1686", - "type": "VariationDescriptor", - "label": "G328V", - "variation_id": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", - "variation": { - "_id": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", - "location": { - "_id": "ga4gh:VSL.w84KcAESJfbxvPCwCvYpQajlkdPrfS12", - "interval": { - "end": {"value": 328, "type": "Number"}, - "start": {"value": 327, "type": "Number"}, - "type": "SequenceInterval", - }, - "sequence_id": "ga4gh:SQ.6CnHhDq_bDCsuIBf0AzxtKq_lXYM7f0m", - "type": "SequenceLocation", - }, - "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, - "type": "Allele", - }, - "xrefs": ["clinvar:376363", "caid:CA16602802", "dbsnp:387906589"], - "alternate_labels": ["GLY328VAL"], - "extensions": [ - { - "name": "civic_representative_coordinate", - "value": { - "chromosome": "2", - "start": 158622516, - "stop": 158622516, - "reference_bases": "C", - "variant_bases": "A", - "representative_transcript": "ENST00000434821.1", - "ensembl_version": 75, - "reference_build": "GRCh37", - }, - }, - {"name": "civic_actionability_score", "value": "30"}, - { - "name": "variant_group", - "value": [ - { - "id": "civic.variant_group:23", - "label": "ACVR1 kinase domain mutation", - "type": "variant_group", - } - ], - }, - ], - "structural_type": "SO:0001583", - "expressions": [ - {"syntax": "hgvs.c", "value": "NM_001105.4:c.983G>T", "type": "Expression"}, - { - "syntax": "hgvs.p", - "value": "NP_001096.1:p.Gly328Val", - "type": "Expression", - }, - { - "syntax": "hgvs.g", - "value": "NC_000002.11:g.158622516C>A", - "type": "Expression", - }, - { - "syntax": "hgvs.c", - "value": "ENST00000434821.1:c.983G>T", - "type": "Expression", - }, - ], - "gene_context": "civic.gid:154", - } - - -@pytest.fixture(scope="session") -def civic_did2950(): - """Create a test fixture for CIViC DID2950.""" - return { - "id": "civic.did:2950", - "type": "DiseaseDescriptor", - "label": "Diffuse Midline Glioma, H3 K27M-mutant", - "disease_id": "DOID:0080684", - "xrefs": ["DOID:0080684"], - } - - -@pytest.fixture(scope="session") -def civic_gid154(): - """Create a test fixture for CIViC GID154.""" - return { - "id": "civic.gid:154", - "type": "GeneDescriptor", - "label": "ACVR1", - "gene_id": "hgnc:171", - "alternate_labels": [ - "ACVR1", - "TSRI", - "SKR1", - "FOP", - "ALK2", - "ACVRLK2", - "ACVR1A", - "ACTRI", - ], - "xrefs": ["ncbigene:90"], - } - - @pytest.fixture(scope="session") def civic_eid26_study_stmt( civic_mpid65, civic_gid29, civic_did3, civic_method, pmid_16384925 diff --git a/tests/data/harvesters/moa/assertions.json b/tests/data/harvesters/moa/assertions.json index 960aef85..69b7602b 100644 --- a/tests/data/harvesters/moa/assertions.json +++ b/tests/data/harvesters/moa/assertions.json @@ -1,9 +1,10 @@ [ { - "assertion_id": 165, - "context": "Resistance to BRAFi monotherapy", - "created_on": "12/07/23", - "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", + "assertion_id": 163, + "context": "", + "created_on": "12/05/24", + "deprecated": false, + "description": "The combination of ipilimumab and vemurafenib in a sequencing strategy showed limited efficacy in a phase II study.", "disease": "Melanoma", "favorable_prognosis": "", "features": [ @@ -24,38 +25,39 @@ "variant_annotation": "Missense" } ], - "feature_id": 165, + "feature_id": 163, "feature_type": "somatic_variant" } ], "last_updated": "2019-06-13", "oncotree_code": "MEL", "oncotree_term": "Melanoma", - "predictive_implication": "Preclinical", + "predictive_implication": "Clinical trial", "sources": [ { - "citation": "Caporali S, Alvino E, Lacal PM, et al. Targeting the PI3K/AKT/mTOR pathway overcomes the stimulating effect of dabrafenib on the invasive behavior of melanoma cells with acquired resistance to the BRAF inhibitor. Int J Oncol. 2016;49(3):1164-74.", - "doi": "10.3892/ijo.2016.3594", - "nct": "", - "pmid": 27572607, + "citation": "Amin A, Lawson DH, Salama AK, et al. Phase II study of vemurafenib followed by ipilimumab in patients with previously untreated BRAF-mutated metastatic melanoma. J Immunother Cancer. 2016;4:44.", + "doi": "10.1186/s40425-016-0148-7", + "nct": "NCT01673854", + "pmid": 27532019, "source_id": 69, "source_type": "Journal", - "url": "https://doi.org/10.3892/ijo.2016.3594" + "url": "https://doi.org/10.1186/s40425-016-0148-7" } ], "submitted_by": "breardon@broadinstitute.org", - "therapy_name": "Dabrafenib + Bevacizumab", + "therapy_name": "Ipilimumab + Vemurafenib", "therapy_resistance": "", "therapy_sensitivity": 1, - "therapy_strategy": "B-RAF inhibition + VEGF/VEGFR inhibition", - "therapy_type": "Targeted therapy", + "therapy_strategy": "CTLA-4 inhibition + B-RAF inhibition", + "therapy_type": "Combination therapy", "validated": true }, { "assertion_id": 164, - "context": "", - "created_on": "12/07/23", - "description": "The combination of ipilimumab and vemurafenib in a sequencing strategy showed limited efficacy in a phase II study.", + "context": "Resistance to BRAFi monotherapy", + "created_on": "12/05/24", + "deprecated": false, + "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", "disease": "Melanoma", "favorable_prognosis": "", "features": [ @@ -83,24 +85,24 @@ "last_updated": "2019-06-13", "oncotree_code": "MEL", "oncotree_term": "Melanoma", - "predictive_implication": "Clinical trial", + "predictive_implication": "Preclinical", "sources": [ { - "citation": "Amin A, Lawson DH, Salama AK, et al. Phase II study of vemurafenib followed by ipilimumab in patients with previously untreated BRAF-mutated metastatic melanoma. J Immunother Cancer. 2016;4:44.", - "doi": "10.1186/s40425-016-0148-7", - "nct": "NCT01673854", - "pmid": 27532019, - "source_id": 68, + "citation": "Caporali S, Alvino E, Lacal PM, et al. Targeting the PI3K/AKT/mTOR pathway overcomes the stimulating effect of dabrafenib on the invasive behavior of melanoma cells with acquired resistance to the BRAF inhibitor. Int J Oncol. 2016;49(3):1164-74.", + "doi": "10.3892/ijo.2016.3594", + "nct": "", + "pmid": 27572607, + "source_id": 70, "source_type": "Journal", - "url": "https://doi.org/10.1186/s40425-016-0148-7" + "url": "https://doi.org/10.3892/ijo.2016.3594" } ], "submitted_by": "breardon@broadinstitute.org", - "therapy_name": "Ipilimumab + Vemurafenib", + "therapy_name": "Dabrafenib + Bevacizumab", "therapy_resistance": "", "therapy_sensitivity": 1, - "therapy_strategy": "CTLA-4 inhibition + B-RAF inhibition", - "therapy_type": "Combination therapy", + "therapy_strategy": "B-RAF inhibition + VEGF/VEGFR inhibition", + "therapy_type": "Targeted therapy", "validated": true } -] \ No newline at end of file +] diff --git a/tests/data/transformers/diagnostic/civic_harvester.json b/tests/data/transformers/diagnostic/civic_harvester.json index 46b8e9e8..20366011 100644 --- a/tests/data/transformers/diagnostic/civic_harvester.json +++ b/tests/data/transformers/diagnostic/civic_harvester.json @@ -23,8 +23,6 @@ "doid": "9253", "disease_url": "https://www.disease-ontology.org/?id=DOID:9253", "aliases": [ - "GANT", - "GIST", "Gastrointestinal Stromal Tumour", "Stromal Tumor Of Gastrointestinal Tract", "Stromal Tumour Of Gastrointestinal Tract" @@ -69,13 +67,14 @@ "therapies": [], "disease": { "id": 15, - "name": "Thyroid Gland Medullary Carcinoma", - "display_name": "Thyroid Gland Medullary Carcinoma", + "name": "Medullary Thyroid Carcinoma", + "display_name": "Medullary Thyroid Carcinoma", "doid": "3973", "disease_url": "https://www.disease-ontology.org/?id=DOID:3973", "aliases": [ "Medullary Carcinoma Of The Thyroid Gland", - "Medullary Thyroid Carcinoma", + "Parafollicular Cell Carcinoma", + "Thyroid Gland Medullary Carcinoma", "Ultimobranchial Thyroid Tumor", "Ultimobranchial Thyroid Tumour" ], @@ -102,46 +101,7 @@ "phenotypes": [] } ], - "assertions": [ - { - "type": "assertion", - "id": 9, - "variant_origin": "SOMATIC", - "therapy_interaction_type": null, - "summary": "Supports diagnosis of diffuse intrinsic pontine glioma.", - "status": "accepted", - "significance": "POSITIVE", - "nccn_guideline_version": "", - "nccn_guideline": null, - "name": "AID9", - "molecular_profile_id": 1594, - "fda_regulatory_approval": null, - "fda_companion_test": null, - "evidence_ids": [ - 4846, - 6955 - ], - "description": "ACVR1 G328V mutations occur within the kinase domain, leading to activation of downstream signaling. Exclusively seen in high-grade pediatric gliomas, supporting diagnosis of diffuse intrinsic pontine glioma.", - "assertion_type": "DIAGNOSTIC", - "assertion_direction": "SUPPORTS", - "amp_level": "TIER_II_LEVEL_C", - "therapies": [], - "disease": { - "id": 2950, - "name": "Diffuse Midline Glioma, H3 K27M-mutant", - "display_name": "Diffuse Midline Glioma, H3 K27M-mutant", - "doid": "0080684", - "disease_url": "https://www.disease-ontology.org/?id=DOID:0080684", - "aliases": [ - "Diffuse Intrinsic Pontine Glioma" - ], - "type": "disease" - }, - "phenotypes": [], - "clingen_codes": [], - "acmg_codes": [] - } - ], + "assertions": [], "genes": [ { "type": "gene", @@ -149,6 +109,12 @@ "name": "PDGFRA", "entrez_id": 5156, "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", + "aliases": [ + "CD140A", + "PDGFR-2", + "PDGFR2", + "PDGFRA" + ], "sources": [ { "id": 415, @@ -186,12 +152,6 @@ "clinical_trials": [], "type": "source" } - ], - "aliases": [ - "CD140A", - "PDGFR-2", - "PDGFR2", - "PDGFRA" ] }, { @@ -200,6 +160,17 @@ "name": "RET", "entrez_id": 5979, "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", + "aliases": [ + "CDHF12", + "CDHR16", + "HSCR1", + "MEN2A", + "MEN2B", + "MTC1", + "PTC", + "RET", + "RET-ELE1" + ], "sources": [ { "id": 44, @@ -255,35 +226,6 @@ "clinical_trials": [], "type": "source" } - ], - "aliases": [ - "CDHF12", - "CDHR16", - "HSCR1", - "MEN2A", - "MEN2B", - "MTC1", - "PTC", - "RET", - "RET-ELE1" - ] - }, - { - "type": "gene", - "id": 154, - "name": "ACVR1", - "entrez_id": 90, - "description": "", - "sources": [], - "aliases": [ - "ACTRI", - "ACVR1", - "ACVR1A", - "ACVRLK2", - "ALK2", - "FOP", - "SKR1", - "TSRI" ] } ], @@ -297,22 +239,6 @@ "entrez_name": "PDGFRA", "entrez_id": 5156, "allele_registry_id": "CA123194", - "hgvs_expressions": [ - "NM_006206.4:c.2525A>T", - "NP_006197.1:p.Asp842Val", - "ENST00000257290.5:c.2525A>T", - "NC_000004.11:g.55152093A>T" - ], - "variant_types": [ - { - "id": 47, - "name": "Missense Variant", - "so_id": "SO:0001583", - "description": "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.", - "url": "http://www.sequenceontology.org/browser/current_svn/term/SO:0001583", - "type": "variant_type" - } - ], "coordinates": { "ensembl_version": 75, "reference_build": "GRCh37", @@ -322,15 +248,27 @@ "chromosome": "4", "start": 55152093, "stop": 55152093, - "representative_transcript2": null, - "chromosome2": null, - "start2": null, - "stop2": null, "type": "coordinates" }, + "variant_types": [ + { + "id": 47, + "name": "Missense Variant", + "so_id": "SO:0001583", + "description": "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.", + "url": "http://www.sequenceontology.org/browser/current_svn/term/SO:0001583", + "type": "variant_type" + } + ], "clinvar_entries": [ "13543" ], + "hgvs_expressions": [ + "NM_006206.4:c.2525A>T", + "NP_006197.1:p.Asp842Val", + "ENST00000257290.5:c.2525A>T", + "NC_000004.11:g.55152093A>T" + ], "variant_aliases": [ "ASP842VAL", "RS121908585" @@ -345,22 +283,6 @@ "entrez_name": "RET", "entrez_id": 5979, "allele_registry_id": "CA009082", - "hgvs_expressions": [ - "NM_020975.4:c.2753T>C", - "NP_065681.1:p.Met918Thr", - "ENST00000355710.3:c.2753T>C", - "NC_000010.10:g.43617416T>C" - ], - "variant_types": [ - { - "id": 47, - "name": "Missense Variant", - "so_id": "SO:0001583", - "description": "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.", - "url": "http://www.sequenceontology.org/browser/current_svn/term/SO:0001583", - "type": "variant_type" - } - ], "coordinates": { "ensembl_version": 75, "reference_build": "GRCh37", @@ -370,35 +292,8 @@ "chromosome": "10", "start": 43617416, "stop": 43617416, - "representative_transcript2": null, - "chromosome2": null, - "start2": null, - "stop2": null, "type": "coordinates" }, - "clinvar_entries": [ - "13919" - ], - "variant_aliases": [ - "MET918THR", - "RS74799832" - ] - }, - { - "type": "variant", - "id": 1686, - "single_variant_molecular_profile_id": 1594, - "name": "G328V", - "gene_id": 154, - "entrez_name": "ACVR1", - "entrez_id": 90, - "allele_registry_id": "CA16602802", - "hgvs_expressions": [ - "NM_001105.4:c.983G>T", - "NP_001096.1:p.Gly328Val", - "NC_000002.11:g.158622516C>A", - "ENST00000434821.1:c.983G>T" - ], "variant_types": [ { "id": 47, @@ -409,28 +304,77 @@ "type": "variant_type" } ], - "coordinates": { - "ensembl_version": 75, - "reference_build": "GRCh37", - "reference_bases": "C", - "variant_bases": "A", - "representative_transcript": "ENST00000434821.1", - "chromosome": "2", - "start": 158622516, - "stop": 158622516, - "representative_transcript2": null, - "chromosome2": null, - "start2": null, - "stop2": null, - "type": "coordinates" - }, "clinvar_entries": [ - "376363" + "13919" + ], + "hgvs_expressions": [ + "NM_020975.4:c.2753T>C", + "NP_065681.1:p.Met918Thr", + "ENST00000355710.3:c.2753T>C", + "NC_000010.10:g.43617416T>C" ], "variant_aliases": [ - "GLY328VAL", - "RS387906589" + "MET918THR", + "RS74799832" ] } + ], + "molecular_profiles": [ + { + "type": "molecular_profile", + "id": 99, + "variant_ids": [ + 99 + ], + "name": "PDGFRA D842V", + "molecular_profile_score": 100.5, + "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", + "aliases": [ + "ASP842VAL", + "RS121908585" + ], + "parsed_name": [ + { + "type": "feature", + "id": 38, + "name": "PDGFRA" + }, + { + "type": "variant", + "id": 99, + "name": "D842V", + "deprecated": false + } + ], + "sources": [] + }, + { + "type": "molecular_profile", + "id": 113, + "variant_ids": [ + 113 + ], + "name": "RET M918T", + "molecular_profile_score": 86.0, + "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", + "aliases": [ + "MET918THR", + "RS74799832" + ], + "parsed_name": [ + { + "type": "feature", + "id": 42, + "name": "RET" + }, + { + "type": "variant", + "id": 113, + "name": "M918T", + "deprecated": false + } + ], + "sources": [] + } ] } diff --git a/tests/data/transformers/prognostic/moa_harvester.json b/tests/data/transformers/prognostic/moa_harvester.json new file mode 100644 index 00000000..0c5eb5de --- /dev/null +++ b/tests/data/transformers/prognostic/moa_harvester.json @@ -0,0 +1,144 @@ +{ + "assertions": [ + { + "id": 141, + "context": "", + "deprecated": false, + "description": "More frequent in Chronic Myelomonocytic Leukemia.", + "disease": { + "name": "Myelodysplasia", + "oncotree_code": "MDS", + "oncotree_term": "Myelodysplasia" + }, + "therapy": { + "name": "", + "type": "", + "strategy": "", + "resistance": "", + "sensitivity": "" + }, + "predictive_implication": "Clinical evidence", + "favorable_prognosis": 0, + "created_on": "12/05/24", + "last_updated": "2019-06-13", + "submitted_by": "breardon@broadinstitute.org", + "validated": true, + "source_id": 60, + "variant": { + "id": 141, + "alternate_allele": "C", + "cdna_change": "c.4376A>G", + "chromosome": "X", + "end_position": "39921444", + "exon": "10", + "feature_type": "somatic_variant", + "gene": "BCOR", + "protein_change": "p.N1425S", + "reference_allele": "T", + "rsid": null, + "start_position": "39921444", + "variant_annotation": "Missense", + "feature": "BCOR p.N1425S (Missense)" + } + }, + { + "id": 532, + "context": "", + "deprecated": false, + "description": "The National Comprehensive Cancer Network\u00ae (NCCN\u00ae) highlights SF3B1 E622, Y623, R625, N626, H662, T663, K666, K700E, I704, G740, G742, and D781 missense variants as being associated with a favorable prognosis in patients with myelodysplastic syndromes.", + "disease": { + "name": "Myelodysplasia", + "oncotree_code": "MDS", + "oncotree_term": "Myelodysplasia" + }, + "therapy": { + "name": "", + "type": "", + "strategy": "", + "resistance": "", + "sensitivity": "" + }, + "predictive_implication": "Guideline", + "favorable_prognosis": 1, + "created_on": "12/05/24", + "last_updated": "2023-11-02", + "submitted_by": "breardon@broadinstitute.org", + "validated": true, + "source_id": 33, + "variant": { + "id": 532, + "alternate_allele": "G", + "cdna_change": "c.1866G>C", + "chromosome": "2", + "end_position": "198267491", + "exon": "14", + "feature_type": "somatic_variant", + "gene": "SF3B1", + "protein_change": "p.E622D", + "reference_allele": "C", + "rsid": "rs763149798", + "start_position": "198267491", + "variant_annotation": "Missense", + "feature": "SF3B1 p.E622D (Missense)" + } + } + ], + "sources": [ + { + "id": 33, + "type": "Guideline", + "doi": "", + "nct": "", + "pmid": "", + "url": "https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf", + "citation": "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines\u00ae) for Myelodysplastic Syndromes V.2.2023. \u00a9 National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org." + }, + { + "id": 60, + "type": "Journal", + "doi": "10.1158/1078-0432.CCR-09-2828", + "nct": "", + "pmid": 20453058, + "url": "https://doi.org/10.1158/1078-0432.CCR-09-2828", + "citation": "O'Brien C, Wallin JJ, Sampath D, et al. Predictive biomarkers of sensitivity to the phosphatidylinositol 3' kinase inhibitor GDC-0941 in breast cancer preclinical models. Clin Cancer Res. 2010;16(14):3670-83." + } + ], + "variants": [ + { + "id": 141, + "alternate_allele": "C", + "cdna_change": "c.4376A>G", + "chromosome": "X", + "end_position": "39921444", + "exon": "10", + "feature_type": "somatic_variant", + "gene": "BCOR", + "protein_change": "p.N1425S", + "reference_allele": "T", + "rsid": null, + "start_position": "39921444", + "variant_annotation": "Missense", + "feature": "BCOR p.N1425S (Missense)" + }, + { + "id": 532, + "alternate_allele": "G", + "cdna_change": "c.1866G>C", + "chromosome": "2", + "end_position": "198267491", + "exon": "14", + "feature_type": "somatic_variant", + "gene": "SF3B1", + "protein_change": "p.E622D", + "reference_allele": "C", + "rsid": "rs763149798", + "start_position": "198267491", + "variant_annotation": "Missense", + "feature": "SF3B1 p.E622D (Missense)" + } + ], + "genes": [ + "BCOR", + "SF3B1" + ] +} diff --git a/tests/data/transformers/moa_harvester.json b/tests/data/transformers/therapeutic/moa_harvester.json similarity index 67% rename from tests/data/transformers/moa_harvester.json rename to tests/data/transformers/therapeutic/moa_harvester.json index 562e3c9b..d75e0cb5 100644 --- a/tests/data/transformers/moa_harvester.json +++ b/tests/data/transformers/therapeutic/moa_harvester.json @@ -3,22 +3,27 @@ { "id": 66, "context": "", + "deprecated": false, "description": "T315I mutant ABL1 in p210 BCR-ABL cells resulted in retained high levels of phosphotyrosine at increasing concentrations of inhibitor STI-571, whereas wildtype appropriately received inhibition.", "disease": { "name": "Chronic Myelogenous Leukemia", "oncotree_code": "CML", "oncotree_term": "Chronic Myelogenous Leukemia" }, - "therapy_name": "Imatinib", - "therapy_type": "Targeted therapy", - "clinical_significance": "resistance", + "therapy": { + "name": "Imatinib", + "type": "Targeted therapy", + "strategy": "BCR-ABL inhibition", + "resistance": 1, + "sensitivity": "" + }, "predictive_implication": "Preclinical", "favorable_prognosis": "", - "created_on": "12/07/23", + "created_on": "12/05/24", "last_updated": "2023-11-30", "submitted_by": "breardon@broadinstitute.org", "validated": true, - "source_ids": 45, + "source_id": 45, "variant": { "id": 66, "alternate_allele": "T", @@ -37,26 +42,31 @@ } }, { - "id": 155, + "id": 154, "context": "Metastatic, after prior therapy", + "deprecated": false, "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", "disease": { "name": "Colorectal Adenocarcinoma", "oncotree_code": "COADREAD", "oncotree_term": "Colorectal Adenocarcinoma" }, - "therapy_name": "Cetuximab + Encorafenib", - "therapy_type": "Targeted therapy", - "clinical_significance": "sensitivity", + "therapy": { + "name": "Cetuximab + Encorafenib", + "type": "Targeted therapy", + "strategy": "EGFR inhibition + B-RAF inhibition", + "resistance": "", + "sensitivity": 1 + }, "predictive_implication": "FDA-Approved", "favorable_prognosis": "", - "created_on": "12/07/23", + "created_on": "12/05/24", "last_updated": "2020-10-15", "submitted_by": "breardon@broadinstitute.org", "validated": true, - "source_ids": 63, + "source_id": 64, "variant": { - "id": 145, + "id": 144, "alternate_allele": "T", "cdna_change": "c.1799T>A", "chromosome": "7", @@ -83,15 +93,15 @@ "url": "https://doi.org/10.1126/science.1062538", "citation": "Gorre, Mercedes E., et al. Clinical resistance to STI-571 cancer therapy caused by BCR-ABL gene mutation or amplification. Science 293.5531 (2001): 876-880." }, - { - "id": 63, - "type": "FDA", - "doi": "", - "nct": "", - "pmid": "", - "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf", - "citation": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020." - } + { + "id": 64, + "type": "FDA", + "doi": "", + "nct": "", + "pmid": "", + "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf", + "citation": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020." + } ], "variants": [ { @@ -111,21 +121,21 @@ "feature": "ABL1 p.T315I (Missense)" }, { - "id": 145, - "alternate_allele": "T", - "cdna_change": "c.1799T>A", - "chromosome": "7", - "end_position": "140453136", - "exon": "15", - "feature_type": "somatic_variant", - "gene": "BRAF", - "protein_change": "p.V600E", - "reference_allele": "A", - "rsid": "rs113488022", - "start_position": "140453136", - "variant_annotation": "Missense", - "feature": "BRAF p.V600E (Missense)" - } + "id": 144, + "alternate_allele": "T", + "cdna_change": "c.1799T>A", + "chromosome": "7", + "end_position": "140453136", + "exon": "15", + "feature_type": "somatic_variant", + "gene": "BRAF", + "protein_change": "p.V600E", + "reference_allele": "A", + "rsid": "rs113488022", + "start_position": "140453136", + "variant_annotation": "Missense", + "feature": "BRAF p.V600E (Missense)" + } ], "genes": [ "ABL1", diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index a3b5c87a..23346171 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -552,8 +552,9 @@ def test_statement_rules( expected_node_labels = [ {"Statement", "StudyStatement", "VariantTherapeuticResponseStudyStatement"}, {"Statement", "StudyStatement", "VariantPrognosticStudyStatement"}, + {"Statement", "StudyStatement", "VariantDiagnosticStudyStatement"}, ] - check_node_labels("Statement", expected_node_labels, 2) + check_node_labels("Statement", expected_node_labels, 3) cite_query = """ MATCH (s:Statement) diff --git a/tests/unit/harvesters/moa/test_moa_assertions.py b/tests/unit/harvesters/moa/test_moa_assertions.py index 300f70f7..6581c15f 100644 --- a/tests/unit/harvesters/moa/test_moa_assertions.py +++ b/tests/unit/harvesters/moa/test_moa_assertions.py @@ -11,27 +11,32 @@ @pytest.fixture(scope="module") -def assertion165(): +def assertion164(): """Create a fixture for assertion #165.""" return { - "id": 165, + "id": 164, "context": "Resistance to BRAFi monotherapy", "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", + "deprecated": False, "disease": { "name": "Melanoma", "oncotree_code": "MEL", "oncotree_term": "Melanoma", }, - "therapy_name": "Dabrafenib + Bevacizumab", - "therapy_type": "Targeted therapy", - "clinical_significance": "sensitivity", + "therapy": { + "name": "Dabrafenib + Bevacizumab", + "type": "Targeted therapy", + "strategy": "B-RAF inhibition + VEGF/VEGFR inhibition", + "resistance": "", + "sensitivity": 1, + }, "predictive_implication": "Preclinical", "favorable_prognosis": "", - "created_on": "12/07/23", + "created_on": "12/05/24", "last_updated": "2019-06-13", "submitted_by": "breardon@broadinstitute.org", "validated": True, - "source_ids": 69, + "source_id": 70, "variant": { "id": 145, "alternate_allele": "T", @@ -53,7 +58,7 @@ def assertion165(): @patch.object(MoaHarvester, "_get_all_variants") @patch.object(MoaHarvester, "_get_all_assertions") -def test_assertion_170(test_get_all_assertions, test_get_all_variants, assertion165): +def test_assertion_164(test_get_all_assertions, test_get_all_variants, assertion164): """Test moa harvester works correctly for assertions.""" moa_harvester_test_dir = TEST_HARVESTERS_DIR / SourceName.MOA.value with (moa_harvester_test_dir / "assertions.json").open() as f: @@ -70,7 +75,7 @@ def test_assertion_170(test_get_all_assertions, test_get_all_variants, assertion actual = None for a in assertions: - if a["id"] == assertion165["id"]: + if a["id"] == assertion164["id"]: actual = a break - assert actual == assertion165 + assert actual == assertion164 diff --git a/tests/unit/harvesters/moa/test_moa_source.py b/tests/unit/harvesters/moa/test_moa_source.py index bc002a26..edabee40 100644 --- a/tests/unit/harvesters/moa/test_moa_source.py +++ b/tests/unit/harvesters/moa/test_moa_source.py @@ -19,10 +19,10 @@ def sources(): @pytest.fixture(scope="module") -def source68(): - """Create a fixture for source of evidence #68.""" +def source69(): + """Create a fixture for source ID 69.""" return { - "id": 68, + "id": 69, "type": "Journal", "doi": "10.1186/s40425-016-0148-7", "nct": "NCT01673854", @@ -33,7 +33,7 @@ def source68(): @patch.object(MoaHarvester, "_get_all_assertions") -def test_source68(test_get_all_assertions, source68): +def test_source69(test_get_all_assertions, source69): """Test moa harvester works correctly for evidence.""" with (TEST_HARVESTERS_DIR / SourceName.MOA.value / "assertions.json").open() as f: data = json.load(f) @@ -44,7 +44,7 @@ def test_source68(test_get_all_assertions, source68): actual = None for s in sources: - if s["id"] == source68["id"]: + if s["id"] == source69["id"]: actual = s break - assert actual == source68 + assert actual == source69 diff --git a/tests/unit/transformers/test_civic_transformer_diagnostic.py b/tests/unit/transformers/test_civic_transformer_diagnostic.py index 2ac0f82b..ac28ba23 100644 --- a/tests/unit/transformers/test_civic_transformer_diagnostic.py +++ b/tests/unit/transformers/test_civic_transformer_diagnostic.py @@ -1,4 +1,4 @@ -"""Test CIViC Transformation to common data model for prognostic.""" +"""Test CIViC Transformation to common data model for diagnostic.""" import json @@ -6,6 +6,7 @@ import pytest_asyncio from tests.conftest import TEST_TRANSFORMERS_DIR +from metakb.normalizers import VICC_NORMALIZER_DATA from metakb.transformers.civic import CivicTransformer DATA_DIR = TEST_TRANSFORMERS_DIR / "diagnostic" @@ -19,84 +20,483 @@ async def data(normalizers): c = CivicTransformer( data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers ) - await c.transform() - c.create_json(cdm_filepath=DATA_DIR / FILENAME) + harvested_data = c.extract_harvested_data() + await c.transform(harvested_data) + c.create_json(DATA_DIR / FILENAME) with (DATA_DIR / FILENAME).open() as f: return json.load(f) @pytest.fixture(scope="module") -def statements(civic_eid2_statement, civic_eid74_statement, civic_aid9_statement): - """Create test fixture for statements.""" - return [civic_eid2_statement, civic_eid74_statement, civic_aid9_statement] +def civic_mpid99(): + """Create a test fixture for CIViC MP 99.""" + return { + "id": "civic.mpid:99", + "type": "CategoricalVariant", + "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", + "label": "PDGFRA D842V", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9", + "type": "Allele", + "label": "D842V", + "digest": "Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9", + "expressions": [ + {"syntax": "hgvs.p", "value": "NP_006197.1:p.Asp842Val"} + ], + "location": { + "id": "ga4gh:SL.xuh2OFm73UN7_0uLySrRY2Xe3FW7KJ5h", + "type": "SequenceLocation", + "digest": "xuh2OFm73UN7_0uLySrRY2Xe3FW7KJ5h", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.XpQn9sZLGv_GU3uiWO7YHq9-_alGjrVX", + }, + "start": 841, + "end": 842, + "sequence": "D", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "V"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.TAskYi2zB3_dTtdyqyIxXKlYosf4cbJo", + "type": "Allele", + "label": "NM_006206.4:c.2525A>T", + "digest": "TAskYi2zB3_dTtdyqyIxXKlYosf4cbJo", + "expressions": [{"syntax": "hgvs.c", "value": "NM_006206.4:c.2525A>T"}], + "location": { + "id": "ga4gh:SL.8w-z6Kgyuzx1yA51AQPX7QKCbuZgUIa1", + "type": "SequenceLocation", + "digest": "8w-z6Kgyuzx1yA51AQPX7QKCbuZgUIa1", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.P_hYEl9XPZMg9zb-vhiwr4SNXtkCutiu", + }, + "start": 2659, + "end": 2660, + "sequence": "A", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + { + "id": "ga4gh:VA.B6-IjSb5S6K46BbJWPAvSD5vWz4tqW1j", + "type": "Allele", + "label": "NC_000004.11:g.55152093A>T", + "digest": "B6-IjSb5S6K46BbJWPAvSD5vWz4tqW1j", + "expressions": [ + {"syntax": "hgvs.g", "value": "NC_000004.11:g.55152093A>T"} + ], + "location": { + "id": "ga4gh:SL.aDuNtHik7usLDSaoVpVv883hG7u0uPGv", + "type": "SequenceLocation", + "digest": "aDuNtHik7usLDSaoVpVv883hG7u0uPGv", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc", + }, + "start": 54285925, + "end": 54285926, + "sequence": "A", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + ], + "alternativeLabels": ["ASP842VAL"], + "mappings": [ + { + "coding": { + "code": "CA123194", + "system": "https://reg.clinicalgenome.org/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "13543", + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "rs121908585", + "system": "https://www.ncbi.nlm.nih.gov/snp/", + }, + "relation": "relatedMatch", + }, + { + "coding": {"code": "99", "system": "https://civicdb.org/variants/"}, + "relation": "exactMatch", + }, + ], + "extensions": [ + { + "name": "CIViC representative coordinate", + "value": { + "chromosome": "4", + "start": 55152093, + "stop": 55152093, + "reference_bases": "A", + "variant_bases": "T", + "representative_transcript": "ENST00000257290.5", + "ensembl_version": 75, + "reference_build": "GRCh37", + "type": "coordinates", + }, + }, + { + "name": "CIViC Molecular Profile Score", + "value": 100.5, + }, + { + "name": "Variant types", + "value": [ + { + "code": "SO:0001583", + "system": "http://www.sequenceontology.org/browser/current_svn/term/", + "label": "missense_variant", + } + ], + }, + ], + } @pytest.fixture(scope="module") -def propositions( - civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition -): - """Create test fixture for proposition.""" - return [civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition] +def civic_gid38(): + """Create test fixture for CIViC GID38.""" + return { + "id": "civic.gid:38", + "type": "Gene", + "label": "PDGFRA", + "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", + "mappings": [ + { + "coding": { + "code": "ncbigene:5156", + "system": "https://www.ncbi.nlm.nih.gov/gene/", + }, + "relation": "exactMatch", + } + ], + "alternativeLabels": ["CD140A", "PDGFR-2", "PDGFR2", "PDGFRA"], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:8803", "label": "PDGFRA"}, + } + ], + } @pytest.fixture(scope="module") -def variation_descriptors(civic_vid99, civic_vid113, civic_vid1686): - """Create test fixture for variants.""" - return [civic_vid99, civic_vid113, civic_vid1686] +def civic_did2(): + """Create test fixture for CIViC DID2.""" + return { + "id": "civic.did:2", + "type": "Disease", + "label": "Gastrointestinal Stromal Tumor", + "mappings": [ + { + "coding": { + "code": "DOID:9253", + "system": "https://www.disease-ontology.org/", + }, + "relation": "exactMatch", + } + ], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C3868", + "label": "Gastrointestinal Stromal Tumor", + "mondo_id": "0011719", + }, + } + ], + } @pytest.fixture(scope="module") -def disease_descriptors(civic_did2, civic_did15, civic_did2950): - """Create test fixture for disease descriptors.""" - return [civic_did2, civic_did15, civic_did2950] +def civic_eid2_study_stmt(civic_method, civic_mpid99, civic_gid38, civic_did2): + """Create a test fixture for CIViC EID2 study statement.""" + return { + "id": "civic.eid:2", + "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", + "direction": "supports", + "strength": { + "code": "e000005", + "label": "clinical cohort evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "isDiagnosticExclusionCriterionFor", + "alleleOriginQualifier": "somatic", + "subjectVariant": civic_mpid99, + "geneContextQualifier": civic_gid38, + "objectCondition": civic_did2, + "specifiedBy": civic_method, + "reportedIn": [ + { + "id": "civic.source:52", + "label": "Lasota et al., 2004", + "title": "A great majority of GISTs with PDGFRA mutations represent gastric tumors of low or no malignant potential.", + "pmid": 15146165, + "type": "Document", + } + ], + "type": "VariantDiagnosticStudyStatement", + } @pytest.fixture(scope="module") -def gene_descriptors(civic_gid38, civic_gid42, civic_gid154): - """Create test fixture for gene descriptors.""" - return [civic_gid38, civic_gid42, civic_gid154] +def civic_mpid113(): + """Create a test fixture for CIViC MP 113.""" + return { + "id": "civic.mpid:113", + "type": "CategoricalVariant", + "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", + "label": "RET M918T", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.hEybNB_CeKflfFhT5AKOU5i1lgZPP-aS", + "type": "Allele", + "label": "M918T", + "digest": "hEybNB_CeKflfFhT5AKOU5i1lgZPP-aS", + "expressions": [ + {"syntax": "hgvs.p", "value": "NP_065681.1:p.Met918Thr"} + ], + "location": { + "id": "ga4gh:SL.oIeqSfOEuqO7KNOPt8YUIa9vo1f6yMao", + "type": "SequenceLocation", + "digest": "oIeqSfOEuqO7KNOPt8YUIa9vo1f6yMao", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.jMu9-ItXSycQsm4hyABeW_UfSNRXRVnl", + }, + "start": 917, + "end": 918, + "sequence": "M", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.TZBjEPHhLRYxssQopcOQLWEBQrwzhH3T", + "type": "Allele", + "label": "NM_020975.4:c.2753T>C", + "digest": "TZBjEPHhLRYxssQopcOQLWEBQrwzhH3T", + "expressions": [{"syntax": "hgvs.c", "value": "NM_020975.4:c.2753T>C"}], + "location": { + "id": "ga4gh:SL.LD_QnJ8V1MR3stLat01acwyO4fWrUGco", + "type": "SequenceLocation", + "digest": "LD_QnJ8V1MR3stLat01acwyO4fWrUGco", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.jHlgYyFWJThVNL_o5UXEBwcQVNEPc62c", + }, + "start": 2942, + "end": 2943, + "sequence": "T", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + }, + { + "id": "ga4gh:VA.ON-Q17mJBYx3unmQ8GiqllzEphxR-Fie", + "type": "Allele", + "label": "NC_000010.10:g.43617416T>C", + "digest": "ON-Q17mJBYx3unmQ8GiqllzEphxR-Fie", + "expressions": [ + {"syntax": "hgvs.g", "value": "NC_000010.10:g.43617416T>C"} + ], + "location": { + "id": "ga4gh:SL.wIzpygPWdaZBkoKcIg461KaERW7XfyZS", + "type": "SequenceLocation", + "digest": "wIzpygPWdaZBkoKcIg461KaERW7XfyZS", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB", + }, + "start": 43121967, + "end": 43121968, + "sequence": "T", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + }, + ], + "alternativeLabels": ["MET918THR"], + "mappings": [ + { + "coding": { + "code": "CA009082", + "system": "https://reg.clinicalgenome.org/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "13919", + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "rs74799832", + "system": "https://www.ncbi.nlm.nih.gov/snp/", + }, + "relation": "relatedMatch", + }, + { + "coding": {"code": "113", "system": "https://civicdb.org/variants/"}, + "relation": "exactMatch", + }, + ], + "extensions": [ + { + "name": "CIViC representative coordinate", + "value": { + "chromosome": "10", + "start": 43617416, + "stop": 43617416, + "reference_bases": "T", + "variant_bases": "C", + "representative_transcript": "ENST00000355710.3", + "ensembl_version": 75, + "reference_build": "GRCh37", + "type": "coordinates", + }, + }, + { + "name": "CIViC Molecular Profile Score", + "value": 86.0, + }, + { + "name": "Variant types", + "value": [ + { + "code": "SO:0001583", + "system": "http://www.sequenceontology.org/browser/current_svn/term/", + "label": "missense_variant", + } + ], + }, + ], + } @pytest.fixture(scope="module") -def documents(pmid_15146165, pmid_18073307): - """Create test fixture for documents.""" - return [pmid_15146165, pmid_18073307] - - -@pytest.mark.skip(reason="Will be resolved in issue-241") -def test_civic_cdm( - data, - statements, - propositions, - variation_descriptors, - gene_descriptors, - disease_descriptors, - civic_methods, - documents, - check_statement, - check_proposition, - check_variation_descriptor, - check_descriptor, - check_document, - check_method, - check_transformed_cdm, -): - """Test that civic transform works correctly.""" - check_transformed_cdm( - data, - statements, - propositions, - variation_descriptors, - gene_descriptors, - disease_descriptors, - None, - civic_methods, - documents, - check_statement, - check_proposition, - check_variation_descriptor, - check_descriptor, - check_document, - check_method, - DATA_DIR / FILENAME, - ) +def civic_gid42(): + """Create test fixture for CIViC GID42.""" + return { + "id": "civic.gid:42", + "type": "Gene", + "label": "RET", + "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", + "mappings": [ + { + "coding": { + "code": "ncbigene:5979", + "system": "https://www.ncbi.nlm.nih.gov/gene/", + }, + "relation": "exactMatch", + } + ], + "alternativeLabels": [ + "CDHF12", + "CDHR16", + "HSCR1", + "MEN2A", + "MEN2B", + "MTC1", + "PTC", + "RET", + "RET-ELE1", + ], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:9967", "label": "RET"}, + } + ], + } + + +@pytest.fixture(scope="module") +def civic_did15(): + """Create test fixture for CIViC DID15.""" + return { + "id": "civic.did:15", + "type": "Disease", + "label": "Medullary Thyroid Carcinoma", + "mappings": [ + { + "coding": { + "code": "DOID:3973", + "system": "https://www.disease-ontology.org/", + }, + "relation": "exactMatch", + } + ], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C3879", + "label": "Thyroid Gland Medullary Carcinoma", + "mondo_id": "0015277", + }, + } + ], + } + + +@pytest.fixture(scope="module") +def civic_eid74_study_stmt(civic_method, civic_mpid113, civic_gid42, civic_did15): + """Create a test fixture for CIViC EID74 study statement.""" + return { + "id": "civic.eid:74", + "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", + "direction": "supports", + "strength": { + "code": "e000005", + "label": "clinical cohort evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "isDiagnosticInclusionCriterionFor", + "alleleOriginQualifier": "somatic", + "subjectVariant": civic_mpid113, + "geneContextQualifier": civic_gid42, + "objectCondition": civic_did15, + "specifiedBy": civic_method, + "reportedIn": [ + { + "id": "civic.source:44", + "label": "Elisei et al., 2008", + "title": "Prognostic significance of somatic RET oncogene mutations in sporadic medullary thyroid cancer: a 10-year follow-up study.", + "pmid": 18073307, + "type": "Document", + } + ], + "type": "VariantDiagnosticStudyStatement", + } + + +@pytest.fixture(scope="module") +def statements(civic_eid2_study_stmt, civic_eid74_study_stmt): + """Create test fixture for CIViC Diagnostic statements.""" + return [civic_eid2_study_stmt, civic_eid74_study_stmt] + + +def test_civic_cdm(data, statements, check_transformed_cdm): + """Test that civic transformation works correctly.""" + check_transformed_cdm(data, statements, DATA_DIR / FILENAME) diff --git a/tests/unit/transformers/test_moa_transformer_prognostic.py b/tests/unit/transformers/test_moa_transformer_prognostic.py new file mode 100644 index 00000000..c14f7185 --- /dev/null +++ b/tests/unit/transformers/test_moa_transformer_prognostic.py @@ -0,0 +1,339 @@ +"""Test MOA Transformation to common data model""" + +import json + +import pytest +import pytest_asyncio +from tests.conftest import TEST_TRANSFORMERS_DIR + +from metakb.normalizers import VICC_NORMALIZER_DATA +from metakb.transformers.moa import MoaTransformer + +DATA_DIR = TEST_TRANSFORMERS_DIR / "prognostic" +FILENAME = "moa_cdm.json" + + +@pytest_asyncio.fixture(scope="module") +async def data(normalizers): + """Create a MOA Transformer test fixture.""" + harvester_path = DATA_DIR / "moa_harvester.json" + moa = MoaTransformer( + data_dir=DATA_DIR, + harvester_path=harvester_path, + normalizers=normalizers, + ) + harvested_data = moa.extract_harvested_data() + await moa.transform(harvested_data) + moa.create_json(cdm_filepath=DATA_DIR / FILENAME) + with (DATA_DIR / FILENAME).open() as f: + return json.load(f) + + +@pytest.fixture(scope="module") +def moa_vid141(): + """Create a test fixture for MOA VID141.""" + return { + "id": "moa.variant:141", + "type": "CategoricalVariant", + "label": "BCOR p.N1425S (Missense)", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.pDuCLNI3mHF25uUPNSDM8LbP8p4Fsuay", + "digest": "pDuCLNI3mHF25uUPNSDM8LbP8p4Fsuay", + "type": "Allele", + "location": { + "id": "ga4gh:SL.XiatLUYcK0JzC_CROMV55bbJ_weygAkP", + "digest": "XiatLUYcK0JzC_CROMV55bbJ_weygAkP", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.VHPiWlNXV-23rh_9w2KR2PLqPd7OSKMS", + }, + "start": 1458, + "end": 1459, + "sequence": "N", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "S"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.e84USp97bhTBu8IC3wsm7nF8_GXU7Yk2", + "type": "Allele", + "label": "X-39921444-T-C", + "digest": "e84USp97bhTBu8IC3wsm7nF8_GXU7Yk2", + "location": { + "id": "ga4gh:SL.6k6-KBncHr2M-nwSTTOLNYbUN5XsMmpB", + "type": "SequenceLocation", + "digest": "6k6-KBncHr2M-nwSTTOLNYbUN5XsMmpB", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": 40062190, + "end": 40062191, + "sequence": "T", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + } + ], + "extensions": [ + { + "name": "MOA representative coordinate", + "value": { + "chromosome": "X", + "start_position": "39921444", + "end_position": "39921444", + "reference_allele": "T", + "alternate_allele": "C", + "cdna_change": "c.4376A>G", + "protein_change": "p.N1425S", + "exon": "10", + }, + } + ], + "mappings": [ + { + "coding": { + "system": "https://moalmanac.org/api/features/", + "code": "141", + }, + "relation": "exactMatch", + } + ], + } + + +@pytest.fixture(scope="module") +def moa_myelodysplasia(): + """Create test fixture for MOA disease Myelodysplasia""" + return { + "id": "moa.normalize.disease.ncit:C3247", + "type": "Disease", + "label": "Myelodysplasia", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C3247", + "label": "Myelodysplastic Syndrome", + "mondo_id": "0018881", + }, + } + ], + "mappings": [ + { + "coding": { + "label": "Myelodysplasia", + "system": "https://oncotree.mskcc.org/", + "code": "MDS", + }, + "relation": "exactMatch", + } + ], + } + + +@pytest.fixture(scope="module") +def moa_bcor(): + """Create MOA gene BCOR test fixture""" + return { + "id": "moa.normalize.gene:BCOR", + "type": "Gene", + "label": "BCOR", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:20893", "label": "BCOR"}, + } + ], + } + + +@pytest.fixture(scope="module") +def moa_source60(): + """Create MOA source ID 60 test fixture""" + return { + "id": "moa.source:60", + "extensions": [{"name": "source_type", "value": "Journal"}], + "type": "Document", + "title": "O'Brien C, Wallin JJ, Sampath D, et al. Predictive biomarkers of sensitivity to the phosphatidylinositol 3' kinase inhibitor GDC-0941 in breast cancer preclinical models. Clin Cancer Res. 2010;16(14):3670-83.", + "urls": ["https://doi.org/10.1158/1078-0432.CCR-09-2828"], + "doi": "10.1158/1078-0432.CCR-09-2828", + "pmid": 20453058, + } + + +@pytest.fixture(scope="module") +def moa_aid141_study_stmt( + moa_vid141, moa_myelodysplasia, moa_bcor, moa_source60, moa_method +): + """Create MOA AID 141 study statement test fixture.""" + return { + "id": "moa.assertion:141", + "type": "VariantPrognosticStudyStatement", + "description": "More frequent in Chronic Myelomonocytic Leukemia.", + "strength": { + "code": "e000007", + "label": "observational study evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "associatedWithWorseOutcomeFor", + "subjectVariant": moa_vid141, + "objectCondition": moa_myelodysplasia, + "alleleOriginQualifier": "somatic", + "geneContextQualifier": moa_bcor, + "specifiedBy": moa_method, + "reportedIn": [moa_source60], + } + + +@pytest.fixture(scope="module") +def moa_vid532(): + """Create a test fixture for MOA VID532.""" + return { + "id": "moa.variant:532", + "type": "CategoricalVariant", + "label": "SF3B1 p.E622D (Missense)", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.53EXGCEm1KH4W4ygbovgD_fFWskECrAJ", + "digest": "53EXGCEm1KH4W4ygbovgD_fFWskECrAJ", + "type": "Allele", + "location": { + "id": "ga4gh:SL.PvDvUEPg69q4PYBxC8jM4cEzQCCkaxHM", + "digest": "PvDvUEPg69q4PYBxC8jM4cEzQCCkaxHM", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.ST8-pVpExi5fmcLBZ_vHcVmMtvgggIJm", + }, + "start": 621, + "end": 622, + "sequence": "E", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "D"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.Vj8RALpb4HP9RtsDNiaW_N3ODw3aSj5T", + "type": "Allele", + "label": "2-198267491-C-G", + "digest": "Vj8RALpb4HP9RtsDNiaW_N3ODw3aSj5T", + "location": { + "id": "ga4gh:SL.R8r0t9A51FTOJ7Mb8VasF8L6D5Sa_FFU", + "type": "SequenceLocation", + "digest": "R8r0t9A51FTOJ7Mb8VasF8L6D5Sa_FFU", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", + }, + "start": 197402766, + "end": 197402767, + "sequence": "C", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, + } + ], + "extensions": [ + { + "name": "MOA representative coordinate", + "value": { + "chromosome": "2", + "start_position": "198267491", + "end_position": "198267491", + "reference_allele": "C", + "alternate_allele": "G", + "cdna_change": "c.1866G>C", + "protein_change": "p.E622D", + "exon": "14", + }, + } + ], + "mappings": [ + { + "coding": { + "system": "https://moalmanac.org/api/features/", + "code": "532", + }, + "relation": "exactMatch", + }, + { + "coding": { + "system": "https://www.ncbi.nlm.nih.gov/snp/", + "code": "rs763149798", + }, + "relation": "relatedMatch", + }, + ], + } + + +@pytest.fixture(scope="module") +def moa_sf3b1(): + """Create MOA gene SF3B1 test fixture""" + return { + "id": "moa.normalize.gene:SF3B1", + "type": "Gene", + "label": "SF3B1", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:10768", "label": "SF3B1"}, + } + ], + } + + +@pytest.fixture(scope="module") +def moa_source33(): + """Create MOA source ID 33 test fixture""" + return { + "id": "moa.source:33", + "extensions": [{"name": "source_type", "value": "Guideline"}], + "type": "Document", + "title": "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines\u00ae) for Myelodysplastic Syndromes V.2.2023. \u00a9 National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org.", + "urls": ["https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf"], + } + + +@pytest.fixture(scope="module") +def moa_aid532_study_stmt( + moa_vid532, moa_myelodysplasia, moa_sf3b1, moa_source33, moa_method +): + """Create MOA AID 532 study statement test fixture.""" + return { + "id": "moa.assertion:532", + "type": "VariantPrognosticStudyStatement", + "description": "The National Comprehensive Cancer Network\u00ae (NCCN\u00ae) highlights SF3B1 E622, Y623, R625, N626, H662, T663, K666, K700E, I704, G740, G742, and D781 missense variants as being associated with a favorable prognosis in patients with myelodysplastic syndromes.", + "strength": { + "code": "e000003", + "label": "professional guideline evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "associatedWithBetterOutcomeFor", + "subjectVariant": moa_vid532, + "objectCondition": moa_myelodysplasia, + "alleleOriginQualifier": "somatic", + "geneContextQualifier": moa_sf3b1, + "specifiedBy": moa_method, + "reportedIn": [moa_source33], + } + + +@pytest.fixture(scope="module") +def statements(moa_aid141_study_stmt, moa_aid532_study_stmt): + """Create test fixture for MOA prognostic statements.""" + return [moa_aid141_study_stmt, moa_aid532_study_stmt] + + +def test_moa_cdm(data, statements, check_transformed_cdm): + """Test that moa transformation works correctly.""" + check_transformed_cdm(data, statements, DATA_DIR / FILENAME) diff --git a/tests/unit/transformers/test_moa_transformer.py b/tests/unit/transformers/test_moa_transformer_therapeutic.py similarity index 88% rename from tests/unit/transformers/test_moa_transformer.py rename to tests/unit/transformers/test_moa_transformer_therapeutic.py index f0c628da..d7997b98 100644 --- a/tests/unit/transformers/test_moa_transformer.py +++ b/tests/unit/transformers/test_moa_transformer_therapeutic.py @@ -9,33 +9,34 @@ from metakb.normalizers import VICC_NORMALIZER_DATA from metakb.transformers.moa import MoaTransformer +DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic" FILENAME = "moa_cdm.json" @pytest_asyncio.fixture(scope="module") async def data(normalizers): """Create a MOA Transformer test fixture.""" - harvester_path = TEST_TRANSFORMERS_DIR / "moa_harvester.json" + harvester_path = DATA_DIR / "moa_harvester.json" moa = MoaTransformer( - data_dir=TEST_TRANSFORMERS_DIR, + data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers, ) harvested_data = moa.extract_harvested_data() await moa.transform(harvested_data) - moa.create_json(cdm_filepath=TEST_TRANSFORMERS_DIR / FILENAME) - with (TEST_TRANSFORMERS_DIR / FILENAME).open() as f: + moa.create_json(cdm_filepath=DATA_DIR / FILENAME) + with (DATA_DIR / FILENAME).open() as f: return json.load(f) @pytest.fixture(scope="module") -def moa_vid145(braf_v600e_genomic): - """Create a test fixture for MOA VID145.""" +def moa_vid144(braf_v600e_genomic): + """Create a test fixture for MOA VID144.""" genomic_rep = braf_v600e_genomic.copy() genomic_rep["label"] = "7-140453136-A-T" return { - "id": "moa.variant:145", + "id": "moa.variant:144", "type": "CategoricalVariant", "label": "BRAF p.V600E (Missense)", "constraints": [ @@ -81,7 +82,7 @@ def moa_vid145(braf_v600e_genomic): { "coding": { "system": "https://moalmanac.org/api/features/", - "code": "145", + "code": "144", }, "relation": "exactMatch", }, @@ -119,10 +120,10 @@ def moa_encorafenib(encorafenib_extensions): @pytest.fixture(scope="module") -def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): - """Create MOA AID 155 study statement test fixture. Uses CombinationTherapy.""" +def moa_aid154_study_stmt(moa_vid144, moa_cetuximab, moa_encorafenib, moa_method): + """Create MOA AID 154 study statement test fixture. Uses CombinationTherapy.""" return { - "id": "moa.assertion:155", + "id": "moa.assertion:154", "type": "VariantTherapeuticResponseStudyStatement", "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", "strength": { @@ -131,7 +132,7 @@ def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsSensitivityTo", - "subjectVariant": moa_vid145, + "subjectVariant": moa_vid144, "objectTherapeutic": { "type": "CombinationTherapy", "id": "moa.ctid:ZGlEkRBR4st6Y_nijjuR1KUV7EFHIF_S", @@ -183,7 +184,7 @@ def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method "specifiedBy": moa_method, "reportedIn": [ { - "id": "moa.source:63", + "id": "moa.source:64", "extensions": [{"name": "source_type", "value": "FDA"}], "type": "Document", "title": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020.", @@ -196,11 +197,11 @@ def moa_aid155_study_stmt(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method @pytest.fixture(scope="module") -def statements(moa_aid66_study_stmt, moa_aid155_study_stmt): +def statements(moa_aid66_study_stmt, moa_aid154_study_stmt): """Create test fixture for MOA therapeutic statements.""" - return [moa_aid66_study_stmt, moa_aid155_study_stmt] + return [moa_aid66_study_stmt, moa_aid154_study_stmt] def test_moa_cdm(data, statements, check_transformed_cdm): """Test that moa transformation works correctly.""" - check_transformed_cdm(data, statements, TEST_TRANSFORMERS_DIR / FILENAME) + check_transformed_cdm(data, statements, DATA_DIR / FILENAME)