diff --git a/src/metakb/query.py b/src/metakb/query.py index cce818cd..a676744d 100644 --- a/src/metakb/query.py +++ b/src/metakb/query.py @@ -15,6 +15,7 @@ ) from ga4gh.core.entity_models import Coding, Document, Extension, Method from ga4gh.va_spec.profiles.var_study_stmt import ( + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -69,6 +70,14 @@ class TherapeuticProcedureType(str, Enum): SUBSTITUTES = "TherapeuticSubstituteGroup" +# Statement types to corresponding class mapping +STMT_TYPE_TO_CLASS = { + "VariantDiagnosticStudyStatement": VariantDiagnosticStudyStatement, + "VariantPrognosticStudyStatement": VariantPrognosticStudyStatement, + "VariantTherapeuticResponseStudyStatement": VariantTherapeuticResponseStudyStatement, +} + + def _deserialize_field(node: dict, field_name: str) -> None | dict: """Deserialize JSON blob property. @@ -493,8 +502,8 @@ def _get_nested_stmts(self, statement_nodes: list[Node]) -> list[dict]: def _get_nested_stmt(self, stmt_node: Node) -> dict: """Get information related to a statement - Only VariantTherapeuticResponseStudyStatement and VariantPrognosticStudyStatement - are supported at the moment + Only VariantTherapeuticResponseStudyStatement, VariantPrognosticStudyStatement, + and VariantDiagnosticStudyStatement are supported at the moment :param stmt_node: Neo4j graph node for statement :return: Nested statement @@ -503,15 +512,14 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict: if study_stmt_type not in { "VariantTherapeuticResponseStudyStatement", "VariantPrognosticStudyStatement", + "VariantDiagnosticStudyStatement", }: return {} - if study_stmt_type == "VariantPrognosticStudyStatement": - study_stmt_cls = VariantPrognosticStudyStatement - condition_key = "objectCondition" - else: - study_stmt_cls = VariantTherapeuticResponseStudyStatement + if study_stmt_type == "VariantTherapeuticResponseStudyStatement": condition_key = "conditionQualifier" + else: + condition_key = "objectCondition" params = { condition_key: None, @@ -559,7 +567,7 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict: else: logger.warning("relation type not supported: %s", rel_type) - return study_stmt_cls(**params).model_dump() + return STMT_TYPE_TO_CLASS[study_stmt_type](**params).model_dump() @staticmethod def _get_vicc_normalizer_extension(node: dict) -> ViccNormalizerDataExtension: @@ -917,10 +925,6 @@ async def batch_search_statements( statement_nodes = [r[0] for r in result] response.statement_ids = [n["id"] for n in statement_nodes] stmts = self._get_nested_stmts(statement_nodes) - response.statements = [ - VariantTherapeuticResponseStudyStatement(**s) - if s["type"] == "VariantTherapeuticResponseStudyStatement" - else VariantPrognosticStudyStatement(**s) - for s in stmts - ] + + response.statements = [STMT_TYPE_TO_CLASS[s["type"]](**s) for s in stmts] return response diff --git a/src/metakb/schemas/api.py b/src/metakb/schemas/api.py index 27a63b86..59251e94 100644 --- a/src/metakb/schemas/api.py +++ b/src/metakb/schemas/api.py @@ -3,6 +3,7 @@ from typing import Literal from ga4gh.va_spec.profiles.var_study_stmt import ( + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -48,7 +49,9 @@ class SearchStatementsService(BaseModel): warnings: list[StrictStr] = [] statement_ids: list[StrictStr] = [] statements: list[ - VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement + VariantTherapeuticResponseStudyStatement + | VariantPrognosticStudyStatement + | VariantDiagnosticStudyStatement ] = [] service_meta_: ServiceMeta @@ -73,6 +76,8 @@ class BatchSearchStatementsService(BaseModel): warnings: list[StrictStr] = [] statement_ids: list[StrictStr] = [] statements: list[ - VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement + VariantTherapeuticResponseStudyStatement + | VariantPrognosticStudyStatement + | VariantDiagnosticStudyStatement ] = [] service_meta_: ServiceMeta diff --git a/src/metakb/transformers/base.py b/src/metakb/transformers/base.py index eebc6c62..a8504fde 100644 --- a/src/metakb/transformers/base.py +++ b/src/metakb/transformers/base.py @@ -25,6 +25,7 @@ ) from ga4gh.core.entity_models import Coding, Document, Extension, Method from ga4gh.va_spec.profiles.var_study_stmt import ( + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -111,7 +112,9 @@ class TransformedData(BaseModel): """Define model for transformed data""" statements: list[ - VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement + VariantTherapeuticResponseStudyStatement + | VariantPrognosticStudyStatement + | VariantDiagnosticStudyStatement ] = [] categorical_variants: list[CategoricalVariant] = [] variations: list[Allele] = [] diff --git a/src/metakb/transformers/civic.py b/src/metakb/transformers/civic.py index a722c7c7..9fbb3c4b 100644 --- a/src/metakb/transformers/civic.py +++ b/src/metakb/transformers/civic.py @@ -22,8 +22,10 @@ ) from ga4gh.va_spec.profiles.var_study_stmt import ( AlleleOriginQualifier, + DiagnosticPredicate, PrognosticPredicate, TherapeuticResponsePredicate, + VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) @@ -91,6 +93,8 @@ "RESISTANCE": TherapeuticResponsePredicate.RESISTANCE, "POOR_OUTCOME": PrognosticPredicate.WORSE_OUTCOME, "BETTER_OUTCOME": PrognosticPredicate.BETTER_OUTCOME, + "POSITIVE": DiagnosticPredicate.INCLUSIVE, + "NEGATIVE": DiagnosticPredicate.EXCLUSIVE, } @@ -121,6 +125,7 @@ class _CivicEvidenceType(str, Enum): PREDICTIVE = "PREDICTIVE" PROGNOSTIC = "PROGNOSTIC" + DIAGNOSTIC = "DIAGNOSTIC" class _VariationCache(BaseModel): @@ -359,8 +364,10 @@ def _add_variant_study_stmt( if evidence_type == _CivicEvidenceType.PREDICTIVE: params["objectTherapeutic"] = civic_therapeutic statement = VariantTherapeuticResponseStudyStatement(**params) - else: + elif evidence_type == _CivicEvidenceType.PROGNOSTIC: statement = VariantPrognosticStudyStatement(**params) + else: + statement = VariantDiagnosticStudyStatement(**params) self.processed_data.statements.append(statement) diff --git a/tests/conftest.py b/tests/conftest.py index c6f2cfa8..ec42e754 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1005,429 +1005,6 @@ def civic_aid6_document(): } -@pytest.fixture(scope="session") -def civic_eid2_statement(): - """Create a test fixture for CIViC EID2 statement.""" - return { - "id": "civic.eid:2", - "type": "Statement", - "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", - "direction": "supports", - "evidence_level": "civic.evidence_level:B", - "proposition": "proposition:KVuJMXiPm-oK4vvijE9Cakvucayay3jE", - "variation_origin": "somatic", - "variation_descriptor": "civic.vid:99", - "disease_descriptor": "civic.did:2", - "method": "method:1", - "supported_by": ["pmid:15146165"], - } - - -@pytest.fixture(scope="session") -def civic_eid2_proposition(): - """Create a test fixture for CIViC EID2 proposition.""" - return { - "id": "proposition:KVuJMXiPm-oK4vvijE9Cakvucayay3jE", - "type": "diagnostic_proposition", - "predicate": "is_diagnostic_exclusion_criterion_for", - "subject": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "object_qualifier": "ncit:C3868", - } - - -@pytest.fixture(scope="session") -def civic_vid99(): - """Create a test fixture for CIViC VID99.""" - return { - "id": "civic.vid:99", - "type": "VariationDescriptor", - "label": "D842V", - "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", - "variation_id": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "variation": { - "_id": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "location": { - "_id": "ga4gh:VSL.CvhzuX1-CV0in3YTnaq9xZGAPxmrkrFC", - "interval": { - "start": {"value": 841, "type": "Number"}, - "end": {"value": 842, "type": "Number"}, - "type": "SequenceInterval", - }, - "sequence_id": "ga4gh:SQ.XpQn9sZLGv_GU3uiWO7YHq9-_alGjrVX", - "type": "SequenceLocation", - }, - "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, - "type": "Allele", - }, - "xrefs": ["clinvar:13543", "caid:CA123194", "dbsnp:121908585"], - "alternate_labels": ["ASP842VAL"], - "extensions": [ - { - "name": "civic_representative_coordinate", - "value": { - "chromosome": "4", - "start": 55152093, - "stop": 55152093, - "reference_bases": "A", - "variant_bases": "T", - "representative_transcript": "ENST00000257290.5", - "ensembl_version": 75, - "reference_build": "GRCh37", - }, - }, - { - "name": "civic_actionability_score", - "value": "100.5", - }, - { - "name": "variant_group", - "value": [ - { - "id": "civic.variant_group:1", - "label": "Imatinib Resistance", - "description": "While imatinib has shown to be incredibly successful in treating philadelphia chromosome positive CML, patients that have shown primary or secondary resistance to the drug have been observed to harbor T315I and E255K ABL kinase domain mutations. These mutations, among others, have been observed both in primary refractory disease and acquired resistance. In gastrointestinal stromal tumors (GIST), PDGFRA 842 mutations have also been shown to confer resistance to imatinib. ", - "type": "variant_group", - } - ], - }, - ], - "structural_type": "SO:0001583", - "expressions": [ - { - "syntax": "hgvs.c", - "value": "NM_006206.4:c.2525A>T", - "type": "Expression", - }, - { - "syntax": "hgvs.p", - "value": "NP_006197.1:p.Asp842Val", - "type": "Expression", - }, - { - "syntax": "hgvs.c", - "value": "ENST00000257290.5:c.2525A>T", - "type": "Expression", - }, - { - "syntax": "hgvs.g", - "value": "NC_000004.11:g.55152093A>T", - "type": "Expression", - }, - ], - "gene_context": "civic.gid:38", - } - - -@pytest.fixture(scope="session") -def civic_did2(): - """Create a test fixture for CIViC DID2.""" - return { - "id": "civic.did:2", - "type": "DiseaseDescriptor", - "label": "Gastrointestinal Stromal Tumor", - "disease_id": "ncit:C3868", - "xrefs": ["DOID:9253"], - } - - -@pytest.fixture(scope="session") -def civic_gid38(): - """Create a test fixture for CIViC GID38.""" - return { - "id": "civic.gid:38", - "type": "GeneDescriptor", - "label": "PDGFRA", - "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", - "gene_id": "hgnc:8803", - "alternate_labels": ["PDGFRA", "PDGFR2", "PDGFR-2", "CD140A"], - "xrefs": ["ncbigene:5156"], - } - - -@pytest.fixture(scope="session") -def civic_eid74_statement(): - """Create a test fixture for CIViC EID74 statement.""" - return { - "id": "civic.eid:74", - "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", - "direction": "supports", - "evidence_level": "civic.evidence_level:B", - "proposition": "proposition:Vyzbpg-s6mw27yJfYBFxGyQeuEJacP4l", - "variation_origin": "somatic", - "variation_descriptor": "civic.vid:113", - "disease_descriptor": "civic.did:15", - "method": "method:1", - "supported_by": ["pmid:18073307"], - "type": "Statement", - } - - -@pytest.fixture(scope="session") -def civic_eid74_proposition(): - """Create a test fixture for CIViC EID74 proposition.""" - return { - "id": "proposition:Vyzbpg-s6mw27yJfYBFxGyQeuEJacP4l", - "type": "diagnostic_proposition", - "predicate": "is_diagnostic_inclusion_criterion_for", - "subject": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "object_qualifier": "ncit:C3879", - } - - -@pytest.fixture(scope="session") -def civic_vid113(): - """Create a test fixture for CIViC VID113.""" - return { - "id": "civic.vid:113", - "type": "VariationDescriptor", - "label": "M918T", - "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", - "variation_id": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "variation": { - "_id": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "location": { - "_id": "ga4gh:VSL.zkwClPQjjO0FqXWN46QRuiGgodhPjxqT", - "interval": { - "end": {"value": 918, "type": "Number"}, - "start": {"value": 917, "type": "Number"}, - "type": "SequenceInterval", - }, - "sequence_id": "ga4gh:SQ.jMu9-ItXSycQsm4hyABeW_UfSNRXRVnl", - "type": "SequenceLocation", - }, - "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, - "type": "Allele", - }, - "xrefs": ["clinvar:13919", "caid:CA009082", "dbsnp:74799832"], - "alternate_labels": ["MET918THR"], - "extensions": [ - { - "name": "civic_representative_coordinate", - "value": { - "chromosome": "10", - "start": 43617416, - "stop": 43617416, - "reference_bases": "T", - "variant_bases": "C", - "representative_transcript": "ENST00000355710.3", - "ensembl_version": 75, - "reference_build": "GRCh37", - }, - }, - {"name": "civic_actionability_score", "value": "86"}, - { - "name": "variant_group", - "value": [ - { - "id": "civic.variant_group:6", - "label": "Motesanib Resistance", - "description": "RET activation is a common oncogenic marker of medullary thyroid carcinoma. Treatment of these patients with the targeted therapeutic motesanib has shown to be effective. However, the missense mutations C634W and M918T have shown to confer motesanib resistance in cell lines. ", - "type": "variant_group", - } - ], - }, - ], - "structural_type": "SO:0001583", - "expressions": [ - { - "syntax": "hgvs.c", - "value": "NM_020975.4:c.2753T>C", - "type": "Expression", - }, - { - "syntax": "hgvs.p", - "value": "NP_065681.1:p.Met918Thr", - "type": "Expression", - }, - { - "syntax": "hgvs.c", - "value": "ENST00000355710.3:c.2753T>C", - "type": "Expression", - }, - { - "syntax": "hgvs.g", - "value": "NC_000010.10:g.43617416T>C", - "type": "Expression", - }, - ], - "gene_context": "civic.gid:42", - } - - -@pytest.fixture(scope="session") -def civic_did15(): - """Create test fixture for CIViC DID15.""" - return { - "id": "civic.did:15", - "type": "DiseaseDescriptor", - "label": "Thyroid Gland Medullary Carcinoma", - "disease_id": "ncit:C3879", - "xrefs": ["DOID:3973"], - } - - -@pytest.fixture(scope="session") -def civic_gid42(): - """Create test fixture for CIViC GID42.""" - return { - "id": "civic.gid:42", - "type": "GeneDescriptor", - "label": "RET", - "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistence. No RET-specific agents are currently clinically available but several promiscuous kinase inhibitors that target RET, among others, have been approved for MTC treatment.", - "gene_id": "hgnc:9967", - "alternate_labels": [ - "RET", - "RET-ELE1", - "PTC", - "MTC1", - "MEN2B", - "MEN2A", - "HSCR1", - "CDHR16", - "CDHF12", - ], - "xrefs": ["ncbigene:5979"], - } - - -@pytest.fixture(scope="session") -def civic_aid9_statement(): - """Create a test fixture for CIViC AID9 statement.""" - return { - "id": "civic.aid:9", - "description": "ACVR1 G328V mutations occur within the kinase domain, leading to activation of downstream signaling. Exclusively seen in high-grade pediatric gliomas, supporting diagnosis of diffuse intrinsic pontine glioma.", - "direction": "supports", - "evidence_level": "amp_asco_cap_2017_level:2C", - "proposition": "proposition:Pjri4dU2VaEKcdKtVkoAUJ8bHFXnW2My", - "variation_origin": "somatic", - "variation_descriptor": "civic.vid:1686", - "disease_descriptor": "civic.did:2950", - "method": "method:2", - "supported_by": ["civic.eid:4846", "civic.eid:6955"], - "type": "Statement", - } - - -@pytest.fixture(scope="session") -def civic_aid9_proposition(): - """Create a test fixture for CIViC AID9 proposition.""" - return { - "id": "proposition:Pjri4dU2VaEKcdKtVkoAUJ8bHFXnW2My", - "predicate": "is_diagnostic_inclusion_criterion_for", - "subject": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", - "object_qualifier": "DOID:0080684", - "type": "diagnostic_proposition", - } - - -@pytest.fixture(scope="session") -def civic_vid1686(): - """Create a test fixture for CIViC VID1686.""" - return { - "id": "civic.vid:1686", - "type": "VariationDescriptor", - "label": "G328V", - "variation_id": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", - "variation": { - "_id": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", - "location": { - "_id": "ga4gh:VSL.w84KcAESJfbxvPCwCvYpQajlkdPrfS12", - "interval": { - "end": {"value": 328, "type": "Number"}, - "start": {"value": 327, "type": "Number"}, - "type": "SequenceInterval", - }, - "sequence_id": "ga4gh:SQ.6CnHhDq_bDCsuIBf0AzxtKq_lXYM7f0m", - "type": "SequenceLocation", - }, - "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, - "type": "Allele", - }, - "xrefs": ["clinvar:376363", "caid:CA16602802", "dbsnp:387906589"], - "alternate_labels": ["GLY328VAL"], - "extensions": [ - { - "name": "civic_representative_coordinate", - "value": { - "chromosome": "2", - "start": 158622516, - "stop": 158622516, - "reference_bases": "C", - "variant_bases": "A", - "representative_transcript": "ENST00000434821.1", - "ensembl_version": 75, - "reference_build": "GRCh37", - }, - }, - {"name": "civic_actionability_score", "value": "30"}, - { - "name": "variant_group", - "value": [ - { - "id": "civic.variant_group:23", - "label": "ACVR1 kinase domain mutation", - "type": "variant_group", - } - ], - }, - ], - "structural_type": "SO:0001583", - "expressions": [ - {"syntax": "hgvs.c", "value": "NM_001105.4:c.983G>T", "type": "Expression"}, - { - "syntax": "hgvs.p", - "value": "NP_001096.1:p.Gly328Val", - "type": "Expression", - }, - { - "syntax": "hgvs.g", - "value": "NC_000002.11:g.158622516C>A", - "type": "Expression", - }, - { - "syntax": "hgvs.c", - "value": "ENST00000434821.1:c.983G>T", - "type": "Expression", - }, - ], - "gene_context": "civic.gid:154", - } - - -@pytest.fixture(scope="session") -def civic_did2950(): - """Create a test fixture for CIViC DID2950.""" - return { - "id": "civic.did:2950", - "type": "DiseaseDescriptor", - "label": "Diffuse Midline Glioma, H3 K27M-mutant", - "disease_id": "DOID:0080684", - "xrefs": ["DOID:0080684"], - } - - -@pytest.fixture(scope="session") -def civic_gid154(): - """Create a test fixture for CIViC GID154.""" - return { - "id": "civic.gid:154", - "type": "GeneDescriptor", - "label": "ACVR1", - "gene_id": "hgnc:171", - "alternate_labels": [ - "ACVR1", - "TSRI", - "SKR1", - "FOP", - "ALK2", - "ACVRLK2", - "ACVR1A", - "ACTRI", - ], - "xrefs": ["ncbigene:90"], - } - - @pytest.fixture(scope="session") def civic_eid26_study_stmt( civic_mpid65, civic_gid29, civic_did3, civic_method, pmid_16384925 diff --git a/tests/data/transformers/diagnostic/civic_harvester.json b/tests/data/transformers/diagnostic/civic_harvester.json index 46b8e9e8..20366011 100644 --- a/tests/data/transformers/diagnostic/civic_harvester.json +++ b/tests/data/transformers/diagnostic/civic_harvester.json @@ -23,8 +23,6 @@ "doid": "9253", "disease_url": "https://www.disease-ontology.org/?id=DOID:9253", "aliases": [ - "GANT", - "GIST", "Gastrointestinal Stromal Tumour", "Stromal Tumor Of Gastrointestinal Tract", "Stromal Tumour Of Gastrointestinal Tract" @@ -69,13 +67,14 @@ "therapies": [], "disease": { "id": 15, - "name": "Thyroid Gland Medullary Carcinoma", - "display_name": "Thyroid Gland Medullary Carcinoma", + "name": "Medullary Thyroid Carcinoma", + "display_name": "Medullary Thyroid Carcinoma", "doid": "3973", "disease_url": "https://www.disease-ontology.org/?id=DOID:3973", "aliases": [ "Medullary Carcinoma Of The Thyroid Gland", - "Medullary Thyroid Carcinoma", + "Parafollicular Cell Carcinoma", + "Thyroid Gland Medullary Carcinoma", "Ultimobranchial Thyroid Tumor", "Ultimobranchial Thyroid Tumour" ], @@ -102,46 +101,7 @@ "phenotypes": [] } ], - "assertions": [ - { - "type": "assertion", - "id": 9, - "variant_origin": "SOMATIC", - "therapy_interaction_type": null, - "summary": "Supports diagnosis of diffuse intrinsic pontine glioma.", - "status": "accepted", - "significance": "POSITIVE", - "nccn_guideline_version": "", - "nccn_guideline": null, - "name": "AID9", - "molecular_profile_id": 1594, - "fda_regulatory_approval": null, - "fda_companion_test": null, - "evidence_ids": [ - 4846, - 6955 - ], - "description": "ACVR1 G328V mutations occur within the kinase domain, leading to activation of downstream signaling. Exclusively seen in high-grade pediatric gliomas, supporting diagnosis of diffuse intrinsic pontine glioma.", - "assertion_type": "DIAGNOSTIC", - "assertion_direction": "SUPPORTS", - "amp_level": "TIER_II_LEVEL_C", - "therapies": [], - "disease": { - "id": 2950, - "name": "Diffuse Midline Glioma, H3 K27M-mutant", - "display_name": "Diffuse Midline Glioma, H3 K27M-mutant", - "doid": "0080684", - "disease_url": "https://www.disease-ontology.org/?id=DOID:0080684", - "aliases": [ - "Diffuse Intrinsic Pontine Glioma" - ], - "type": "disease" - }, - "phenotypes": [], - "clingen_codes": [], - "acmg_codes": [] - } - ], + "assertions": [], "genes": [ { "type": "gene", @@ -149,6 +109,12 @@ "name": "PDGFRA", "entrez_id": 5156, "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", + "aliases": [ + "CD140A", + "PDGFR-2", + "PDGFR2", + "PDGFRA" + ], "sources": [ { "id": 415, @@ -186,12 +152,6 @@ "clinical_trials": [], "type": "source" } - ], - "aliases": [ - "CD140A", - "PDGFR-2", - "PDGFR2", - "PDGFRA" ] }, { @@ -200,6 +160,17 @@ "name": "RET", "entrez_id": 5979, "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", + "aliases": [ + "CDHF12", + "CDHR16", + "HSCR1", + "MEN2A", + "MEN2B", + "MTC1", + "PTC", + "RET", + "RET-ELE1" + ], "sources": [ { "id": 44, @@ -255,35 +226,6 @@ "clinical_trials": [], "type": "source" } - ], - "aliases": [ - "CDHF12", - "CDHR16", - "HSCR1", - "MEN2A", - "MEN2B", - "MTC1", - "PTC", - "RET", - "RET-ELE1" - ] - }, - { - "type": "gene", - "id": 154, - "name": "ACVR1", - "entrez_id": 90, - "description": "", - "sources": [], - "aliases": [ - "ACTRI", - "ACVR1", - "ACVR1A", - "ACVRLK2", - "ALK2", - "FOP", - "SKR1", - "TSRI" ] } ], @@ -297,22 +239,6 @@ "entrez_name": "PDGFRA", "entrez_id": 5156, "allele_registry_id": "CA123194", - "hgvs_expressions": [ - "NM_006206.4:c.2525A>T", - "NP_006197.1:p.Asp842Val", - "ENST00000257290.5:c.2525A>T", - "NC_000004.11:g.55152093A>T" - ], - "variant_types": [ - { - "id": 47, - "name": "Missense Variant", - "so_id": "SO:0001583", - "description": "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.", - "url": "http://www.sequenceontology.org/browser/current_svn/term/SO:0001583", - "type": "variant_type" - } - ], "coordinates": { "ensembl_version": 75, "reference_build": "GRCh37", @@ -322,15 +248,27 @@ "chromosome": "4", "start": 55152093, "stop": 55152093, - "representative_transcript2": null, - "chromosome2": null, - "start2": null, - "stop2": null, "type": "coordinates" }, + "variant_types": [ + { + "id": 47, + "name": "Missense Variant", + "so_id": "SO:0001583", + "description": "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.", + "url": "http://www.sequenceontology.org/browser/current_svn/term/SO:0001583", + "type": "variant_type" + } + ], "clinvar_entries": [ "13543" ], + "hgvs_expressions": [ + "NM_006206.4:c.2525A>T", + "NP_006197.1:p.Asp842Val", + "ENST00000257290.5:c.2525A>T", + "NC_000004.11:g.55152093A>T" + ], "variant_aliases": [ "ASP842VAL", "RS121908585" @@ -345,22 +283,6 @@ "entrez_name": "RET", "entrez_id": 5979, "allele_registry_id": "CA009082", - "hgvs_expressions": [ - "NM_020975.4:c.2753T>C", - "NP_065681.1:p.Met918Thr", - "ENST00000355710.3:c.2753T>C", - "NC_000010.10:g.43617416T>C" - ], - "variant_types": [ - { - "id": 47, - "name": "Missense Variant", - "so_id": "SO:0001583", - "description": "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.", - "url": "http://www.sequenceontology.org/browser/current_svn/term/SO:0001583", - "type": "variant_type" - } - ], "coordinates": { "ensembl_version": 75, "reference_build": "GRCh37", @@ -370,35 +292,8 @@ "chromosome": "10", "start": 43617416, "stop": 43617416, - "representative_transcript2": null, - "chromosome2": null, - "start2": null, - "stop2": null, "type": "coordinates" }, - "clinvar_entries": [ - "13919" - ], - "variant_aliases": [ - "MET918THR", - "RS74799832" - ] - }, - { - "type": "variant", - "id": 1686, - "single_variant_molecular_profile_id": 1594, - "name": "G328V", - "gene_id": 154, - "entrez_name": "ACVR1", - "entrez_id": 90, - "allele_registry_id": "CA16602802", - "hgvs_expressions": [ - "NM_001105.4:c.983G>T", - "NP_001096.1:p.Gly328Val", - "NC_000002.11:g.158622516C>A", - "ENST00000434821.1:c.983G>T" - ], "variant_types": [ { "id": 47, @@ -409,28 +304,77 @@ "type": "variant_type" } ], - "coordinates": { - "ensembl_version": 75, - "reference_build": "GRCh37", - "reference_bases": "C", - "variant_bases": "A", - "representative_transcript": "ENST00000434821.1", - "chromosome": "2", - "start": 158622516, - "stop": 158622516, - "representative_transcript2": null, - "chromosome2": null, - "start2": null, - "stop2": null, - "type": "coordinates" - }, "clinvar_entries": [ - "376363" + "13919" + ], + "hgvs_expressions": [ + "NM_020975.4:c.2753T>C", + "NP_065681.1:p.Met918Thr", + "ENST00000355710.3:c.2753T>C", + "NC_000010.10:g.43617416T>C" ], "variant_aliases": [ - "GLY328VAL", - "RS387906589" + "MET918THR", + "RS74799832" ] } + ], + "molecular_profiles": [ + { + "type": "molecular_profile", + "id": 99, + "variant_ids": [ + 99 + ], + "name": "PDGFRA D842V", + "molecular_profile_score": 100.5, + "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", + "aliases": [ + "ASP842VAL", + "RS121908585" + ], + "parsed_name": [ + { + "type": "feature", + "id": 38, + "name": "PDGFRA" + }, + { + "type": "variant", + "id": 99, + "name": "D842V", + "deprecated": false + } + ], + "sources": [] + }, + { + "type": "molecular_profile", + "id": 113, + "variant_ids": [ + 113 + ], + "name": "RET M918T", + "molecular_profile_score": 86.0, + "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", + "aliases": [ + "MET918THR", + "RS74799832" + ], + "parsed_name": [ + { + "type": "feature", + "id": 42, + "name": "RET" + }, + { + "type": "variant", + "id": 113, + "name": "M918T", + "deprecated": false + } + ], + "sources": [] + } ] } diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index a3b5c87a..23346171 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -552,8 +552,9 @@ def test_statement_rules( expected_node_labels = [ {"Statement", "StudyStatement", "VariantTherapeuticResponseStudyStatement"}, {"Statement", "StudyStatement", "VariantPrognosticStudyStatement"}, + {"Statement", "StudyStatement", "VariantDiagnosticStudyStatement"}, ] - check_node_labels("Statement", expected_node_labels, 2) + check_node_labels("Statement", expected_node_labels, 3) cite_query = """ MATCH (s:Statement) diff --git a/tests/unit/transformers/test_civic_transformer_diagnostic.py b/tests/unit/transformers/test_civic_transformer_diagnostic.py index 2ac0f82b..ac28ba23 100644 --- a/tests/unit/transformers/test_civic_transformer_diagnostic.py +++ b/tests/unit/transformers/test_civic_transformer_diagnostic.py @@ -1,4 +1,4 @@ -"""Test CIViC Transformation to common data model for prognostic.""" +"""Test CIViC Transformation to common data model for diagnostic.""" import json @@ -6,6 +6,7 @@ import pytest_asyncio from tests.conftest import TEST_TRANSFORMERS_DIR +from metakb.normalizers import VICC_NORMALIZER_DATA from metakb.transformers.civic import CivicTransformer DATA_DIR = TEST_TRANSFORMERS_DIR / "diagnostic" @@ -19,84 +20,483 @@ async def data(normalizers): c = CivicTransformer( data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers ) - await c.transform() - c.create_json(cdm_filepath=DATA_DIR / FILENAME) + harvested_data = c.extract_harvested_data() + await c.transform(harvested_data) + c.create_json(DATA_DIR / FILENAME) with (DATA_DIR / FILENAME).open() as f: return json.load(f) @pytest.fixture(scope="module") -def statements(civic_eid2_statement, civic_eid74_statement, civic_aid9_statement): - """Create test fixture for statements.""" - return [civic_eid2_statement, civic_eid74_statement, civic_aid9_statement] +def civic_mpid99(): + """Create a test fixture for CIViC MP 99.""" + return { + "id": "civic.mpid:99", + "type": "CategoricalVariant", + "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", + "label": "PDGFRA D842V", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9", + "type": "Allele", + "label": "D842V", + "digest": "Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9", + "expressions": [ + {"syntax": "hgvs.p", "value": "NP_006197.1:p.Asp842Val"} + ], + "location": { + "id": "ga4gh:SL.xuh2OFm73UN7_0uLySrRY2Xe3FW7KJ5h", + "type": "SequenceLocation", + "digest": "xuh2OFm73UN7_0uLySrRY2Xe3FW7KJ5h", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.XpQn9sZLGv_GU3uiWO7YHq9-_alGjrVX", + }, + "start": 841, + "end": 842, + "sequence": "D", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "V"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.TAskYi2zB3_dTtdyqyIxXKlYosf4cbJo", + "type": "Allele", + "label": "NM_006206.4:c.2525A>T", + "digest": "TAskYi2zB3_dTtdyqyIxXKlYosf4cbJo", + "expressions": [{"syntax": "hgvs.c", "value": "NM_006206.4:c.2525A>T"}], + "location": { + "id": "ga4gh:SL.8w-z6Kgyuzx1yA51AQPX7QKCbuZgUIa1", + "type": "SequenceLocation", + "digest": "8w-z6Kgyuzx1yA51AQPX7QKCbuZgUIa1", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.P_hYEl9XPZMg9zb-vhiwr4SNXtkCutiu", + }, + "start": 2659, + "end": 2660, + "sequence": "A", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + { + "id": "ga4gh:VA.B6-IjSb5S6K46BbJWPAvSD5vWz4tqW1j", + "type": "Allele", + "label": "NC_000004.11:g.55152093A>T", + "digest": "B6-IjSb5S6K46BbJWPAvSD5vWz4tqW1j", + "expressions": [ + {"syntax": "hgvs.g", "value": "NC_000004.11:g.55152093A>T"} + ], + "location": { + "id": "ga4gh:SL.aDuNtHik7usLDSaoVpVv883hG7u0uPGv", + "type": "SequenceLocation", + "digest": "aDuNtHik7usLDSaoVpVv883hG7u0uPGv", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc", + }, + "start": 54285925, + "end": 54285926, + "sequence": "A", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + ], + "alternativeLabels": ["ASP842VAL"], + "mappings": [ + { + "coding": { + "code": "CA123194", + "system": "https://reg.clinicalgenome.org/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "13543", + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "rs121908585", + "system": "https://www.ncbi.nlm.nih.gov/snp/", + }, + "relation": "relatedMatch", + }, + { + "coding": {"code": "99", "system": "https://civicdb.org/variants/"}, + "relation": "exactMatch", + }, + ], + "extensions": [ + { + "name": "CIViC representative coordinate", + "value": { + "chromosome": "4", + "start": 55152093, + "stop": 55152093, + "reference_bases": "A", + "variant_bases": "T", + "representative_transcript": "ENST00000257290.5", + "ensembl_version": 75, + "reference_build": "GRCh37", + "type": "coordinates", + }, + }, + { + "name": "CIViC Molecular Profile Score", + "value": 100.5, + }, + { + "name": "Variant types", + "value": [ + { + "code": "SO:0001583", + "system": "http://www.sequenceontology.org/browser/current_svn/term/", + "label": "missense_variant", + } + ], + }, + ], + } @pytest.fixture(scope="module") -def propositions( - civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition -): - """Create test fixture for proposition.""" - return [civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition] +def civic_gid38(): + """Create test fixture for CIViC GID38.""" + return { + "id": "civic.gid:38", + "type": "Gene", + "label": "PDGFRA", + "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", + "mappings": [ + { + "coding": { + "code": "ncbigene:5156", + "system": "https://www.ncbi.nlm.nih.gov/gene/", + }, + "relation": "exactMatch", + } + ], + "alternativeLabels": ["CD140A", "PDGFR-2", "PDGFR2", "PDGFRA"], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:8803", "label": "PDGFRA"}, + } + ], + } @pytest.fixture(scope="module") -def variation_descriptors(civic_vid99, civic_vid113, civic_vid1686): - """Create test fixture for variants.""" - return [civic_vid99, civic_vid113, civic_vid1686] +def civic_did2(): + """Create test fixture for CIViC DID2.""" + return { + "id": "civic.did:2", + "type": "Disease", + "label": "Gastrointestinal Stromal Tumor", + "mappings": [ + { + "coding": { + "code": "DOID:9253", + "system": "https://www.disease-ontology.org/", + }, + "relation": "exactMatch", + } + ], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C3868", + "label": "Gastrointestinal Stromal Tumor", + "mondo_id": "0011719", + }, + } + ], + } @pytest.fixture(scope="module") -def disease_descriptors(civic_did2, civic_did15, civic_did2950): - """Create test fixture for disease descriptors.""" - return [civic_did2, civic_did15, civic_did2950] +def civic_eid2_study_stmt(civic_method, civic_mpid99, civic_gid38, civic_did2): + """Create a test fixture for CIViC EID2 study statement.""" + return { + "id": "civic.eid:2", + "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", + "direction": "supports", + "strength": { + "code": "e000005", + "label": "clinical cohort evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "isDiagnosticExclusionCriterionFor", + "alleleOriginQualifier": "somatic", + "subjectVariant": civic_mpid99, + "geneContextQualifier": civic_gid38, + "objectCondition": civic_did2, + "specifiedBy": civic_method, + "reportedIn": [ + { + "id": "civic.source:52", + "label": "Lasota et al., 2004", + "title": "A great majority of GISTs with PDGFRA mutations represent gastric tumors of low or no malignant potential.", + "pmid": 15146165, + "type": "Document", + } + ], + "type": "VariantDiagnosticStudyStatement", + } @pytest.fixture(scope="module") -def gene_descriptors(civic_gid38, civic_gid42, civic_gid154): - """Create test fixture for gene descriptors.""" - return [civic_gid38, civic_gid42, civic_gid154] +def civic_mpid113(): + """Create a test fixture for CIViC MP 113.""" + return { + "id": "civic.mpid:113", + "type": "CategoricalVariant", + "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", + "label": "RET M918T", + "constraints": [ + { + "definingContext": { + "id": "ga4gh:VA.hEybNB_CeKflfFhT5AKOU5i1lgZPP-aS", + "type": "Allele", + "label": "M918T", + "digest": "hEybNB_CeKflfFhT5AKOU5i1lgZPP-aS", + "expressions": [ + {"syntax": "hgvs.p", "value": "NP_065681.1:p.Met918Thr"} + ], + "location": { + "id": "ga4gh:SL.oIeqSfOEuqO7KNOPt8YUIa9vo1f6yMao", + "type": "SequenceLocation", + "digest": "oIeqSfOEuqO7KNOPt8YUIa9vo1f6yMao", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.jMu9-ItXSycQsm4hyABeW_UfSNRXRVnl", + }, + "start": 917, + "end": 918, + "sequence": "M", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + "type": "DefiningContextConstraint", + } + ], + "members": [ + { + "id": "ga4gh:VA.TZBjEPHhLRYxssQopcOQLWEBQrwzhH3T", + "type": "Allele", + "label": "NM_020975.4:c.2753T>C", + "digest": "TZBjEPHhLRYxssQopcOQLWEBQrwzhH3T", + "expressions": [{"syntax": "hgvs.c", "value": "NM_020975.4:c.2753T>C"}], + "location": { + "id": "ga4gh:SL.LD_QnJ8V1MR3stLat01acwyO4fWrUGco", + "type": "SequenceLocation", + "digest": "LD_QnJ8V1MR3stLat01acwyO4fWrUGco", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.jHlgYyFWJThVNL_o5UXEBwcQVNEPc62c", + }, + "start": 2942, + "end": 2943, + "sequence": "T", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + }, + { + "id": "ga4gh:VA.ON-Q17mJBYx3unmQ8GiqllzEphxR-Fie", + "type": "Allele", + "label": "NC_000010.10:g.43617416T>C", + "digest": "ON-Q17mJBYx3unmQ8GiqllzEphxR-Fie", + "expressions": [ + {"syntax": "hgvs.g", "value": "NC_000010.10:g.43617416T>C"} + ], + "location": { + "id": "ga4gh:SL.wIzpygPWdaZBkoKcIg461KaERW7XfyZS", + "type": "SequenceLocation", + "digest": "wIzpygPWdaZBkoKcIg461KaERW7XfyZS", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB", + }, + "start": 43121967, + "end": 43121968, + "sequence": "T", + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + }, + ], + "alternativeLabels": ["MET918THR"], + "mappings": [ + { + "coding": { + "code": "CA009082", + "system": "https://reg.clinicalgenome.org/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "13919", + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "rs74799832", + "system": "https://www.ncbi.nlm.nih.gov/snp/", + }, + "relation": "relatedMatch", + }, + { + "coding": {"code": "113", "system": "https://civicdb.org/variants/"}, + "relation": "exactMatch", + }, + ], + "extensions": [ + { + "name": "CIViC representative coordinate", + "value": { + "chromosome": "10", + "start": 43617416, + "stop": 43617416, + "reference_bases": "T", + "variant_bases": "C", + "representative_transcript": "ENST00000355710.3", + "ensembl_version": 75, + "reference_build": "GRCh37", + "type": "coordinates", + }, + }, + { + "name": "CIViC Molecular Profile Score", + "value": 86.0, + }, + { + "name": "Variant types", + "value": [ + { + "code": "SO:0001583", + "system": "http://www.sequenceontology.org/browser/current_svn/term/", + "label": "missense_variant", + } + ], + }, + ], + } @pytest.fixture(scope="module") -def documents(pmid_15146165, pmid_18073307): - """Create test fixture for documents.""" - return [pmid_15146165, pmid_18073307] - - -@pytest.mark.skip(reason="Will be resolved in issue-241") -def test_civic_cdm( - data, - statements, - propositions, - variation_descriptors, - gene_descriptors, - disease_descriptors, - civic_methods, - documents, - check_statement, - check_proposition, - check_variation_descriptor, - check_descriptor, - check_document, - check_method, - check_transformed_cdm, -): - """Test that civic transform works correctly.""" - check_transformed_cdm( - data, - statements, - propositions, - variation_descriptors, - gene_descriptors, - disease_descriptors, - None, - civic_methods, - documents, - check_statement, - check_proposition, - check_variation_descriptor, - check_descriptor, - check_document, - check_method, - DATA_DIR / FILENAME, - ) +def civic_gid42(): + """Create test fixture for CIViC GID42.""" + return { + "id": "civic.gid:42", + "type": "Gene", + "label": "RET", + "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", + "mappings": [ + { + "coding": { + "code": "ncbigene:5979", + "system": "https://www.ncbi.nlm.nih.gov/gene/", + }, + "relation": "exactMatch", + } + ], + "alternativeLabels": [ + "CDHF12", + "CDHR16", + "HSCR1", + "MEN2A", + "MEN2B", + "MTC1", + "PTC", + "RET", + "RET-ELE1", + ], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:9967", "label": "RET"}, + } + ], + } + + +@pytest.fixture(scope="module") +def civic_did15(): + """Create test fixture for CIViC DID15.""" + return { + "id": "civic.did:15", + "type": "Disease", + "label": "Medullary Thyroid Carcinoma", + "mappings": [ + { + "coding": { + "code": "DOID:3973", + "system": "https://www.disease-ontology.org/", + }, + "relation": "exactMatch", + } + ], + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C3879", + "label": "Thyroid Gland Medullary Carcinoma", + "mondo_id": "0015277", + }, + } + ], + } + + +@pytest.fixture(scope="module") +def civic_eid74_study_stmt(civic_method, civic_mpid113, civic_gid42, civic_did15): + """Create a test fixture for CIViC EID74 study statement.""" + return { + "id": "civic.eid:74", + "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", + "direction": "supports", + "strength": { + "code": "e000005", + "label": "clinical cohort evidence", + "system": "https://go.osu.edu/evidence-codes", + }, + "predicate": "isDiagnosticInclusionCriterionFor", + "alleleOriginQualifier": "somatic", + "subjectVariant": civic_mpid113, + "geneContextQualifier": civic_gid42, + "objectCondition": civic_did15, + "specifiedBy": civic_method, + "reportedIn": [ + { + "id": "civic.source:44", + "label": "Elisei et al., 2008", + "title": "Prognostic significance of somatic RET oncogene mutations in sporadic medullary thyroid cancer: a 10-year follow-up study.", + "pmid": 18073307, + "type": "Document", + } + ], + "type": "VariantDiagnosticStudyStatement", + } + + +@pytest.fixture(scope="module") +def statements(civic_eid2_study_stmt, civic_eid74_study_stmt): + """Create test fixture for CIViC Diagnostic statements.""" + return [civic_eid2_study_stmt, civic_eid74_study_stmt] + + +def test_civic_cdm(data, statements, check_transformed_cdm): + """Test that civic transformation works correctly.""" + check_transformed_cdm(data, statements, DATA_DIR / FILENAME)