From 3fddcf8a23975fdc4beb938dc0c68589e157854f Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 2 Apr 2024 11:22:06 -0400 Subject: [PATCH] feat: use MOA representative coordinate to create PSC members (#312) --- src/metakb/transform/moa.py | 46 ++++++++++++++- tests/conftest.py | 68 +++++++++++++++------- tests/unit/transform/test_moa_transform.py | 6 +- 3 files changed, 96 insertions(+), 24 deletions(-) diff --git a/src/metakb/transform/moa.py b/src/metakb/transform/moa.py index 22f30b8f..abb2c2f9 100644 --- a/src/metakb/transform/moa.py +++ b/src/metakb/transform/moa.py @@ -308,12 +308,13 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: "protein_change", "exon", ] + moa_rep_coord = {k: variant.get(k) for k in coordinates_keys} extensions = [ core_models.Extension( - name="MOA representative coordinate", - value={k: variant[k] for k in coordinates_keys}, + name="MOA representative coordinate", value=moa_rep_coord ) ] + members = await self._get_variation_members(moa_rep_coord) # Add mappings data mappings = [ @@ -343,6 +344,7 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: definingContext=moa_variation.root, mappings=mappings or None, extensions=extensions, + members=members, ).model_dump(exclude_none=True) self.able_to_normalize["variations"][variant_id] = { @@ -351,6 +353,46 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: } self.variations.append(psc) + async def _get_variation_members( + self, moa_rep_coord: Dict + ) -> Optional[List[models.Variation]]: + """Get members field for variation object. This is the related variant concepts. + FOr now, only looks at genomic representative coordinate. + + :param moa_rep_coord: MOA Representative Coordinate + :return: List containing one VRS variation record for associated genomic + representation, if variation-normalizer was able to successfully normalize + """ + members = None + chromosome = moa_rep_coord.get("chromosome") + pos = moa_rep_coord.get("start_position") + ref = moa_rep_coord.get("reference_allele") + alt = moa_rep_coord.get("alternate_allele") + + if all((chromosome, pos is not None, ref and ref != "-", alt and alt != "-")): + gnomad_vcf = f"{chromosome}-{pos}-{ref}-{alt}" + + vrs_genomic_variation = await self.vicc_normalizers.normalize_variation( + [gnomad_vcf] + ) + + if vrs_genomic_variation: + genomic_params = vrs_genomic_variation.model_dump(exclude_none=True) + genomic_params["label"] = gnomad_vcf + members = [models.Variation(**genomic_params)] + else: + logger.debug( + "Variation Normalizer unable to normalize genomic representation: %s", + gnomad_vcf, + ) + else: + logger.debug( + "Not enough enough information provided to create genomic representation: %s", + moa_rep_coord, + ) + + return members + def _add_genes(self, genes: List[str]) -> None: """Create gene objects for all MOA gene records. Mutates instance variables `able_to_normalize['genes']` and `genes`, if diff --git a/tests/conftest.py b/tests/conftest.py index b8168022..ed1b4b52 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -335,34 +335,40 @@ def civic_vid12(): @pytest.fixture(scope="session") -def civic_mpid12(civic_vid12): +def braf_v600e_genomic(): + """Genomic representation for BRAF V600E""" + return { + "id": "ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe", + "digest": "Otc5ovrw906Ack087o1fhegB4jDRqCAe", + "type": "Allele", + "location": { + "id": "ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2", + "digest": "nhul5x5P_fKjGEpY9PEkMIekJfZaKom2", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "start": 140753335, + "end": 140753336, + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + } + + +@pytest.fixture(scope="session") +def civic_mpid12(civic_vid12, braf_v600e_genomic): """Create test fixture for CIViC Molecular Profile ID 12""" + genomic_rep = braf_v600e_genomic.copy() + genomic_rep["label"] = "NC_000007.13:g.140453136A>T" + return { "id": "civic.mpid:12", "type": "ProteinSequenceConsequence", "description": "BRAF V600E has been shown to be recurrent in many cancer types. It is one of the most widely studied variants in cancer. This variant is correlated with poor prognosis in certain cancer types, including colorectal cancer and papillary thyroid cancer. The targeted therapeutic dabrafenib has been shown to be effective in clinical trials with an array of BRAF mutations and cancer types. Dabrafenib has also shown to be effective when combined with the MEK inhibitor trametinib in colorectal cancer and melanoma. However, in patients with TP53, CDKN2A and KRAS mutations, dabrafenib resistance has been reported. Ipilimumab, regorafenib, vemurafenib, and a number of combination therapies have been successful in treating V600E mutations. However, cetuximab and panitumumab have been largely shown to be ineffective without supplementary treatment.", "label": "BRAF V600E", "definingContext": civic_vid12, - "members": [ - { - "id": "ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe", - "label": "NC_000007.13:g.140453136A>T", - "digest": "Otc5ovrw906Ack087o1fhegB4jDRqCAe", - "type": "Allele", - "location": { - "id": "ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2", - "digest": "nhul5x5P_fKjGEpY9PEkMIekJfZaKom2", - "type": "SequenceLocation", - "sequenceReference": { - "type": "SequenceReference", - "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", - }, - "start": 140753335, - "end": 140753336, - }, - "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, - } - ], + "members": [genomic_rep], "aliases": ["VAL600GLU", "V640E", "VAL640GLU"], "mappings": [ { @@ -1683,6 +1689,26 @@ def moa_vid66(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "I"}, }, + "members": [ + { + "id": "ga4gh:VA.HUJOQCml0LngKmUf5IJIYQk9CfKmagbf", + "label": "9-133748283-C-T", + "digest": "HUJOQCml0LngKmUf5IJIYQk9CfKmagbf", + "type": "Allele", + "location": { + "id": "ga4gh:SL.vd9Kb9rCPWBEUZ_wbBxZyulgOAq-jk0P", + "digest": "vd9Kb9rCPWBEUZ_wbBxZyulgOAq-jk0P", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI", + }, + "start": 133748282, + "end": 133748283, + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + } + ], "extensions": [ { "name": "MOA representative coordinate", diff --git a/tests/unit/transform/test_moa_transform.py b/tests/unit/transform/test_moa_transform.py index e4e9a9d5..b29f79b7 100644 --- a/tests/unit/transform/test_moa_transform.py +++ b/tests/unit/transform/test_moa_transform.py @@ -27,8 +27,11 @@ async def data(normalizers): @pytest.fixture(scope="module") -def moa_vid145(): +def moa_vid145(braf_v600e_genomic): """Create a test fixture for MOA VID145.""" + genomic_rep = braf_v600e_genomic.copy() + genomic_rep["label"] = "7-140453136-A-T" + return { "id": "moa.variant:145", "type": "ProteinSequenceConsequence", @@ -49,6 +52,7 @@ def moa_vid145(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "E"}, }, + "members": [genomic_rep], "extensions": [ { "name": "MOA representative coordinate",