Skip to content

Commit

Permalink
feat: use MOA representative coordinate to create PSC members (#312)
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Apr 2, 2024
1 parent f389c6e commit 3fddcf8
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 24 deletions.
46 changes: 44 additions & 2 deletions src/metakb/transform/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,13 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None:
"protein_change",
"exon",
]
moa_rep_coord = {k: variant.get(k) for k in coordinates_keys}
extensions = [
core_models.Extension(
name="MOA representative coordinate",
value={k: variant[k] for k in coordinates_keys},
name="MOA representative coordinate", value=moa_rep_coord
)
]
members = await self._get_variation_members(moa_rep_coord)

# Add mappings data
mappings = [
Expand Down Expand Up @@ -343,6 +344,7 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None:
definingContext=moa_variation.root,
mappings=mappings or None,
extensions=extensions,
members=members,
).model_dump(exclude_none=True)

self.able_to_normalize["variations"][variant_id] = {
Expand All @@ -351,6 +353,46 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None:
}
self.variations.append(psc)

async def _get_variation_members(
self, moa_rep_coord: Dict
) -> Optional[List[models.Variation]]:
"""Get members field for variation object. This is the related variant concepts.
FOr now, only looks at genomic representative coordinate.
:param moa_rep_coord: MOA Representative Coordinate
:return: List containing one VRS variation record for associated genomic
representation, if variation-normalizer was able to successfully normalize
"""
members = None
chromosome = moa_rep_coord.get("chromosome")
pos = moa_rep_coord.get("start_position")
ref = moa_rep_coord.get("reference_allele")
alt = moa_rep_coord.get("alternate_allele")

if all((chromosome, pos is not None, ref and ref != "-", alt and alt != "-")):
gnomad_vcf = f"{chromosome}-{pos}-{ref}-{alt}"

vrs_genomic_variation = await self.vicc_normalizers.normalize_variation(
[gnomad_vcf]
)

if vrs_genomic_variation:
genomic_params = vrs_genomic_variation.model_dump(exclude_none=True)
genomic_params["label"] = gnomad_vcf
members = [models.Variation(**genomic_params)]
else:
logger.debug(
"Variation Normalizer unable to normalize genomic representation: %s",
gnomad_vcf,
)
else:
logger.debug(
"Not enough enough information provided to create genomic representation: %s",
moa_rep_coord,
)

return members

def _add_genes(self, genes: List[str]) -> None:
"""Create gene objects for all MOA gene records.
Mutates instance variables `able_to_normalize['genes']` and `genes`, if
Expand Down
68 changes: 47 additions & 21 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,34 +335,40 @@ def civic_vid12():


@pytest.fixture(scope="session")
def civic_mpid12(civic_vid12):
def braf_v600e_genomic():
"""Genomic representation for BRAF V600E"""
return {
"id": "ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"digest": "Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"type": "Allele",
"location": {
"id": "ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"digest": "nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"type": "SequenceLocation",
"sequenceReference": {
"type": "SequenceReference",
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"start": 140753335,
"end": 140753336,
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}


@pytest.fixture(scope="session")
def civic_mpid12(civic_vid12, braf_v600e_genomic):
"""Create test fixture for CIViC Molecular Profile ID 12"""
genomic_rep = braf_v600e_genomic.copy()
genomic_rep["label"] = "NC_000007.13:g.140453136A>T"

return {
"id": "civic.mpid:12",
"type": "ProteinSequenceConsequence",
"description": "BRAF V600E has been shown to be recurrent in many cancer types. It is one of the most widely studied variants in cancer. This variant is correlated with poor prognosis in certain cancer types, including colorectal cancer and papillary thyroid cancer. The targeted therapeutic dabrafenib has been shown to be effective in clinical trials with an array of BRAF mutations and cancer types. Dabrafenib has also shown to be effective when combined with the MEK inhibitor trametinib in colorectal cancer and melanoma. However, in patients with TP53, CDKN2A and KRAS mutations, dabrafenib resistance has been reported. Ipilimumab, regorafenib, vemurafenib, and a number of combination therapies have been successful in treating V600E mutations. However, cetuximab and panitumumab have been largely shown to be ineffective without supplementary treatment.",
"label": "BRAF V600E",
"definingContext": civic_vid12,
"members": [
{
"id": "ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"label": "NC_000007.13:g.140453136A>T",
"digest": "Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"type": "Allele",
"location": {
"id": "ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"digest": "nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"type": "SequenceLocation",
"sequenceReference": {
"type": "SequenceReference",
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"start": 140753335,
"end": 140753336,
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}
],
"members": [genomic_rep],
"aliases": ["VAL600GLU", "V640E", "VAL640GLU"],
"mappings": [
{
Expand Down Expand Up @@ -1683,6 +1689,26 @@ def moa_vid66():
},
"state": {"type": "LiteralSequenceExpression", "sequence": "I"},
},
"members": [
{
"id": "ga4gh:VA.HUJOQCml0LngKmUf5IJIYQk9CfKmagbf",
"label": "9-133748283-C-T",
"digest": "HUJOQCml0LngKmUf5IJIYQk9CfKmagbf",
"type": "Allele",
"location": {
"id": "ga4gh:SL.vd9Kb9rCPWBEUZ_wbBxZyulgOAq-jk0P",
"digest": "vd9Kb9rCPWBEUZ_wbBxZyulgOAq-jk0P",
"type": "SequenceLocation",
"sequenceReference": {
"type": "SequenceReference",
"refgetAccession": "SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI",
},
"start": 133748282,
"end": 133748283,
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}
],
"extensions": [
{
"name": "MOA representative coordinate",
Expand Down
6 changes: 5 additions & 1 deletion tests/unit/transform/test_moa_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ async def data(normalizers):


@pytest.fixture(scope="module")
def moa_vid145():
def moa_vid145(braf_v600e_genomic):
"""Create a test fixture for MOA VID145."""
genomic_rep = braf_v600e_genomic.copy()
genomic_rep["label"] = "7-140453136-A-T"

return {
"id": "moa.variant:145",
"type": "ProteinSequenceConsequence",
Expand All @@ -49,6 +52,7 @@ def moa_vid145():
},
"state": {"type": "LiteralSequenceExpression", "sequence": "E"},
},
"members": [genomic_rep],
"extensions": [
{
"name": "MOA representative coordinate",
Expand Down

0 comments on commit 3fddcf8

Please sign in to comment.