Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use MOA representative coordinate to create PSC members #312

Merged
merged 8 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions src/metakb/transform/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,13 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None:
"protein_change",
"exon",
]
moa_rep_coord = {k: variant.get(k) for k in coordinates_keys}
extensions = [
core_models.Extension(
name="MOA representative coordinate",
value={k: variant[k] for k in coordinates_keys},
name="MOA representative coordinate", value=moa_rep_coord
)
]
members = await self._get_variation_members(moa_rep_coord)

# Add mappings data
mappings = [
Expand Down Expand Up @@ -343,6 +344,7 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None:
definingContext=moa_variation.root,
mappings=mappings or None,
extensions=extensions,
members=members,
).model_dump(exclude_none=True)

self.able_to_normalize["variations"][variant_id] = {
Expand All @@ -351,6 +353,46 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None:
}
self.variations.append(psc)

async def _get_variation_members(
self, moa_rep_coord: Dict
) -> Optional[List[models.Variation]]:
"""Get members field for variation object. This is the related variant concepts.
FOr now, only looks at genomic representative coordinate.

:param moa_rep_coord: MOA Representative Coordinate
:return: List containing one VRS variation record for associated genomic
representation, if variation-normalizer was able to successfully normalize
"""
members = None
chromosome = moa_rep_coord.get("chromosome")
pos = moa_rep_coord.get("start_position")
ref = moa_rep_coord.get("reference_allele")
alt = moa_rep_coord.get("alternate_allele")

if all((chromosome, pos is not None, ref and ref != "-", alt and alt != "-")):
gnomad_vcf = f"{chromosome}-{pos}-{ref}-{alt}"

vrs_genomic_variation = await self.vicc_normalizers.normalize_variation(
[gnomad_vcf]
)

if vrs_genomic_variation:
genomic_params = vrs_genomic_variation.model_dump(exclude_none=True)
genomic_params["label"] = gnomad_vcf
members = [models.Variation(**genomic_params)]
else:
logger.debug(
"Variation Normalizer unable to normalize genomic representation: %s",
gnomad_vcf,
)
else:
logger.debug(
"Not enough enough information provided to create genomic representation: %s",
moa_rep_coord,
)

return members

def _add_genes(self, genes: List[str]) -> None:
"""Create gene objects for all MOA gene records.
Mutates instance variables `able_to_normalize['genes']` and `genes`, if
Expand Down
68 changes: 47 additions & 21 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,34 +335,40 @@ def civic_vid12():


@pytest.fixture(scope="session")
def civic_mpid12(civic_vid12):
def braf_v600e_genomic():
"""Genomic representation for BRAF V600E"""
return {
"id": "ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"digest": "Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"type": "Allele",
"location": {
"id": "ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"digest": "nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"type": "SequenceLocation",
"sequenceReference": {
"type": "SequenceReference",
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"start": 140753335,
"end": 140753336,
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}


@pytest.fixture(scope="session")
def civic_mpid12(civic_vid12, braf_v600e_genomic):
"""Create test fixture for CIViC Molecular Profile ID 12"""
genomic_rep = braf_v600e_genomic.copy()
genomic_rep["label"] = "NC_000007.13:g.140453136A>T"

return {
"id": "civic.mpid:12",
"type": "ProteinSequenceConsequence",
"description": "BRAF V600E has been shown to be recurrent in many cancer types. It is one of the most widely studied variants in cancer. This variant is correlated with poor prognosis in certain cancer types, including colorectal cancer and papillary thyroid cancer. The targeted therapeutic dabrafenib has been shown to be effective in clinical trials with an array of BRAF mutations and cancer types. Dabrafenib has also shown to be effective when combined with the MEK inhibitor trametinib in colorectal cancer and melanoma. However, in patients with TP53, CDKN2A and KRAS mutations, dabrafenib resistance has been reported. Ipilimumab, regorafenib, vemurafenib, and a number of combination therapies have been successful in treating V600E mutations. However, cetuximab and panitumumab have been largely shown to be ineffective without supplementary treatment.",
"label": "BRAF V600E",
"definingContext": civic_vid12,
"members": [
{
"id": "ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"label": "NC_000007.13:g.140453136A>T",
"digest": "Otc5ovrw906Ack087o1fhegB4jDRqCAe",
"type": "Allele",
"location": {
"id": "ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"digest": "nhul5x5P_fKjGEpY9PEkMIekJfZaKom2",
"type": "SequenceLocation",
"sequenceReference": {
"type": "SequenceReference",
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"start": 140753335,
"end": 140753336,
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}
],
"members": [genomic_rep],
"aliases": ["VAL600GLU", "V640E", "VAL640GLU"],
"mappings": [
{
Expand Down Expand Up @@ -1683,6 +1689,26 @@ def moa_vid66():
},
"state": {"type": "LiteralSequenceExpression", "sequence": "I"},
},
"members": [
{
"id": "ga4gh:VA.HUJOQCml0LngKmUf5IJIYQk9CfKmagbf",
"label": "9-133748283-C-T",
"digest": "HUJOQCml0LngKmUf5IJIYQk9CfKmagbf",
"type": "Allele",
"location": {
"id": "ga4gh:SL.vd9Kb9rCPWBEUZ_wbBxZyulgOAq-jk0P",
"digest": "vd9Kb9rCPWBEUZ_wbBxZyulgOAq-jk0P",
"type": "SequenceLocation",
"sequenceReference": {
"type": "SequenceReference",
"refgetAccession": "SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI",
},
"start": 133748282,
"end": 133748283,
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}
],
"extensions": [
{
"name": "MOA representative coordinate",
Expand Down
6 changes: 5 additions & 1 deletion tests/unit/transform/test_moa_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ async def data(normalizers):


@pytest.fixture(scope="module")
def moa_vid145():
def moa_vid145(braf_v600e_genomic):
"""Create a test fixture for MOA VID145."""
genomic_rep = braf_v600e_genomic.copy()
genomic_rep["label"] = "7-140453136-A-T"

return {
"id": "moa.variant:145",
"type": "ProteinSequenceConsequence",
Expand All @@ -49,6 +52,7 @@ def moa_vid145():
},
"state": {"type": "LiteralSequenceExpression", "sequence": "E"},
},
"members": [genomic_rep],
"extensions": [
{
"name": "MOA representative coordinate",
Expand Down
Loading