Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add support for transforming civic diagnostic evidence items #414

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 9 additions & 19 deletions src/metakb/harvesters/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,24 +151,27 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict
assertion_record = {
"id": assertion["assertion_id"],
"context": assertion["context"],
"deprecated": assertion["deprecated"],
"description": assertion["description"],
"disease": {
"name": assertion["disease"],
"oncotree_code": assertion["oncotree_code"],
"oncotree_term": assertion["oncotree_term"],
},
"therapy_name": assertion["therapy_name"],
"therapy_type": assertion["therapy_type"],
"clinical_significance": self._get_therapy(
assertion["therapy_resistance"], assertion["therapy_sensitivity"]
),
"therapy": {
"name": assertion["therapy_name"],
"type": assertion["therapy_type"],
"strategy": assertion["therapy_strategy"],
"resistance": assertion["therapy_resistance"],
"sensitivity": assertion["therapy_sensitivity"],
},
"predictive_implication": assertion["predictive_implication"],
"favorable_prognosis": assertion["favorable_prognosis"],
"created_on": assertion["created_on"],
"last_updated": assertion["last_updated"],
"submitted_by": assertion["submitted_by"],
"validated": assertion["validated"],
"source_ids": assertion["sources"][0]["source_id"],
"source_id": assertion["sources"][0]["source_id"],
}

for v in variants_list:
Expand All @@ -177,19 +180,6 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict

return assertion_record

def _get_therapy(self, resistance: bool, sensitivity: bool) -> str | None:
"""Get therapy response data.

:param resistance: `True` if Therapy Resistance. `False` if not Therapy Resistance
:param sensitivity: `True` if Therapy Sensitivity. `False` if not Therapy Sensitivity
:return: whether the therapy response is resistance or sensitivity
"""
if resistance:
return "resistance"
if sensitivity:
return "sensitivity"
return None

def _get_feature(self, v: dict) -> dict:
"""Get feature name from the harvested variants

Expand Down
32 changes: 18 additions & 14 deletions src/metakb/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from ga4gh.core.entity_models import Coding, Document, Extension, Method
from ga4gh.va_spec.profiles.var_study_stmt import (
VariantDiagnosticStudyStatement,
VariantPrognosticStudyStatement,
VariantTherapeuticResponseStudyStatement,
)
Expand Down Expand Up @@ -69,6 +70,14 @@ class TherapeuticProcedureType(str, Enum):
SUBSTITUTES = "TherapeuticSubstituteGroup"


# Statement types to corresponding class mapping
STMT_TYPE_TO_CLASS = {
"VariantDiagnosticStudyStatement": VariantDiagnosticStudyStatement,
"VariantPrognosticStudyStatement": VariantPrognosticStudyStatement,
"VariantTherapeuticResponseStudyStatement": VariantTherapeuticResponseStudyStatement,
}


def _deserialize_field(node: dict, field_name: str) -> None | dict:
"""Deserialize JSON blob property.

Expand Down Expand Up @@ -493,8 +502,8 @@ def _get_nested_stmts(self, statement_nodes: list[Node]) -> list[dict]:

def _get_nested_stmt(self, stmt_node: Node) -> dict:
"""Get information related to a statement
Only VariantTherapeuticResponseStudyStatement and VariantPrognosticStudyStatement
are supported at the moment
Only VariantTherapeuticResponseStudyStatement, VariantPrognosticStudyStatement,
and VariantDiagnosticStudyStatement are supported at the moment

:param stmt_node: Neo4j graph node for statement
:return: Nested statement
Expand All @@ -503,15 +512,14 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict:
if study_stmt_type not in {
"VariantTherapeuticResponseStudyStatement",
"VariantPrognosticStudyStatement",
"VariantDiagnosticStudyStatement",
}:
return {}

if study_stmt_type == "VariantPrognosticStudyStatement":
study_stmt_cls = VariantPrognosticStudyStatement
condition_key = "objectCondition"
else:
study_stmt_cls = VariantTherapeuticResponseStudyStatement
if study_stmt_type == "VariantTherapeuticResponseStudyStatement":
condition_key = "conditionQualifier"
else:
condition_key = "objectCondition"

params = {
condition_key: None,
Expand Down Expand Up @@ -559,7 +567,7 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict:
else:
logger.warning("relation type not supported: %s", rel_type)

return study_stmt_cls(**params).model_dump()
return STMT_TYPE_TO_CLASS[study_stmt_type](**params).model_dump()

@staticmethod
def _get_vicc_normalizer_extension(node: dict) -> ViccNormalizerDataExtension:
Expand Down Expand Up @@ -917,10 +925,6 @@ async def batch_search_statements(
statement_nodes = [r[0] for r in result]
response.statement_ids = [n["id"] for n in statement_nodes]
stmts = self._get_nested_stmts(statement_nodes)
response.statements = [
VariantTherapeuticResponseStudyStatement(**s)
if s["type"] == "VariantTherapeuticResponseStudyStatement"
else VariantPrognosticStudyStatement(**s)
for s in stmts
]

response.statements = [STMT_TYPE_TO_CLASS[s["type"]](**s) for s in stmts]
return response
9 changes: 7 additions & 2 deletions src/metakb/schemas/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Literal

from ga4gh.va_spec.profiles.var_study_stmt import (
VariantDiagnosticStudyStatement,
VariantPrognosticStudyStatement,
VariantTherapeuticResponseStudyStatement,
)
Expand Down Expand Up @@ -48,7 +49,9 @@ class SearchStatementsService(BaseModel):
warnings: list[StrictStr] = []
statement_ids: list[StrictStr] = []
statements: list[
VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement
VariantTherapeuticResponseStudyStatement
| VariantPrognosticStudyStatement
| VariantDiagnosticStudyStatement
] = []
service_meta_: ServiceMeta

Expand All @@ -73,6 +76,8 @@ class BatchSearchStatementsService(BaseModel):
warnings: list[StrictStr] = []
statement_ids: list[StrictStr] = []
statements: list[
VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement
VariantTherapeuticResponseStudyStatement
| VariantPrognosticStudyStatement
| VariantDiagnosticStudyStatement
] = []
service_meta_: ServiceMeta
5 changes: 4 additions & 1 deletion src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from ga4gh.core.entity_models import Coding, Document, Extension, Method
from ga4gh.va_spec.profiles.var_study_stmt import (
VariantDiagnosticStudyStatement,
VariantPrognosticStudyStatement,
VariantTherapeuticResponseStudyStatement,
)
Expand Down Expand Up @@ -111,7 +112,9 @@ class TransformedData(BaseModel):
"""Define model for transformed data"""

statements: list[
VariantTherapeuticResponseStudyStatement | VariantPrognosticStudyStatement
VariantTherapeuticResponseStudyStatement
| VariantPrognosticStudyStatement
| VariantDiagnosticStudyStatement
] = []
categorical_variants: list[CategoricalVariant] = []
variations: list[Allele] = []
Expand Down
9 changes: 8 additions & 1 deletion src/metakb/transformers/civic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
)
from ga4gh.va_spec.profiles.var_study_stmt import (
AlleleOriginQualifier,
DiagnosticPredicate,
PrognosticPredicate,
TherapeuticResponsePredicate,
VariantDiagnosticStudyStatement,
VariantPrognosticStudyStatement,
VariantTherapeuticResponseStudyStatement,
)
Expand Down Expand Up @@ -91,6 +93,8 @@
"RESISTANCE": TherapeuticResponsePredicate.RESISTANCE,
"POOR_OUTCOME": PrognosticPredicate.WORSE_OUTCOME,
"BETTER_OUTCOME": PrognosticPredicate.BETTER_OUTCOME,
"POSITIVE": DiagnosticPredicate.INCLUSIVE,
"NEGATIVE": DiagnosticPredicate.EXCLUSIVE,
}


Expand Down Expand Up @@ -121,6 +125,7 @@ class _CivicEvidenceType(str, Enum):

PREDICTIVE = "PREDICTIVE"
PROGNOSTIC = "PROGNOSTIC"
DIAGNOSTIC = "DIAGNOSTIC"


class _VariationCache(BaseModel):
Expand Down Expand Up @@ -359,8 +364,10 @@ def _add_variant_study_stmt(
if evidence_type == _CivicEvidenceType.PREDICTIVE:
params["objectTherapeutic"] = civic_therapeutic
statement = VariantTherapeuticResponseStudyStatement(**params)
else:
elif evidence_type == _CivicEvidenceType.PROGNOSTIC:
statement = VariantPrognosticStudyStatement(**params)
else:
statement = VariantDiagnosticStudyStatement(**params)

self.processed_data.statements.append(statement)

Expand Down
Loading
Loading