From b477340ee9e49dcf273746eea86608f05e712edd Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 21 Jan 2025 12:07:06 -0500 Subject: [PATCH 1/4] wip --- src/metakb/transformers/base.py | 90 +++++++++++-------- src/metakb/transformers/civic.py | 1 - tests/conftest.py | 41 +++++++-- .../test_civic_transformer_diagnostic.py | 14 ++- .../test_moa_transformer_prognostic.py | 13 ++- .../test_moa_transformer_therapeutic.py | 6 +- 6 files changed, 113 insertions(+), 52 deletions(-) diff --git a/src/metakb/transformers/base.py b/src/metakb/transformers/base.py index 30fc510c..166a0115 100644 --- a/src/metakb/transformers/base.py +++ b/src/metakb/transformers/base.py @@ -76,22 +76,22 @@ class MethodId(str, Enum): class CivicEvidenceLevel(str, Enum): """Define constraints for CIViC evidence levels""" - A = "civic.evidence_level:A" - B = "civic.evidence_level:B" - C = "civic.evidence_level:C" - D = "civic.evidence_level:D" - E = "civic.evidence_level:E" + A = "A" + B = "B" + C = "C" + D = "D" + E = "E" class MoaEvidenceLevel(str, Enum): """Define constraints MOAlmanac evidence levels""" - FDA_APPROVED = "moa.evidence_level:fda_approved" - GUIDELINE = "moa.evidence_level:guideline" - CLINICAL_TRIAL = "moa.evidence_level:clinical_trial" - CLINICAL_EVIDENCE = "moa.evidence_level:clinical_evidence" - PRECLINICAL = "moa.evidence_level:preclinical_evidence" - INFERENTIAL = "moa.evidence_level:inferential_evidence" + FDA_APPROVED = "FDA-Approved" + GUIDELINE = "Guideline" + CLINICAL_TRIAL = "Clinical trial" + CLINICAL_EVIDENCE = "Clinical evidence" + PRECLINICAL = "Preclinical evidence" + INFERENTIAL = "Inferential evidence" class TherapyType(str, Enum): @@ -323,40 +323,58 @@ def _evidence_level_to_vicc_concept_mapping( to corresponding vicc concept vocab (value) represented as MappableConcept object """ + + def _get_concept_mapping(exact_mapping: str) -> str: + """Get system for an exact mapping + + :param exact_mapping: Exact mapping code + :raises NotImplementedError: If SourceName not supported yet + :return: System label + """ + if isinstance(exact_mapping, EcoLevel): + id_ = exact_mapping.value.lower() + system = "https://www.evidenceontology.org/term/" + elif isinstance(exact_mapping, CivicEvidenceLevel): + system = ( + "https://civic.readthedocs.io/en/latest/model/evidence/level.html" + ) + id_ = f"civic.evidence_level:{exact_mapping.value}" + elif isinstance(exact_mapping, MoaEvidenceLevel): + system = "https://moalmanac.org/about" + id_ = f"moa.assertion_level:{'_'.join(exact_mapping.value.lower().replace('-', '_').split())}" + else: + raise NotImplementedError + + return ConceptMapping( + coding=Coding(id=id_, system=system, code=exact_mapping), + relation=Relation.EXACT_MATCH, + ) + mappings = {} for item in self._vicc_concept_vocabs: primary_code = item.id.split(":")[-1] - concept_mappings = [ - ConceptMapping( - coding=Coding( - system="https://go.osu.edu/evidence-codes", - code=code(primary_code), - ), - relation=Relation.EXACT_MATCH, - ) - ] - for exact_mapping in item.exact_mappings: - system_prefix = exact_mapping.split(":")[0].split(".")[0] - - try: - system = SourceName(system_prefix).as_print_case() - except ValueError: - system = system_prefix - - concept_mappings.append( + concept_mappings = [ ConceptMapping( - coding=Coding(system=system, code=code(exact_mapping)), + coding=Coding( + id=item.id, + system="https://go.osu.edu/evidence-codes", + code=code(primary_code), + ), relation=Relation.EXACT_MATCH, ) + ] + + concept_mappings.extend( + _get_concept_mapping(exact_mapping_) + for exact_mapping_ in item.exact_mappings ) - mappings[exact_mapping] = MappableConcept( - conceptType="Evidence Strength", - label=item.term, - primaryCode=primary_code, - mappings=concept_mappings, - ) + mappings[exact_mapping] = MappableConcept( + label=item.term, + primaryCode=primary_code, + mappings=concept_mappings, + ) return mappings diff --git a/src/metakb/transformers/civic.py b/src/metakb/transformers/civic.py index c23093b2..926e05ef 100644 --- a/src/metakb/transformers/civic.py +++ b/src/metakb/transformers/civic.py @@ -12,7 +12,6 @@ Extension, MappableConcept, Relation, - code, ) from ga4gh.va_spec.aac_2017.models import ( VariantDiagnosticProposition, diff --git a/tests/conftest.py b/tests/conftest.py index b545d90c..2d6deca9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -362,19 +362,23 @@ def civic_eid2997_study_stmt( "description": "Afatinib, an irreversible inhibitor of the ErbB family of tyrosine kinases has been approved in the US for the first-line treatment of patients with metastatic non-small-cell lung cancer (NSCLC) who have tumours with EGFR exon 19 deletions or exon 21 (L858R) substitution mutations as detected by a US FDA-approved test", "direction": "supports", "strength": { - "conceptType": "Evidence Strength", "primaryCode": "e000001", "label": "authoritative evidence", "mappings": [ { "coding": { + "id": "vicc:e000001", "system": "https://go.osu.edu/evidence-codes", "code": "e000001", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:A"}, + "coding": { + "id": "civic.evidence_level:A", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "A", + }, "relation": "exactMatch", }, ], @@ -984,13 +988,18 @@ def civic_eid816_study_stmt( "mappings": [ { "coding": { + "id": "vicc:e000005", "system": "https://go.osu.edu/evidence-codes", "code": "e000005", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:B"}, + "coding": { + "id": "civic.evidence_level:B", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "B", + }, "relation": "exactMatch", }, ], @@ -1038,13 +1047,18 @@ def civic_eid9851_study_stmt( "mappings": [ { "coding": { + "id": "vicc:e000001", "system": "https://go.osu.edu/evidence-codes", "code": "e000001", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:A"}, + "coding": { + "id": "civic.evidence_level:A", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "A", + }, "relation": "exactMatch", }, ], @@ -1157,13 +1171,18 @@ def civic_eid26_study_stmt( "mappings": [ { "coding": { + "id": "vicc:e000005", "system": "https://go.osu.edu/evidence-codes", "code": "e000005", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:B"}, + "coding": { + "id": "civic.evidence_level:B", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "B", + }, "relation": "exactMatch", }, ], @@ -1481,19 +1500,25 @@ def moa_aid66_study_stmt( "mappings": [ { "coding": { + "id": "vicc:e000009", "system": "https://go.osu.edu/evidence-codes", "code": "e000009", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:D"}, + "coding": { + "id": "civic.evidence_level:D", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "D", + }, "relation": "exactMatch", }, { "coding": { - "system": "MOA", - "code": "moa.evidence_level:preclinical_evidence", + "id": "moa.assertion_level:preclinical_evidence", + "system": "https://moalmanac.org/about", + "code": "Preclinical evidence", }, "relation": "exactMatch", }, diff --git a/tests/unit/transformers/test_civic_transformer_diagnostic.py b/tests/unit/transformers/test_civic_transformer_diagnostic.py index b451e592..e07f295e 100644 --- a/tests/unit/transformers/test_civic_transformer_diagnostic.py +++ b/tests/unit/transformers/test_civic_transformer_diagnostic.py @@ -251,13 +251,18 @@ def civic_eid2_study_stmt(civic_method, civic_mpid99, civic_gid38, civic_did2): "mappings": [ { "coding": { + "id": "vicc:e000005", "system": "https://go.osu.edu/evidence-codes", "code": "e000005", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:B"}, + "coding": { + "id": "civic.evidence_level:B", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "B", + }, "relation": "exactMatch", }, ], @@ -518,13 +523,18 @@ def civic_eid74_study_stmt(civic_method, civic_mpid113, civic_gid42, civic_did15 "mappings": [ { "coding": { + "id": "vicc:e000005", "system": "https://go.osu.edu/evidence-codes", "code": "e000005", }, "relation": "exactMatch", }, { - "coding": {"system": "CIViC", "code": "civic.evidence_level:B"}, + "coding": { + "id": "civic.evidence_level:B", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "B", + }, "relation": "exactMatch", }, ], diff --git a/tests/unit/transformers/test_moa_transformer_prognostic.py b/tests/unit/transformers/test_moa_transformer_prognostic.py index 9ac9349e..cf4f149b 100644 --- a/tests/unit/transformers/test_moa_transformer_prognostic.py +++ b/tests/unit/transformers/test_moa_transformer_prognostic.py @@ -192,6 +192,7 @@ def moa_aid141_study_stmt( "mappings": [ { "coding": { + "id": "vicc:e000007", "system": "https://go.osu.edu/evidence-codes", "code": "e000007", }, @@ -199,8 +200,9 @@ def moa_aid141_study_stmt( }, { "coding": { - "system": "MOA", - "code": "moa.evidence_level:clinical_evidence", + "id": "moa.assertion_level:clinical_evidence", + "system": "https://moalmanac.org/about", + "code": "Clinical evidence", }, "relation": "exactMatch", }, @@ -357,13 +359,18 @@ def moa_aid532_study_stmt( "mappings": [ { "coding": { + "id": "vicc:e000003", "system": "https://go.osu.edu/evidence-codes", "code": "e000003", }, "relation": "exactMatch", }, { - "coding": {"system": "MOA", "code": "moa.evidence_level:guideline"}, + "coding": { + "id": "moa.assertion_level:guideline", + "system": "https://moalmanac.org/about", + "code": "Guideline", + }, "relation": "exactMatch", }, ], diff --git a/tests/unit/transformers/test_moa_transformer_therapeutic.py b/tests/unit/transformers/test_moa_transformer_therapeutic.py index 27651672..49b5c024 100644 --- a/tests/unit/transformers/test_moa_transformer_therapeutic.py +++ b/tests/unit/transformers/test_moa_transformer_therapeutic.py @@ -134,6 +134,7 @@ def moa_aid154_study_stmt(moa_vid144, moa_cetuximab, moa_encorafenib, moa_method "mappings": [ { "coding": { + "id": "vicc:e000002", "system": "https://go.osu.edu/evidence-codes", "code": "e000002", }, @@ -141,8 +142,9 @@ def moa_aid154_study_stmt(moa_vid144, moa_cetuximab, moa_encorafenib, moa_method }, { "coding": { - "system": "MOA", - "code": "moa.evidence_level:fda_approved", + "id": "moa.assertion_level:fda_approved", + "system": "https://moalmanac.org/about", + "code": "FDA-Approved", }, "relation": "exactMatch", }, From face8b84f51fa870b5a611cac85fc50c65fb530f Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Sun, 26 Jan 2025 19:29:13 -0500 Subject: [PATCH 2/4] update --- src/metakb/load_data.py | 8 +++++--- src/metakb/query.py | 2 ++ tests/unit/database/test_database.py | 7 ++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/metakb/load_data.py b/src/metakb/load_data.py index 4b092e8b..b8f254a4 100644 --- a/src/metakb/load_data.py +++ b/src/metakb/load_data.py @@ -479,9 +479,11 @@ def _add_statement_evidence(tx: ManagedTransaction, statement_in: dict) -> None: if strength: strength_key_fields = ("primaryCode", "label") - strength_keys = [_create_parameterized_query( - strength, strength_key_fields, entity_param_prefix="strength_" - )] + strength_keys = [ + _create_parameterized_query( + strength, strength_key_fields, entity_param_prefix="strength_" + ) + ] for k in strength_key_fields: v = strength.get(k) if v: diff --git a/src/metakb/query.py b/src/metakb/query.py index b28fc16c..81abf89c 100644 --- a/src/metakb/query.py +++ b/src/metakb/query.py @@ -561,6 +561,8 @@ def _get_nested_stmt( elif rel_type == "IS_REPORTED_IN": params["reportedIn"] = [self._get_document(node)] elif rel_type == "HAS_STRENGTH": + if "mappings" in node: + node["mappings"] = json.loads(node["mappings"]) params["strength"] = MappableConcept(**node) elif rel_type == "HAS_THERAPEUTIC": params["proposition"]["objectTherapeutic"] = self._get_therapy_or_group( diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index 296afb13..9c52194f 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -634,11 +634,7 @@ def test_statement_rules( check_node_props(statement, civic_aid6_ss_cp, expected_keys) -def test_strength_rules( - driver: Driver, - check_relation_count, - civic_eid2997_study_stmt -): +def test_strength_rules(driver: Driver, check_relation_count, civic_eid2997_study_stmt): """Verify property and relationship rules for Strength nodes.""" query = """ MATCH (s:Strength) @@ -686,6 +682,7 @@ def test_strength_rules( strength_node["mappings"] = json.loads(strength_node["mappings"]) assert strength_node == civic_eid2997_study_stmt["strength"] + def test_classification_rules( driver: Driver, check_unique_property, check_relation_count, civic_aid6_statement ): From 38d46e15dd6d98510d9670e9666136d37c9f25b6 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Sun, 26 Jan 2025 19:30:11 -0500 Subject: [PATCH 3/4] revert --- src/metakb/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metakb/cli.py b/src/metakb/cli.py index 1766df7a..b832e679 100644 --- a/src/metakb/cli.py +++ b/src/metakb/cli.py @@ -546,8 +546,8 @@ async def update( ``False``. :param sources: source name(s) to update. If empty, update all sources. """ # noqa: D301 - #_harvest_sources(sources, refresh_source_caches) - #await _transform_sources(sources, None, normalizer_db_url) + _harvest_sources(sources, refresh_source_caches) + await _transform_sources(sources, None, normalizer_db_url) start = timer() _echo_info("Loading Neo4j database...") From 77abcab124a8807ececc35f259239010e29beb8b Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Sun, 26 Jan 2025 19:31:48 -0500 Subject: [PATCH 4/4] cleanup --- src/metakb/transformers/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/metakb/transformers/base.py b/src/metakb/transformers/base.py index 36b23d89..e5d9ff55 100644 --- a/src/metakb/transformers/base.py +++ b/src/metakb/transformers/base.py @@ -328,12 +328,12 @@ def _evidence_level_to_vicc_concept_mapping( object """ - def _get_concept_mapping(exact_mapping: str) -> str: + def _get_concept_mapping(exact_mapping: str) -> ConceptMapping: """Get system for an exact mapping :param exact_mapping: Exact mapping code :raises NotImplementedError: If SourceName not supported yet - :return: System label + :return: Concept mapping object """ if isinstance(exact_mapping, EcoLevel): id_ = exact_mapping.value.lower() @@ -342,10 +342,10 @@ def _get_concept_mapping(exact_mapping: str) -> str: system = ( "https://civic.readthedocs.io/en/latest/model/evidence/level.html" ) - id_ = f"civic.evidence_level:{exact_mapping.value}" + id_ = f"{SourceName.CIVIC.value}.evidence_level:{exact_mapping.value}" elif isinstance(exact_mapping, MoaEvidenceLevel): system = "https://moalmanac.org/about" - id_ = f"moa.assertion_level:{'_'.join(exact_mapping.value.lower().replace('-', '_').split())}" + id_ = f"{SourceName.MOA.value}.assertion_level:{'_'.join(exact_mapping.value.lower().replace('-', '_').split())}" else: raise NotImplementedError