diff --git a/Pipfile b/Pipfile index b77d5bdc..e04e008a 100644 --- a/Pipfile +++ b/Pipfile @@ -4,11 +4,11 @@ verify_ssl = true name = "pypi" [packages] -"ga4gh.vrs" = "==2.0.0a6" -gene-normalizer = {version = "~=0.3.0-dev1", extras = ["etl"]} -variation-normalizer = "~=0.8.2" -disease-normalizer = {version = "~=0.4.0.dev3", extras = ["etl"]} -thera-py = {version = "~=0.5.0.dev3", extras = ["etl"]} +"ga4gh.vrs" = "~=2.0.0a8" +gene-normalizer = {version = "~=0.4.0", extras = ["etl"]} +variation-normalizer = "~=0.9.1" +disease-normalizer = {version = "~=0.5.0", extras = ["etl"]} +thera-py = {version = "~=0.6.0", extras = ["etl"]} civicpy = "~=3.1" requests = "*" pydantic = "==2.*" diff --git a/pyproject.toml b/pyproject.toml index 77ddecc5..6fa1a76b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,11 +23,11 @@ requires-python = ">=3.10" description = "A search interface for cancer variant interpretations assembled by aggregating and harmonizing across multiple cancer variant interpretation knowledgebases." license = {file = "LICENSE"} dependencies = [ - "ga4gh.vrs==2.0.0a6", - "gene-normalizer[etl]~=0.3.0-dev1", - "variation-normalizer~=0.8.2", - "disease-normalizer[etl]~=0.4.0.dev3", - "thera-py[etl]~=0.5.0.dev3", + "ga4gh.vrs~=2.0.0a8", + "gene-normalizer[etl]~=0.4.0", + "variation-normalizer~=0.9.1", + "disease-normalizer[etl]~=0.5.0", + "thera-py[etl]~=0.6.0", "civicpy~=3.1", "requests", "pydantic==2.*", diff --git a/src/metakb/load_data.py b/src/metakb/load_data.py index 01535741..ba5615fb 100644 --- a/src/metakb/load_data.py +++ b/src/metakb/load_data.py @@ -104,7 +104,7 @@ def _add_gene_or_disease( obj_keys = [ _create_parameterized_query( - obj, ("id", "label", "description", "aliases", "type") + obj, ("id", "label", "description", "alternativeLabels", "type") ) ] @@ -173,7 +173,9 @@ def _add_therapeutic_agent(tx: ManagedTransaction, therapeutic_agent: dict) -> N :param therapeutic_agent: Therapeutic Agent CDM object """ ta = therapeutic_agent.copy() - nonnull_keys = [_create_parameterized_query(ta, ("id", "label", "aliases", "type"))] + nonnull_keys = [ + _create_parameterized_query(ta, ("id", "label", "alternativeLabels", "type")) + ] _add_mappings_and_exts_to_obj(ta, nonnull_keys) nonnull_keys = ", ".join(nonnull_keys) @@ -269,7 +271,7 @@ def _add_categorical_variation( mp_nonnull_keys = [ _create_parameterized_query( - cv, ("id", "label", "description", "aliases", "type") + cv, ("id", "label", "description", "alternativeLabels", "type") ) ] diff --git a/src/metakb/normalizers.py b/src/metakb/normalizers.py index 57671b7e..adb8c006 100644 --- a/src/metakb/normalizers.py +++ b/src/metakb/normalizers.py @@ -11,8 +11,8 @@ from disease.database.database import AWS_ENV_VAR_NAME as DISEASE_AWS_ENV_VAR_NAME from disease.query import QueryHandler as DiseaseQueryHandler from disease.schemas import NormalizationService as NormalizedDisease -from ga4gh.core._internal.models import Extension -from ga4gh.vrs._internal.models import ( +from ga4gh.core.entity_models import Extension +from ga4gh.vrs.models import ( Allele, CopyNumberChange, CopyNumberCount, diff --git a/src/metakb/query.py b/src/metakb/query.py index 6d1fe3b3..ffe9ef0b 100644 --- a/src/metakb/query.py +++ b/src/metakb/query.py @@ -5,15 +5,14 @@ from copy import copy from enum import Enum -from ga4gh.core._internal.models import ( - Coding, +from ga4gh.core.domain_models import ( Disease, - Extension, Gene, TherapeuticAgent, TherapeuticProcedure, ) -from ga4gh.vrs import models +from ga4gh.core.entity_models import Coding, Expression, Extension +from ga4gh.vrs.models import Variation from neo4j import Driver from neo4j.graph import Node from pydantic import ValidationError @@ -578,7 +577,7 @@ def _get_variations(self, cv_id: str, relation: VariationRelation) -> list[dict] elif variation_k.startswith("expression_hgvs_"): syntax = variation_k.split("expression_")[-1].replace("_", ".") expressions.extend( - models.Expression(syntax=syntax, value=hgvs_expr) + Expression(syntax=syntax, value=hgvs_expr) for hgvs_expr in variation_v ) @@ -588,7 +587,7 @@ def _get_variations(self, cv_id: str, relation: VariationRelation) -> list[dict] v_params["location"]["sequenceReference"] = json.loads( loc_params["sequence_reference"] ) - variations.append(models.Variation(**v_params).model_dump()) + variations.append(Variation(**v_params).model_dump()) return variations def _get_cat_var(self, node: dict) -> CategoricalVariation: diff --git a/src/metakb/schemas/annotation.py b/src/metakb/schemas/annotation.py index 982f153d..73b2764a 100644 --- a/src/metakb/schemas/annotation.py +++ b/src/metakb/schemas/annotation.py @@ -4,7 +4,7 @@ from enum import Enum from typing import Literal -from ga4gh.core import core_models +from ga4gh.core.entity_models import IRI, Coding, _DomainEntity, _Entity from pydantic import Field, StrictInt, StrictStr, constr, field_validator @@ -24,7 +24,7 @@ class Direction(str, Enum): NONE = "none" -class Document(core_models._MappableEntity): # noqa: SLF001 +class Document(_DomainEntity): """a representation of a physical or digital document""" type: Literal["Document"] = "Document" @@ -42,22 +42,22 @@ class Document(core_models._MappableEntity): # noqa: SLF001 ) -class Method(core_models._Entity): # noqa: SLF001 +class Method(_Entity): """A set of instructions that specify how to achieve some objective (e.g. experimental protocols, curation guidelines, rule sets, etc.) """ type: Literal["Method"] = Field("Method", description="MUST be 'Method'.") - isReportedIn: Document | core_models.IRI | None = Field( + isReportedIn: Document | IRI | None = Field( None, description="A document in which the information content is expressed." ) - subtype: core_models.Coding | None = Field( + subtype: Coding | None = Field( None, description="A more specific type of entity the method represents (e.g. Variant Interpretation Guideline, Experimental Protocol)", ) -class Agent(core_models._Entity): # noqa: SLF001 +class Agent(_Entity): """An autonomous actor (person, organization, or computational agent) that bears some form of responsibility for an activity taking place, for the existence of an entity, or for another agent's activity. @@ -68,7 +68,7 @@ class Agent(core_models._Entity): # noqa: SLF001 subtype: AgentSubtype | None = None -class Contribution(core_models._Entity): # noqa: SLF001 +class Contribution(_Entity): """The sum of all actions taken by a single agent in contributing to the creation, modification, assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, DataItem, Publication, etc.) @@ -77,7 +77,7 @@ class Contribution(core_models._Entity): # noqa: SLF001 type: Literal["Contribution"] = "Contribution" contributor: Agent | None = None date: StrictStr | None = None - activity: core_models.Coding | None = Field( + activity: Coding | None = Field( None, description="SHOULD describe a concept descending from the Contributor Role Ontology.", ) @@ -99,19 +99,19 @@ def date_format(cls, v: str | None) -> str | None: return v -class _InformationEntity(core_models._Entity): # noqa: SLF001 +class _InformationEntity(_Entity): """InformationEntities are abstract (non-physical) entities that are about something (i.e. they carry information about things in the real world). """ id: StrictStr type: StrictStr - specifiedBy: Method | core_models.IRI | None = Field( + specifiedBy: Method | IRI | None = Field( None, description="A `Method` that describes all or part of the process through which the information was generated.", ) contributions: list[Contribution] | None = None - isReportedIn: list[Document | core_models.IRI] | None = Field( + isReportedIn: list[Document | IRI] | None = Field( None, description="A document in which the information content is expressed." ) # recordMetadata (might be added in the future) @@ -123,12 +123,12 @@ class DataItem(_InformationEntity): """ type: Literal["DataItem"] = Field("DataItem", description="MUST be 'DataItem'.") - subtype: core_models.Coding | None = Field( + subtype: Coding | None = Field( None, description="A specific type of data the DataItem object represents (e.g. a specimen count, a patient weight, an allele frequency, a p-value, a confidence score)", ) value: StrictStr - unit: core_models.Coding | None = None + unit: Coding | None = None class _StatementBase(_InformationEntity): @@ -142,7 +142,7 @@ class _StatementBase(_InformationEntity): direction: Direction = Field( ..., description="direction of this Statement with respect to the predicate." ) - strength: core_models.Coding | core_models.IRI | None = Field( + strength: Coding | IRI | None = Field( None, description="The overall strength of support for the Statement based on all evidence assessed.", ) diff --git a/src/metakb/schemas/categorical_variation.py b/src/metakb/schemas/categorical_variation.py index b41c84e0..753d00d9 100644 --- a/src/metakb/schemas/categorical_variation.py +++ b/src/metakb/schemas/categorical_variation.py @@ -7,7 +7,7 @@ from enum import Enum from typing import Literal -from ga4gh.core import core_models +from ga4gh.core.entity_models import IRI, _DomainEntity from ga4gh.vrs import models from pydantic import Field, RootModel, StrictStr @@ -28,10 +28,10 @@ class LocationMatchCharacteristic(str, Enum): SUPERINTERVAL = "superinterval" -class _CategoricalVariationBase(core_models._DomainEntity): # noqa: SLF001 +class _CategoricalVariationBase(_DomainEntity): """Base class for Categorical Variation""" - members: list[models.Variation | core_models.IRI] | None = Field( + members: list[models.Variation | IRI] | None = Field( None, description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", ) @@ -51,7 +51,7 @@ class ProteinSequenceConsequence(_CategoricalVariationBase): "ProteinSequenceConsequence", description="MUST be 'ProteinSequenceConsequence'.", ) - definingContext: models.Allele | core_models.IRI = Field( + definingContext: models.Allele | IRI = Field( ..., description="The `VRS Allele `_ object that is congruent with (projects to the same codons) as alleles on other protein reference sequences.", ) @@ -69,7 +69,7 @@ class CanonicalAllele(_CategoricalVariationBase): type: Literal["CanonicalAllele"] = Field( "CanonicalAllele", description="MUST be 'CanonicalAllele'." ) - definingContext: models.Allele | core_models.IRI = Field( + definingContext: models.Allele | IRI = Field( ..., description="The `VRS Allele `_ object that is congruent with variants on alternate reference sequences.", ) diff --git a/src/metakb/schemas/variation_statement.py b/src/metakb/schemas/variation_statement.py index 0f86fee4..088b020e 100644 --- a/src/metakb/schemas/variation_statement.py +++ b/src/metakb/schemas/variation_statement.py @@ -3,7 +3,8 @@ from enum import Enum from typing import Literal -from ga4gh.core import core_models +from ga4gh.core.domain_models import Condition, Gene, TherapeuticProcedure +from ga4gh.core.entity_models import IRI, Coding from ga4gh.vrs import models from pydantic import BaseModel, Field @@ -69,7 +70,7 @@ class _VariantStatement(_StatementBase): """A `Statement` describing the impact of a variant.""" # extends subject - variant: models.Variation | CategoricalVariation | core_models.IRI = Field( + variant: models.Variation | CategoricalVariation | IRI = Field( ..., description="A variation object that is the subject of the Statement." ) @@ -77,7 +78,7 @@ class _VariantStatement(_StatementBase): class _VariantClassification(_VariantStatement): """A `VariantStatement` classifying the impact of a variant.""" - classification: core_models.Coding | core_models.IRI = Field( + classification: Coding | IRI = Field( ..., description="A methodological, summary classification about the impact of a variant.", ) @@ -94,7 +95,7 @@ class VariantPathogenicityQualifier(BaseModel): None, description="The pattern of inheritance expected for the pathogenic effect of this variant.", ) - geneContext: core_models.Gene | None = Field( + geneContext: Gene | None = Field( None, description="A gene context that qualifies the Statement." ) @@ -110,7 +111,7 @@ class VariantPathogenicity(_VariantClassification): # extends predicate predicate: Literal["isCausalFor"] | None = None # extends object - condition: core_models.Condition | core_models.IRI = Field( + condition: Condition | IRI = Field( ..., description="The `Condition` for which the variant impact is stated." ) # extends qualifiers @@ -123,7 +124,7 @@ class _VariantStudySummary(_VariantStatement): """ # extends isReportedIn - isReportedIn: list[Document | core_models.IRI] = Field( + isReportedIn: list[Document | IRI] = Field( ..., description="A document in which the information content is expressed.", min_length=1, @@ -141,7 +142,7 @@ class _VariantOncogenicityStudyQualifier(BaseModel): None, description="Whether the statement should be interpreted in the context of the variant being rare or common.", ) - geneContext: core_models.Gene | None = Field( + geneContext: Gene | None = Field( None, description="A gene context that qualifies the Statement." ) @@ -155,7 +156,7 @@ class VariantOncogenicityStudy(_VariantStudySummary): # extends predicate predicate: VariantOncogenicityStudyPredicate # extends object - tumorType: core_models.Condition | core_models.IRI = Field( + tumorType: Condition | IRI = Field( ..., description="The tumor type for which the variant impact is evaluated." ) # extends qualifiers @@ -174,11 +175,11 @@ class VariantTherapeuticResponseStudy(_VariantStudySummary): # extends predicate predicate: VariantTherapeuticResponseStudyPredicate # extends object - therapeutic: core_models.TherapeuticProcedure | core_models.IRI = Field( + therapeutic: TherapeuticProcedure | IRI = Field( ..., description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.", ) - tumorType: core_models.Condition | core_models.IRI = Field( + tumorType: Condition | IRI = Field( ..., description="The tumor type context in which the variant impact is evaluated.", ) diff --git a/src/metakb/transform/base.py b/src/metakb/transform/base.py index 61a28f2c..84933cb3 100644 --- a/src/metakb/transform/base.py +++ b/src/metakb/transform/base.py @@ -15,16 +15,15 @@ NormalizationService as NormalizedDisease, ) from ga4gh.core import sha512t24u -from ga4gh.core._internal.models import ( - Coding, +from ga4gh.core.domain_models import ( CombinationTherapy, Disease, - Extension, Gene, TherapeuticAgent, TherapeuticSubstituteGroup, ) -from ga4gh.vrs._internal.models import Allele +from ga4gh.core.entity_models import Coding, Extension +from ga4gh.vrs.models import Allele from pydantic import BaseModel, StrictStr, ValidationError from therapy.schemas import NormalizationService as NormalizedTherapy diff --git a/src/metakb/transform/civic.py b/src/metakb/transform/civic.py index 21b2f682..40688da3 100644 --- a/src/metakb/transform/civic.py +++ b/src/metakb/transform/civic.py @@ -5,17 +5,14 @@ from enum import Enum from pathlib import Path -from ga4gh.core._internal.models import ( - Coding, +from ga4gh.core.domain_models import ( Disease, - Extension, Gene, - Mapping, - Relation, TherapeuticAgent, TherapeuticSubstituteGroup, ) -from ga4gh.vrs._internal.models import Expression, Syntax, Variation +from ga4gh.core.entity_models import Coding, ConceptMapping, Extension, Relation, Syntax +from ga4gh.vrs.models import Expression, Variation from pydantic import BaseModel, ValidationError from metakb import APP_ROOT @@ -88,7 +85,7 @@ class _VariationCache(BaseModel): vrs_variation: Variation civic_gene_id: str variant_types: list[Coding] | None = None - mappings: list[Mapping] | None = None + mappings: list[ConceptMapping] | None = None aliases: list[str] | None = None coordinates: dict | None members: list[Variation] | None = None @@ -439,7 +436,7 @@ def _add_protein_consequences( description=mp["description"], label=mp["name"], definingContext=civic_variation_data.vrs_variation.root, - aliases=list(set(aliases)) or None, + alternativeLabels=list(set(aliases)) or None, mappings=civic_variation_data.mappings, extensions=extensions or None, members=civic_variation_data.members, @@ -570,7 +567,7 @@ async def _add_variations(self, variants: list[dict]) -> None: # Get mappings mappings = [ - Mapping( + ConceptMapping( coding=Coding( code=str(variant["id"]), system="https://civicdb.org/variants/", @@ -581,7 +578,7 @@ async def _add_variations(self, variants: list[dict]) -> None: if variant["allele_registry_id"]: mappings.append( - Mapping( + ConceptMapping( coding=Coding( code=variant["allele_registry_id"], system="https://reg.clinicalgenome.org/", @@ -591,7 +588,7 @@ async def _add_variations(self, variants: list[dict]) -> None: ) mappings.extend( - Mapping( + ConceptMapping( coding=Coding( code=ce, system="https://www.ncbi.nlm.nih.gov/clinvar/variation/", @@ -606,7 +603,7 @@ async def _add_variations(self, variants: list[dict]) -> None: if SNP_RE.match(a): a = a.lower() mappings.append( - Mapping( + ConceptMapping( coding=Coding( code=a, system="https://www.ncbi.nlm.nih.gov/snp/", @@ -630,7 +627,7 @@ async def _add_variations(self, variants: list[dict]) -> None: civic_gene_id=f"civic.gid:{variant['gene_id']}", variant_types=variant_types_value or None, mappings=mappings or None, - aliases=aliases or None, + alternativeLabels=aliases or None, coordinates=coordinates or None, members=members, ) @@ -675,7 +672,7 @@ def _add_genes(self, genes: list[dict]) -> None: label=gene["name"], description=gene["description"] if gene["description"] else None, mappings=[ - Mapping( + ConceptMapping( coding=Coding( code=f"ncbigene:{gene['entrez_id']}", system="https://www.ncbi.nlm.nih.gov/gene/", @@ -683,7 +680,7 @@ def _add_genes(self, genes: list[dict]) -> None: relation=Relation.EXACT_MATCH, ) ], - aliases=gene["aliases"] if gene["aliases"] else None, + alternativeLabels=gene["aliases"] if gene["aliases"] else None, extensions=[ Extension(name="gene_normalizer_id", value=normalized_gene_id) ], @@ -739,7 +736,7 @@ def _get_disease(self, disease: dict) -> Disease | None: doid = f"DOID:{doid}" queries = [doid, display_name] mappings.append( - Mapping( + ConceptMapping( coding=Coding( code=doid, system="https://www.disease-ontology.org/", @@ -835,7 +832,7 @@ def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: if ncit_id: queries = [f"ncit:{ncit_id}", label] mappings.append( - Mapping( + ConceptMapping( coding=Coding( code=ncit_id, system="https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", @@ -876,7 +873,7 @@ def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: id=therapy_id, label=label, mappings=mappings if mappings else None, - aliases=therapy["aliases"] if therapy["aliases"] else None, + alternativeLabels=therapy["aliases"] if therapy["aliases"] else None, extensions=extensions, ) diff --git a/src/metakb/transform/moa.py b/src/metakb/transform/moa.py index fc153e8c..e6716a35 100644 --- a/src/metakb/transform/moa.py +++ b/src/metakb/transform/moa.py @@ -6,16 +6,13 @@ from urllib.parse import quote from ga4gh.core import sha512t24u -from ga4gh.core._internal.models import ( - Coding, +from ga4gh.core.domain_models import ( Disease, - Extension, Gene, - Mapping, - Relation, TherapeuticAgent, ) -from ga4gh.vrs import models +from ga4gh.core.entity_models import Coding, ConceptMapping, Extension, Relation +from ga4gh.vrs.models import Variation from metakb import APP_ROOT from metakb.harvesters.moa import MoaHarvestedData @@ -305,7 +302,7 @@ async def _add_protein_consequences(self, variants: list[dict]) -> None: params = vrs_variation.model_dump(exclude_none=True) moa_variant_id = f"moa.variant:{variant_id}" params["id"] = vrs_variation.id - moa_variation = models.Variation(**params) + moa_variation = Variation(**params) # Add MOA representative coordinate data to extensions coordinates_keys = [ @@ -326,7 +323,7 @@ async def _add_protein_consequences(self, variants: list[dict]) -> None: # Add mappings data mappings = [ - Mapping( + ConceptMapping( coding=Coding( code=str(variant_id), system="https://moalmanac.org/api/features/", @@ -337,7 +334,7 @@ async def _add_protein_consequences(self, variants: list[dict]) -> None: if variant["rsid"]: mappings.append( - Mapping( + ConceptMapping( coding=Coding( code=variant["rsid"], system="https://www.ncbi.nlm.nih.gov/snp/", @@ -363,7 +360,7 @@ async def _add_protein_consequences(self, variants: list[dict]) -> None: async def _get_variation_members( self, moa_rep_coord: dict - ) -> list[models.Variation] | None: + ) -> list[Variation] | None: """Get members field for variation object. This is the related variant concepts. FOr now, only looks at genomic representative coordinate. @@ -387,7 +384,7 @@ async def _get_variation_members( if vrs_genomic_variation: genomic_params = vrs_genomic_variation.model_dump(exclude_none=True) genomic_params["label"] = gnomad_vcf - members = [models.Variation(**genomic_params)] + members = [Variation(**genomic_params)] else: logger.debug( "Variation Normalizer unable to normalize genomic representation: %s", @@ -436,7 +433,7 @@ def _add_documents(self, sources: list) -> None: if source["nct"]: mappings = [ - Mapping( + ConceptMapping( coding=Coding( code=source["nct"], system="https://clinicaltrials.gov/search?term=", @@ -558,7 +555,7 @@ def _get_disease(self, disease: dict) -> dict | None: ot_term = disease["oncotree_term"] if ot_code: mappings.append( - Mapping( + ConceptMapping( coding=Coding( code=ot_code, system="https://oncotree.mskcc.org/", diff --git a/tests/conftest.py b/tests/conftest.py index 977e869b..4494e463 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -64,12 +64,10 @@ def cetuximab_extensions(): """Create test fixture for cetuximab extensions""" return [ { - "type": "Extension", "name": "therapy_normalizer_data", "value": {"normalized_id": "rxcui:318341", "label": "cetuximab"}, }, { - "type": "Extension", "name": "regulatory_approval", "value": { "approval_rating": "ChEMBL", @@ -129,12 +127,10 @@ def encorafenib_extensions(): """Create test fixture for encorafenib extensions""" return [ { - "type": "Extension", "name": "therapy_normalizer_data", "value": {"normalized_id": "rxcui:2049106", "label": "encorafenib"}, }, { - "type": "Extension", "name": "regulatory_approval", "value": { "approval_rating": "ChEMBL", @@ -196,7 +192,7 @@ def civic_mpid33(civic_vid33): "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, } ], - "aliases": ["LEU858ARG"], + "alternativeLabels": ["LEU858ARG"], "mappings": [ { "coding": { @@ -252,12 +248,10 @@ def civic_mpid33(civic_vid33): "reference_build": "GRCh37", "type": "coordinates", }, - "type": "Extension", }, { "name": "CIViC Molecular Profile Score", "value": 379.0, - "type": "Extension", }, { "name": "Variant types", @@ -268,7 +262,6 @@ def civic_mpid33(civic_vid33): "label": "missense_variant", } ], - "type": "Extension", }, ], } @@ -339,10 +332,16 @@ def civic_gid5(): "relation": "exactMatch", } ], - "aliases": ["B-RAF1", "B-raf", "BRAF", "BRAF-1", "BRAF1", "NS7", "RAFB1"], - "extensions": [ - {"type": "Extension", "name": "gene_normalizer_id", "value": "hgnc:1097"} + "alternativeLabels": [ + "B-RAF1", + "B-raf", + "BRAF", + "BRAF-1", + "BRAF1", + "NS7", + "RAFB1", ], + "extensions": [{"name": "gene_normalizer_id", "value": "hgnc:1097"}], } @@ -415,7 +414,7 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "label": "BRAF V600E", "definingContext": civic_vid12, "members": [genomic_rep], - "aliases": ["VAL600GLU", "V640E", "VAL640GLU"], + "alternativeLabels": ["VAL600GLU", "V640E", "VAL640GLU"], "mappings": [ { "coding": { @@ -464,12 +463,10 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "reference_build": "GRCh37", "type": "coordinates", }, - "type": "Extension", }, { "name": "CIViC Molecular Profile Score", "value": 1378.5, - "type": "Extension", }, { "name": "Variant types", @@ -480,7 +477,6 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "label": "missense_variant", } ], - "type": "Extension", }, ], } @@ -537,7 +533,16 @@ def civic_gid19(): "relation": "exactMatch", } ], - "aliases": ["EGFR", "ERBB", "ERBB1", "ERRP", "HER1", "NISBD2", "PIG61", "mENA"], + "alternativeLabels": [ + "EGFR", + "ERBB", + "ERBB1", + "ERRP", + "HER1", + "NISBD2", + "PIG61", + "mENA", + ], } @@ -557,14 +562,13 @@ def civic_tid146(): "relation": "exactMatch", } ], - "aliases": [ + "alternativeLabels": [ "BIBW2992", "BIBW 2992", "(2e)-N-(4-(3-Chloro-4-Fluoroanilino)-7-(((3s)-Oxolan-3-yl)Oxy)Quinoxazolin-6-yl)-4-(Dimethylamino)But-2-Enamide", ], "extensions": [ { - "type": "Extension", "name": "regulatory_approval", "value": { "approval_rating": "FDA", @@ -589,7 +593,6 @@ def civic_tid146(): }, }, { - "type": "Extension", "name": "therapy_normalizer_data", "value": {"normalized_id": "rxcui:1430438", "label": "afatinib"}, }, @@ -615,7 +618,6 @@ def civic_did8(): ], "extensions": [ { - "type": "Extension", "name": "disease_normalizer_data", "value": { "normalized_id": "ncit:C2926", @@ -654,7 +656,7 @@ def civic_tid28(): "relation": "exactMatch", } ], - "aliases": [ + "alternativeLabels": [ "ABX-EGF", "ABX-EGF Monoclonal Antibody", "ABX-EGF, Clone E7.6.3", @@ -668,12 +670,10 @@ def civic_tid28(): ], "extensions": [ { - "type": "Extension", "name": "therapy_normalizer_data", "value": {"normalized_id": "rxcui:263034", "label": "panitumumab"}, }, { - "type": "Extension", "name": "regulatory_approval", "value": { "approval_rating": "ChEMBL", @@ -723,7 +723,7 @@ def civic_tid16(cetuximab_extensions): "relation": "exactMatch", } ], - "aliases": [ + "alternativeLabels": [ "Cetuximab Biosimilar CDP-1", "Cetuximab Biosimilar CMAB009", "Cetuximab Biosimilar KL 140", @@ -746,7 +746,6 @@ def civic_tsg(civic_tid16, civic_tid28): "substitutes": [civic_tid16, civic_tid28], "extensions": [ { - "type": "Extension", "name": "civic_therapy_interaction_type", "value": "SUBSTITUTES", } @@ -770,7 +769,7 @@ def civic_tid483(encorafenib_extensions): "relation": "exactMatch", } ], - "aliases": ["Braftovi", "LGX 818", "LGX-818", "LGX818"], + "alternativeLabels": ["Braftovi", "LGX 818", "LGX-818", "LGX818"], "extensions": encorafenib_extensions, } @@ -784,7 +783,6 @@ def civic_ct(civic_tid483, civic_tid16): "components": [civic_tid483, civic_tid16], "extensions": [ { - "type": "Extension", "name": "civic_therapy_interaction_type", "value": "COMBINATION", } @@ -810,7 +808,6 @@ def civic_did11(): ], "extensions": [ { - "type": "Extension", "name": "disease_normalizer_data", "value": { "normalized_id": "ncit:C4978", @@ -1017,12 +1014,10 @@ def civic_vid99(): "ensembl_version": 75, "reference_build": "GRCh37", }, - "type": "Extension", }, { "name": "civic_actionability_score", "value": "100.5", - "type": "Extension", }, { "name": "variant_group", @@ -1034,7 +1029,6 @@ def civic_vid99(): "type": "variant_group", } ], - "type": "Extension", }, ], "structural_type": "SO:0001583", @@ -1159,9 +1153,8 @@ def civic_vid113(): "ensembl_version": 75, "reference_build": "GRCh37", }, - "type": "Extension", }, - {"name": "civic_actionability_score", "value": "86", "type": "Extension"}, + {"name": "civic_actionability_score", "value": "86"}, { "name": "variant_group", "value": [ @@ -1172,7 +1165,6 @@ def civic_vid113(): "type": "variant_group", } ], - "type": "Extension", }, ], "structural_type": "SO:0001583", @@ -1306,9 +1298,8 @@ def civic_vid1686(): "ensembl_version": 75, "reference_build": "GRCh37", }, - "type": "Extension", }, - {"name": "civic_actionability_score", "value": "30", "type": "Extension"}, + {"name": "civic_actionability_score", "value": "30"}, { "name": "variant_group", "value": [ @@ -1318,7 +1309,6 @@ def civic_vid1686(): "type": "variant_group", } ], - "type": "Extension", }, ], "structural_type": "SO:0001583", @@ -1447,9 +1437,8 @@ def civic_vid65(): "ensembl_version": 75, "reference_build": "GRCh37", }, - "type": "Extension", }, - {"name": "civic_actionability_score", "value": "67", "type": "Extension"}, + {"name": "civic_actionability_score", "value": "67"}, { "name": "variant_group", "value": [ @@ -1459,7 +1448,6 @@ def civic_vid65(): "type": "variant_group", } ], - "type": "Extension", }, ], "structural_type": "SO:0001583", @@ -1583,9 +1571,8 @@ def civic_vid258(): "ensembl_version": 75, "reference_build": "GRCh37", }, - "type": "Extension", }, - {"name": "civic_actionability_score", "value": "55", "type": "Extension"}, + {"name": "civic_actionability_score", "value": "55"}, ], "structural_type": "SO:0001583", "expressions": [ @@ -1767,7 +1754,6 @@ def moa_vid66(): "protein_change": "p.T315I", "exon": "5", }, - "type": "Extension", } ], "mappings": [ @@ -1796,9 +1782,7 @@ def moa_abl1(): "id": "moa.normalize.gene:ABL1", "type": "Gene", "label": "ABL1", - "extensions": [ - {"type": "Extension", "name": "gene_normalizer_id", "value": "hgnc:76"} - ], + "extensions": [{"name": "gene_normalizer_id", "value": "hgnc:76"}], } @@ -1811,7 +1795,6 @@ def moa_imatinib(): "label": "Imatinib", "extensions": [ { - "type": "Extension", "name": "regulatory_approval", "value": { "approval_rating": "FDA", @@ -1902,7 +1885,6 @@ def moa_imatinib(): }, }, { - "type": "Extension", "name": "therapy_normalizer_data", "value": {"normalized_id": "rxcui:282388", "label": "imatinib"}, }, @@ -1919,7 +1901,6 @@ def moa_chronic_myelogenous_leukemia(): "label": "Chronic Myelogenous Leukemia", "extensions": [ { - "type": "Extension", "name": "disease_normalizer_data", "value": { "normalized_id": "ncit:C3174", @@ -2010,9 +1991,7 @@ def moa_source45(): """Create a test fixture for MOA source 44.""" return { "id": "moa.source:45", - "extensions": [ - {"type": "Extension", "name": "source_type", "value": "Journal"} - ], + "extensions": [{"name": "source_type", "value": "Journal"}], "type": "Document", "title": "Gorre, Mercedes E., et al. Clinical resistance to STI-571 cancer therapy caused by BCR-ABL gene mutation or amplification. Science 293.5531 (2001): 876-880.", "url": "https://doi.org/10.1126/science.1062538", @@ -2067,7 +2046,6 @@ def _dict_check(expected_d: dict, actual_d: dict, is_cdm: bool = False) -> None: new_extensions.append( { "name": f"{normalizer_data_type}_normalizer_id", - "type": "Extension", "value": ext["value"]["normalized_id"], } ) diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index a1ab8562..0db00128 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -223,7 +223,7 @@ def test_gene_rules( "description", "mappings", "type", - "aliases", + "alternativeLabels", } check_node_props(gene, civic_gid5, expected_keys, extension_names) @@ -329,7 +329,7 @@ def test_categorical_variation_rules( "id", "label", "description", - "aliases", + "alternativeLabels", "civic_molecular_profile_score", "civic_representative_coordinate", "mappings", @@ -339,7 +339,7 @@ def test_categorical_variation_rules( assert cv["type"] == civic_mpid12["type"] assert cv["label"] == civic_mpid12["label"] assert cv["description"] == civic_mpid12["description"] - assert set(cv["aliases"]) == set(civic_mpid12["aliases"]) + assert set(cv["alternativeLabels"]) == set(civic_mpid12["alternativeLabels"]) assert isinstance(cv["civic_molecular_profile_score"], float) crc = json.loads(cv["civic_representative_coordinate"]) assert set(crc.keys()) == { @@ -360,7 +360,7 @@ def test_categorical_variation_rules( variant_types = json.loads(cv["variant_types"]) for vt in variant_types: - assert set(vt.keys()) == {"label", "system", "version", "code"} + assert set(vt.keys()) == {"label", "system", "code"} def test_location_rules( @@ -453,7 +453,7 @@ def test_therapeutic_procedure_rules( expected_keys = { "id", "label", - "aliases", + "alternativeLabels", "therapy_normalizer_id", "regulatory_approval", "mappings", diff --git a/tests/unit/search/test_search_studies.py b/tests/unit/search/test_search_studies.py index 4751cbef..78160e42 100644 --- a/tests/unit/search/test_search_studies.py +++ b/tests/unit/search/test_search_studies.py @@ -1,7 +1,7 @@ """Test search study methods""" import pytest -from ga4gh.core._internal.models import Extension +from ga4gh.core.entity_models import Extension from metakb.query import QueryHandler diff --git a/tests/unit/transform/test_moa_transform.py b/tests/unit/transform/test_moa_transform.py index 69084059..e2c1b5ac 100644 --- a/tests/unit/transform/test_moa_transform.py +++ b/tests/unit/transform/test_moa_transform.py @@ -67,7 +67,6 @@ def moa_vid145(braf_v600e_genomic): "protein_change": "p.V600E", "exon": "15", }, - "type": "Extension", } ], "mappings": [ @@ -132,7 +131,6 @@ def moa_aid155_study(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): "components": [moa_cetuximab, moa_encorafenib], "extensions": [ { - "type": "Extension", "name": "moa_therapy_type", "value": "Targeted therapy", } @@ -144,7 +142,6 @@ def moa_aid155_study(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): "label": "Colorectal Adenocarcinoma", "extensions": [ { - "type": "Extension", "name": "disease_normalizer_data", "value": { "normalized_id": "ncit:C5105", @@ -172,7 +169,6 @@ def moa_aid155_study(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): "label": "BRAF", "extensions": [ { - "type": "Extension", "name": "gene_normalizer_id", "value": "hgnc:1097", } @@ -183,9 +179,7 @@ def moa_aid155_study(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): "isReportedIn": [ { "id": "moa.source:63", - "extensions": [ - {"type": "Extension", "name": "source_type", "value": "FDA"} - ], + "extensions": [{"name": "source_type", "value": "FDA"}], "type": "Document", "title": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020.", "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf",