Skip to content

Commit

Permalink
build!: update normalizer + ga4gh.vrs depdencies
Browse files Browse the repository at this point in the history
close #379
  • Loading branch information
korikuzma committed Jul 16, 2024
1 parent d5107ad commit 0eed7b3
Show file tree
Hide file tree
Showing 15 changed files with 118 additions and 151 deletions.
10 changes: 5 additions & 5 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ verify_ssl = true
name = "pypi"

[packages]
"ga4gh.vrs" = "==2.0.0a6"
gene-normalizer = {version = "~=0.3.0-dev1", extras = ["etl"]}
variation-normalizer = "~=0.8.2"
disease-normalizer = {version = "~=0.4.0.dev3", extras = ["etl"]}
thera-py = {version = "~=0.5.0.dev3", extras = ["etl"]}
"ga4gh.vrs" = "~=2.0.0a8"
gene-normalizer = {version = "~=0.4.0", extras = ["etl"]}
variation-normalizer = "~=0.9.1"
disease-normalizer = {version = "~=0.5.0", extras = ["etl"]}
thera-py = {version = "~=0.6.0", extras = ["etl"]}
civicpy = "~=3.1"
requests = "*"
pydantic = "==2.*"
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ requires-python = ">=3.10"
description = "A search interface for cancer variant interpretations assembled by aggregating and harmonizing across multiple cancer variant interpretation knowledgebases."
license = {file = "LICENSE"}
dependencies = [
"ga4gh.vrs==2.0.0a6",
"gene-normalizer[etl]~=0.3.0-dev1",
"variation-normalizer~=0.8.2",
"disease-normalizer[etl]~=0.4.0.dev3",
"thera-py[etl]~=0.5.0.dev3",
"ga4gh.vrs~=2.0.0a8",
"gene-normalizer[etl]~=0.4.0",
"variation-normalizer~=0.9.1",
"disease-normalizer[etl]~=0.5.0",
"thera-py[etl]~=0.6.0",
"civicpy~=3.1",
"requests",
"pydantic==2.*",
Expand Down
8 changes: 5 additions & 3 deletions src/metakb/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _add_gene_or_disease(

obj_keys = [
_create_parameterized_query(
obj, ("id", "label", "description", "aliases", "type")
obj, ("id", "label", "description", "alternativeLabels", "type")
)
]

Expand Down Expand Up @@ -173,7 +173,9 @@ def _add_therapeutic_agent(tx: ManagedTransaction, therapeutic_agent: dict) -> N
:param therapeutic_agent: Therapeutic Agent CDM object
"""
ta = therapeutic_agent.copy()
nonnull_keys = [_create_parameterized_query(ta, ("id", "label", "aliases", "type"))]
nonnull_keys = [
_create_parameterized_query(ta, ("id", "label", "alternativeLabels", "type"))
]

_add_mappings_and_exts_to_obj(ta, nonnull_keys)
nonnull_keys = ", ".join(nonnull_keys)
Expand Down Expand Up @@ -269,7 +271,7 @@ def _add_categorical_variation(

mp_nonnull_keys = [
_create_parameterized_query(
cv, ("id", "label", "description", "aliases", "type")
cv, ("id", "label", "description", "alternativeLabels", "type")
)
]

Expand Down
4 changes: 2 additions & 2 deletions src/metakb/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from disease.database.database import AWS_ENV_VAR_NAME as DISEASE_AWS_ENV_VAR_NAME
from disease.query import QueryHandler as DiseaseQueryHandler
from disease.schemas import NormalizationService as NormalizedDisease
from ga4gh.core._internal.models import Extension
from ga4gh.vrs._internal.models import (
from ga4gh.core.entity_models import Extension
from ga4gh.vrs.models import (
Allele,
CopyNumberChange,
CopyNumberCount,
Expand Down
11 changes: 5 additions & 6 deletions src/metakb/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
from copy import copy
from enum import Enum

from ga4gh.core._internal.models import (
Coding,
from ga4gh.core.domain_models import (
Disease,
Extension,
Gene,
TherapeuticAgent,
TherapeuticProcedure,
)
from ga4gh.vrs import models
from ga4gh.core.entity_models import Coding, Expression, Extension
from ga4gh.vrs.models import Variation
from neo4j import Driver
from neo4j.graph import Node
from pydantic import ValidationError
Expand Down Expand Up @@ -578,7 +577,7 @@ def _get_variations(self, cv_id: str, relation: VariationRelation) -> list[dict]
elif variation_k.startswith("expression_hgvs_"):
syntax = variation_k.split("expression_")[-1].replace("_", ".")
expressions.extend(
models.Expression(syntax=syntax, value=hgvs_expr)
Expression(syntax=syntax, value=hgvs_expr)
for hgvs_expr in variation_v
)

Expand All @@ -588,7 +587,7 @@ def _get_variations(self, cv_id: str, relation: VariationRelation) -> list[dict]
v_params["location"]["sequenceReference"] = json.loads(
loc_params["sequence_reference"]
)
variations.append(models.Variation(**v_params).model_dump())
variations.append(Variation(**v_params).model_dump())
return variations

def _get_cat_var(self, node: dict) -> CategoricalVariation:
Expand Down
28 changes: 14 additions & 14 deletions src/metakb/schemas/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum
from typing import Literal

from ga4gh.core import core_models
from ga4gh.core.entity_models import IRI, Coding, _DomainEntity, _Entity
from pydantic import Field, StrictInt, StrictStr, constr, field_validator


Expand All @@ -24,7 +24,7 @@ class Direction(str, Enum):
NONE = "none"


class Document(core_models._MappableEntity): # noqa: SLF001
class Document(_DomainEntity):
"""a representation of a physical or digital document"""

type: Literal["Document"] = "Document"
Expand All @@ -42,22 +42,22 @@ class Document(core_models._MappableEntity): # noqa: SLF001
)


class Method(core_models._Entity): # noqa: SLF001
class Method(_Entity):
"""A set of instructions that specify how to achieve some objective (e.g.
experimental protocols, curation guidelines, rule sets, etc.)
"""

type: Literal["Method"] = Field("Method", description="MUST be 'Method'.")
isReportedIn: Document | core_models.IRI | None = Field(
isReportedIn: Document | IRI | None = Field(
None, description="A document in which the information content is expressed."
)
subtype: core_models.Coding | None = Field(
subtype: Coding | None = Field(
None,
description="A more specific type of entity the method represents (e.g. Variant Interpretation Guideline, Experimental Protocol)",
)


class Agent(core_models._Entity): # noqa: SLF001
class Agent(_Entity):
"""An autonomous actor (person, organization, or computational agent) that bears
some form of responsibility for an activity taking place, for the existence of an
entity, or for another agent's activity.
Expand All @@ -68,7 +68,7 @@ class Agent(core_models._Entity): # noqa: SLF001
subtype: AgentSubtype | None = None


class Contribution(core_models._Entity): # noqa: SLF001
class Contribution(_Entity):
"""The sum of all actions taken by a single agent in contributing to the creation,
modification, assessment, or deprecation of a particular entity (e.g. a Statement,
EvidenceLine, DataItem, Publication, etc.)
Expand All @@ -77,7 +77,7 @@ class Contribution(core_models._Entity): # noqa: SLF001
type: Literal["Contribution"] = "Contribution"
contributor: Agent | None = None
date: StrictStr | None = None
activity: core_models.Coding | None = Field(
activity: Coding | None = Field(
None,
description="SHOULD describe a concept descending from the Contributor Role Ontology.",
)
Expand All @@ -99,19 +99,19 @@ def date_format(cls, v: str | None) -> str | None:
return v


class _InformationEntity(core_models._Entity): # noqa: SLF001
class _InformationEntity(_Entity):
"""InformationEntities are abstract (non-physical) entities that are about something
(i.e. they carry information about things in the real world).
"""

id: StrictStr
type: StrictStr
specifiedBy: Method | core_models.IRI | None = Field(
specifiedBy: Method | IRI | None = Field(
None,
description="A `Method` that describes all or part of the process through which the information was generated.",
)
contributions: list[Contribution] | None = None
isReportedIn: list[Document | core_models.IRI] | None = Field(
isReportedIn: list[Document | IRI] | None = Field(
None, description="A document in which the information content is expressed."
)
# recordMetadata (might be added in the future)
Expand All @@ -123,12 +123,12 @@ class DataItem(_InformationEntity):
"""

type: Literal["DataItem"] = Field("DataItem", description="MUST be 'DataItem'.")
subtype: core_models.Coding | None = Field(
subtype: Coding | None = Field(
None,
description="A specific type of data the DataItem object represents (e.g. a specimen count, a patient weight, an allele frequency, a p-value, a confidence score)",
)
value: StrictStr
unit: core_models.Coding | None = None
unit: Coding | None = None


class _StatementBase(_InformationEntity):
Expand All @@ -142,7 +142,7 @@ class _StatementBase(_InformationEntity):
direction: Direction = Field(
..., description="direction of this Statement with respect to the predicate."
)
strength: core_models.Coding | core_models.IRI | None = Field(
strength: Coding | IRI | None = Field(
None,
description="The overall strength of support for the Statement based on all evidence assessed.",
)
Expand Down
10 changes: 5 additions & 5 deletions src/metakb/schemas/categorical_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from enum import Enum
from typing import Literal

from ga4gh.core import core_models
from ga4gh.core.entity_models import IRI, _DomainEntity
from ga4gh.vrs import models
from pydantic import Field, RootModel, StrictStr

Expand All @@ -28,10 +28,10 @@ class LocationMatchCharacteristic(str, Enum):
SUPERINTERVAL = "superinterval"


class _CategoricalVariationBase(core_models._DomainEntity): # noqa: SLF001
class _CategoricalVariationBase(_DomainEntity):
"""Base class for Categorical Variation"""

members: list[models.Variation | core_models.IRI] | None = Field(
members: list[models.Variation | IRI] | None = Field(
None,
description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.",
)
Expand All @@ -51,7 +51,7 @@ class ProteinSequenceConsequence(_CategoricalVariationBase):
"ProteinSequenceConsequence",
description="MUST be 'ProteinSequenceConsequence'.",
)
definingContext: models.Allele | core_models.IRI = Field(
definingContext: models.Allele | IRI = Field(
...,
description="The `VRS Allele <https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele>`_ object that is congruent with (projects to the same codons) as alleles on other protein reference sequences.",
)
Expand All @@ -69,7 +69,7 @@ class CanonicalAllele(_CategoricalVariationBase):
type: Literal["CanonicalAllele"] = Field(
"CanonicalAllele", description="MUST be 'CanonicalAllele'."
)
definingContext: models.Allele | core_models.IRI = Field(
definingContext: models.Allele | IRI = Field(
...,
description="The `VRS Allele <https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele>`_ object that is congruent with variants on alternate reference sequences.",
)
Expand Down
21 changes: 11 additions & 10 deletions src/metakb/schemas/variation_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from enum import Enum
from typing import Literal

from ga4gh.core import core_models
from ga4gh.core.domain_models import Condition, Gene, TherapeuticProcedure
from ga4gh.core.entity_models import IRI, Coding
from ga4gh.vrs import models
from pydantic import BaseModel, Field

Expand Down Expand Up @@ -69,15 +70,15 @@ class _VariantStatement(_StatementBase):
"""A `Statement` describing the impact of a variant."""

# extends subject
variant: models.Variation | CategoricalVariation | core_models.IRI = Field(
variant: models.Variation | CategoricalVariation | IRI = Field(
..., description="A variation object that is the subject of the Statement."
)


class _VariantClassification(_VariantStatement):
"""A `VariantStatement` classifying the impact of a variant."""

classification: core_models.Coding | core_models.IRI = Field(
classification: Coding | IRI = Field(
...,
description="A methodological, summary classification about the impact of a variant.",
)
Expand All @@ -94,7 +95,7 @@ class VariantPathogenicityQualifier(BaseModel):
None,
description="The pattern of inheritance expected for the pathogenic effect of this variant.",
)
geneContext: core_models.Gene | None = Field(
geneContext: Gene | None = Field(
None, description="A gene context that qualifies the Statement."
)

Expand All @@ -110,7 +111,7 @@ class VariantPathogenicity(_VariantClassification):
# extends predicate
predicate: Literal["isCausalFor"] | None = None
# extends object
condition: core_models.Condition | core_models.IRI = Field(
condition: Condition | IRI = Field(
..., description="The `Condition` for which the variant impact is stated."
)
# extends qualifiers
Expand All @@ -123,7 +124,7 @@ class _VariantStudySummary(_VariantStatement):
"""

# extends isReportedIn
isReportedIn: list[Document | core_models.IRI] = Field(
isReportedIn: list[Document | IRI] = Field(
...,
description="A document in which the information content is expressed.",
min_length=1,
Expand All @@ -141,7 +142,7 @@ class _VariantOncogenicityStudyQualifier(BaseModel):
None,
description="Whether the statement should be interpreted in the context of the variant being rare or common.",
)
geneContext: core_models.Gene | None = Field(
geneContext: Gene | None = Field(
None, description="A gene context that qualifies the Statement."
)

Expand All @@ -155,7 +156,7 @@ class VariantOncogenicityStudy(_VariantStudySummary):
# extends predicate
predicate: VariantOncogenicityStudyPredicate
# extends object
tumorType: core_models.Condition | core_models.IRI = Field(
tumorType: Condition | IRI = Field(
..., description="The tumor type for which the variant impact is evaluated."
)
# extends qualifiers
Expand All @@ -174,11 +175,11 @@ class VariantTherapeuticResponseStudy(_VariantStudySummary):
# extends predicate
predicate: VariantTherapeuticResponseStudyPredicate
# extends object
therapeutic: core_models.TherapeuticProcedure | core_models.IRI = Field(
therapeutic: TherapeuticProcedure | IRI = Field(
...,
description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.",
)
tumorType: core_models.Condition | core_models.IRI = Field(
tumorType: Condition | IRI = Field(
...,
description="The tumor type context in which the variant impact is evaluated.",
)
Expand Down
7 changes: 3 additions & 4 deletions src/metakb/transform/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@
NormalizationService as NormalizedDisease,
)
from ga4gh.core import sha512t24u
from ga4gh.core._internal.models import (
Coding,
from ga4gh.core.domain_models import (
CombinationTherapy,
Disease,
Extension,
Gene,
TherapeuticAgent,
TherapeuticSubstituteGroup,
)
from ga4gh.vrs._internal.models import Allele
from ga4gh.core.entity_models import Coding, Extension
from ga4gh.vrs.models import Allele
from pydantic import BaseModel, StrictStr, ValidationError
from therapy.schemas import NormalizationService as NormalizedTherapy

Expand Down
Loading

0 comments on commit 0eed7b3

Please sign in to comment.