From bb79636b40bc17fdea876620bc6996b106b55c8e Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 22 Jul 2024 10:11:47 -0400 Subject: [PATCH] build!: update ga4gh.vrs + gene/variation normalizers close #305 * Update domain entity and entity class names * Update tests to add `SequenceLocation.sequence` * Update FastAPI configs to include response model and return Pydantic models instead of dict --- Pipfile | 6 +++--- pyproject.toml | 6 +++--- src/metakb/load_data.py | 2 +- src/metakb/main.py | 12 ++++++++---- src/metakb/schemas/annotation.py | 12 ++++++------ src/metakb/schemas/categorical_variation.py | 4 ++-- tests/conftest.py | 4 ++++ tests/unit/database/test_database.py | 1 + 8 files changed, 28 insertions(+), 19 deletions(-) diff --git a/Pipfile b/Pipfile index e04e008a..2ecddb0d 100644 --- a/Pipfile +++ b/Pipfile @@ -4,9 +4,9 @@ verify_ssl = true name = "pypi" [packages] -"ga4gh.vrs" = "~=2.0.0a8" -gene-normalizer = {version = "~=0.4.0", extras = ["etl"]} -variation-normalizer = "~=0.9.1" +"ga4gh.vrs" = "~=2.0.0a10" +gene-normalizer = {version = "~=0.4.1", extras = ["etl"]} +variation-normalizer = "~=0.10.0" disease-normalizer = {version = "~=0.5.0", extras = ["etl"]} thera-py = {version = "~=0.6.0", extras = ["etl"]} civicpy = "~=3.1" diff --git a/pyproject.toml b/pyproject.toml index 6fa1a76b..99744c33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,9 +23,9 @@ requires-python = ">=3.10" description = "A search interface for cancer variant interpretations assembled by aggregating and harmonizing across multiple cancer variant interpretation knowledgebases." license = {file = "LICENSE"} dependencies = [ - "ga4gh.vrs~=2.0.0a8", - "gene-normalizer[etl]~=0.4.0", - "variation-normalizer~=0.9.1", + "ga4gh.vrs~=2.0.0a10", + "gene-normalizer[etl]~=0.4.1", + "variation-normalizer~=0.10.0", "disease-normalizer[etl]~=0.5.0", "thera-py[etl]~=0.6.0", "civicpy~=3.1", diff --git a/src/metakb/load_data.py b/src/metakb/load_data.py index ba5615fb..0c6028a3 100644 --- a/src/metakb/load_data.py +++ b/src/metakb/load_data.py @@ -195,7 +195,7 @@ def _add_location(tx: ManagedTransaction, location_in: dict) -> None: loc = location_in.copy() loc_keys = [ f"loc.{key}=${key}" - for key in ("id", "digest", "start", "end", "type") + for key in ("id", "digest", "start", "end", "sequence", "type") if loc.get(key) is not None # start could be 0 ] loc["sequence_reference"] = json.dumps(loc["sequenceReference"]) diff --git a/src/metakb/main.py b/src/metakb/main.py index 964ae923..f515bcc5 100644 --- a/src/metakb/main.py +++ b/src/metakb/main.py @@ -84,6 +84,8 @@ def custom_openapi() -> dict: @app.get( "/api/v2/search/studies", summary=search_studies_summary, + response_model=SearchStudiesService, + response_model_exclude_none=True, description=search_studies_descr, ) async def get_studies( @@ -94,7 +96,7 @@ async def get_studies( study_id: Annotated[str | None, Query(description=s_description)] = None, start: Annotated[int, Query(description=start_description)] = 0, limit: Annotated[int | None, Query(description=limit_description)] = None, -) -> dict: +) -> SearchStudiesService: """Get nested studies from queried concepts that match all conditions provided. For example, if `variation` and `therapy` are provided, will return all studies that have both the provided `variation` and `therapy`. @@ -125,7 +127,7 @@ async def get_studies( service_meta_=ServiceMeta(), warnings=["`start` and `limit` params must both be nonnegative"], ) - return resp.model_dump(exclude_none=True) + return resp _batch_descr = { @@ -140,6 +142,8 @@ async def get_studies( @app.get( "/api/v2/batch_search/studies", summary=_batch_descr["summary"], + response_model=BatchSearchStudiesService, + response_model_exclude_none=True, description=_batch_descr["description"], ) async def batch_get_studies( @@ -149,7 +153,7 @@ async def batch_get_studies( ] = None, start: Annotated[int, Query(description=_batch_descr["arg_start"])] = 0, limit: Annotated[int | None, Query(description=_batch_descr["arg_limit"])] = None, -) -> dict: +) -> BatchSearchStudiesService: """Fetch all studies associated with `any` of the provided variations. :param variations: variations to match against @@ -166,4 +170,4 @@ async def batch_get_studies( warnings=["`start` and `limit` params must both be nonnegative"], ) - return response.model_dump(exclude_none=True) + return response diff --git a/src/metakb/schemas/annotation.py b/src/metakb/schemas/annotation.py index 73b2764a..1dae3d26 100644 --- a/src/metakb/schemas/annotation.py +++ b/src/metakb/schemas/annotation.py @@ -4,7 +4,7 @@ from enum import Enum from typing import Literal -from ga4gh.core.entity_models import IRI, Coding, _DomainEntity, _Entity +from ga4gh.core.entity_models import IRI, Coding, DomainEntity, Entity from pydantic import Field, StrictInt, StrictStr, constr, field_validator @@ -24,7 +24,7 @@ class Direction(str, Enum): NONE = "none" -class Document(_DomainEntity): +class Document(DomainEntity): """a representation of a physical or digital document""" type: Literal["Document"] = "Document" @@ -42,7 +42,7 @@ class Document(_DomainEntity): ) -class Method(_Entity): +class Method(Entity): """A set of instructions that specify how to achieve some objective (e.g. experimental protocols, curation guidelines, rule sets, etc.) """ @@ -57,7 +57,7 @@ class Method(_Entity): ) -class Agent(_Entity): +class Agent(Entity): """An autonomous actor (person, organization, or computational agent) that bears some form of responsibility for an activity taking place, for the existence of an entity, or for another agent's activity. @@ -68,7 +68,7 @@ class Agent(_Entity): subtype: AgentSubtype | None = None -class Contribution(_Entity): +class Contribution(Entity): """The sum of all actions taken by a single agent in contributing to the creation, modification, assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, DataItem, Publication, etc.) @@ -99,7 +99,7 @@ def date_format(cls, v: str | None) -> str | None: return v -class _InformationEntity(_Entity): +class _InformationEntity(Entity): """InformationEntities are abstract (non-physical) entities that are about something (i.e. they carry information about things in the real world). """ diff --git a/src/metakb/schemas/categorical_variation.py b/src/metakb/schemas/categorical_variation.py index 753d00d9..ecb0dbcc 100644 --- a/src/metakb/schemas/categorical_variation.py +++ b/src/metakb/schemas/categorical_variation.py @@ -7,7 +7,7 @@ from enum import Enum from typing import Literal -from ga4gh.core.entity_models import IRI, _DomainEntity +from ga4gh.core.entity_models import IRI, DomainEntity from ga4gh.vrs import models from pydantic import Field, RootModel, StrictStr @@ -28,7 +28,7 @@ class LocationMatchCharacteristic(str, Enum): SUPERINTERVAL = "superinterval" -class _CategoricalVariationBase(_DomainEntity): +class _CategoricalVariationBase(DomainEntity): """Base class for Categorical Variation""" members: list[models.Variation | IRI] | None = Field( diff --git a/tests/conftest.py b/tests/conftest.py index 4494e463..bccec9a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -188,6 +188,7 @@ def civic_mpid33(civic_vid33): }, "start": 55191821, "end": 55191822, + "sequence": "T", }, "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, } @@ -362,6 +363,7 @@ def civic_vid12(): }, "start": 599, "end": 600, + "sequence": "V", }, "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, "expressions": [ @@ -396,6 +398,7 @@ def braf_v600e_genomic(): }, "start": 140753335, "end": 140753336, + "sequence": "A", }, "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, } @@ -1737,6 +1740,7 @@ def moa_vid66(): }, "start": 133748282, "end": 133748283, + "sequence": "C", }, "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, } diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index 0db00128..dc1206c9 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -384,6 +384,7 @@ def test_location_rules( "sequence_reference", "start", "end", + "sequence", "type", } assert json.loads(loc["sequence_reference"]) == {