From 3c687d9366dfcc0187158182ff1cfe6e6e01a665 Mon Sep 17 00:00:00 2001 From: Katie Stahl Date: Mon, 29 Jul 2024 13:50:36 -0400 Subject: [PATCH] build!: updating to vrs 2.0 models --- README.md | 4 +- client/src/services/ResponseModels.ts | 141 +++++------------- requirements.txt | 11 +- server/pyproject.toml | 3 +- .../src/curfu/devtools/build_client_types.py | 2 +- server/src/curfu/gene_services.py | 3 +- server/src/curfu/routers/meta.py | 2 +- server/src/curfu/schemas.py | 7 +- 8 files changed, 50 insertions(+), 123 deletions(-) diff --git a/README.md b/README.md index a37d72f3..12b1d0ca 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ source venv/bin/activate python3 -m pip install -e ".[dev,tests]" # make sure to include the extra dependencies! ``` -Acquire two sets of static assets and place all of them within the `server/curation/data` directory: +Acquire two sets of static assets and place all of them within the `server/src/curfu/data` directory: 1. Gene autocomplete files, providing legal gene search terms to the client autocomplete component. One file each is used for entity types `aliases`, `assoc_with`, `xrefs`, `prev_symbols`, `labels`, and `symbols`. Each should be named according to the pattern `gene__.tsv`. These can be regenerated with the shell command `curfu_devtools genes`. @@ -39,7 +39,7 @@ Acquire two sets of static assets and place all of them within the `server/curat Your data/directory should look something like this: ``` -server/curfu/data +server/src/curfu/data ├── domain_lookup_2022-01-20.tsv ├── gene_aliases_suggest_20211025.tsv ├── gene_assoc_with_suggest_20211025.tsv diff --git a/client/src/services/ResponseModels.ts b/client/src/services/ResponseModels.ts index 2b822703..f51a15d1 100644 --- a/client/src/services/ResponseModels.ts +++ b/client/src/services/ResponseModels.ts @@ -94,27 +94,13 @@ export interface RegulatoryElement { type?: "RegulatoryElement"; regulatory_class: RegulatoryClass; feature_id?: string; - associated_gene?: GeneDescriptor; - feature_location?: LocationDescriptor; -} -/** - * This descriptor is intended to reference VRS Gene value objects. - */ -export interface GeneDescriptor { - id: CURIE; - type?: "GeneDescriptor"; - label?: string; - description?: string; - xrefs?: CURIE[]; - alternate_labels?: string[]; - extensions?: Extension[]; - gene_id?: CURIE; - gene?: Gene; + associated_gene?: Gene; + feature_location?: SequenceLocation; } /** * The Extension class provides VODs with a means to extend descriptions * with other attributes unique to a content provider. These extensions are - * not expected to be natively understood under VRSATILE, but may be used + * not expected to be natively understood under VRS, but may be used * for pre-negotiated exchange of message attributes when needed. */ export interface Extension { @@ -129,57 +115,33 @@ export interface Extension { */ export interface Gene { type?: "Gene"; - /** - * A CURIE reference to a Gene concept - */ - gene_id: CURIE; + id: string; + label: string; } /** - * This descriptor is intended to reference VRS Location value objects. + * A referenced Sequence */ -export interface LocationDescriptor { - id: CURIE; - type?: "LocationDescriptor"; - label?: string; - description?: string; - xrefs?: CURIE[]; - alternate_labels?: string[]; - extensions?: Extension[]; - location_id?: CURIE; - location?: SequenceLocation | ChromosomeLocation; +export interface SequenceReference { + // refseq id of the referenced sequence + id?: CURIE; + type?: "SequenceReference"; + // VRS computed identifier for the sequence accession + refgetAccession: string; } /** - * A Location defined by an interval on a referenced Sequence. + * A Location defined by the start/end coordinates on a referenced Sequence. */ export interface SequenceLocation { /** - * Variation Id. MUST be unique within document. + * A VRS Computed Identifier for the Sequence location. */ - _id?: CURIE; + id?: CURIE; type?: "SequenceLocation"; - /** - * A VRS Computed Identifier for the reference Sequence. - */ - sequence_id: CURIE; - /** - * Reference sequence region defined by a SequenceInterval. - */ - interval: SequenceInterval | SimpleInterval; -} -/** - * A SequenceInterval represents a span on a Sequence. Positions are always - * represented by contiguous spans using interbase coordinates or coordinate ranges. - */ -export interface SequenceInterval { - type?: "SequenceInterval"; - /** - * The start coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than the value of `end`. - */ - start: DefiniteRange | IndefiniteRange | Number; - /** - * The end coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than the value of `start`. - */ - end: DefiniteRange | IndefiniteRange | Number; + sequenceReference: SequenceReference; + // start coordinate of the sequence location + start: number; + // end coordinate of the sequence location + end: number; } /** * A bounded, inclusive range of numbers. @@ -222,22 +184,6 @@ export interface Number { */ value: number; } -/** - * DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always - * represented by contiguous spans using interbase coordinates. - * This class is deprecated. Use SequenceInterval instead. - */ -export interface SimpleInterval { - type?: "SimpleInterval"; - /** - * The start coordinate - */ - start: number; - /** - * The end coordinate - */ - end: number; -} /** * A Location on a chromosome defined by a species and chromosome name. */ @@ -286,16 +232,16 @@ export interface TranscriptSegmentElement { exon_start_offset?: number; exon_end?: number; exon_end_offset?: number; - gene_descriptor: GeneDescriptor; - element_genomic_start?: LocationDescriptor; - element_genomic_end?: LocationDescriptor; + gene: Gene; + element_genomic_start?: SequenceLocation; + element_genomic_end?: SequenceLocation; } /** * Define Gene Element class. */ export interface GeneElement { type?: "GeneElement"; - gene_descriptor: GeneDescriptor; + gene: Gene; } /** * Define Templated Sequence Element class. @@ -304,7 +250,7 @@ export interface GeneElement { */ export interface TemplatedSequenceElement { type?: "TemplatedSequenceElement"; - region: LocationDescriptor; + region: SequenceLocation; strand: Strand; } /** @@ -314,21 +260,6 @@ export interface LinkerElement { type?: "LinkerSequenceElement"; linker_sequence: SequenceDescriptor; } -/** - * This descriptor is intended to reference VRS Sequence value objects. - */ -export interface SequenceDescriptor { - id: CURIE; - type?: "SequenceDescriptor"; - label?: string; - description?: string; - xrefs?: CURIE[]; - alternate_labels?: string[]; - extensions?: Extension[]; - sequence_id?: CURIE; - sequence?: Sequence; - residue_type?: CURIE; -} /** * Define UnknownGene class. This is primarily intended to represent a * partner in the result of a fusion partner-agnostic assay, which identifies @@ -417,10 +348,10 @@ export interface MultiplePossibleGenesElement { export interface FunctionalDomain { type?: "FunctionalDomain"; status: DomainStatus; - associated_gene: GeneDescriptor; + associated_gene: Gene; _id?: CURIE; label?: string; - sequence_location?: LocationDescriptor; + sequence_location?: SequenceLocation; } /** * Assayed fusion with client-oriented structural element models. Used in @@ -446,8 +377,8 @@ export interface ClientRegulatoryElement { type?: "RegulatoryElement"; regulatory_class: RegulatoryClass; feature_id?: string; - associated_gene?: GeneDescriptor; - feature_location?: LocationDescriptor; + associated_gene?: Gene; + feature_location?: SequenceLocation; display_class: string; nomenclature: string; } @@ -463,9 +394,9 @@ export interface ClientTranscriptSegmentElement { exon_start_offset?: number; exon_end?: number; exon_end_offset?: number; - gene_descriptor: GeneDescriptor; - element_genomic_start?: LocationDescriptor; - element_genomic_end?: LocationDescriptor; + gene: Gene; + element_genomic_start?: SequenceLocation; + element_genomic_end?: SequenceLocation; input_type: "genomic_coords_gene" | "genomic_coords_tx" | "exon_coords_tx"; input_tx?: string; input_strand?: Strand; @@ -485,7 +416,7 @@ export interface ClientGeneElement { element_id: string; nomenclature: string; type?: "GeneElement"; - gene_descriptor: GeneDescriptor; + gene: Gene; } /** * Templated sequence element used client-side. @@ -494,7 +425,7 @@ export interface ClientTemplatedSequenceElement { element_id: string; nomenclature: string; type?: "TemplatedSequenceElement"; - region: LocationDescriptor; + region: SequenceLocation; strand: Strand; input_chromosome?: string; input_start?: string; @@ -548,10 +479,10 @@ export interface ClientMultiplePossibleGenesElement { export interface ClientFunctionalDomain { type?: "FunctionalDomain"; status: DomainStatus; - associated_gene: GeneDescriptor; + associated_gene: Gene; _id?: CURIE; label?: string; - sequence_location?: LocationDescriptor; + sequence_location?: SequenceLocation; domain_id: string; } /** diff --git a/requirements.txt b/requirements.txt index d19c649b..29aef2a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,17 +18,16 @@ charset-normalizer==3.2.0 click==8.1.6 coloredlogs==15.0.1 configparser==6.0.0 -cool-seq-tool==0.1.14.dev0 +cool-seq-tool==0.5.1 cssselect==1.2.0 Cython==3.0.0 decorator==5.1.1 executing==1.2.0 fake-useragent==1.1.3 fastapi==0.100.0 -fusor==0.0.30.dev1 -ga4gh.vrs==0.8.4 -ga4gh.vrsatile.pydantic==0.0.13 -gene-normalizer==0.1.39 +# TODO: add this back once new release is out fusor==0.0.30.dev1 +ga4gh.vrs==2.0.0a10 +gene-normalizer==0.4.0 h11==0.14.0 hgvs==1.5.4 humanfriendly==10.0 @@ -55,7 +54,7 @@ prompt-toolkit==3.0.39 psycopg2==2.9.6 ptyprocess==0.7.0 pure-eval==0.2.2 -pydantic==1.10.12 +pydantic==2.4.2 pyee==8.2.2 Pygments==2.15.1 pyliftover==0.4 diff --git a/server/pyproject.toml b/server/pyproject.toml index 5c3a601c..394d8956 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -25,7 +25,6 @@ dependencies = [ "fastapi >= 0.72.0", "aiofiles", "asyncpg", - "fusor ~= 0.0.30-dev1", "sqlparse >= 0.4.2", "urllib3 >= 1.26.5", "click", @@ -47,7 +46,7 @@ dev = [ "ruff", "black", "pre-commit>=3.7.1", - "gene-normalizer ~= 0.1.39", + "gene-normalizer ~= 0.4.0", "pydantic-to-typescript", ] diff --git a/server/src/curfu/devtools/build_client_types.py b/server/src/curfu/devtools/build_client_types.py index 04655e4a..f600c7df 100644 --- a/server/src/curfu/devtools/build_client_types.py +++ b/server/src/curfu/devtools/build_client_types.py @@ -7,7 +7,7 @@ def build_client_types() -> None: """Construct type definitions for front-end client.""" - client_dir = Path(__file__).resolve().parents[3] / "client" + client_dir = Path(__file__).resolve().parents[4] / "client" generate_typescript_defs( "curfu.schemas", str((client_dir / "src" / "services" / "ResponseModels.ts").absolute()), diff --git a/server/src/curfu/gene_services.py b/server/src/curfu/gene_services.py index 5147a5c9..5bebac50 100644 --- a/server/src/curfu/gene_services.py +++ b/server/src/curfu/gene_services.py @@ -3,7 +3,6 @@ import csv from pathlib import Path -from ga4gh.vrsatile.pydantic.vrsatile_models import CURIE from gene.query import QueryHandler from gene.schemas import MatchType @@ -50,7 +49,7 @@ def __init__(self, suggestions_file: Path | None = None) -> None: @staticmethod def get_normalized_gene( term: str, normalizer: QueryHandler - ) -> tuple[CURIE, str, str | CURIE | None]: + ) -> tuple[str, str, str | str | None]: """Get normalized ID given gene symbol/label/alias. :param str term: user-entered gene term :param QueryHandler normalizer: gene normalizer instance diff --git a/server/src/curfu/routers/meta.py b/server/src/curfu/routers/meta.py index 506cd103..a0a29073 100644 --- a/server/src/curfu/routers/meta.py +++ b/server/src/curfu/routers/meta.py @@ -1,6 +1,6 @@ """Provide service meta information""" -from cool_seq_tool.version import __version__ as cool_seq_tool_version +from cool_seq_tool import __version__ as cool_seq_tool_version from fastapi import APIRouter from fusor import __version__ as fusor_version diff --git a/server/src/curfu/schemas.py b/server/src/curfu/schemas.py index 73cf5996..eadd5a82 100644 --- a/server/src/curfu/schemas.py +++ b/server/src/curfu/schemas.py @@ -18,14 +18,13 @@ TranscriptSegmentElement, UnknownGeneElement, ) -from ga4gh.vrsatile.pydantic.vrsatile_models import CURIE from pydantic import BaseModel, Extra, Field, StrictInt, StrictStr, validator ResponseWarnings = list[StrictStr] | None ResponseDict = dict[ str, - str | int | CURIE | list[str] | list[tuple[str, str, str, str]] | FunctionalDomain, + str | int | str | list[str] | list[tuple[str, str, str, str]] | FunctionalDomain, ] Warnings = list[str] @@ -134,7 +133,7 @@ class NormalizeGeneResponse(Response): """Response model for gene normalization endpoint.""" term: StrictStr - concept_id: CURIE | None + concept_id: StrictStr | None symbol: StrictStr | None cased: StrictStr | None @@ -154,7 +153,7 @@ class SuggestGeneResponse(Response): class DomainParams(BaseModel): """Fields for individual domain suggestion entries""" - interpro_id: CURIE + interpro_id: StrictStr domain_name: StrictStr start: int end: int