Skip to content

Commit

Permalink
Merge pull request #20 from VariantEffect/error-reporting
Browse files Browse the repository at this point in the history
Error reporting
  • Loading branch information
bencap authored Sep 13, 2024
2 parents 8c734b5 + cc8f3ad commit 511dfa7
Show file tree
Hide file tree
Showing 19 changed files with 698 additions and 232 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ jobs:
test:
name: test py${{ matrix.python-version }}
runs-on: ubuntu-latest
env:
MAVEDB_BASE_URL: https://api.mavedb.org
strategy:
matrix:
python-version: ["3.11", "3.12"]
Expand Down
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ RUN pip install -e '.[dev,tests]'
RUN pip install -U polars-lts-cpu
# install gene normalizer with pg dependencies. TODO: can the pg dependencies be specified in pyproject.toml?
#RUN pip install 'gene-normalizer[pg]'

# not working, needs to happen after db volume is mounted
# ENV GENE_NORM_DB_URL=postgres://postgres:postgres@db:5432/gene_normalizer
# RUN echo "y" | gene_norm_update_remote

ENV PYTHONUNBUFFERED 1

ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/src"

# Tell Docker that we will listen on port 8000.
EXPOSE 8000

# At container startup, run the application using uvicorn.
CMD ["uvicorn", "api.server_main:app", "--host", "0.0.0.0", "--port", "8000"]
15 changes: 15 additions & 0 deletions docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@ services:
volumes:
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo

api:
build:
context: .
command: bash -c "uvicorn api.server_main:app --host 0.0.0.0 --port 8000 --reload"
depends_on:
- db
- seqrepo
env_file:
- settings/.env.dev
ports:
- "8004:8000"
volumes:
- .:/usr/src/app
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo

volumes:
vrs-mapping-data-dev:
vrs-mapping-seqrepo-dev:
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ dependencies = [
"pydantic>=2",
"python-dotenv",
"setuptools>=68.0", # tmp -- ensure 3.12 compatibility
"mavehgvs==0.6.1"
"mavehgvs==0.6.1",
"fastapi",
"starlette",
"uvicorn"
]
dynamic = ["version"]

Expand Down
7 changes: 7 additions & 0 deletions settings/.env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ POSTGRES_DB=gene_normalizer

UTA_DB_URL=postgresql://anonymous:[email protected]:5432/uta/uta_20180821

####################################################################################################
# Environment variables for MaveDB connection
####################################################################################################

MAVEDB_BASE_URL=http://localhost:8000
MAVEDB_API_KEY=

####################################################################################################
# Environment variables for seqrepo
####################################################################################################
Expand Down
1 change: 1 addition & 0 deletions src/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide VRS mapping utilities API"""
1 change: 1 addition & 0 deletions src/api/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide routers for dcd mapping API"""
161 changes: 161 additions & 0 deletions src/api/routers/map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
""""Provide mapping router"""
from cool_seq_tool.schemas import AnnotationLayer
from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse
from requests import HTTPError

from dcd_mapping.align import AlignmentError, BlatNotFoundError, align
from dcd_mapping.annotate import (
_get_computed_reference_sequence,
_get_mapped_reference_sequence,
_set_scoreset_layer,
annotate,
)
from dcd_mapping.lookup import DataLookupError
from dcd_mapping.mavedb_data import (
ScoresetNotSupportedError,
get_raw_scoreset_metadata,
get_scoreset_metadata,
get_scoreset_records,
)
from dcd_mapping.resource_utils import ResourceAcquisitionError
from dcd_mapping.schemas import ScoreAnnotation, ScoresetMapping, VrsVersion
from dcd_mapping.transcripts import TxSelectError, select_transcript
from dcd_mapping.vrs_map import VrsMapError, vrs_map

router = APIRouter(
prefix="/api/v1", tags=["mappings"], responses={404: {"description": "Not found"}}
)


@router.post(path="/map/{urn}", status_code=200, response_model=ScoresetMapping)
async def map_scoreset(urn: str) -> ScoresetMapping:
"""Perform end-to-end mapping for a scoreset.
:param urn: identifier for a scoreset.
:param output_path: optional path to save output at
:param vrs_version: version of VRS objects to output (1.3 or 2)
:param silent: if True, suppress console information output
"""
try:
metadata = get_scoreset_metadata(urn)
records = get_scoreset_records(urn, True)
except ScoresetNotSupportedError as e:
return ScoresetMapping(
metadata=None,
error_message=str(e).strip("'"),
)
except ResourceAcquisitionError as e:
msg = f"Unable to acquire resource from MaveDB: {e}"
raise HTTPException(status_code=500, detail=msg) from e

try:
alignment_result = align(metadata, True)
except BlatNotFoundError as e:
msg = "BLAT command appears missing. Ensure it is available on the $PATH or use the environment variable BLAT_BIN_PATH to point to it. See instructions in the README prerequisites section for more."
raise HTTPException(status_code=500, detail=msg) from e
except ResourceAcquisitionError as e:
msg = f"BLAT resource could not be acquired: {e}"
raise HTTPException(status_code=500, detail=msg) from e
except AlignmentError as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)

try:
transcript = await select_transcript(metadata, records, alignment_result)
except (TxSelectError, KeyError, ValueError) as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
except HTTPError as e:
msg = f"HTTP error occurred during transcript selection: {e}"
raise HTTPException(status_code=500, detail=msg) from e
except DataLookupError as e:
msg = f"Data lookup error occurred during transcript selection: {e}"
raise HTTPException(status_code=500, detail=msg) from e

try:
vrs_results = vrs_map(metadata, alignment_result, records, transcript, True)
except VrsMapError as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
if vrs_results is None:
return ScoresetMapping(
metadata=metadata,
error_message="No variant mappings available for this score set",
)

try:
vrs_results = annotate(vrs_results, transcript, metadata, VrsVersion.V_2)
except Exception as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
if vrs_results is None:
return ScoresetMapping(
metadata=metadata,
error_message="No annotated variant mappings available for this score set",
)

try:
raw_metadata = get_raw_scoreset_metadata(urn)
preferred_layers = {
_set_scoreset_layer(urn, vrs_results),
}

reference_sequences = {
layer: {
"computed_reference_sequence": None,
"mapped_reference_sequence": None,
}
for layer in AnnotationLayer
}

for layer in preferred_layers:
reference_sequences[layer][
"computed_reference_sequence"
] = _get_computed_reference_sequence(urn, layer, transcript)
reference_sequences[layer][
"mapped_reference_sequence"
] = _get_mapped_reference_sequence(layer, transcript, alignment_result)

mapped_scores: list[ScoreAnnotation] = []
for m in vrs_results:
if m.annotation_layer in preferred_layers:
# drop annotation layer from mapping object
mapped_scores.append(ScoreAnnotation(**m.model_dump()))
except Exception as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)

return JSONResponse(
content=ScoresetMapping(
metadata=raw_metadata,
computed_protein_reference_sequence=reference_sequences[
AnnotationLayer.PROTEIN
]["computed_reference_sequence"],
mapped_protein_reference_sequence=reference_sequences[
AnnotationLayer.PROTEIN
]["mapped_reference_sequence"],
computed_genomic_reference_sequence=reference_sequences[
AnnotationLayer.GENOMIC
]["computed_reference_sequence"],
mapped_genomic_reference_sequence=reference_sequences[
AnnotationLayer.GENOMIC
]["mapped_reference_sequence"],
mapped_scores=mapped_scores,
).model_dump(exclude_none=True)
)
14 changes: 14 additions & 0 deletions src/api/server_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""FastAPI server file"""
import uvicorn
from fastapi import FastAPI

from api.routers import map

app = FastAPI()

app.include_router(map.router)


# If the application is not already being run within a uvicorn server, start uvicorn here.
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000) # noqa: S104
2 changes: 2 additions & 0 deletions src/dcd_mapping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from dotenv import load_dotenv

from .main import map_scoreset, map_scoreset_urn
from .version import dcd_mapping_version

__all__ = ["map_scoreset", "map_scoreset_urn"]
__version__ = dcd_mapping_version

load_dotenv()
13 changes: 5 additions & 8 deletions src/dcd_mapping/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def _get_best_hit(output: QueryResult, urn: str, chromosome: str | None) -> Hit:
else:
if list(output):
hit_chrs = [h.id for h in output]
# TODO should this be an error rather than a warning? it seems like a problem if we can't find a hit on the expected chromosome
_logger.warning(
"Failed to match hit chromosomes during alignment. URN: %s, expected chromosome: %s, hit chromosomes: %s",
urn,
Expand All @@ -221,8 +222,8 @@ def _get_best_hit(output: QueryResult, urn: str, chromosome: str | None) -> Hit:
best_score_hit = hit

if best_score_hit is None:
_logger.error("Couldn't get hits from %s -- check BLAT output.", urn)
raise AlignmentError
msg = f"Couldn't get BLAT hits from {urn}"
raise AlignmentError(msg)

return best_score_hit

Expand All @@ -246,12 +247,8 @@ def _get_best_hsp(hit: Hit, urn: str, gene_location: GeneLocation | None) -> HSP
else:
best_hsp = max(hit, key=lambda hsp: hsp.score)
if best_hsp is None:
_logger.error(
"Unable to get best HSP from hit -- this should be impossible? urn: %s, hit: %s",
urn,
hit,
)
raise AlignmentError
msg = f"Unable to get best HSP from BLAT hit: {hit}"
raise AlignmentError(msg)
return best_hsp


Expand Down
Loading

0 comments on commit 511dfa7

Please sign in to comment.