Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error reporting #20

Merged
merged 17 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ jobs:
test:
name: test py${{ matrix.python-version }}
runs-on: ubuntu-latest
env:
MAVEDB_BASE_URL: https://api.mavedb.org
strategy:
matrix:
python-version: ["3.11", "3.12"]
Expand Down
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ RUN pip install -e '.[dev,tests]'
RUN pip install -U polars-lts-cpu
# install gene normalizer with pg dependencies. TODO: can the pg dependencies be specified in pyproject.toml?
#RUN pip install 'gene-normalizer[pg]'

# not working, needs to happen after db volume is mounted
# ENV GENE_NORM_DB_URL=postgres://postgres:postgres@db:5432/gene_normalizer
# RUN echo "y" | gene_norm_update_remote

ENV PYTHONUNBUFFERED 1

ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/src"

# Tell Docker that we will listen on port 8000.
EXPOSE 8000

# At container startup, run the application using uvicorn.
CMD ["uvicorn", "api.server_main:app", "--host", "0.0.0.0", "--port", "8000"]
15 changes: 15 additions & 0 deletions docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@ services:
volumes:
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo

api:
build:
context: .
command: bash -c "uvicorn api.server_main:app --host 0.0.0.0 --port 8000 --reload"
depends_on:
- db
- seqrepo
env_file:
- settings/.env.dev
ports:
- "8004:8000"
volumes:
- .:/usr/src/app
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo

volumes:
vrs-mapping-data-dev:
vrs-mapping-seqrepo-dev:
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ dependencies = [
"pydantic>=2",
"python-dotenv",
"setuptools>=68.0", # tmp -- ensure 3.12 compatibility
"mavehgvs==0.6.1"
"mavehgvs==0.6.1",
"fastapi",
"starlette",
"uvicorn"
]
dynamic = ["version"]

Expand Down
7 changes: 7 additions & 0 deletions settings/.env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ POSTGRES_DB=gene_normalizer

UTA_DB_URL=postgresql://anonymous:[email protected]:5432/uta/uta_20180821

####################################################################################################
# Environment variables for MaveDB connection
####################################################################################################

MAVEDB_BASE_URL=http://localhost:8000
MAVEDB_API_KEY=

####################################################################################################
# Environment variables for seqrepo
####################################################################################################
Expand Down
1 change: 1 addition & 0 deletions src/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide VRS mapping utilities API"""
1 change: 1 addition & 0 deletions src/api/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide routers for dcd mapping API"""
161 changes: 161 additions & 0 deletions src/api/routers/map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
""""Provide mapping router"""
from cool_seq_tool.schemas import AnnotationLayer
from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse
from requests import HTTPError

from dcd_mapping.align import AlignmentError, BlatNotFoundError, align
from dcd_mapping.annotate import (
_get_computed_reference_sequence,
_get_mapped_reference_sequence,
_set_scoreset_layer,
annotate,
)
from dcd_mapping.lookup import DataLookupError
from dcd_mapping.mavedb_data import (
ScoresetNotSupportedError,
get_raw_scoreset_metadata,
get_scoreset_metadata,
get_scoreset_records,
)
from dcd_mapping.resource_utils import ResourceAcquisitionError
from dcd_mapping.schemas import ScoreAnnotation, ScoresetMapping, VrsVersion
from dcd_mapping.transcripts import TxSelectError, select_transcript
from dcd_mapping.vrs_map import VrsMapError, vrs_map

router = APIRouter(
prefix="/api/v1", tags=["mappings"], responses={404: {"description": "Not found"}}
)


@router.post(path="/map/{urn}", status_code=200, response_model=ScoresetMapping)
async def map_scoreset(urn: str) -> ScoresetMapping:
"""Perform end-to-end mapping for a scoreset.

:param urn: identifier for a scoreset.
:param output_path: optional path to save output at
:param vrs_version: version of VRS objects to output (1.3 or 2)
:param silent: if True, suppress console information output
"""
try:
metadata = get_scoreset_metadata(urn)
records = get_scoreset_records(urn, True)
except ScoresetNotSupportedError as e:
return ScoresetMapping(
metadata=None,
error_message=str(e).strip("'"),
)
except ResourceAcquisitionError as e:
msg = f"Unable to acquire resource from MaveDB: {e}"
raise HTTPException(status_code=500, detail=msg) from e

try:
alignment_result = align(metadata, True)
except BlatNotFoundError as e:
msg = "BLAT command appears missing. Ensure it is available on the $PATH or use the environment variable BLAT_BIN_PATH to point to it. See instructions in the README prerequisites section for more."
raise HTTPException(status_code=500, detail=msg) from e
except ResourceAcquisitionError as e:
msg = f"BLAT resource could not be acquired: {e}"
raise HTTPException(status_code=500, detail=msg) from e
except AlignmentError as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)

try:
transcript = await select_transcript(metadata, records, alignment_result)
except (TxSelectError, KeyError, ValueError) as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
except HTTPError as e:
msg = f"HTTP error occurred during transcript selection: {e}"
raise HTTPException(status_code=500, detail=msg) from e
except DataLookupError as e:
msg = f"Data lookup error occurred during transcript selection: {e}"
raise HTTPException(status_code=500, detail=msg) from e

try:
vrs_results = vrs_map(metadata, alignment_result, records, transcript, True)
except VrsMapError as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
if vrs_results is None:
return ScoresetMapping(
metadata=metadata,
error_message="No variant mappings available for this score set",
)

try:
vrs_results = annotate(vrs_results, transcript, metadata, VrsVersion.V_2)
except Exception as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
if vrs_results is None:
return ScoresetMapping(
metadata=metadata,
error_message="No annotated variant mappings available for this score set",
)

try:
raw_metadata = get_raw_scoreset_metadata(urn)
preferred_layers = {
_set_scoreset_layer(urn, vrs_results),
}

reference_sequences = {
layer: {
"computed_reference_sequence": None,
"mapped_reference_sequence": None,
}
for layer in AnnotationLayer
}

for layer in preferred_layers:
reference_sequences[layer][
"computed_reference_sequence"
] = _get_computed_reference_sequence(urn, layer, transcript)
reference_sequences[layer][
"mapped_reference_sequence"
] = _get_mapped_reference_sequence(layer, transcript, alignment_result)

mapped_scores: list[ScoreAnnotation] = []
for m in vrs_results:
if m.annotation_layer in preferred_layers:
# drop annotation layer from mapping object
mapped_scores.append(ScoreAnnotation(**m.model_dump()))
except Exception as e:
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)

return JSONResponse(
content=ScoresetMapping(
metadata=raw_metadata,
computed_protein_reference_sequence=reference_sequences[
AnnotationLayer.PROTEIN
]["computed_reference_sequence"],
mapped_protein_reference_sequence=reference_sequences[
AnnotationLayer.PROTEIN
]["mapped_reference_sequence"],
computed_genomic_reference_sequence=reference_sequences[
AnnotationLayer.GENOMIC
]["computed_reference_sequence"],
mapped_genomic_reference_sequence=reference_sequences[
AnnotationLayer.GENOMIC
]["mapped_reference_sequence"],
mapped_scores=mapped_scores,
).model_dump(exclude_none=True)
)
14 changes: 14 additions & 0 deletions src/api/server_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""FastAPI server file"""
import uvicorn
from fastapi import FastAPI

from api.routers import map

app = FastAPI()

app.include_router(map.router)


# If the application is not already being run within a uvicorn server, start uvicorn here.
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000) # noqa: S104
2 changes: 2 additions & 0 deletions src/dcd_mapping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from dotenv import load_dotenv

from .main import map_scoreset, map_scoreset_urn
from .version import dcd_mapping_version

__all__ = ["map_scoreset", "map_scoreset_urn"]
__version__ = dcd_mapping_version
bencap marked this conversation as resolved.
Show resolved Hide resolved

load_dotenv()
13 changes: 5 additions & 8 deletions src/dcd_mapping/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def _get_best_hit(output: QueryResult, urn: str, chromosome: str | None) -> Hit:
else:
if list(output):
hit_chrs = [h.id for h in output]
# TODO should this be an error rather than a warning? it seems like a problem if we can't find a hit on the expected chromosome
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My sense is that this is fine as a warning since the chromosome isn't explicitly provided by the user, and is instead inferred via score set metadata.

_logger.warning(
"Failed to match hit chromosomes during alignment. URN: %s, expected chromosome: %s, hit chromosomes: %s",
urn,
Expand All @@ -221,8 +222,8 @@ def _get_best_hit(output: QueryResult, urn: str, chromosome: str | None) -> Hit:
best_score_hit = hit

if best_score_hit is None:
_logger.error("Couldn't get hits from %s -- check BLAT output.", urn)
raise AlignmentError
msg = f"Couldn't get BLAT hits from {urn}"
raise AlignmentError(msg)

return best_score_hit

Expand All @@ -246,12 +247,8 @@ def _get_best_hsp(hit: Hit, urn: str, gene_location: GeneLocation | None) -> HSP
else:
best_hsp = max(hit, key=lambda hsp: hsp.score)
if best_hsp is None:
_logger.error(
"Unable to get best HSP from hit -- this should be impossible? urn: %s, hit: %s",
urn,
hit,
)
raise AlignmentError
msg = f"Unable to get best HSP from BLAT hit: {hit}"
raise AlignmentError(msg)
return best_hsp


Expand Down
Loading
Loading