Skip to content

Commit

Permalink
Merge pull request #421 from stacklok/issue-201-v1
Browse files Browse the repository at this point in the history
fix: wrap provider error codes into HTTP Exceptions
  • Loading branch information
yrobla authored Dec 19, 2024
2 parents a8f8a78 + 3c35bb5 commit 6d619f2
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 11 deletions.
2 changes: 1 addition & 1 deletion src/codegate/llm_utils/llmclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,4 @@ async def _complete_litellm(

except Exception as e:
logger.error(f"LiteLLM completion failed {model} ({content}): {e}")
return {}
raise e
14 changes: 13 additions & 1 deletion src/codegate/providers/anthropic/provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import structlog
from typing import Optional

from fastapi import Header, HTTPException, Request
Expand Down Expand Up @@ -53,5 +54,16 @@ async def create_message(
data = json.loads(body)

is_fim_request = self._is_fim_request(request, data)
stream = await self.complete(data, x_api_key, is_fim_request)
try:
stream = await self.complete(data, x_api_key, is_fim_request)
except Exception as e:
#  check if we have an status code there
if hasattr(e, "status_code"):
# log the exception
logger = structlog.get_logger("codegate")
logger.error("Error in AnthropicProvider completion", error=str(e))
raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore
else:
# just continue raising the exception
raise e
return self._completion_handler.create_response(stream)
24 changes: 21 additions & 3 deletions src/codegate/providers/llamacpp/provider.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import json
import structlog
from typing import Optional

from fastapi import Request
from fastapi import Request, HTTPException

from codegate.pipeline.base import SequentialPipelineProcessor
from codegate.pipeline.output import OutputPipelineProcessor
Expand All @@ -10,7 +11,7 @@
from codegate.providers.llamacpp.normalizer import LLamaCppInputNormalizer, LLamaCppOutputNormalizer


class LlamaCppProvider(BaseProvider):
class LlamaCppProvider(BaseProvider):
def __init__(
self,
pipeline_processor: Optional[SequentialPipelineProcessor] = None,
Expand Down Expand Up @@ -46,7 +47,24 @@ async def create_completion(
):
body = await request.body()
data = json.loads(body)
logger = structlog.get_logger("codegate")

is_fim_request = self._is_fim_request(request, data)
stream = await self.complete(data, None, is_fim_request=is_fim_request)
try:
stream = await self.complete(data, None, is_fim_request=is_fim_request)
except RuntimeError as e:
# propagate as error 500
logger.error("Error in LlamaCppProvider completion", error=str(e))
raise HTTPException(status_code=500, detail=str(e))
except ValueError as e:
# capture well known exceptions
logger.error("Error in LlamaCppProvider completion", error=str(e))
if str(e).startswith("Model path does not exist") or \
str(e).startswith("No file found"):
raise HTTPException(status_code=404, detail=str(e))
elif "exceed" in str(e):
raise HTTPException(status_code=429, detail=str(e))
else:
# just continue raising the exception
raise e
return self._completion_handler.create_response(stream)
21 changes: 19 additions & 2 deletions src/codegate/providers/ollama/provider.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
from typing import Optional

from fastapi import Request
from fastapi import Request, HTTPException
import httpx
import structlog

from codegate.config import Config
from codegate.pipeline.base import SequentialPipelineProcessor
Expand Down Expand Up @@ -58,5 +60,20 @@ async def create_completion(request: Request):
data["base_url"] = self.base_url

is_fim_request = self._is_fim_request(request, data)
stream = await self.complete(data, api_key=None, is_fim_request=is_fim_request)
try:
stream = await self.complete(data, api_key=None, is_fim_request=is_fim_request)
except httpx.ConnectError as e:
logger = structlog.get_logger("codegate")
logger.error("Error in OllamaProvider completion", error=str(e))
raise HTTPException(status_code=503, detail="Ollama service is unavailable")
except Exception as e:
#  check if we have an status code there
if hasattr(e, "status_code"):
# log the exception
logger = structlog.get_logger("codegate")
logger.error("Error in OllamaProvider completion", error=str(e))
raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore
else:
# just continue raising the exception
raise e
return self._completion_handler.create_response(stream)
14 changes: 13 additions & 1 deletion src/codegate/providers/openai/provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import structlog
from typing import Optional

from fastapi import Header, HTTPException, Request
Expand Down Expand Up @@ -54,5 +55,16 @@ async def create_completion(
data = json.loads(body)

is_fim_request = self._is_fim_request(request, data)
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
try:
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
except Exception as e:
#  check if we have an status code there
if hasattr(e, "status_code"):
logger = structlog.get_logger("codegate")
logger.error("Error in OpenAIProvider completion", error=str(e))

raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore
else:
# just continue raising the exception
raise e
return self._completion_handler.create_response(stream)
24 changes: 21 additions & 3 deletions src/codegate/providers/vllm/provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import structlog
from typing import Optional

import httpx
Expand Down Expand Up @@ -52,7 +53,10 @@ async def get_models(authorization: str = Header(..., description="Bearer token"

token = authorization.split(" ")[1]
config = Config.get_config()
base_url = config.provider_urls.get("vllm")
if config:
base_url = config.provider_urls.get("vllm")
else:
base_url = ""

async with httpx.AsyncClient() as client:
response = await client.get(
Expand All @@ -76,8 +80,22 @@ async def create_completion(

# Add the vLLM base URL to the request
config = Config.get_config()
data["base_url"] = config.provider_urls.get("vllm")
if config:
data["base_url"] = config.provider_urls.get("vllm")
else:
data["base_url"] = ""

is_fim_request = self._is_fim_request(request, data)
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
try:
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
except Exception as e:
#  check if we have an status code there
if hasattr(e, "status_code"):
logger = structlog.get_logger("codegate")
logger.error("Error in VLLMProvider completion", error=str(e))

raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore
else:
# just continue raising the exception
raise e
return self._completion_handler.create_response(stream)

0 comments on commit 6d619f2

Please sign in to comment.