-
-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add health check monitoring for EXL2 errors (#206)
* Add health check monitoring for EXL2 errors * Health: Format and change status code A status code of 503 makes more sense to use. ---------
- Loading branch information
1 parent
e0ffa90
commit 2cda890
Showing
4 changed files
with
73 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import asyncio | ||
from collections import deque | ||
from datetime import datetime, timezone | ||
from functools import partial | ||
from pydantic import BaseModel, Field | ||
from typing import Union | ||
|
||
|
||
class UnhealthyEvent(BaseModel): | ||
"""Represents an error that makes the system unhealthy""" | ||
|
||
time: datetime = Field( | ||
default_factory=partial(datetime.now, timezone.utc), | ||
description="Time the error occurred in UTC time", | ||
) | ||
description: str = Field("Unknown error", description="The error message") | ||
|
||
|
||
class HealthManagerClass: | ||
"""Class to manage the health global state""" | ||
|
||
def __init__(self): | ||
# limit the max stored errors to 100 to avoid a memory leak | ||
self.issues: deque[UnhealthyEvent] = deque(maxlen=100) | ||
self._lock = asyncio.Lock() | ||
|
||
async def add_unhealthy_event(self, error: Union[str, Exception]): | ||
"""Add a new unhealthy event""" | ||
async with self._lock: | ||
if isinstance(error, Exception): | ||
error = f"{error.__class__.__name__}: {str(error)}" | ||
self.issues.append(UnhealthyEvent(description=error)) | ||
|
||
async def is_service_healthy(self) -> tuple[bool, list[UnhealthyEvent]]: | ||
"""Check if the service is healthy""" | ||
async with self._lock: | ||
healthy = len(self.issues) == 0 | ||
return healthy, list(self.issues) | ||
|
||
|
||
# Create an instance of the global state manager | ||
HealthManager = HealthManagerClass() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from typing import Literal | ||
from pydantic import BaseModel, Field | ||
|
||
from common.health import UnhealthyEvent | ||
|
||
|
||
class HealthCheckResponse(BaseModel): | ||
"""System health status""" | ||
|
||
status: Literal["healthy", "unhealthy"] = Field( | ||
"healthy", description="System health status" | ||
) | ||
issues: list[UnhealthyEvent] = Field( | ||
default_factory=list, description="List of issues" | ||
) |