From 0b25c208d675cca371931e119cc73f5b560fb6f4 Mon Sep 17 00:00:00 2001 From: kingbri Date: Tue, 5 Mar 2024 18:16:02 -0500 Subject: [PATCH] API: Fix error reporting Make a disconnect on load error consistently. It should be safer to warn the user to run unload (or re-run load) if a model does not load correctly. Also don't log the traceback for request errors that don't have one. Signed-off-by: kingbri --- common/utils.py | 6 +++--- main.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/common/utils.py b/common/utils.py index b01be0f8..9f7b704a 100644 --- a/common/utils.py +++ b/common/utils.py @@ -27,7 +27,7 @@ class TabbyRequestError(BaseModel): error: TabbyRequestErrorMessage -def get_generator_error(message: str): +def get_generator_error(message: str, exc_info: bool = True): """Get a generator error.""" generator_error = handle_request_error(message) @@ -35,7 +35,7 @@ def get_generator_error(message: str): return get_sse_packet(generator_error.model_dump_json()) -def handle_request_error(message: str): +def handle_request_error(message: str, exc_info: bool = True): """Log a request error to the console.""" error_message = TabbyRequestErrorMessage( @@ -45,7 +45,7 @@ def handle_request_error(message: str): request_error = TabbyRequestError(error=error_message) # Log the error and provided message to the console - if error_message.trace: + if error_message.trace and exc_info: logger.error(error_message.trace) logger.error(f"Sent to request: {message}") diff --git a/main.py b/main.py index d8aa07db..38480a59 100644 --- a/main.py +++ b/main.py @@ -93,11 +93,14 @@ async def _check_model_container(): + """Checks if a model isn't loading or loaded.""" + if MODEL_CONTAINER is None or not ( MODEL_CONTAINER.model_is_loading or MODEL_CONTAINER.model_loaded ): error_message = handle_request_error( - "No models are currently loaded." + "No models are currently loaded.", + exc_info=False, ).error.message raise HTTPException(400, error_message) @@ -221,6 +224,7 @@ async def generator(): # Unload the existing model if MODEL_CONTAINER and MODEL_CONTAINER.model: + logger.info("Unloading existing model.") await unload_model() MODEL_CONTAINER = ExllamaV2Container(model_path.resolve(), False, **load_data) @@ -231,7 +235,10 @@ async def generator(): try: for module, modules in load_status: if await request.is_disconnected(): - await unload_model() + logger.error( + "Model load cancelled by user. " + "Please make sure to run unload to free up resources." + ) break if module == 0: