API: Auto-unload on a load request

Automatically unload the existing model when calling /load. This was requested many times, and does make more sense in the long run. Signed-off-by: kingbri <[email protected]>
theroyallab · Feb 22, 2024 · bee26a2 · bee26a2
1 parent 368eb2e
commit bee26a2
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 3 deletions.
diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
@@ -464,6 +464,8 @@ def unload(self, loras_only: bool = False):
         gc.collect()
         torch.cuda.empty_cache()
 
+        logger.info("Model unloaded.")
+
     def encode_tokens(self, text: str, **kwargs):
         """Wrapper to encode tokens from a text string"""
 

diff --git a/main.py b/main.py
@@ -172,11 +172,19 @@ async def load_model(request: Request, data: ModelLoadRequest):
     """Loads a model into the model container."""
     global MODEL_CONTAINER
 
+    if not data.name:
+        raise HTTPException(400, "A model name was not provided.")
+
+    # Unload the existing model
     if MODEL_CONTAINER and MODEL_CONTAINER.model:
-        raise HTTPException(400, "A model is already loaded! Please unload it first.")
+        loaded_model_name = MODEL_CONTAINER.get_model_path().name
 
-    if not data.name:
-        raise HTTPException(400, "model_name not found.")
+        if loaded_model_name == data.name:
+            raise HTTPException(
+                400, f"Model \"{loaded_model_name}\"is already loaded! Aborting."
+            )
+        else:
+            MODEL_CONTAINER.unload()
 
     model_path = pathlib.Path(unwrap(get_model_config().get("model_dir"), "models"))
     model_path = model_path / data.name