diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index bf8b6787..16293927 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -464,6 +464,8 @@ def unload(self, loras_only: bool = False): gc.collect() torch.cuda.empty_cache() + logger.info("Model unloaded.") + def encode_tokens(self, text: str, **kwargs): """Wrapper to encode tokens from a text string""" diff --git a/main.py b/main.py index 85907ef8..4b4e512d 100644 --- a/main.py +++ b/main.py @@ -172,11 +172,19 @@ async def load_model(request: Request, data: ModelLoadRequest): """Loads a model into the model container.""" global MODEL_CONTAINER + if not data.name: + raise HTTPException(400, "A model name was not provided.") + + # Unload the existing model if MODEL_CONTAINER and MODEL_CONTAINER.model: - raise HTTPException(400, "A model is already loaded! Please unload it first.") + loaded_model_name = MODEL_CONTAINER.get_model_path().name - if not data.name: - raise HTTPException(400, "model_name not found.") + if loaded_model_name == data.name: + raise HTTPException( + 400, f"Model \"{loaded_model_name}\"is already loaded! Aborting." + ) + else: + MODEL_CONTAINER.unload() model_path = pathlib.Path(unwrap(get_model_config().get("model_dir"), "models")) model_path = model_path / data.name