Skip to content

Commit

Permalink
API: Auto-unload on a load request
Browse files Browse the repository at this point in the history
Automatically unload the existing model when calling /load. This was
requested many times, and does make more sense in the long run.

Signed-off-by: kingbri <[email protected]>
  • Loading branch information
bdashore3 committed Feb 22, 2024
1 parent 368eb2e commit bee26a2
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
2 changes: 2 additions & 0 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,8 @@ def unload(self, loras_only: bool = False):
gc.collect()
torch.cuda.empty_cache()

logger.info("Model unloaded.")

def encode_tokens(self, text: str, **kwargs):
"""Wrapper to encode tokens from a text string"""

Expand Down
14 changes: 11 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,19 @@ async def load_model(request: Request, data: ModelLoadRequest):
"""Loads a model into the model container."""
global MODEL_CONTAINER

if not data.name:
raise HTTPException(400, "A model name was not provided.")

# Unload the existing model
if MODEL_CONTAINER and MODEL_CONTAINER.model:
raise HTTPException(400, "A model is already loaded! Please unload it first.")
loaded_model_name = MODEL_CONTAINER.get_model_path().name

if not data.name:
raise HTTPException(400, "model_name not found.")
if loaded_model_name == data.name:
raise HTTPException(
400, f"Model \"{loaded_model_name}\"is already loaded! Aborting."
)
else:
MODEL_CONTAINER.unload()

model_path = pathlib.Path(unwrap(get_model_config().get("model_dir"), "models"))
model_path = model_path / data.name
Expand Down

0 comments on commit bee26a2

Please sign in to comment.