From c6b43b25167953f1bb220a2943f8f23d06777422 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 14 Feb 2024 17:28:43 -0500 Subject: [PATCH] Add __del__ at Vllm for deleting model (#10733) * Add __del__ at Vllm for deleting model * cr --- .../llama_index/llms/vllm/base.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py b/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py index 5fdd577c7dedb..93c795871b23f 100644 --- a/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py +++ b/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py @@ -212,6 +212,17 @@ def _model_kwargs(self) -> Dict[str, Any]: } return {**base_kwargs} + def __del__(self) -> None: + import torch + from vllm.model_executor.parallel_utils.parallel_state import ( + destroy_model_parallel, + ) + + destroy_model_parallel() + del self._client + if torch.cuda.is_available(): + torch.cuda.synchronize() + def _get_all_kwargs(self, **kwargs: Any) -> Dict[str, Any]: return { **self._model_kwargs, @@ -262,7 +273,8 @@ async def achat( async def acomplete( self, prompt: str, formatted: bool = False, **kwargs: Any ) -> CompletionResponse: - raise (ValueError("Not Implemented")) + kwargs = kwargs if kwargs else {} + return self.complete(prompt, **kwargs) @llm_chat_callback() async def astream_chat(