hacky hacky hacky cleanup

Signed-off-by: Tyler Michael Smith <[email protected]>
vllm-project · Nov 26, 2024 · c4fcfce · c4fcfce
1 parent b5bac31
commit c4fcfce
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 3 deletions.
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
@@ -1174,6 +1174,16 @@ def destroy_model_parallel():
     _PP = None
 
 
+# In V1, Calling _TP.destroy() results in 2 leaked shared memory objects. This
+# is related to the torch.distributed.destroy_process_group calls. However, not
+# cleaning up its mq_broadcaster results in 1 leaked shm object.
+# TODO: Fix up this hack
+def destroy_tp_mq_broadcaster():
+    global _TP
+    if _TP:
+        _TP.mq_broadcaster = None
+
+
 def destroy_distributed_environment():
     global _WORLD
     if _WORLD:

diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py
@@ -105,8 +105,8 @@ def _get_executor_cls(cls, vllm_config: VllmConfig):
             executor_class = MultiprocExecutor
         else:
             assert (distributed_executor_backend is None)
-            from vllm.v1.executor.monoproc_executor import MonoprocExecutor
-            executor_class = MonoprocExecutor
+            from vllm.v1.executor.uniproc_executor import UniprocExecutor
+            executor_class = UniprocExecutor
 
         return executor_class
 

diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
@@ -13,7 +13,8 @@
 
 import vllm.envs as envs
 from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
-from vllm.distributed import (ensure_model_parallel_initialized,
+from vllm.distributed import (destroy_tp_mq_broadcaster,
+                              ensure_model_parallel_initialized,
                               init_distributed_environment,
                               set_custom_all_reduce)
 from vllm.distributed.device_communicators.shm_broadcast import (Handle,
@@ -390,6 +391,10 @@ def run_worker(*args, **kwargs):
 
             worker.execute_model_busy_loop()
 
+            # Clean up once worker exits busy loop
+            worker = None
+            destroy_tp_mq_broadcaster()
+
         except KeyboardInterrupt:
             logger.debug("Worker interrupted.")