Skip to content

Commit

Permalink
hacky hacky hacky cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Tyler Michael Smith <[email protected]>
  • Loading branch information
tlrmchlsmth committed Nov 26, 2024
1 parent b5bac31 commit c4fcfce
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
10 changes: 10 additions & 0 deletions vllm/distributed/parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,16 @@ def destroy_model_parallel():
_PP = None


# In V1, Calling _TP.destroy() results in 2 leaked shared memory objects. This
# is related to the torch.distributed.destroy_process_group calls. However, not
# cleaning up its mq_broadcaster results in 1 leaked shm object.
# TODO: Fix up this hack
def destroy_tp_mq_broadcaster():
global _TP
if _TP:
_TP.mq_broadcaster = None


def destroy_distributed_environment():
global _WORLD
if _WORLD:
Expand Down
4 changes: 2 additions & 2 deletions vllm/v1/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def _get_executor_cls(cls, vllm_config: VllmConfig):
executor_class = MultiprocExecutor
else:
assert (distributed_executor_backend is None)
from vllm.v1.executor.monoproc_executor import MonoprocExecutor
executor_class = MonoprocExecutor
from vllm.v1.executor.uniproc_executor import UniprocExecutor
executor_class = UniprocExecutor

return executor_class

Expand Down
7 changes: 6 additions & 1 deletion vllm/v1/worker/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

import vllm.envs as envs
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
from vllm.distributed import (ensure_model_parallel_initialized,
from vllm.distributed import (destroy_tp_mq_broadcaster,
ensure_model_parallel_initialized,
init_distributed_environment,
set_custom_all_reduce)
from vllm.distributed.device_communicators.shm_broadcast import (Handle,
Expand Down Expand Up @@ -390,6 +391,10 @@ def run_worker(*args, **kwargs):

worker.execute_model_busy_loop()

# Clean up once worker exits busy loop
worker = None
destroy_tp_mq_broadcaster()

except KeyboardInterrupt:
logger.debug("Worker interrupted.")

Expand Down

0 comments on commit c4fcfce

Please sign in to comment.