Skip to content

Commit

Permalink
fix graceful shutdown
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Dec 10, 2024
1 parent 1ca44ba commit aa46efe
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
3 changes: 3 additions & 0 deletions vllm/executor/multiproc_hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def _check_executor_parameters(self):
f"please ensure that world_size ({world_size}) "
f"is less than than max local hpu count ({hpu_device_count})")

def __del__(self):
self.shutdown()


class MultiprocessingHPUExecutorAsync(MultiprocessingHPUExecutor,
MultiprocessingGPUExecutorAsync):
Expand Down
17 changes: 17 additions & 0 deletions vllm/executor/multiproc_worker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import vllm.envs as envs
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.triton_utils.importing import HAS_TRITON
from vllm.utils import cuda_is_initialized

Expand Down Expand Up @@ -291,6 +292,22 @@ def set_multiprocessing_worker_envs(parallel_config):
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

if (current_platform.is_hpu()
and parallel_config.distributed_executor_backend == 'mp'
and envs.VLLM_WORKER_MULTIPROC_METHOD == 'fork'):
if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD", None) is not None:
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might "
"cause application hangs on exit. Using "
"VLLM_WORKER_MULTIPROC_METHOD=fork anyway, "
"as it was explicitly requested.")
else:
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might "
"cause application hangs on exit. Setting "
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. "
"To override that behavior, please set "
"VLLM_WORKER_MULTIPROC_METHOD=fork explicitly.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

# Configure thread parallelism if OMP_NUM_THREADS isn't set
#
# Helps to avoid CPU contention. The default of spawning a thread per
Expand Down

0 comments on commit aa46efe

Please sign in to comment.