Skip to content

Commit

Permalink
Fix spawn vs fork issue using approach from #8823
Browse files Browse the repository at this point in the history
Signed-off-by: Tyler Michael Smith <[email protected]>
  • Loading branch information
tlrmchlsmth committed Nov 26, 2024
1 parent c4fcfce commit bedd593
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
11 changes: 9 additions & 2 deletions vllm/v1/executor/multiproc_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
from vllm.logger import init_logger
from vllm.triton_utils import maybe_set_triton_cache_manager
from vllm.utils import (get_distributed_init_method, get_open_port,
get_vllm_instance_id)
from vllm.utils import (cuda_is_initialized, get_distributed_init_method,
get_open_port, get_vllm_instance_id)
from vllm.v1.core.scheduler_output import ExecutorMsg, ExecutorMsgType
from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.worker.gpu_worker import WorkerProc, WorkerProcHandle
Expand Down Expand Up @@ -43,6 +43,13 @@ def __init__(self, vllm_config: VllmConfig) -> None:
f"tensor_parallel_size ({tensor_parallel_size}) -- pipeline "
f"parallelism is not yet implemented in v1")

if (cuda_is_initialized()
and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") != "spawn"):
logger.warning("CUDA was previously initialized. We must use "
"the `spawn` multiprocessing start method. Setting "
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

# Ensure that VLLM_INSTANCE_ID is set, to be inherited by workers
os.environ["VLLM_INSTANCE_ID"] = get_vllm_instance_id()

Expand Down
10 changes: 2 additions & 8 deletions vllm/v1/worker/gpu_worker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""A GPU worker class."""
import gc
import multiprocessing
import os
import pickle
from dataclasses import dataclass
Expand All @@ -19,6 +18,7 @@
set_custom_all_reduce)
from vllm.distributed.device_communicators.shm_broadcast import (Handle,
MessageQueue)
from vllm.executor.multiproc_worker_utils import get_mp_context
from vllm.logger import init_logger
from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform
Expand Down Expand Up @@ -343,13 +343,7 @@ def make_worker_process(
distributed_init_method: str,
input_shm_handle, # Receive SchedulerOutput
) -> WorkerProcHandle:
# The current process might have CUDA context,
# so we need to spawn a new process.
# NOTE(rob): this is a problem for using EngineCoreProc w/
# LLM, since we need a if __name__ == "__main__" guard.

# TODO(tms): fix before landing
context = multiprocessing.get_context("fork")
context = get_mp_context()

# ZMQ paths to send back and forth to worker process
# Used for initialization.
Expand Down

0 comments on commit bedd593

Please sign in to comment.