Skip to content

Commit

Permalink
[V1] Unify VLLM_ENABLE_V1_MULTIPROCESSING handling in RayExecutor
Browse files Browse the repository at this point in the history
Signed-off-by: Rui Qiao <[email protected]>
  • Loading branch information
ruisearch42 committed Dec 24, 2024
1 parent a491d6f commit bb43070
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 8 deletions.
5 changes: 0 additions & 5 deletions tests/basic_correctness/test_basic_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,6 @@ def test_models_distributed(
if attention_backend:
os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend

# Import VLLM_USE_V1 dynamically to handle patching
from vllm.envs import VLLM_USE_V1
if VLLM_USE_V1 and distributed_executor_backend != "mp":
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"

dtype = "half"
max_tokens = 5

Expand Down
2 changes: 0 additions & 2 deletions vllm/v1/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from vllm.v1.engine.detokenizer import Detokenizer
from vllm.v1.engine.processor import Processor
from vllm.v1.executor.abstract import Executor
from vllm.v1.executor.ray_utils import initialize_ray_cluster

logger = init_logger(__name__)

Expand Down Expand Up @@ -112,7 +111,6 @@ def _get_executor_cls(cls, vllm_config: VllmConfig) -> Type[Executor]:
distributed_executor_backend = (
vllm_config.parallel_config.distributed_executor_backend)
if distributed_executor_backend == "ray":
initialize_ray_cluster(vllm_config.parallel_config)
from vllm.v1.executor.ray_executor import RayExecutor
executor_class = RayExecutor
elif distributed_executor_backend == "mp":
Expand Down
4 changes: 3 additions & 1 deletion vllm/v1/executor/ray_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from vllm.logger import init_logger
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
from vllm.v1.executor.abstract import Executor
from vllm.v1.executor.ray_utils import RayWorkerWrapper, ray
from vllm.v1.executor.ray_utils import RayWorkerWrapper, ray, initialize_ray_cluster

Check failure on line 11 in vllm/v1/executor/ray_executor.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/v1/executor/ray_executor.py:11:81: E501 Line too long (84 > 80)
from vllm.v1.outputs import ModelRunnerOutput

if ray is not None:
Expand All @@ -33,7 +33,9 @@ def __init__(self, vllm_config: VllmConfig) -> None:
if ray_usage != "1":
os.environ["RAY_USAGE_STATS_ENABLED"] = "0"

initialize_ray_cluster(self.parallel_config)
placement_group = self.parallel_config.placement_group

# Create the parallel GPU workers.
self._init_workers_ray(placement_group)

Expand Down

0 comments on commit bb43070

Please sign in to comment.