From 9cdba9669cb32191aa0ae6782c0648be3e0e44ed Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 11 Nov 2024 20:55:09 -0500 Subject: [PATCH] [Doc] Update help text for `--distributed-executor-backend` (#10231) Signed-off-by: Russell Bryant --- vllm/config.py | 9 ++++++--- vllm/engine/arg_utils.py | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index dc9c06d7fb16e..bb9fee30c8445 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -951,9 +951,12 @@ class ParallelConfig: https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler. placement_group: ray distributed model workers placement group. distributed_executor_backend: Backend to use for distributed model - workers, either "ray" or "mp" (multiprocessing). If either - pipeline_parallel_size or tensor_parallel_size is greater than 1, - will default to "ray" if Ray is installed or "mp" otherwise. + workers, either "ray" or "mp" (multiprocessing). If the product + of pipeline_parallel_size and tensor_parallel_size is less than + or equal to the number of GPUs available, "mp" will be used to + keep processing on a single host. Otherwise, this will default + to "ray" if Ray is installed and fail otherwise. Note that tpu + and hpu only support Ray for distributed inference. """ def __init__( diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 02e67f89e5a8d..1591059a89f92 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -369,9 +369,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: '--distributed-executor-backend', choices=['ray', 'mp'], default=EngineArgs.distributed_executor_backend, - help='Backend to use for distributed serving. When more than 1 GPU ' - 'is used, will be automatically set to "ray" if installed ' - 'or "mp" (multiprocessing) otherwise.') + help='Backend to use for distributed model ' + 'workers, either "ray" or "mp" (multiprocessing). If the product ' + 'of pipeline_parallel_size and tensor_parallel_size is less than ' + 'or equal to the number of GPUs available, "mp" will be used to ' + 'keep processing on a single host. Otherwise, this will default ' + 'to "ray" if Ray is installed and fail otherwise. Note that tpu ' + 'and hpu only support Ray for distributed inference.') + parser.add_argument( '--worker-use-ray', action='store_true',