From 290e015c6c328d08d6074827c5c43371017b27ab Mon Sep 17 00:00:00 2001 From: Suhong Moon <46987248+SuhongMoon@users.noreply.github.com> Date: Mon, 18 Dec 2023 14:33:24 -0500 Subject: [PATCH] Update Help Text for --gpu-memory-utilization Argument (#2183) --- vllm/engine/arg_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 75b198e115905..7e58069e2c22d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -156,11 +156,13 @@ def add_cli_args( type=int, default=EngineArgs.swap_space, help='CPU swap space size (GiB) per GPU') - parser.add_argument('--gpu-memory-utilization', - type=float, - default=EngineArgs.gpu_memory_utilization, - help='the percentage of GPU memory to be used for ' - 'the model executor') + parser.add_argument( + '--gpu-memory-utilization', + type=float, + default=EngineArgs.gpu_memory_utilization, + help='the fraction of GPU memory to be used for ' + 'the model executor, which can range from 0 to 1.' + 'If unspecified, will use the default value of 0.9.') parser.add_argument('--max-num-batched-tokens', type=int, default=EngineArgs.max_num_batched_tokens,