diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 75b198e115905..7e58069e2c22d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -156,11 +156,13 @@ def add_cli_args( type=int, default=EngineArgs.swap_space, help='CPU swap space size (GiB) per GPU') - parser.add_argument('--gpu-memory-utilization', - type=float, - default=EngineArgs.gpu_memory_utilization, - help='the percentage of GPU memory to be used for ' - 'the model executor') + parser.add_argument( + '--gpu-memory-utilization', + type=float, + default=EngineArgs.gpu_memory_utilization, + help='the fraction of GPU memory to be used for ' + 'the model executor, which can range from 0 to 1.' + 'If unspecified, will use the default value of 0.9.') parser.add_argument('--max-num-batched-tokens', type=int, default=EngineArgs.max_num_batched_tokens,