From a1d1b8b566d03db5d5d91fc565e81c761666a4e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Sch=C3=B6nnenbeck?= Date: Mon, 30 Sep 2024 08:40:02 +0200 Subject: [PATCH 1/4] Make policy settable from EngineArgs --- vllm/engine/arg_utils.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 0efb0cbbf8bec..6f00b97868c1d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -2,8 +2,8 @@ import dataclasses import json from dataclasses import dataclass -from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, - Type, Union) +from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional, + Tuple, Type, Union) import torch @@ -177,6 +177,7 @@ class EngineArgs: disable_async_output_proc: bool = False override_neuron_config: Optional[Dict[str, Any]] = None mm_processor_kwargs: Optional[Dict[str, Any]] = None + scheduling_policy: Literal["fcfs", "priority"] = "fcfs" def __post_init__(self): if self.tokenizer is None: @@ -797,6 +798,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=None, help="override or set neuron device configuration.") + parser.add_argument( + '--scheduling-policy', + choices=['fcfs', 'priority'], + default="fcfs", + help='The scheduling policy to use. "fcfs" (default) or "priority"' + ) + return parser @classmethod @@ -1011,6 +1019,7 @@ def create_engine_config(self) -> EngineConfig: multi_step_stream_outputs=self.multi_step_stream_outputs, send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER and parallel_config.use_ray), + policy=self.scheduling_policy, ) lora_config = LoRAConfig( max_lora_rank=self.max_lora_rank, From cff3fdeaf6ea77d8c4b5c00dc7bca3d7aa28d0fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Sch=C3=B6nnenbeck?= Date: Mon, 30 Sep 2024 12:07:11 +0200 Subject: [PATCH 2/4] Improved documentation --- vllm/engine/arg_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 6f00b97868c1d..7bee562081552 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -802,8 +802,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: '--scheduling-policy', choices=['fcfs', 'priority'], default="fcfs", - help='The scheduling policy to use. "fcfs" (default) or "priority"' - ) + help='The scheduling policy to use. "fcfs" (first come first serve' + ', i.e. requests are handled in order of arrival, this is the ' + 'default) or "priority" (requests are handled based on given ' + 'priority (lower value means earlier handling) and time of ' + 'arrival deciding any ties).') return parser From 1781cb7d4634449582b0da42f9db9734194ddaf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Sch=C3=B6nnenbeck?= Date: Mon, 30 Sep 2024 12:07:25 +0200 Subject: [PATCH 3/4] Improved documentation --- vllm/engine/arg_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7bee562081552..2b61c138d972d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -803,8 +803,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=['fcfs', 'priority'], default="fcfs", help='The scheduling policy to use. "fcfs" (first come first serve' - ', i.e. requests are handled in order of arrival, this is the ' - 'default) or "priority" (requests are handled based on given ' + ', i.e. requests are handled in order of arrival; default) ' + 'or "priority" (requests are handled based on given ' 'priority (lower value means earlier handling) and time of ' 'arrival deciding any ties).') From 5d44fd87a75bdbd6465862c1b43132097cb6c37b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Sch=C3=B6nnenbeck?= Date: Mon, 30 Sep 2024 12:08:29 +0200 Subject: [PATCH 4/4] typo --- vllm/engine/arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 2b61c138d972d..208766a18e99c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -802,7 +802,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: '--scheduling-policy', choices=['fcfs', 'priority'], default="fcfs", - help='The scheduling policy to use. "fcfs" (first come first serve' + help='The scheduling policy to use. "fcfs" (first come first served' ', i.e. requests are handled in order of arrival; default) ' 'or "priority" (requests are handled based on given ' 'priority (lower value means earlier handling) and time of '