From aa98eacab6baa60a9272c40b609cfdfcae38cf51 Mon Sep 17 00:00:00 2001 From: Linkun Chen Date: Fri, 25 Oct 2024 12:14:54 -0700 Subject: [PATCH] Refactor benchmark_throughput.py to pass TextPrompt instead of string * This is preparation to support multi-modality input, by reusing existing TextPrompt structure * no significant metrics diff, see below - before: Throughput: 13.99 requests/s, 2933.11 total tokens/s, 2758.10 output tokens/s - after: Throughput: 13.99 requests/s, 2932.69 total tokens/s, 2757.70 output tokens/s - test command: `python benchmarks/benchmark_throughput.py --model mistral-community/pixtral-12b --max-model-len=8192 --dataset ../sharegpt4v_instruct_gpt4-vision_cap100k.json` --- benchmarks/benchmark_throughput.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index c4b8c3822edd2..21a4a71b3d1d2 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -15,7 +15,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.entrypoints.openai.api_server import ( build_async_engine_client_from_engine_args) -from vllm.inputs import SingletonPrompt +from vllm.inputs import TextPrompt from vllm.multimodal import MultiModalDataDict from vllm.sampling_params import BeamSearchParams from vllm.utils import FlexibleArgumentParser, merge_async_iterators @@ -95,10 +95,10 @@ def run_vllm( llm = LLM(**dataclasses.asdict(engine_args)) # Add the requests to the engine. - prompts: List[SingletonPrompt] = [] + prompts: List[TextPrompt] = [] sampling_params: List[SamplingParams] = [] for request in requests: - prompts.append(request.prompt) + prompts.append(TextPrompt(prompt=request.prompt)) sampling_params.append( SamplingParams( n=n, @@ -144,10 +144,10 @@ async def run_vllm_async( engine_args, disable_frontend_multiprocessing) as llm: # Add the requests to the engine. - prompts: List[SingletonPrompt] = [] + prompts: List[TextPrompt] = [] sampling_params: List[SamplingParams] = [] for request in requests: - prompts.append(request.prompt) + prompts.append(TextPrompt(prompt=request.prompt)) sampling_params.append( SamplingParams( n=n,