Refactor benchmark_throughput.py to pass TextPrompt instead of string

* This is preparation to support multi-modality input, by reusing existing TextPrompt structure * no significant metrics diff, see below - before: Throughput: 13.99 requests/s, 2933.11 total tokens/s, 2758.10 output tokens/s - after: Throughput: 13.99 requests/s, 2932.69 total tokens/s, 2757.70 output tokens/s - test command: `python benchmarks/benchmark_throughput.py --model mistral-community/pixtral-12b --max-model-len=8192 --dataset ../sharegpt4v_instruct_gpt4-vision_cap100k.json`
vllm-project · Oct 25, 2024 · aa98eac · aa98eac
1 parent 84a579e
commit aa98eac
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
@@ -15,7 +15,7 @@
 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.entrypoints.openai.api_server import (
     build_async_engine_client_from_engine_args)
-from vllm.inputs import SingletonPrompt
+from vllm.inputs import TextPrompt
 from vllm.multimodal import MultiModalDataDict
 from vllm.sampling_params import BeamSearchParams
 from vllm.utils import FlexibleArgumentParser, merge_async_iterators
@@ -95,10 +95,10 @@ def run_vllm(
     llm = LLM(**dataclasses.asdict(engine_args))
 
     # Add the requests to the engine.
-    prompts: List[SingletonPrompt] = []
+    prompts: List[TextPrompt] = []
     sampling_params: List[SamplingParams] = []
     for request in requests:
-        prompts.append(request.prompt)
+        prompts.append(TextPrompt(prompt=request.prompt))
         sampling_params.append(
             SamplingParams(
                 n=n,
@@ -144,10 +144,10 @@ async def run_vllm_async(
             engine_args, disable_frontend_multiprocessing) as llm:
 
         # Add the requests to the engine.
-        prompts: List[SingletonPrompt] = []
+        prompts: List[TextPrompt] = []
         sampling_params: List[SamplingParams] = []
         for request in requests:
-            prompts.append(request.prompt)
+            prompts.append(TextPrompt(prompt=request.prompt))
             sampling_params.append(
                 SamplingParams(
                     n=n,