From aa98eacab6baa60a9272c40b609cfdfcae38cf51 Mon Sep 17 00:00:00 2001
From: Linkun Chen <lkchen@github.com>
Date: Fri, 25 Oct 2024 12:14:54 -0700
Subject: [PATCH] Refactor benchmark_throughput.py to pass TextPrompt instead
 of string

* This is preparation to support multi-modality input, by reusing existing TextPrompt structure
* no significant metrics diff, see below
 - before: Throughput: 13.99 requests/s, 2933.11 total tokens/s, 2758.10 output tokens/s
 - after: Throughput: 13.99 requests/s, 2932.69 total tokens/s, 2757.70 output tokens/s
 - test command: `python benchmarks/benchmark_throughput.py --model mistral-community/pixtral-12b  --max-model-len=8192 --dataset ../sharegpt4v_instruct_gpt4-vision_cap100k.json`
---
 benchmarks/benchmark_throughput.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index c4b8c3822edd2..21a4a71b3d1d2 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -15,7 +15,7 @@
 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.entrypoints.openai.api_server import (
     build_async_engine_client_from_engine_args)
-from vllm.inputs import SingletonPrompt
+from vllm.inputs import TextPrompt
 from vllm.multimodal import MultiModalDataDict
 from vllm.sampling_params import BeamSearchParams
 from vllm.utils import FlexibleArgumentParser, merge_async_iterators
@@ -95,10 +95,10 @@ def run_vllm(
     llm = LLM(**dataclasses.asdict(engine_args))
 
     # Add the requests to the engine.
-    prompts: List[SingletonPrompt] = []
+    prompts: List[TextPrompt] = []
     sampling_params: List[SamplingParams] = []
     for request in requests:
-        prompts.append(request.prompt)
+        prompts.append(TextPrompt(prompt=request.prompt))
         sampling_params.append(
             SamplingParams(
                 n=n,
@@ -144,10 +144,10 @@ async def run_vllm_async(
             engine_args, disable_frontend_multiprocessing) as llm:
 
         # Add the requests to the engine.
-        prompts: List[SingletonPrompt] = []
+        prompts: List[TextPrompt] = []
         sampling_params: List[SamplingParams] = []
         for request in requests:
-            prompts.append(request.prompt)
+            prompts.append(TextPrompt(prompt=request.prompt))
             sampling_params.append(
                 SamplingParams(
                     n=n,