From 4e730ab6108ddead8cc26351402dabff15af91dc Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 2 Dec 2024 16:16:29 -0500 Subject: [PATCH] Revert offline_inference_vision_language.py --- examples/offline_inference_vision_language.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py index e2afe64d2021b..f08f22eec164a 100644 --- a/examples/offline_inference_vision_language.py +++ b/examples/offline_inference_vision_language.py @@ -5,8 +5,6 @@ For most models, the prompt format should follow corresponding examples on HuggingFace model repository. """ -import time - from transformers import AutoTokenizer from vllm import LLM, SamplingParams @@ -26,7 +24,6 @@ def run_llava(question: str, modality: str): prompt = f"USER: \n{question}\nASSISTANT:" llm = LLM(model="llava-hf/llava-1.5-7b-hf", max_model_len=4096) - stop_token_ids = None return llm, prompt, stop_token_ids @@ -517,10 +514,7 @@ def main(args): }, } for _ in range(args.num_prompts)] - start_time = time.time() outputs = llm.generate(inputs, sampling_params=sampling_params) - elapsed_time = time.time() - start_time - print("generate time = {}".format(elapsed_time)) for o in outputs: generated_text = o.outputs[0].text