From f344924ca54839365e5b5e9e9b2e2cb6069cd3b6 Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Sat, 27 Jul 2024 01:15:31 +0000
Subject: [PATCH] .

---
 tests/fp8_offline_inference.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/fp8_offline_inference.py b/tests/fp8_offline_inference.py
index 9f4e306e9df5e..578e40970de86 100644
--- a/tests/fp8_offline_inference.py
+++ b/tests/fp8_offline_inference.py
@@ -1,4 +1,3 @@
-import pytest
 from vllm import LLM, SamplingParams
 
 
@@ -10,7 +9,8 @@ def test_fp8_offline_inference():
     llm = LLM(
             model="/data/models/llama-2-7b-chat-hf", 
             kv_cache_dtype="fp8", 
-            quantization_param_path="./tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json"
+            quantization_param_path = \
+                    "./tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json"
             )
     
     prompt = "London is the capital of"
@@ -18,6 +18,7 @@ def test_fp8_offline_inference():
     # Generate model response
     out = llm.generate(prompt, sampling_params)[0].outputs[0].text
 
-    assert out == " England and the United Kingdom. It is located in the southeastern part of"
+    assert out == ( " England and the United Kingdom."
+           " It is located in the southeastern part of")
     #print(out)