From f344924ca54839365e5b5e9e9b2e2cb6069cd3b6 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Sat, 27 Jul 2024 01:15:31 +0000 Subject: [PATCH] . --- tests/fp8_offline_inference.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/fp8_offline_inference.py b/tests/fp8_offline_inference.py index 9f4e306e9df5e..578e40970de86 100644 --- a/tests/fp8_offline_inference.py +++ b/tests/fp8_offline_inference.py @@ -1,4 +1,3 @@ -import pytest from vllm import LLM, SamplingParams @@ -10,7 +9,8 @@ def test_fp8_offline_inference(): llm = LLM( model="/data/models/llama-2-7b-chat-hf", kv_cache_dtype="fp8", - quantization_param_path="./tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json" + quantization_param_path = \ + "./tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json" ) prompt = "London is the capital of" @@ -18,6 +18,7 @@ def test_fp8_offline_inference(): # Generate model response out = llm.generate(prompt, sampling_params)[0].outputs[0].text - assert out == " England and the United Kingdom. It is located in the southeastern part of" + assert out == ( " England and the United Kingdom." + " It is located in the southeastern part of") #print(out)