Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexei-V-Ivanov-AMD committed Jul 27, 2024
1 parent fe5828d commit f344924
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions tests/fp8_offline_inference.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pytest
from vllm import LLM, SamplingParams


Expand All @@ -10,14 +9,16 @@ def test_fp8_offline_inference():
llm = LLM(
model="/data/models/llama-2-7b-chat-hf",
kv_cache_dtype="fp8",
quantization_param_path="./tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json"
quantization_param_path = \
"./tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json"
)

prompt = "London is the capital of"

# Generate model response
out = llm.generate(prompt, sampling_params)[0].outputs[0].text

assert out == " England and the United Kingdom. It is located in the southeastern part of"
assert out == ( " England and the United Kingdom."
" It is located in the southeastern part of")
#print(out)

0 comments on commit f344924

Please sign in to comment.