From 4294bffc62ac2e3c8fd1d36144df8989bba834e3 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Thu, 26 Oct 2023 06:52:58 +0000 Subject: [PATCH] fix `vocab_size` --- src/turbomind/models/llama/LlamaBatch.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/turbomind/models/llama/LlamaBatch.cc b/src/turbomind/models/llama/LlamaBatch.cc index 9f34f5a383..3ca687e885 100644 --- a/src/turbomind/models/llama/LlamaBatch.cc +++ b/src/turbomind/models/llama/LlamaBatch.cc @@ -525,7 +525,7 @@ bool LlamaBatch::generate() decoder_output_buf_, batch_size_); - CheckValues(logits_buf_, batch_size_ * llama_->vocab_size_, "post_decode_embedding", stream_); + CheckValues(logits_buf_, batch_size_ * llama_->vocab_size_padded_, "post_decode_embedding", stream_); // stop-words & bad-words require the matched tokens to be contiguous, so item size > 1 is // not supported yet.