From 637bfff367a27c5e85bc5b1577b9084f8f858923 Mon Sep 17 00:00:00 2001 From: AllentDan Date: Fri, 22 Sep 2023 15:27:57 +0800 Subject: [PATCH] remove eoa of turbomind --- lmdeploy/turbomind/turbomind.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py index bc630430a7..f8a7444546 100644 --- a/lmdeploy/turbomind/turbomind.py +++ b/lmdeploy/turbomind/turbomind.py @@ -168,6 +168,8 @@ def __init__(self, tm_model, cuda_stream_id=0): self.gpu_count = tm_model.gpu_count self.stop_words = tm_model.stop_words + self.stop_tokens = [] if self.stop_words is None else \ + self.stop_words.flatten().tolist() self.eos_id = tm_model.eos_id self.session_len = tm_model.session_len @@ -352,6 +354,8 @@ def _broadcast_np(data, dtype, shape=(batch_size, )): output, len_ = output, len_.item() if len(output) > 0 and output[-1].item() == self.eos_id: outputs.append((output[:-1], len_ - 1)) + elif len(output) > 0 and output[-1].item() in self.stop_tokens: + outputs.append((output[:-1], len_)) else: outputs.append((output, len_))