diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index 5c504e0f0217d..b00dd136d4a47 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -140,7 +140,12 @@ async def beam_search( best_beams = sorted_completed[:beam_width] for beam in best_beams: - beam.text = tokenizer.decode(beam.tokens[tokenized_length:]) + if (beam.tokens[-1] == tokenizer.eos_token_id and not ignore_eos): + # Skip the eos token in the text. + tokens = beam.tokens[tokenized_length:-1] + else: + tokens = beam.tokens[tokenized_length:] + beam.text = tokenizer.decode(tokens) beam_search_output = RequestOutput( request_id=request_id,