diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index fcb1df1d27928..dcbb7b7825aa5 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -395,7 +395,6 @@ def _construct_prompt_tokens_tensor( # TODO - Add a method in vllm/utils.py to pad a numpy array similar # to make_tensor_with_pad which takes a list and move the logic # there. - padded_prompts = np.full((self.num_reqs, max_prompt_len), vocab_size, dtype=np.int64)