diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py index 6a5234bc848d0..e0adde8f5d5f4 100644 --- a/vllm/model_executor/sampling_metadata.py +++ b/vllm/model_executor/sampling_metadata.py @@ -522,8 +522,8 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float], pin_memory=pin_memory, ) if do_penalties: - prompt_tensor = torch.from_numpy(prompt_padded_tokens) - output_tensor = torch.from_numpy(output_padded_tokens) + prompt_tensor = torch.from_numpy(prompt_padded_tokens).pin_memory() + output_tensor = torch.from_numpy(output_padded_tokens).pin_memory() else: prompt_tensor = None output_tensor = None