Add pin_memory() call

vllm-project · Jul 16, 2024 · 9320301 · 9320301
1 parent 550753b
commit 9320301
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
@@ -522,8 +522,8 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
             pin_memory=pin_memory,
         )
         if do_penalties:
-            prompt_tensor = torch.from_numpy(prompt_padded_tokens)
-            output_tensor = torch.from_numpy(output_padded_tokens)
+            prompt_tensor = torch.from_numpy(prompt_padded_tokens).pin_memory()
+            output_tensor = torch.from_numpy(output_padded_tokens).pin_memory()
         else:
             prompt_tensor = None
             output_tensor = None