[TRTLLM-8377][test] unit tests for TorchSampler batched sampling (#9012)

ixlmar · Funatiq · web-flow · commit b151de4a8f1b · 2025-11-11T07:16:42.000-08:00
Signed-off-by: ixlmar &lt;206748156+ixlmar@users.noreply.github.com&gt;
Co-authored-by: Robin Kobus &lt;19427718+Funatiq@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/sampler.py b/tensorrt_llm/_torch/pyexecutor/sampler.py
@@ -673,15 +673,17 @@ def get_generator(self, device: torch.device) -> torch.Generator:
         assert self._generator.device == device
         return self._generator
 
-    def get_spec_tree_manager(self, resource_manager: ResourceManager) -> Optional[SpecTreeManager]:
+    def get_spec_tree_manager(
+        self, resource_manager: Optional[ResourceManager]
+    ) -> Optional[SpecTreeManager]:
         if resource_manager is None:
             return None
         spec_resource_manager = resource_manager.get_resource_manager(
             ResourceManagerType.SPEC_RESOURCE_MANAGER
         )
         if spec_resource_manager is None or not hasattr(spec_resource_manager, "spec_tree_manager"):
             return None
-        return spec_resource_manager.spec_tree_manager
+        return spec_resource_manager.spec_tree_manager  # type: ignore
 
     @staticmethod
     def _meet_max_token_stop_criteria(request: LlmRequest, max_seq_len: int):
diff --git a/tensorrt_llm/_torch/pyexecutor/sampling_utils_flashinfer.py b/tensorrt_llm/_torch/pyexecutor/sampling_utils_flashinfer.py
@@ -91,17 +91,14 @@ def _make_tensor(data: list, dtype: torch.dtype, device: torch.device) -> torch.
         def _prepare_logits_with_temperature(
             logits: torch.Tensor,
             group_logit_indices: Optional[torch.Tensor],
-            temperature: Optional[torch.Tensor],
+            temperature: torch.Tensor,
         ) -> torch.Tensor:
-            if temperature is not None:
-                temperature = temperature.unsqueeze(-1)
-                if group_logit_indices is not None:
-                    logits = torch.index_select(logits, 0, group_logit_indices)  # ensures copy
-                    logits /= temperature
-                else:
-                    logits = logits / temperature  # not inplace
-            elif group_logit_indices is not None:
-                logits = logits[group_logit_indices]
+            temperature = temperature.unsqueeze(-1)
+            if group_logit_indices is not None:
+                logits = torch.index_select(logits, 0, group_logit_indices)  # ensures copy
+                logits /= temperature
+            else:
+                logits = logits / temperature  # not inplace
             return logits
 
         @staticmethod
@@ -112,12 +109,12 @@ def _prepare_probs_with_temperature(
         ) -> torch.Tensor:
             if group_logit_indices is not None:
                 logits = logits[group_logit_indices]
-            logits = flashinfer.sampling.softmax(
+            probs = flashinfer.sampling.softmax(
                 logits,
                 temperature,
                 enable_pdl=ENABLE_PDL,
             )
-            return logits
+            return probs
 
         @classmethod
         def _sample_from_probs(
@@ -151,7 +148,7 @@ def _sample_with_probs(
             group_logit_indices: Optional[torch.Tensor],
             top_k: Optional[torch.Tensor],
             top_p: Optional[torch.Tensor],
-            temperature: Optional[torch.Tensor],
+            temperature: torch.Tensor,
             generator: Optional[torch.Generator],
         ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
             if top_k is not None:
diff --git a/tests/unittest/_torch/sampler/test_torch_sampler.py b/tests/unittest/_torch/sampler/test_torch_sampler.py