remove unused functions

brb-nv · brb-nv · commit 06c354b99c2c · 2025-07-09T15:15:53.000Z
diff --git a/tensorrt_llm/_torch/models/modeling_gemma3vl.py b/tensorrt_llm/_torch/models/modeling_gemma3vl.py
@@ -107,48 +107,6 @@ def __call__(
         }
 
 
-def get_gemma3_causal_mask(
-    input_ids: torch.Tensor,
-    image_token_index: int,
-    sliding_window: Optional[int] = None,
-):
-    print("[get_gemma3_causal_mask] input_ids: ", input_ids)
-    assert input_ids.ndim == 1, "input_ids should be a 1D tensor."
-    # Get token type ids. 0 corresponds to text tokens, 1 corresponds to image tokens.
-    token_type_ids = torch.zeros_like(input_ids, device=input_ids.device)
-    image_token_mask = (input_ids == image_token_index).to(
-        device=input_ids.device, dtype=torch.bool)
-    token_type_ids[image_token_mask] = 1
-
-    sequence_length = input_ids.shape[-1]
-    # TODO: Use causal when sliding_window is larger than sequence_length.
-    if sliding_window is None:
-        causal_mask = torch.arange(
-            sequence_length,
-            device=input_ids.device).unsqueeze(0) <= torch.arange(
-                sequence_length, device=input_ids.device).unsqueeze(1)
-    else:
-        attention_mask_1 = torch.arange(
-            sequence_length,
-            device=input_ids.device).unsqueeze(0) <= torch.arange(
-                sequence_length, device=input_ids.device).unsqueeze(1)
-        attention_mask_2 = torch.arange(
-            sequence_length,
-            device=input_ids.device).unsqueeze(0) > torch.arange(
-                sequence_length,
-                device=input_ids.device).unsqueeze(1) - sliding_window
-        causal_mask = attention_mask_1 & attention_mask_2
-
-    # Apply a bidirectional mask for image tokens.
-    if token_type_ids is not None:
-        token_type_mask = token_type_ids.unsqueeze(
-            0) == token_type_ids.unsqueeze(1)
-        # If text token, do not change anything.
-        token_type_mask[token_type_ids == 0] = False
-        causal_mask = causal_mask.masked_fill(token_type_mask, True)
-    return causal_mask
-
-
 @register_auto_model("Gemma3ForConditionalGeneration")
 @register_input_processor(Gemma3InputProcessor, model_type="gemma3")
 class Gemma3Model(PreTrainedModel):
diff --git a/tests/unittest/_torch/modeling/test_modeling_gemma3.py b/tests/unittest/_torch/modeling/test_modeling_gemma3.py
@@ -212,37 +212,6 @@ def test_gemma3_sanity(self):
 
         kv_cache_manager.shutdown()
 
-    def generate_causal_mask(self,
-                             batch_size,
-                             target_length,
-                             sequence_length,
-                             device=None):
-        mask = torch.tril(
-            torch.ones((target_length, sequence_length),
-                       dtype=torch.bool,
-                       device=device))
-        # Expand to (batch_size, 1, target_length, sequence_length)
-        mask = mask.unsqueeze(0).unsqueeze(1).expand(batch_size, 1,
-                                                     target_length,
-                                                     sequence_length)
-        return mask
-
-    def generate_sliding_window_mask(self, batch_size: int, target_length: int,
-                                     cache_position: torch.Tensor,
-                                     device: torch.device,
-                                     attention_window_size: int):
-        # TRTLLM's sliding window attention is inclusive.
-        effective_window_size = attention_window_size + 1
-        attention_mask_1 = torch.arange(
-            target_length,
-            device=device).unsqueeze(0) <= cache_position.unsqueeze(-1)
-        attention_mask_2 = torch.arange(target_length, device=device).unsqueeze(
-            0) > cache_position.unsqueeze(-1) - effective_window_size
-        attention_mask = attention_mask_1 & attention_mask_2
-        attention_mask = attention_mask[None, None, :, :].expand(
-            batch_size, 1, -1, -1)
-        return attention_mask
-
     @parameterized.expand([
         Scenario(backend="TRTLLM", config_name="1B"),
         Scenario(backend="VANILLA", config_name="1B"),