intel · XuehaoSun · Jun 27, 2024 · Jun 26, 2024
diff --git a/intel_extension_for_transformers/transformers/llm/evaluation/models.py b/intel_extension_for_transformers/transformers/llm/evaluation/models.py
@@ -38,8 +38,7 @@ def _reorder_cache(
 
         This is required to match `past_key_values` with the correct beam_idx at every generation step.
         """
-        if self.config.model_type == "bloom":
-            return self._reorder_cache_bloom(past_key_values, beam_idx)
+
         if self.config.model_type == "chatglm":
             return tuple(
                 tuple(

diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -946,11 +946,7 @@ def collate_batch(batch):
                             )
 
                         last_ind.append(input_ids.shape[0] - 1)
-                        if model_type in ["bloom"]:
-                            attention_mask = torch.ones(len(input_ids) + 1)
-                            attention_mask[0] = 0
-                        else:
-                            attention_mask = torch.ones(len(input_ids))
+                        attention_mask = torch.ones(len(input_ids))
                         position_ids = torch.arange(len(input_ids))
                         input_ids_padded.append(input_ids)
                         attention_mask_padded.append(attention_mask)

diff --git a/intel_extension_for_transformers/transformers/utils/utility.py b/intel_extension_for_transformers/transformers/utils/utility.py
@@ -375,11 +375,7 @@ def get_example_inputs(model_config, batch_size=1, tokenizer=None, num_beams=4):
         past_key_values = generate_dummy_past_key_values(config=model_config, input_bs=batch_size)
 
     input_ids = input_ids[:, :512]
-    if model_type in ["bloom", "qwen"]:
-        attention_mask = torch.ones(input_ids.shape[0], input_ids.shape[1] + 1)
-        attention_mask[:,0] = 0
-    else:
-        attention_mask = torch.ones(input_ids.shape)
+    attention_mask = torch.ones(input_ids.shape)
     position_ids = torch.arange(input_ids.shape[1]).repeat(batch_size, 1)
 
     if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS: