updated comment to add new arg

tthakkal · Aug 21, 2024 · ac0f284 · ac0f284
1 parent d5b7e20
commit ac0f284
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 0 deletions.
diff --git a/optimum/habana/transformers/models/clip/modeling_clip.py b/optimum/habana/transformers/models/clip/modeling_clip.py
@@ -84,6 +84,7 @@ def forward(
         Copied from CLIPAttention.forward: https://github.com/huggingface/transformers/blob/ab0f050b42d903f34d6eb97f3f8c0c07f0517ad2/src/transformers/models/clip/modeling_clip.py
         The only differences are:
         - add new args use_flash_attention to enable FusedSDPA
+        - add new args flash_attention_recompute
         """
         bsz, tgt_len, embed_dim = hidden_states.size()
         attn_weights_reshaped = None
@@ -184,6 +185,7 @@ def forward(
         Copied from CLIPEncoderLayer.forward: https://github.com/huggingface/transformers/blob/ab0f050b42d903f34d6eb97f3f8c0c07f0517ad2/src/transformers/models/clip/modeling_clip.py
         The only differences are:
         - add new args use_flash_attention
+        - add new args flash_attention_recompute
         """
         residual = hidden_states
 
@@ -227,6 +229,7 @@ def forward(
         Copied from CLIPEncoder.forward: https://github.com/huggingface/transformers/blob/ab0f050b42d903f34d6eb97f3f8c0c07f0517ad2/src/transformers/models/clip/modeling_clip.py
         The only differences are:
         - add new args use_flash_attention
+        - add new args flash_attention_recompute
         """
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
@@ -288,6 +291,7 @@ def forward(
         Copied from CLIPVisionTransformer.forward: https://github.com/huggingface/transformers/blob/ab0f050b42d903f34d6eb97f3f8c0c07f0517ad2/src/transformers/models/clip/modeling_clip.py
         The only differences are:
         - add new args use_flash_attention
+        - add new args flash_attention_recompute
         """
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
@@ -339,6 +343,7 @@ def forward(
         Copied from CLIPVisionModel.forward: https://github.com/huggingface/transformers/blob/ab0f050b42d903f34d6eb97f3f8c0c07f0517ad2/src/transformers/models/clip/modeling_clip.py
         The only differences are:
         - add new args use_flash_attention
+        - add new args flash_attention_recompute
         """
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
 

diff --git a/optimum/habana/transformers/models/llava_next/modeling_llava_next.py b/optimum/habana/transformers/models/llava_next/modeling_llava_next.py
@@ -61,6 +61,8 @@ def forward(
         Inherits from LlavaForConditionalGeneration: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava_next/modeling_llava_next.py#L433
         The only differences are:
         - add new args token_idx
+        - add new args use_flash_attention
+        - add new args flash_attention_recompute
         - Moved the process of merging images into inputs_embeds into prepare_inputs_for_generation
         """