Clarify enable_gqa support in fx_importer.py

keshavvinayak01 · web-flow · commit 719fe5ac9dde · 2025-11-05T01:18:40.000+05:30
Removed TODO note for grouped query attention support in the docstring and comments.
diff --git a/python/torch_mlir/extras/fx_importer.py b/python/torch_mlir/extras/fx_importer.py
@@ -1915,7 +1915,7 @@ def _import_hop_flex_attention(
         - score_mod: Optional submodule/callable for score modification (imported as function)
         - block_mask: Optional BlockMask tuple containing mask_mod function and runtime tensors
         - scale: Optional float for attention score scaling
-        - enable_gqa: Boolean for grouped query attention support (TODO: NYI)
+        - enable_gqa: Boolean for grouped query attention support
         - kernel_options: Dict of performance tuning options (TODO: NYI)
 
         This creates a call to aten.flex_attention with function symbol references for
@@ -1932,7 +1932,6 @@ def _import_hop_flex_attention(
             node.args[:6]
         )
 
-        # TODO: Add support for enable_gqa (grouped query attention)
         # This is a boolean flag that enables GQA optimization
         enable_gqa = node.args[6] if len(node.args) > 6 else False