Skip to content

Commit

Permalink
most model types now support flash attention 2 regardless of multipac…
Browse files Browse the repository at this point in the history
…k support (#1854)
  • Loading branch information
winglian authored Aug 22, 2024
1 parent b33dc07 commit fefa95e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 10 deletions.
1 change: 1 addition & 0 deletions src/axolotl/monkeypatch/multipack.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"qwen2_moe",
"falcon",
"phi",
"phi3",
"gemma",
"gemma2",
"gemmoe",
Expand Down
14 changes: 4 additions & 10 deletions src/axolotl/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,16 +591,10 @@ def load_model(
"flash_attention_2"
)
else:
if model_config.model_type in SUPPORTED_MULTIPACK_MODEL_TYPES:
model_kwargs["attn_implementation"] = "flash_attention_2"
model_config._attn_implementation = ( # pylint: disable=protected-access
"flash_attention_2"
)
else:
model_kwargs["attn_implementation"] = "eager"
model_config._attn_implementation = ( # pylint: disable=protected-access
"eager"
)
model_kwargs["attn_implementation"] = "flash_attention_2"
model_config._attn_implementation = ( # pylint: disable=protected-access
"flash_attention_2"
)
elif cfg.sdp_attention:
model_kwargs["attn_implementation"] = "sdpa"
model_config._attn_implementation = "sdpa" # pylint: disable=protected-access
Expand Down

0 comments on commit fefa95e

Please sign in to comment.