From e81bd73a778b4833b8b2781c16b4427b7aa1111c Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Mon, 27 May 2024 16:46:15 +0200
Subject: [PATCH] Fix BT CI (#1872)

* fix bt test failures due to default sdpa attention

* exclude macos13+py3.8

* update tr

* check transformers version
---
 .github/workflows/test_bettertransformer.yml | 54 +++++++++++---------
 optimum/pipelines/pipelines_base.py          |  9 +++-
 tests/bettertransformer/test_encoder.py      |  2 +-
 tests/bettertransformer/testing_utils.py     |  2 +-
 4 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/test_bettertransformer.yml b/.github/workflows/test_bettertransformer.yml
index 6607466dc22..080d8272dfc 100644
--- a/.github/workflows/test_bettertransformer.yml
+++ b/.github/workflows/test_bettertransformer.yml
@@ -2,9 +2,9 @@ name: BetterTransformer / Python - Test
 
 on:
   push:
-    branches: [ main ]
+    branches: [main]
   pull_request:
-    branches: [ main ]
+    branches: [main]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -17,29 +17,35 @@ jobs:
       matrix:
         python-version: [3.8, 3.9]
         os: [ubuntu-20.04, macos-13]
+        exclude: [{ python-version: 3.8, os: macos-13 }]
 
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        pip install .[tests]
-        pip install --no-cache-dir --upgrade torch torchvision torchaudio
-        pip install accelerate
-    - name: Test on pytorch stable
-      working-directory: tests
-      run: |
-        pytest bettertransformer/test_*.py -s -vvvvv
-    - name: Install dependencies 2
-      run: |
-        pip uninstall -y torch torchvision torchaudio
-        pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
-    - name: Test on pytorch nightly
-      working-directory: tests
-      run: |
-        pytest bettertransformer/test_*.py -s -vvvvv
+      - name: Checkout code
+        uses: actions/checkout@v4
 
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          pip install .[tests]
+          pip install --no-cache-dir --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install accelerate
+
+      - name: Test with stable pytorch
+        working-directory: tests
+        run: |
+          pytest bettertransformer -s -vvvvv
+
+      - name: Install dependencies 2
+        run: |
+          pip uninstall -y torch torchvision torchaudio
+          pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
+
+      - name: Test with nightly pytorch
+        working-directory: tests
+        run: |
+          pytest bettertransformer -s -vvvvv
diff --git a/optimum/pipelines/pipelines_base.py b/optimum/pipelines/pipelines_base.py
index e2046882bd6..cc36e94ef5d 100644
--- a/optimum/pipelines/pipelines_base.py
+++ b/optimum/pipelines/pipelines_base.py
@@ -45,7 +45,7 @@
 from transformers.pipelines import infer_framework_load_model
 
 from ..bettertransformer import BetterTransformer
-from ..utils import is_onnxruntime_available
+from ..utils import check_if_transformers_greater, is_onnxruntime_available
 from ..utils.file_utils import find_files_matching_pattern
 
 
@@ -179,7 +179,12 @@ def load_bettertransformer(
     **kwargs,
 ):
     if model_kwargs is None:
-        model_kwargs = {}
+        # the argument was first introduced in 4.36.0 but most models didn't have an sdpa implementation then
+        # see https://github.com/huggingface/transformers/blob/v4.36.0/src/transformers/modeling_utils.py#L1258
+        if check_if_transformers_greater("4.36.0"):
+            model_kwargs = {"attn_implementation": "eager"}
+        else:
+            model_kwargs = {}
 
     if model is None:
         model_id = SUPPORTED_TASKS[targeted_task]["default"]
diff --git a/tests/bettertransformer/test_encoder.py b/tests/bettertransformer/test_encoder.py
index cbf4bcbae90..74aacaed58c 100644
--- a/tests/bettertransformer/test_encoder.py
+++ b/tests/bettertransformer/test_encoder.py
@@ -114,7 +114,7 @@ def test_inference_speed(self):
         """
         model_name = "bert-base-uncased"
 
-        hf_model = AutoModel.from_pretrained(model_name).eval()
+        hf_model = AutoModel.from_pretrained(model_name, attn_implementation="eager").eval()
         bt_model = BetterTransformer.transform(hf_model, keep_original_model=True)
 
         BATCH_SIZE = 8
diff --git a/tests/bettertransformer/testing_utils.py b/tests/bettertransformer/testing_utils.py
index eb4f0ab9a4d..6e7ff71ddd9 100644
--- a/tests/bettertransformer/testing_utils.py
+++ b/tests/bettertransformer/testing_utils.py
@@ -235,7 +235,7 @@ def _test_logits(self, model_id: str, model_type: str, **preprocessor_kwargs):
         inputs = self.prepare_inputs_for_class(model_id=model_id, model_type=model_type, **preprocessor_kwargs)
 
         torch.manual_seed(0)
-        hf_random_model = AutoModel.from_pretrained(model_id).eval()
+        hf_random_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval()
         random_config = hf_random_model.config
 
         hf_random_model = hf_random_model.eval()