From 9b14bd7dcf96ad9b062910ea3e77a5e778bc43dd Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 30 Oct 2024 15:27:27 +0100
Subject: [PATCH 1/9] add transformers 4.36 tests

---
 .github/workflows/test_onnxruntime.yml |  2 ++
 setup.py                               | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
index 0ab95752d01..6ef6c1edc71 100644
--- a/.github/workflows/test_onnxruntime.yml
+++ b/.github/workflows/test_onnxruntime.yml
@@ -20,6 +20,8 @@ jobs:
         transformers-version: ["latest"]
         os: [ubuntu-20.04, windows-2019, macos-13]
         include:
+          - transformers-version: "4.36.*"
+            os: ubuntu-20.04
           - transformers-version: "4.45.*"
             os: ubuntu-20.04
 
diff --git a/setup.py b/setup.py
index 82892bfcc8c..bdc16df9d62 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@
         "datasets>=1.2.1",
         "evaluate",
         "protobuf>=3.20.1",
-        "transformers<4.47.0",
+        "transformers>=4.36,<4.47.0",
     ],
     "onnxruntime-gpu": [
         "onnx",
@@ -63,19 +63,19 @@
         "evaluate",
         "protobuf>=3.20.1",
         "accelerate",  # ORTTrainer requires it.
-        "transformers<4.47.0",
+        "transformers>=4.36,<4.47.0",
     ],
     "exporters": [
         "onnx",
         "onnxruntime",
         "timm",
-        "transformers<4.47.0",
+        "transformers>=4.36,<4.47.0",
     ],
     "exporters-gpu": [
         "onnx",
         "onnxruntime-gpu",
         "timm",
-        "transformers<4.47.0",
+        "transformers>=4.36,<4.47.0",
     ],
     "exporters-tf": [
         "tensorflow>=2.4,<=2.12.1",
@@ -86,7 +86,7 @@
         "h5py",
         "numpy<1.24.0",
         "datasets<=2.16",
-        "transformers>=4.26,<4.38",
+        "transformers>=4.36,<4.38",
     ],
     "diffusers": ["diffusers"],
     "intel": "optimum-intel>=1.18.0",

From 55d44982ea24722c44c91f6a42da840871fb8dcc Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 30 Oct 2024 18:57:02 +0100
Subject: [PATCH 2/9] add test depending on tranformers version

---
 tests/onnxruntime/test_modeling.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 597eb581e2a..f1d9cb9d000 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2318,7 +2318,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "bloom",
         "codegen",
         "falcon",
-        "gemma",
         "gpt2",
         "gpt_bigcode",
         "gpt_neo",
@@ -2330,8 +2329,14 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "opt",
     ]
 
-    if check_if_transformers_greater("4.40"):
-        SUPPORTED_ARCHITECTURES.extend(["gemma", "phi3", "qwen2"])
+    if check_if_transformers_greater("4.37"):
+        SUPPORTED_ARCHITECTURES.append("qwen2")
+
+    if check_if_transformers_greater("4.38"):
+        SUPPORTED_ARCHITECTURES.append("gemma")
+
+    if check_if_transformers_greater("4.41"):
+        SUPPORTED_ARCHITECTURES.append("phi3")
 
     FULL_GRID = {
         "model_arch": SUPPORTED_ARCHITECTURES,

From c0ddb692fa945561792d743e512c5aad40658b29 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 30 Oct 2024 18:57:17 +0100
Subject: [PATCH 3/9] add min transformers required version for gemma

---
 optimum/exporters/onnx/model_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 9e57128c272..82486524ce0 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -295,7 +295,7 @@ class Qwen2OnnxConfig(LlamaOnnxConfig):
 class GemmaOnnxConfig(LlamaOnnxConfig):
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator)
     DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator
-    pass
+    MIN_TRANSFORMERS_VERSION = version.parse("4.38.0")
 
 
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):

From 30d69a52aa05cef0caa99268ac236ff2b12b48c0 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 31 Oct 2024 11:00:30 +0100
Subject: [PATCH 4/9] update macos

---
 .github/workflows/test_onnxruntime.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
index 6ef6c1edc71..d2cad279ac0 100644
--- a/.github/workflows/test_onnxruntime.yml
+++ b/.github/workflows/test_onnxruntime.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         transformers-version: ["latest"]
-        os: [ubuntu-20.04, windows-2019, macos-13]
+        os: [ubuntu-20.04, windows-2019, macos-15]
         include:
           - transformers-version: "4.36.*"
             os: ubuntu-20.04

From f359ec7fe0ebda14b77002820db4fd8a6a475b3a Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 31 Oct 2024 15:04:00 +0100
Subject: [PATCH 5/9] fix whisper test

---
 tests/onnxruntime/test_modeling.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index f1d9cb9d000..d9fb55a223e 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2326,7 +2326,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "llama",
         "mistral",
         "mpt",
-        "opt",
     ]
 
     if check_if_transformers_greater("4.37"):
@@ -4602,14 +4601,14 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
             )
 
         self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
-        self.assertEqual(
-            outputs_model_with_pkv.shape[1],
-            self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1,
-        )
-        self.assertEqual(
-            outputs_model_without_pkv.shape[1],
-            self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1,
-        )
+
+        if model_arch == "whisper" and check_if_transformers_greater("4.43"):
+            gen_length = self.GENERATION_LENGTH + 2
+        else:
+            gen_length = self.GENERATION_LENGTH + 1
+
+        self.assertEqual(outputs_model_with_pkv.shape[1], gen_length)
+        self.assertEqual(outputs_model_without_pkv.shape[1], gen_length)
 
         self.GENERATION_LENGTH = generation_length
         if os.environ.get("TEST_LEVEL", 0) == "1":

From 64e6d5be59d449c39bc258ca218d624cb277369b Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 31 Oct 2024 16:47:14 +0100
Subject: [PATCH 6/9] add opt

---
 tests/onnxruntime/test_modeling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index d9fb55a223e..9187b851fc0 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2326,6 +2326,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "llama",
         "mistral",
         "mpt",
+        "opt",
     ]
 
     if check_if_transformers_greater("4.37"):

From 067587ce1bc2b039a069538a76200aa524d04e95 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 31 Oct 2024 17:08:40 +0100
Subject: [PATCH 7/9] fix mpt

---
 optimum/exporters/onnx/model_configs.py | 1 +
 tests/onnxruntime/test_modeling.py      | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 82486524ce0..10d38fae76d 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -343,6 +343,7 @@ def patch_model_for_export(
 class MPTOnnxConfig(TextDecoderOnnxConfig):
     # MPT does not require position_ids input.
     DEFAULT_ONNX_OPSET = 13
+    MIN_TRANSFORMERS_VERSION = version.parse("4.41.0")
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
         num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers"
     )
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 9187b851fc0..e90bb4be758 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2325,7 +2325,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "gptj",
         "llama",
         "mistral",
-        "mpt",
         "opt",
     ]
 
@@ -2335,8 +2334,9 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
     if check_if_transformers_greater("4.38"):
         SUPPORTED_ARCHITECTURES.append("gemma")
 
+    # TODO: fix "mpt" for which inference fails for transformers < v4.41
     if check_if_transformers_greater("4.41"):
-        SUPPORTED_ARCHITECTURES.append("phi3")
+        SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"])
 
     FULL_GRID = {
         "model_arch": SUPPORTED_ARCHITECTURES,
@@ -2449,7 +2449,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
         transformers_model = AutoModelForCausalLM.from_pretrained(model_id)
         transformers_model = transformers_model.eval()
         tokenizer = get_preprocessor(model_id)
-        tokens = tokenizer("This is a sample output", return_tensors="pt")
+        tokens = tokenizer("This is a sample input", return_tensors="pt")
         position_ids = None
         if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS:
             input_shape = tokens["input_ids"].shape
@@ -2471,7 +2471,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
         # Compare batched generation.
         tokenizer.pad_token_id = tokenizer.eos_token_id
         tokenizer.padding_side = "left"
-        tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
+        tokens = tokenizer(["This is", "This is a sample input"], return_tensors="pt", padding=True)
         onnx_model.generation_config.eos_token_id = None
         transformers_model.generation_config.eos_token_id = None
         onnx_model.config.eos_token_id = None

From 1917d1e50480682d89ae65a5fa55e421f1fb3eac Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 15 Nov 2024 17:12:55 +0100
Subject: [PATCH 8/9] add comment

---
 optimum/exporters/onnx/model_configs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 10d38fae76d..df7754c3769 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -343,6 +343,7 @@ def patch_model_for_export(
 class MPTOnnxConfig(TextDecoderOnnxConfig):
     # MPT does not require position_ids input.
     DEFAULT_ONNX_OPSET = 13
+    # TODO: fix inference for transformers < v4.41 for beam_search > 1
     MIN_TRANSFORMERS_VERSION = version.parse("4.41.0")
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
         num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers"

From 82bfbbfc310a0ac02d558d3ca99b9cc7a8ddb9ae Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 18 Nov 2024 11:42:46 +0100
Subject: [PATCH 9/9] add granite testwhen supported by transformers

---
 tests/onnxruntime/test_modeling.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 9aacdd7d968..84ac27029f9 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2323,7 +2323,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "gpt_neo",
         "gpt_neox",
         "gptj",
-        "granite",
         "llama",
         "mistral",
         "opt",
@@ -2339,6 +2338,9 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
     if check_if_transformers_greater("4.41"):
         SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"])
 
+    if check_if_transformers_greater("4.45"):
+        SUPPORTED_ARCHITECTURES.append("granite")
+
     FULL_GRID = {
         "model_arch": SUPPORTED_ARCHITECTURES,
         "use_cache": [False, True],