From 9b14bd7dcf96ad9b062910ea3e77a5e778bc43dd Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 30 Oct 2024 15:27:27 +0100 Subject: [PATCH 1/9] add transformers 4.36 tests --- .github/workflows/test_onnxruntime.yml | 2 ++ setup.py | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 0ab95752d01..6ef6c1edc71 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -20,6 +20,8 @@ jobs: transformers-version: ["latest"] os: [ubuntu-20.04, windows-2019, macos-13] include: + - transformers-version: "4.36.*" + os: ubuntu-20.04 - transformers-version: "4.45.*" os: ubuntu-20.04 diff --git a/setup.py b/setup.py index 82892bfcc8c..bdc16df9d62 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ "datasets>=1.2.1", "evaluate", "protobuf>=3.20.1", - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "onnxruntime-gpu": [ "onnx", @@ -63,19 +63,19 @@ "evaluate", "protobuf>=3.20.1", "accelerate", # ORTTrainer requires it. - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "exporters": [ "onnx", "onnxruntime", "timm", - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "exporters-gpu": [ "onnx", "onnxruntime-gpu", "timm", - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "exporters-tf": [ "tensorflow>=2.4,<=2.12.1", @@ -86,7 +86,7 @@ "h5py", "numpy<1.24.0", "datasets<=2.16", - "transformers>=4.26,<4.38", + "transformers>=4.36,<4.38", ], "diffusers": ["diffusers"], "intel": "optimum-intel>=1.18.0", From 55d44982ea24722c44c91f6a42da840871fb8dcc Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 30 Oct 2024 18:57:02 +0100 Subject: [PATCH 2/9] add test depending on tranformers version --- tests/onnxruntime/test_modeling.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 597eb581e2a..f1d9cb9d000 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2318,7 +2318,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "bloom", "codegen", "falcon", - "gemma", "gpt2", "gpt_bigcode", "gpt_neo", @@ -2330,8 +2329,14 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "opt", ] - if check_if_transformers_greater("4.40"): - SUPPORTED_ARCHITECTURES.extend(["gemma", "phi3", "qwen2"]) + if check_if_transformers_greater("4.37"): + SUPPORTED_ARCHITECTURES.append("qwen2") + + if check_if_transformers_greater("4.38"): + SUPPORTED_ARCHITECTURES.append("gemma") + + if check_if_transformers_greater("4.41"): + SUPPORTED_ARCHITECTURES.append("phi3") FULL_GRID = { "model_arch": SUPPORTED_ARCHITECTURES, From c0ddb692fa945561792d743e512c5aad40658b29 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 30 Oct 2024 18:57:17 +0100 Subject: [PATCH 3/9] add min transformers required version for gemma --- optimum/exporters/onnx/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 9e57128c272..82486524ce0 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -295,7 +295,7 @@ class Qwen2OnnxConfig(LlamaOnnxConfig): class GemmaOnnxConfig(LlamaOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator) DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator - pass + MIN_TRANSFORMERS_VERSION = version.parse("4.38.0") class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): From 30d69a52aa05cef0caa99268ac236ff2b12b48c0 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 31 Oct 2024 11:00:30 +0100 Subject: [PATCH 4/9] update macos --- .github/workflows/test_onnxruntime.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 6ef6c1edc71..d2cad279ac0 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: transformers-version: ["latest"] - os: [ubuntu-20.04, windows-2019, macos-13] + os: [ubuntu-20.04, windows-2019, macos-15] include: - transformers-version: "4.36.*" os: ubuntu-20.04 From f359ec7fe0ebda14b77002820db4fd8a6a475b3a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 31 Oct 2024 15:04:00 +0100 Subject: [PATCH 5/9] fix whisper test --- tests/onnxruntime/test_modeling.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index f1d9cb9d000..d9fb55a223e 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2326,7 +2326,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "llama", "mistral", "mpt", - "opt", ] if check_if_transformers_greater("4.37"): @@ -4602,14 +4601,14 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): ) self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) - self.assertEqual( - outputs_model_with_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) - self.assertEqual( - outputs_model_without_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) + + if model_arch == "whisper" and check_if_transformers_greater("4.43"): + gen_length = self.GENERATION_LENGTH + 2 + else: + gen_length = self.GENERATION_LENGTH + 1 + + self.assertEqual(outputs_model_with_pkv.shape[1], gen_length) + self.assertEqual(outputs_model_without_pkv.shape[1], gen_length) self.GENERATION_LENGTH = generation_length if os.environ.get("TEST_LEVEL", 0) == "1": From 64e6d5be59d449c39bc258ca218d624cb277369b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 31 Oct 2024 16:47:14 +0100 Subject: [PATCH 6/9] add opt --- tests/onnxruntime/test_modeling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index d9fb55a223e..9187b851fc0 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2326,6 +2326,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "llama", "mistral", "mpt", + "opt", ] if check_if_transformers_greater("4.37"): From 067587ce1bc2b039a069538a76200aa524d04e95 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 31 Oct 2024 17:08:40 +0100 Subject: [PATCH 7/9] fix mpt --- optimum/exporters/onnx/model_configs.py | 1 + tests/onnxruntime/test_modeling.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 82486524ce0..10d38fae76d 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -343,6 +343,7 @@ def patch_model_for_export( class MPTOnnxConfig(TextDecoderOnnxConfig): # MPT does not require position_ids input. DEFAULT_ONNX_OPSET = 13 + MIN_TRANSFORMERS_VERSION = version.parse("4.41.0") NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args( num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers" ) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 9187b851fc0..e90bb4be758 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2325,7 +2325,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "gptj", "llama", "mistral", - "mpt", "opt", ] @@ -2335,8 +2334,9 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): if check_if_transformers_greater("4.38"): SUPPORTED_ARCHITECTURES.append("gemma") + # TODO: fix "mpt" for which inference fails for transformers < v4.41 if check_if_transformers_greater("4.41"): - SUPPORTED_ARCHITECTURES.append("phi3") + SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"]) FULL_GRID = { "model_arch": SUPPORTED_ARCHITECTURES, @@ -2449,7 +2449,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach transformers_model = AutoModelForCausalLM.from_pretrained(model_id) transformers_model = transformers_model.eval() tokenizer = get_preprocessor(model_id) - tokens = tokenizer("This is a sample output", return_tensors="pt") + tokens = tokenizer("This is a sample input", return_tensors="pt") position_ids = None if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS: input_shape = tokens["input_ids"].shape @@ -2471,7 +2471,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach # Compare batched generation. tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.padding_side = "left" - tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) + tokens = tokenizer(["This is", "This is a sample input"], return_tensors="pt", padding=True) onnx_model.generation_config.eos_token_id = None transformers_model.generation_config.eos_token_id = None onnx_model.config.eos_token_id = None From 1917d1e50480682d89ae65a5fa55e421f1fb3eac Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 15 Nov 2024 17:12:55 +0100 Subject: [PATCH 8/9] add comment --- optimum/exporters/onnx/model_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 10d38fae76d..df7754c3769 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -343,6 +343,7 @@ def patch_model_for_export( class MPTOnnxConfig(TextDecoderOnnxConfig): # MPT does not require position_ids input. DEFAULT_ONNX_OPSET = 13 + # TODO: fix inference for transformers < v4.41 for beam_search > 1 MIN_TRANSFORMERS_VERSION = version.parse("4.41.0") NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args( num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers" From 82bfbbfc310a0ac02d558d3ca99b9cc7a8ddb9ae Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 18 Nov 2024 11:42:46 +0100 Subject: [PATCH 9/9] add granite testwhen supported by transformers --- tests/onnxruntime/test_modeling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 9aacdd7d968..84ac27029f9 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2323,7 +2323,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "gpt_neo", "gpt_neox", "gptj", - "granite", "llama", "mistral", "opt", @@ -2339,6 +2338,9 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): if check_if_transformers_greater("4.41"): SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"]) + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES.append("granite") + FULL_GRID = { "model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [False, True],