From f0c76d496cb460e8c89af95c2f6eebef584011b4 Mon Sep 17 00:00:00 2001
From: Github Executorch <github_executorch@arm.com>
Date: Tue, 17 Dec 2024 20:27:33 -0800
Subject: [PATCH] Split modeling tests to separate files and CI jobs

---
 .github/workflows/test_executorch_runtime.yml |   9 +-
 tests/executorch/runtime/test_modeling.py     | 142 ------------------
 .../executorch/runtime/test_modeling_gemma.py |  56 +++++++
 .../runtime/test_modeling_gemma2.py           |  58 +++++++
 .../executorch/runtime/test_modeling_llama.py |  84 +++++++++++
 .../executorch/runtime/test_modeling_olmo.py  |  56 +++++++
 .../executorch/runtime/test_modeling_qwen2.py |  54 +++++++
 7 files changed, 316 insertions(+), 143 deletions(-)
 create mode 100644 tests/executorch/runtime/test_modeling_gemma.py
 create mode 100644 tests/executorch/runtime/test_modeling_gemma2.py
 create mode 100644 tests/executorch/runtime/test_modeling_llama.py
 create mode 100644 tests/executorch/runtime/test_modeling_olmo.py
 create mode 100644 tests/executorch/runtime/test_modeling_qwen2.py

diff --git a/.github/workflows/test_executorch_runtime.yml b/.github/workflows/test_executorch_runtime.yml
index 3aea14f4ee..d5bbc0f8ea 100644
--- a/.github/workflows/test_executorch_runtime.yml
+++ b/.github/workflows/test_executorch_runtime.yml
@@ -17,6 +17,13 @@ jobs:
       matrix:
         python-version: ['3.10', '3.11', '3.12']
         os: [macos-15]
+        test-modeling:
+          - test_modeling_gemma2.py
+          - test_modeling_gemma.py
+          - test_modeling_llama.py
+          - test_modeling_olmo.py
+          - test_modeling.py
+          - test_modeling_qwen2.py
 
     runs-on: ${{ matrix.os }}
     steps:
@@ -32,4 +39,4 @@ jobs:
       - name: Run tests
         working-directory: tests
         run: |
-          RUN_SLOW=1 pytest executorch/runtime/test_*.py -s -vvvv --durations=0
+          RUN_SLOW=1 pytest executorch/runtime/${{ matrix.test-modeling }} -s -vvvv --durations=0
diff --git a/tests/executorch/runtime/test_modeling.py b/tests/executorch/runtime/test_modeling.py
index d8c6e1bb49..6593da7a8c 100644
--- a/tests/executorch/runtime/test_modeling.py
+++ b/tests/executorch/runtime/test_modeling.py
@@ -69,145 +69,3 @@ def test_load_model_from_local_path(self):
             )
             self.assertIsInstance(model, ExecuTorchModelForCausalLM)
             self.assertIsInstance(model.model, ExecuTorchModule)
-
-    @slow
-    @pytest.mark.run_slow
-    def test_llama3_2_1b_text_generation_with_xnnpack(self):
-        model_id = "NousResearch/Llama-3.2-1B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            task="text-generation",
-            recipe="xnnpack",
-        )
-        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
-        self.assertIsInstance(model.model, ExecuTorchModule)
-
-        EXPECTED_GENERATED_TEXT = "Simply put, the theory of relativity states that the laws of physics are the same in all inertial frames of reference."
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        generated_text = model.text_generation(
-            tokenizer=tokenizer,
-            prompt="Simply put, the theory of relativity states that",
-            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
-        )
-        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
-
-    @slow
-    @pytest.mark.run_slow
-    def test_llama3_2_3b_text_generation_with_xnnpack(self):
-        model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            task="text-generation",
-            recipe="xnnpack",
-        )
-        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
-        self.assertIsInstance(model.model, ExecuTorchModule)
-
-        EXPECTED_GENERATED_TEXT = (
-            "Simply put, the theory of relativity states that time is relative and can be affected "
-            "by an object's speed. This theory was developed by Albert Einstein in the early 20th "
-            "century. The theory has two parts"
-        )
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        generated_text = model.text_generation(
-            tokenizer=tokenizer,
-            prompt="Simply put, the theory of relativity states that",
-            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
-        )
-        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
-
-    @slow
-    @pytest.mark.run_slow
-    def test_qwen2_5_text_generation_with_xnnpack(self):
-        model_id = "Qwen/Qwen2.5-0.5B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            task="text-generation",
-            recipe="xnnpack",
-        )
-        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
-        self.assertIsInstance(model.model, ExecuTorchModule)
-
-        EXPECTED_GENERATED_TEXT = "My favourite condiment is iced tea. I love it with my breakfast, my lunch"
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        generated_text = model.text_generation(
-            tokenizer=tokenizer,
-            prompt="My favourite condiment is ",
-            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
-        )
-        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
-
-    @slow
-    @pytest.mark.run_slow
-    def test_gemma2_text_generation_with_xnnpack(self):
-        # model_id = "google/gemma-2-2b"
-        model_id = "unsloth/gemma-2-2b-it"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            task="text-generation",
-            recipe="xnnpack",
-        )
-        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
-        self.assertIsInstance(model.model, ExecuTorchModule)
-
-        EXPECTED_GENERATED_TEXT = (
-            "Hello I am doing a project for my school and I need to make sure it is a great to be creative and I can!"
-        )
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        generated_text = model.text_generation(
-            tokenizer=tokenizer,
-            prompt="Hello I am doing a project for my school",
-            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
-        )
-        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
-
-    @slow
-    @pytest.mark.run_slow
-    def test_gemma_text_generation_with_xnnpack(self):
-        # model_id = "google/gemma-2b"
-        model_id = "weqweasdas/RM-Gemma-2B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            task="text-generation",
-            recipe="xnnpack",
-        )
-        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
-        self.assertIsInstance(model.model, ExecuTorchModule)
-
-        EXPECTED_GENERATED_TEXT = "Hello I am doing a project for my school and I need to write a report on the history of the United States."
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        generated_text = model.text_generation(
-            tokenizer=tokenizer,
-            prompt="Hello I am doing a project for my school",
-            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
-        )
-        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
-
-    @slow
-    @pytest.mark.run_slow
-    def test_olmo_text_generation_with_xnnpack(self):
-        model_id = "allenai/OLMo-1B-hf"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            task="text-generation",
-            recipe="xnnpack",
-        )
-        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
-        self.assertIsInstance(model.model, ExecuTorchModule)
-
-        EXPECTED_GENERATED_TEXT = (
-            "Simply put, the theory of relativity states that the speed of light is the same in all directions."
-        )
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        generated_text = model.text_generation(
-            tokenizer=tokenizer,
-            prompt="Simply put, the theory of relativity states that",
-            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
-        )
-        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
diff --git a/tests/executorch/runtime/test_modeling_gemma.py b/tests/executorch/runtime/test_modeling_gemma.py
new file mode 100644
index 0000000000..08f80d4e57
--- /dev/null
+++ b/tests/executorch/runtime/test_modeling_gemma.py
@@ -0,0 +1,56 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import unittest
+
+import pytest
+from executorch.extension.pybindings.portable_lib import ExecuTorchModule
+from transformers import AutoTokenizer
+from transformers.testing_utils import (
+    slow,
+)
+
+from optimum.executorchruntime import ExecuTorchModelForCausalLM
+
+
+class ExecuTorchModelIntegrationTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @slow
+    @pytest.mark.run_slow
+    def test_gemma_text_generation_with_xnnpack(self):
+        # TODO: Swithc to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
+        # model_id = "google/gemma-2b"
+        model_id = "weqweasdas/RM-Gemma-2B"
+        model = ExecuTorchModelForCausalLM.from_pretrained(
+            model_name_or_path=model_id,
+            export=True,
+            task="text-generation",
+            recipe="xnnpack",
+        )
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+        EXPECTED_GENERATED_TEXT = "Hello I am doing a project for my school and I need to write a report on the history of the United States."
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+            tokenizer=tokenizer,
+            prompt="Hello I am doing a project for my school",
+            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
+        )
+        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
diff --git a/tests/executorch/runtime/test_modeling_gemma2.py b/tests/executorch/runtime/test_modeling_gemma2.py
new file mode 100644
index 0000000000..6878daa774
--- /dev/null
+++ b/tests/executorch/runtime/test_modeling_gemma2.py
@@ -0,0 +1,58 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import unittest
+
+import pytest
+from executorch.extension.pybindings.portable_lib import ExecuTorchModule
+from transformers import AutoTokenizer
+from transformers.testing_utils import (
+    slow,
+)
+
+from optimum.executorchruntime import ExecuTorchModelForCausalLM
+
+
+class ExecuTorchModelIntegrationTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @slow
+    @pytest.mark.run_slow
+    def test_gemma2_text_generation_with_xnnpack(self):
+        # TODO: Swithc to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
+        # model_id = "google/gemma-2-2b"
+        model_id = "unsloth/gemma-2-2b-it"
+        model = ExecuTorchModelForCausalLM.from_pretrained(
+            model_name_or_path=model_id,
+            export=True,
+            task="text-generation",
+            recipe="xnnpack",
+        )
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+        EXPECTED_GENERATED_TEXT = (
+            "Hello I am doing a project for my school and I need to make sure it is a great to be creative and I can!"
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+            tokenizer=tokenizer,
+            prompt="Hello I am doing a project for my school",
+            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
+        )
+        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
diff --git a/tests/executorch/runtime/test_modeling_llama.py b/tests/executorch/runtime/test_modeling_llama.py
new file mode 100644
index 0000000000..1834ee162d
--- /dev/null
+++ b/tests/executorch/runtime/test_modeling_llama.py
@@ -0,0 +1,84 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import unittest
+
+import pytest
+from executorch.extension.pybindings.portable_lib import ExecuTorchModule
+from transformers import AutoTokenizer
+from transformers.testing_utils import (
+    slow,
+)
+
+from optimum.executorchruntime import ExecuTorchModelForCausalLM
+
+
+class ExecuTorchModelIntegrationTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @slow
+    @pytest.mark.run_slow
+    def test_llama3_2_1b_text_generation_with_xnnpack(self):
+        # TODO: Swithc to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed
+        # model_id = "lama/Llama-3.2-1B"
+        model_id = "NousResearch/Llama-3.2-1B"
+        model = ExecuTorchModelForCausalLM.from_pretrained(
+            model_name_or_path=model_id,
+            export=True,
+            task="text-generation",
+            recipe="xnnpack",
+        )
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+        EXPECTED_GENERATED_TEXT = "Simply put, the theory of relativity states that the laws of physics are the same in all inertial frames of reference."
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+            tokenizer=tokenizer,
+            prompt="Simply put, the theory of relativity states that",
+            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
+        )
+        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
+
+    @slow
+    @pytest.mark.run_slow
+    def test_llama3_2_3b_text_generation_with_xnnpack(self):
+        # TODO: Swithc to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed
+        # model_id = "lama/Llama-3.2-3B"
+        model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
+        model = ExecuTorchModelForCausalLM.from_pretrained(
+            model_name_or_path=model_id,
+            export=True,
+            task="text-generation",
+            recipe="xnnpack",
+        )
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+        EXPECTED_GENERATED_TEXT = (
+            "Simply put, the theory of relativity states that time is relative and can be affected "
+            "by an object's speed. This theory was developed by Albert Einstein in the early 20th "
+            "century. The theory has two parts"
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+            tokenizer=tokenizer,
+            prompt="Simply put, the theory of relativity states that",
+            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
+        )
+        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
diff --git a/tests/executorch/runtime/test_modeling_olmo.py b/tests/executorch/runtime/test_modeling_olmo.py
new file mode 100644
index 0000000000..65c3045ad8
--- /dev/null
+++ b/tests/executorch/runtime/test_modeling_olmo.py
@@ -0,0 +1,56 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import unittest
+
+import pytest
+from executorch.extension.pybindings.portable_lib import ExecuTorchModule
+from transformers import AutoTokenizer
+from transformers.testing_utils import (
+    slow,
+)
+
+from optimum.executorchruntime import ExecuTorchModelForCausalLM
+
+
+class ExecuTorchModelIntegrationTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @slow
+    @pytest.mark.run_slow
+    def test_olmo_text_generation_with_xnnpack(self):
+        model_id = "allenai/OLMo-1B-hf"
+        model = ExecuTorchModelForCausalLM.from_pretrained(
+            model_name_or_path=model_id,
+            export=True,
+            task="text-generation",
+            recipe="xnnpack",
+        )
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+        EXPECTED_GENERATED_TEXT = (
+            "Simply put, the theory of relativity states that the speed of light is the same in all directions."
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+            tokenizer=tokenizer,
+            prompt="Simply put, the theory of relativity states that",
+            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
+        )
+        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
diff --git a/tests/executorch/runtime/test_modeling_qwen2.py b/tests/executorch/runtime/test_modeling_qwen2.py
new file mode 100644
index 0000000000..d80a286b72
--- /dev/null
+++ b/tests/executorch/runtime/test_modeling_qwen2.py
@@ -0,0 +1,54 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import unittest
+
+import pytest
+from executorch.extension.pybindings.portable_lib import ExecuTorchModule
+from transformers import AutoTokenizer
+from transformers.testing_utils import (
+    slow,
+)
+
+from optimum.executorchruntime import ExecuTorchModelForCausalLM
+
+
+class ExecuTorchModelIntegrationTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @slow
+    @pytest.mark.run_slow
+    def test_qwen2_5_text_generation_with_xnnpack(self):
+        model_id = "Qwen/Qwen2.5-0.5B"
+        model = ExecuTorchModelForCausalLM.from_pretrained(
+            model_name_or_path=model_id,
+            export=True,
+            task="text-generation",
+            recipe="xnnpack",
+        )
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+        EXPECTED_GENERATED_TEXT = "My favourite condiment is iced tea. I love it with my breakfast, my lunch"
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+            tokenizer=tokenizer,
+            prompt="My favourite condiment is ",
+            max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
+        )
+        self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)