From f0c76d496cb460e8c89af95c2f6eebef584011b4 Mon Sep 17 00:00:00 2001 From: Github Executorch Date: Tue, 17 Dec 2024 20:27:33 -0800 Subject: [PATCH] Split modeling tests to separate files and CI jobs --- .github/workflows/test_executorch_runtime.yml | 9 +- tests/executorch/runtime/test_modeling.py | 142 ------------------ .../executorch/runtime/test_modeling_gemma.py | 56 +++++++ .../runtime/test_modeling_gemma2.py | 58 +++++++ .../executorch/runtime/test_modeling_llama.py | 84 +++++++++++ .../executorch/runtime/test_modeling_olmo.py | 56 +++++++ .../executorch/runtime/test_modeling_qwen2.py | 54 +++++++ 7 files changed, 316 insertions(+), 143 deletions(-) create mode 100644 tests/executorch/runtime/test_modeling_gemma.py create mode 100644 tests/executorch/runtime/test_modeling_gemma2.py create mode 100644 tests/executorch/runtime/test_modeling_llama.py create mode 100644 tests/executorch/runtime/test_modeling_olmo.py create mode 100644 tests/executorch/runtime/test_modeling_qwen2.py diff --git a/.github/workflows/test_executorch_runtime.yml b/.github/workflows/test_executorch_runtime.yml index 3aea14f4ee..d5bbc0f8ea 100644 --- a/.github/workflows/test_executorch_runtime.yml +++ b/.github/workflows/test_executorch_runtime.yml @@ -17,6 +17,13 @@ jobs: matrix: python-version: ['3.10', '3.11', '3.12'] os: [macos-15] + test-modeling: + - test_modeling_gemma2.py + - test_modeling_gemma.py + - test_modeling_llama.py + - test_modeling_olmo.py + - test_modeling.py + - test_modeling_qwen2.py runs-on: ${{ matrix.os }} steps: @@ -32,4 +39,4 @@ jobs: - name: Run tests working-directory: tests run: | - RUN_SLOW=1 pytest executorch/runtime/test_*.py -s -vvvv --durations=0 + RUN_SLOW=1 pytest executorch/runtime/${{ matrix.test-modeling }} -s -vvvv --durations=0 diff --git a/tests/executorch/runtime/test_modeling.py b/tests/executorch/runtime/test_modeling.py index d8c6e1bb49..6593da7a8c 100644 --- a/tests/executorch/runtime/test_modeling.py +++ b/tests/executorch/runtime/test_modeling.py @@ -69,145 +69,3 @@ def test_load_model_from_local_path(self): ) self.assertIsInstance(model, ExecuTorchModelForCausalLM) self.assertIsInstance(model.model, ExecuTorchModule) - - @slow - @pytest.mark.run_slow - def test_llama3_2_1b_text_generation_with_xnnpack(self): - model_id = "NousResearch/Llama-3.2-1B" - model = ExecuTorchModelForCausalLM.from_pretrained( - model_name_or_path=model_id, - export=True, - task="text-generation", - recipe="xnnpack", - ) - self.assertIsInstance(model, ExecuTorchModelForCausalLM) - self.assertIsInstance(model.model, ExecuTorchModule) - - EXPECTED_GENERATED_TEXT = "Simply put, the theory of relativity states that the laws of physics are the same in all inertial frames of reference." - tokenizer = AutoTokenizer.from_pretrained(model_id) - generated_text = model.text_generation( - tokenizer=tokenizer, - prompt="Simply put, the theory of relativity states that", - max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), - ) - self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) - - @slow - @pytest.mark.run_slow - def test_llama3_2_3b_text_generation_with_xnnpack(self): - model_id = "NousResearch/Hermes-3-Llama-3.2-3B" - model = ExecuTorchModelForCausalLM.from_pretrained( - model_name_or_path=model_id, - export=True, - task="text-generation", - recipe="xnnpack", - ) - self.assertIsInstance(model, ExecuTorchModelForCausalLM) - self.assertIsInstance(model.model, ExecuTorchModule) - - EXPECTED_GENERATED_TEXT = ( - "Simply put, the theory of relativity states that time is relative and can be affected " - "by an object's speed. This theory was developed by Albert Einstein in the early 20th " - "century. The theory has two parts" - ) - tokenizer = AutoTokenizer.from_pretrained(model_id) - generated_text = model.text_generation( - tokenizer=tokenizer, - prompt="Simply put, the theory of relativity states that", - max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), - ) - self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) - - @slow - @pytest.mark.run_slow - def test_qwen2_5_text_generation_with_xnnpack(self): - model_id = "Qwen/Qwen2.5-0.5B" - model = ExecuTorchModelForCausalLM.from_pretrained( - model_name_or_path=model_id, - export=True, - task="text-generation", - recipe="xnnpack", - ) - self.assertIsInstance(model, ExecuTorchModelForCausalLM) - self.assertIsInstance(model.model, ExecuTorchModule) - - EXPECTED_GENERATED_TEXT = "My favourite condiment is iced tea. I love it with my breakfast, my lunch" - tokenizer = AutoTokenizer.from_pretrained(model_id) - generated_text = model.text_generation( - tokenizer=tokenizer, - prompt="My favourite condiment is ", - max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), - ) - self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) - - @slow - @pytest.mark.run_slow - def test_gemma2_text_generation_with_xnnpack(self): - # model_id = "google/gemma-2-2b" - model_id = "unsloth/gemma-2-2b-it" - model = ExecuTorchModelForCausalLM.from_pretrained( - model_name_or_path=model_id, - export=True, - task="text-generation", - recipe="xnnpack", - ) - self.assertIsInstance(model, ExecuTorchModelForCausalLM) - self.assertIsInstance(model.model, ExecuTorchModule) - - EXPECTED_GENERATED_TEXT = ( - "Hello I am doing a project for my school and I need to make sure it is a great to be creative and I can!" - ) - tokenizer = AutoTokenizer.from_pretrained(model_id) - generated_text = model.text_generation( - tokenizer=tokenizer, - prompt="Hello I am doing a project for my school", - max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), - ) - self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) - - @slow - @pytest.mark.run_slow - def test_gemma_text_generation_with_xnnpack(self): - # model_id = "google/gemma-2b" - model_id = "weqweasdas/RM-Gemma-2B" - model = ExecuTorchModelForCausalLM.from_pretrained( - model_name_or_path=model_id, - export=True, - task="text-generation", - recipe="xnnpack", - ) - self.assertIsInstance(model, ExecuTorchModelForCausalLM) - self.assertIsInstance(model.model, ExecuTorchModule) - - EXPECTED_GENERATED_TEXT = "Hello I am doing a project for my school and I need to write a report on the history of the United States." - tokenizer = AutoTokenizer.from_pretrained(model_id) - generated_text = model.text_generation( - tokenizer=tokenizer, - prompt="Hello I am doing a project for my school", - max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), - ) - self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) - - @slow - @pytest.mark.run_slow - def test_olmo_text_generation_with_xnnpack(self): - model_id = "allenai/OLMo-1B-hf" - model = ExecuTorchModelForCausalLM.from_pretrained( - model_name_or_path=model_id, - export=True, - task="text-generation", - recipe="xnnpack", - ) - self.assertIsInstance(model, ExecuTorchModelForCausalLM) - self.assertIsInstance(model.model, ExecuTorchModule) - - EXPECTED_GENERATED_TEXT = ( - "Simply put, the theory of relativity states that the speed of light is the same in all directions." - ) - tokenizer = AutoTokenizer.from_pretrained(model_id) - generated_text = model.text_generation( - tokenizer=tokenizer, - prompt="Simply put, the theory of relativity states that", - max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), - ) - self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) diff --git a/tests/executorch/runtime/test_modeling_gemma.py b/tests/executorch/runtime/test_modeling_gemma.py new file mode 100644 index 0000000000..08f80d4e57 --- /dev/null +++ b/tests/executorch/runtime/test_modeling_gemma.py @@ -0,0 +1,56 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import pytest +from executorch.extension.pybindings.portable_lib import ExecuTorchModule +from transformers import AutoTokenizer +from transformers.testing_utils import ( + slow, +) + +from optimum.executorchruntime import ExecuTorchModelForCausalLM + + +class ExecuTorchModelIntegrationTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @slow + @pytest.mark.run_slow + def test_gemma_text_generation_with_xnnpack(self): + # TODO: Swithc to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed + # model_id = "google/gemma-2b" + model_id = "weqweasdas/RM-Gemma-2B" + model = ExecuTorchModelForCausalLM.from_pretrained( + model_name_or_path=model_id, + export=True, + task="text-generation", + recipe="xnnpack", + ) + self.assertIsInstance(model, ExecuTorchModelForCausalLM) + self.assertIsInstance(model.model, ExecuTorchModule) + + EXPECTED_GENERATED_TEXT = "Hello I am doing a project for my school and I need to write a report on the history of the United States." + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt="Hello I am doing a project for my school", + max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), + ) + self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) diff --git a/tests/executorch/runtime/test_modeling_gemma2.py b/tests/executorch/runtime/test_modeling_gemma2.py new file mode 100644 index 0000000000..6878daa774 --- /dev/null +++ b/tests/executorch/runtime/test_modeling_gemma2.py @@ -0,0 +1,58 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import pytest +from executorch.extension.pybindings.portable_lib import ExecuTorchModule +from transformers import AutoTokenizer +from transformers.testing_utils import ( + slow, +) + +from optimum.executorchruntime import ExecuTorchModelForCausalLM + + +class ExecuTorchModelIntegrationTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @slow + @pytest.mark.run_slow + def test_gemma2_text_generation_with_xnnpack(self): + # TODO: Swithc to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed + # model_id = "google/gemma-2-2b" + model_id = "unsloth/gemma-2-2b-it" + model = ExecuTorchModelForCausalLM.from_pretrained( + model_name_or_path=model_id, + export=True, + task="text-generation", + recipe="xnnpack", + ) + self.assertIsInstance(model, ExecuTorchModelForCausalLM) + self.assertIsInstance(model.model, ExecuTorchModule) + + EXPECTED_GENERATED_TEXT = ( + "Hello I am doing a project for my school and I need to make sure it is a great to be creative and I can!" + ) + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt="Hello I am doing a project for my school", + max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), + ) + self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) diff --git a/tests/executorch/runtime/test_modeling_llama.py b/tests/executorch/runtime/test_modeling_llama.py new file mode 100644 index 0000000000..1834ee162d --- /dev/null +++ b/tests/executorch/runtime/test_modeling_llama.py @@ -0,0 +1,84 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import pytest +from executorch.extension.pybindings.portable_lib import ExecuTorchModule +from transformers import AutoTokenizer +from transformers.testing_utils import ( + slow, +) + +from optimum.executorchruntime import ExecuTorchModelForCausalLM + + +class ExecuTorchModelIntegrationTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @slow + @pytest.mark.run_slow + def test_llama3_2_1b_text_generation_with_xnnpack(self): + # TODO: Swithc to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed + # model_id = "lama/Llama-3.2-1B" + model_id = "NousResearch/Llama-3.2-1B" + model = ExecuTorchModelForCausalLM.from_pretrained( + model_name_or_path=model_id, + export=True, + task="text-generation", + recipe="xnnpack", + ) + self.assertIsInstance(model, ExecuTorchModelForCausalLM) + self.assertIsInstance(model.model, ExecuTorchModule) + + EXPECTED_GENERATED_TEXT = "Simply put, the theory of relativity states that the laws of physics are the same in all inertial frames of reference." + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt="Simply put, the theory of relativity states that", + max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), + ) + self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) + + @slow + @pytest.mark.run_slow + def test_llama3_2_3b_text_generation_with_xnnpack(self): + # TODO: Swithc to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed + # model_id = "lama/Llama-3.2-3B" + model_id = "NousResearch/Hermes-3-Llama-3.2-3B" + model = ExecuTorchModelForCausalLM.from_pretrained( + model_name_or_path=model_id, + export=True, + task="text-generation", + recipe="xnnpack", + ) + self.assertIsInstance(model, ExecuTorchModelForCausalLM) + self.assertIsInstance(model.model, ExecuTorchModule) + + EXPECTED_GENERATED_TEXT = ( + "Simply put, the theory of relativity states that time is relative and can be affected " + "by an object's speed. This theory was developed by Albert Einstein in the early 20th " + "century. The theory has two parts" + ) + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt="Simply put, the theory of relativity states that", + max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), + ) + self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) diff --git a/tests/executorch/runtime/test_modeling_olmo.py b/tests/executorch/runtime/test_modeling_olmo.py new file mode 100644 index 0000000000..65c3045ad8 --- /dev/null +++ b/tests/executorch/runtime/test_modeling_olmo.py @@ -0,0 +1,56 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import pytest +from executorch.extension.pybindings.portable_lib import ExecuTorchModule +from transformers import AutoTokenizer +from transformers.testing_utils import ( + slow, +) + +from optimum.executorchruntime import ExecuTorchModelForCausalLM + + +class ExecuTorchModelIntegrationTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @slow + @pytest.mark.run_slow + def test_olmo_text_generation_with_xnnpack(self): + model_id = "allenai/OLMo-1B-hf" + model = ExecuTorchModelForCausalLM.from_pretrained( + model_name_or_path=model_id, + export=True, + task="text-generation", + recipe="xnnpack", + ) + self.assertIsInstance(model, ExecuTorchModelForCausalLM) + self.assertIsInstance(model.model, ExecuTorchModule) + + EXPECTED_GENERATED_TEXT = ( + "Simply put, the theory of relativity states that the speed of light is the same in all directions." + ) + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt="Simply put, the theory of relativity states that", + max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), + ) + self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT) diff --git a/tests/executorch/runtime/test_modeling_qwen2.py b/tests/executorch/runtime/test_modeling_qwen2.py new file mode 100644 index 0000000000..d80a286b72 --- /dev/null +++ b/tests/executorch/runtime/test_modeling_qwen2.py @@ -0,0 +1,54 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import pytest +from executorch.extension.pybindings.portable_lib import ExecuTorchModule +from transformers import AutoTokenizer +from transformers.testing_utils import ( + slow, +) + +from optimum.executorchruntime import ExecuTorchModelForCausalLM + + +class ExecuTorchModelIntegrationTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @slow + @pytest.mark.run_slow + def test_qwen2_5_text_generation_with_xnnpack(self): + model_id = "Qwen/Qwen2.5-0.5B" + model = ExecuTorchModelForCausalLM.from_pretrained( + model_name_or_path=model_id, + export=True, + task="text-generation", + recipe="xnnpack", + ) + self.assertIsInstance(model, ExecuTorchModelForCausalLM) + self.assertIsInstance(model.model, ExecuTorchModule) + + EXPECTED_GENERATED_TEXT = "My favourite condiment is iced tea. I love it with my breakfast, my lunch" + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt="My favourite condiment is ", + max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)), + ) + self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)