Skip to content

Commit

Permalink
Split modeling tests to separate files and CI jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Executorch committed Dec 18, 2024
1 parent c36e2e2 commit f0c76d4
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 143 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/test_executorch_runtime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ jobs:
matrix:
python-version: ['3.10', '3.11', '3.12']
os: [macos-15]
test-modeling:
- test_modeling_gemma2.py
- test_modeling_gemma.py
- test_modeling_llama.py
- test_modeling_olmo.py
- test_modeling.py
- test_modeling_qwen2.py

runs-on: ${{ matrix.os }}
steps:
Expand All @@ -32,4 +39,4 @@ jobs:
- name: Run tests
working-directory: tests
run: |
RUN_SLOW=1 pytest executorch/runtime/test_*.py -s -vvvv --durations=0
RUN_SLOW=1 pytest executorch/runtime/${{ matrix.test-modeling }} -s -vvvv --durations=0
142 changes: 0 additions & 142 deletions tests/executorch/runtime/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,145 +69,3 @@ def test_load_model_from_local_path(self):
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

@slow
@pytest.mark.run_slow
def test_llama3_2_1b_text_generation_with_xnnpack(self):
model_id = "NousResearch/Llama-3.2-1B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = "Simply put, the theory of relativity states that the laws of physics are the same in all inertial frames of reference."
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Simply put, the theory of relativity states that",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)

@slow
@pytest.mark.run_slow
def test_llama3_2_3b_text_generation_with_xnnpack(self):
model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = (
"Simply put, the theory of relativity states that time is relative and can be affected "
"by an object's speed. This theory was developed by Albert Einstein in the early 20th "
"century. The theory has two parts"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Simply put, the theory of relativity states that",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)

@slow
@pytest.mark.run_slow
def test_qwen2_5_text_generation_with_xnnpack(self):
model_id = "Qwen/Qwen2.5-0.5B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = "My favourite condiment is iced tea. I love it with my breakfast, my lunch"
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="My favourite condiment is ",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)

@slow
@pytest.mark.run_slow
def test_gemma2_text_generation_with_xnnpack(self):
# model_id = "google/gemma-2-2b"
model_id = "unsloth/gemma-2-2b-it"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = (
"Hello I am doing a project for my school and I need to make sure it is a great to be creative and I can!"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Hello I am doing a project for my school",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)

@slow
@pytest.mark.run_slow
def test_gemma_text_generation_with_xnnpack(self):
# model_id = "google/gemma-2b"
model_id = "weqweasdas/RM-Gemma-2B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = "Hello I am doing a project for my school and I need to write a report on the history of the United States."
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Hello I am doing a project for my school",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)

@slow
@pytest.mark.run_slow
def test_olmo_text_generation_with_xnnpack(self):
model_id = "allenai/OLMo-1B-hf"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = (
"Simply put, the theory of relativity states that the speed of light is the same in all directions."
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Simply put, the theory of relativity states that",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
56 changes: 56 additions & 0 deletions tests/executorch/runtime/test_modeling_gemma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# coding=utf-8
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import tempfile
import unittest

import pytest
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
from transformers import AutoTokenizer
from transformers.testing_utils import (
slow,
)

from optimum.executorchruntime import ExecuTorchModelForCausalLM


class ExecuTorchModelIntegrationTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@slow
@pytest.mark.run_slow
def test_gemma_text_generation_with_xnnpack(self):
# TODO: Swithc to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "google/gemma-2b"
model_id = "weqweasdas/RM-Gemma-2B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = "Hello I am doing a project for my school and I need to write a report on the history of the United States."
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Hello I am doing a project for my school",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
58 changes: 58 additions & 0 deletions tests/executorch/runtime/test_modeling_gemma2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# coding=utf-8
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import tempfile
import unittest

import pytest
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
from transformers import AutoTokenizer
from transformers.testing_utils import (
slow,
)

from optimum.executorchruntime import ExecuTorchModelForCausalLM


class ExecuTorchModelIntegrationTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@slow
@pytest.mark.run_slow
def test_gemma2_text_generation_with_xnnpack(self):
# TODO: Swithc to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "google/gemma-2-2b"
model_id = "unsloth/gemma-2-2b-it"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = (
"Hello I am doing a project for my school and I need to make sure it is a great to be creative and I can!"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Hello I am doing a project for my school",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
84 changes: 84 additions & 0 deletions tests/executorch/runtime/test_modeling_llama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# coding=utf-8
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import tempfile
import unittest

import pytest
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
from transformers import AutoTokenizer
from transformers.testing_utils import (
slow,
)

from optimum.executorchruntime import ExecuTorchModelForCausalLM


class ExecuTorchModelIntegrationTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@slow
@pytest.mark.run_slow
def test_llama3_2_1b_text_generation_with_xnnpack(self):
# TODO: Swithc to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "lama/Llama-3.2-1B"
model_id = "NousResearch/Llama-3.2-1B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = "Simply put, the theory of relativity states that the laws of physics are the same in all inertial frames of reference."
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Simply put, the theory of relativity states that",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)

@slow
@pytest.mark.run_slow
def test_llama3_2_3b_text_generation_with_xnnpack(self):
# TODO: Swithc to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "lama/Llama-3.2-3B"
model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
task="text-generation",
recipe="xnnpack",
)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

EXPECTED_GENERATED_TEXT = (
"Simply put, the theory of relativity states that time is relative and can be affected "
"by an object's speed. This theory was developed by Albert Einstein in the early 20th "
"century. The theory has two parts"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
generated_text = model.text_generation(
tokenizer=tokenizer,
prompt="Simply put, the theory of relativity states that",
max_seq_len=len(tokenizer.encode(EXPECTED_GENERATED_TEXT)),
)
self.assertEqual(generated_text, EXPECTED_GENERATED_TEXT)
Loading

0 comments on commit f0c76d4

Please sign in to comment.