From 04468744692129619b39b317ae001d8def66f16a Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 01:20:16 +0300 Subject: [PATCH 1/9] Add config for Llama --- optimum/exporters/onnx/model_configs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 6b83511b3b..665f82c8b9 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -208,6 +208,11 @@ class OPTOnnxConfig(TextDecoderOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfig +class LlamaOnnxConfig(TextDecoderOnnxConfig): + DEFAULT_ONNX_OPSET = 13 + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + class BloomDummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator): def generate(self, input_name: str, framework: str = "pt"): past_key_shape = ( From 168d4cb57045594bca663f07957b65e7f76283ac Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 01:20:52 +0300 Subject: [PATCH 2/9] Register Llama in tasks --- optimum/exporters/tasks.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 4922f80900..d4423d1391 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -621,6 +621,14 @@ class TasksManager: "sequence-classification", onnx="OPTOnnxConfig", ), + "llama": supported_tasks_mapping( + "default", + "default-with-past", + "causal-lm", + "causal-lm-with-past", + "sequence-classification", + onnx="LlamaOnnxConfig", + ), "pegasus": supported_tasks_mapping( "default", "default-with-past", From ab4a8bf08ca08f39e1f3298138fea57c9199b710 Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 01:21:20 +0300 Subject: [PATCH 3/9] Add llama and it's corresponding tiny-random model from hf into tests --- tests/exporters/exporters_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 6fc3268390..bc279c64ce 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -75,7 +75,8 @@ "mpnet": "hf-internal-testing/tiny-random-MPNetModel", "mt5": "lewtun/tiny-random-mt5", "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel", - "opt": "hf-internal-testing/tiny-random-OPTModel", + "opt": "hf-internal-testing/tiny-random-llama", + "llama": "hf-internal-testing/tiny-random-OPTModel", # "owlvit": "google/owlvit-base-patch32", "pegasus": "hf-internal-testing/tiny-random-PegasusModel", "perceiver": { From 7e06da1c0480a5e2817d224998de6207a43f71d5 Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 01:32:04 +0300 Subject: [PATCH 4/9] Add tests for modeling and exporters --- tests/exporters/exporters_utils.py | 1 + tests/onnxruntime/test_modeling.py | 1 + tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 3 files changed, 3 insertions(+) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index bc279c64ce..44eb7a89d9 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -164,6 +164,7 @@ "levit": "facebook/levit-128S", "layoutlm": "microsoft/layoutlm-base-uncased", "layoutlmv3": "microsoft/layoutlmv3-base", + "llama": "decapoda-research/llama-65b-hf", "longt5": "hf-internal-testing/tiny-random-longt5", # Not using google/long-t5-local-base because it takes too much time for testing. # "longformer": "allenai/longformer-base-4096", "m2m-100": "hf-internal-testing/tiny-random-m2m_100", # Not using facebook/m2m100_418M because it takes too much time for testing. diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 0c00c7e664..dfea29f390 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -1969,6 +1969,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "gpt_neo", "gpt_neox", "gptj", + "llama", ] FULL_GRID = { diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 812f4454b6..1fefbdb8d9 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -49,6 +49,7 @@ "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", + "llama": "hf-internal-testing/tiny-random-llama", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken "mbart": "hf-internal-testing/tiny-random-mbart", From 95a881057bf763903144f1f7ac8e026ef80297a7 Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 01:36:37 +0300 Subject: [PATCH 5/9] Add entry for a Llama --- docs/source/exporters/onnx/overview.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 3d62663e4d..b85d880b9a 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -53,6 +53,7 @@ Supported architectures: - LayoutLM-v3 - Levit - LongT5 +- Llama - M2-M100 - Marian - MBart From c669d558c76e241cef1abfa817736e817a218701 Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 15:25:21 +0300 Subject: [PATCH 6/9] Add llama into supported normalized configs --- optimum/utils/normalized_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 0c15befc75..24c51d3335 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -202,6 +202,7 @@ class NormalizedConfigManager: "gpt2": GPT2LikeNormalizedTextConfig, "gpt_neo": NormalizedTextConfig.with_args(num_attention_heads="num_heads"), "gpt_neox": NormalizedTextConfig, + "llama": NormalizedTextConfig, "gptj": GPT2LikeNormalizedTextConfig, "imagegpt": GPT2LikeNormalizedTextConfig, "longt5": T5LikeNormalizedTextConfig, From a39fbed46117c629c6383a58ea14183a7c76ed36 Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Sat, 25 Mar 2023 21:41:27 +0300 Subject: [PATCH 7/9] Add optimization support for llama --- optimum/onnxruntime/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py index c62d372534..fb918032a6 100644 --- a/optimum/onnxruntime/utils.py +++ b/optimum/onnxruntime/utils.py @@ -114,6 +114,7 @@ class ORTConfigManager: "gptj": "gpt2", # longt5 with O4 results in segmentation fault "longt5": "bert", + "llama": "gpt2", "marian": "bart", "mbart": "bart", "mt5": "bart", From 43e98b766679a63ac8f52e066979589ce8adcde4 Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Tue, 4 Apr 2023 14:30:32 +0300 Subject: [PATCH 8/9] Change tiny-llama source to trl-internal-testing --- tests/exporters/exporters_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 44eb7a89d9..bf34a79fb9 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -64,6 +64,7 @@ "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", + "llama": "trl-internal-testing/tiny-random-LlamaForCausalLM", # "longformer": "allenai/longformer-base-4096", "m2m-100": "hf-internal-testing/tiny-random-m2m_100", "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken @@ -75,8 +76,7 @@ "mpnet": "hf-internal-testing/tiny-random-MPNetModel", "mt5": "lewtun/tiny-random-mt5", "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel", - "opt": "hf-internal-testing/tiny-random-llama", - "llama": "hf-internal-testing/tiny-random-OPTModel", + "opt": "hf-internal-testing/tiny-random-OPTModel", # "owlvit": "google/owlvit-base-patch32", "pegasus": "hf-internal-testing/tiny-random-PegasusModel", "perceiver": { From e4898e725a224a826ece34569ca27d6e203d3a9b Mon Sep 17 00:00:00 2001 From: Chernenko Ruslan Date: Tue, 4 Apr 2023 14:46:11 +0300 Subject: [PATCH 9/9] Change tiny-llama source to trl-internal-testing --- tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 1fefbdb8d9..22e417ba22 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -49,7 +49,7 @@ "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", - "llama": "hf-internal-testing/tiny-random-llama", + "llama": "trl-internal-testing/tiny-random-LlamaForCausalLM", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken "mbart": "hf-internal-testing/tiny-random-mbart",