From fa49187d3b790fd3cdc13b6d9262f037edb4edd3 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Mon, 8 Apr 2024 13:26:06 +0400 Subject: [PATCH] Add openvino export for InternLM2 and Orion architectures (#628) * support more models in export * add orion * update tests --- optimum/exporters/openvino/__main__.py | 15 ++++++++++++++- optimum/exporters/openvino/convert.py | 2 +- optimum/exporters/openvino/model_configs.py | 20 +++++++++++++++++++- optimum/exporters/openvino/model_patcher.py | 2 +- tests/openvino/test_modeling.py | 4 +++- tests/openvino/utils_tests.py | 3 +++ 6 files changed, 41 insertions(+), 5 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 5d6e31ebac..dbea798f75 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -202,7 +202,6 @@ def main_export( quantization_config = getattr(config, "quantization_config", None) do_gptq_patching = quantization_config and quantization_config["quant_method"] == "gptq" model_type = config.model_type.replace("_", "-") - if model_type not in TasksManager._SUPPORTED_MODEL_TYPE: custom_architecture = True elif task not in TasksManager.get_supported_tasks_for_model_type( @@ -220,6 +219,20 @@ def main_export( ) if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: loading_kwargs["attn_implementation"] = "eager" + # there are some difference between remote and in library representation of past key values for some models, + # for avoiding confusion we disable remote code for them + if ( + trust_remote_code + and model_type in {"falcon", "mpt", "phi"} + and ("with-past" in task or original_task == "auto") + and not custom_export_configs + ): + logger.warning( + f"Model type `{model_type}` export for task `{task}` is not supported for loading with `trust_remote_code=True`" + "using default export configuration, `trust_remote_code` will be disabled. " + "Please provide custom export config if you want load model with remote code." + ) + trust_remote_code = False # Patch the modules to export of GPTQ models w/o GPU if do_gptq_patching: diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 98dd22d824..ccc046ce55 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -345,7 +345,7 @@ def ts_patched_forward(*args, **kwargs): input_dict = dict(zip(keys, tuple_input)) kwargs[input_name] = input_dict outputs = patched_forward(*args, **kwargs) - return tuple(outputs.values()) + return tuple([value if not isinstance(value, list) else tuple(value) for value in outputs.values()]) patcher.patched_forward = ts_patched_forward diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index a274b3671d..6f22cf2142 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -74,7 +74,7 @@ def init_model_configs(): @register_in_tasks_manager("baichuan", *["text-generation", "text-generation-with-past"], library_name="transformers") -class BaichaunOpenVINOConfig(TextDecoderOnnxConfig): +class BaichaunOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): DEFAULT_ONNX_OPSET = 13 NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args( num_layers="num_hidden_layers", num_attention_heads="num_attention_heads", hidden_size="hidden_size" @@ -400,3 +400,21 @@ class Starcoder2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + +@register_in_tasks_manager("internlm2", *["text-generation", "text-generation-with-past"], library_name="transformers") +class InternLM2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): + DEFAULT_ONNX_OPSET = 14 + + DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) + DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + +@register_in_tasks_manager("orion", *["text-generation", "text-generation-with-past"], library_name="transformers") +class OrionOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): + DEFAULT_ONNX_OPSET = 14 + + DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) + DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 2cedf64b0a..bafd467dd4 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -513,5 +513,5 @@ def __init__( ): super().__init__(config, model, model_kwargs) # model has first inference buffers initialization - if self._model.lm_head.first_flag: + if hasattr(self._model.lm_head, "first_flag"): self._model(torch.ones((1, 10), dtype=torch.int64), torch.ones((1, 10), dtype=torch.int64)) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index f54305113f..32fc255a1f 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -524,10 +524,12 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "stablelm", "starcoder2", "phi", + "internlm2", + "orion", ) GENERATION_LENGTH = 100 IS_SUPPORT_STATEFUL = is_openvino_version(">=", "2023.3") - REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen") + REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen", "internlm2", "olmo", "orion") @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index c95444274e..e7f62f1f61 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -50,6 +50,7 @@ "gptj": "hf-internal-testing/tiny-random-GPTJModel", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-ibert", + "internlm2": "katuni4ka/tiny-random-internlm2", "levit": "hf-internal-testing/tiny-random-LevitModel", "longt5": "hf-internal-testing/tiny-random-longt5", "llama": "fxmarty/tiny-llama-fast-tokenizer", @@ -69,6 +70,8 @@ "mpt": "hf-internal-testing/tiny-random-MptForCausalLM", "mt5": "stas/mt5-tiny-random", "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel", + "olmo": "katuni4ka/tiny-random-olmo", + "orion": "katuni4ka/tiny-random-orion", "pegasus": "hf-internal-testing/tiny-random-pegasus", "pix2struct": "fxmarty/pix2struct-tiny-random", "phi": "echarlaix/tiny-random-PhiForCausalLM",