From 32a51afb7f8255f36546012b3b6bf060110e59bb Mon Sep 17 00:00:00 2001
From: fxmarty <9808326+fxmarty@users.noreply.github.com>
Date: Thu, 8 Feb 2024 12:55:08 +0100
Subject: [PATCH] Move & rename `onnx_export` (#1685)

* move & rename onnx_export

* fix test

* Update optimum/exporters/onnx/convert.py

Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>

---------

Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
---
 .../onnx/package_reference/export.mdx         |   2 +
 optimum/commands/export/onnx.py               |   2 +-
 optimum/exporters/onnx/__init__.py            |  16 +-
 optimum/exporters/onnx/__main__.py            | 389 +-----------------
 optimum/exporters/onnx/convert.py             | 345 +++++++++++++++-
 optimum/exporters/onnx/utils.py               | 101 ++++-
 optimum/exporters/tasks.py                    |   2 +
 .../exporters/onnx/test_exporters_onnx_cli.py |   2 +-
 tests/exporters/onnx/test_onnx_export.py      |   5 +-
 9 files changed, 474 insertions(+), 390 deletions(-)

diff --git a/docs/source/exporters/onnx/package_reference/export.mdx b/docs/source/exporters/onnx/package_reference/export.mdx
index c9f7f7aed20..c79c174a187 100644
--- a/docs/source/exporters/onnx/package_reference/export.mdx
+++ b/docs/source/exporters/onnx/package_reference/export.mdx
@@ -18,6 +18,8 @@ You can export models to ONNX from two frameworks in 🤗 Optimum: PyTorch and T
 
 [[autodoc]] exporters.onnx.main_export
 
+[[autodoc]] exporters.onnx.onnx_export_from_model
+
 [[autodoc]] exporters.onnx.convert.export
 
 [[autodoc]] exporters.onnx.convert.export_pytorch
diff --git a/optimum/commands/export/onnx.py b/optimum/commands/export/onnx.py
index 35a9ff0b7ec..b2772c85e75 100644
--- a/optimum/commands/export/onnx.py
+++ b/optimum/commands/export/onnx.py
@@ -250,7 +250,7 @@ def parse_args(parser: "ArgumentParser"):
         return parse_args_onnx(parser)
 
     def run(self):
-        from ...exporters.onnx.__main__ import main_export
+        from ...exporters.onnx import main_export
 
         # Get the shapes to be used to generate dummy inputs
         input_shapes = {}
diff --git a/optimum/exporters/onnx/__init__.py b/optimum/exporters/onnx/__init__.py
index fb70dd5974d..609096e37ef 100644
--- a/optimum/exporters/onnx/__init__.py
+++ b/optimum/exporters/onnx/__init__.py
@@ -21,7 +21,13 @@
 _import_structure = {
     "base": ["OnnxConfig", "OnnxConfigWithLoss", "OnnxConfigWithPast", "OnnxSeq2SeqConfigWithPast"],
     "config": ["TextDecoderOnnxConfig", "TextEncoderOnnxConfig", "TextSeq2SeqOnnxConfig"],
-    "convert": ["export", "export_models", "validate_model_outputs", "validate_models_outputs"],
+    "convert": [
+        "export",
+        "export_models",
+        "validate_model_outputs",
+        "validate_models_outputs",
+        "onnx_export_from_model",
+    ],
     "utils": [
         "get_decoder_models_for_export",
         "get_encoder_decoder_models_for_export",
@@ -34,7 +40,13 @@
 if TYPE_CHECKING:
     from .base import OnnxConfig, OnnxConfigWithLoss, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast  # noqa
     from .config import TextDecoderOnnxConfig, TextEncoderOnnxConfig, TextSeq2SeqOnnxConfig  # noqa
-    from .convert import export, export_models, validate_model_outputs, validate_models_outputs  # noqa
+    from .convert import (
+        export,
+        export_models,
+        validate_model_outputs,
+        validate_models_outputs,
+        onnx_export_from_model,
+    )  # noqa
     from .utils import (
         get_decoder_models_for_export,
         get_encoder_decoder_models_for_export,
diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
index 747c5f3e73f..52053d5834f 100644
--- a/optimum/exporters/onnx/__main__.py
+++ b/optimum/exporters/onnx/__main__.py
@@ -15,151 +15,35 @@
 """Entry point to the optimum.exporters.onnx command line."""
 
 import argparse
-import os
 from pathlib import Path
 
 from packaging import version
 from requests.exceptions import ConnectionError as RequestsConnectionError
 from transformers import AutoConfig, AutoTokenizer
-from transformers.modeling_utils import get_parameter_dtype
 from transformers.utils import is_torch_available
 
 from ...commands.export.onnx import parse_args_onnx
 from ...configuration_utils import _transformers_version
-from ...utils import DEFAULT_DUMMY_SHAPES, ONNX_WEIGHTS_NAME, logging
-from ...utils.modeling_utils import MODEL_TO_PATCH_FOR_PAST
-from ...utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors
-from ..error_utils import AtolError, OutputMatchError, ShapeError
+from ...utils import DEFAULT_DUMMY_SHAPES, logging
+from ...utils.save_utils import maybe_load_preprocessors
 from ..tasks import TasksManager
-from .constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED, UNPICKABLE_ARCHS
-from .convert import export_models, validate_models_outputs
-from .utils import (
-    MODEL_TYPES_REQUIRING_POSITION_IDS,
-    _get_submodels_for_export_decoder,
-    _get_submodels_for_export_encoder_decoder,
-    _get_submodels_for_export_stable_diffusion,
-    get_decoder_models_for_export,
-    get_encoder_decoder_models_for_export,
-    get_sam_models_for_export,
-    get_speecht5_models_for_export,
-    get_stable_diffusion_models_for_export,
-)
+from .constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
+from .convert import onnx_export_from_model
 
 
 if is_torch_available():
     import torch
 
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
 
 
 if TYPE_CHECKING:
-    from transformers import PreTrainedModel, TFPreTrainedModel
-
     from .base import OnnxConfig
 
 logger = logging.get_logger()
 logger.setLevel(logging.INFO)
 
 
-def _get_submodels_and_onnx_configs(
-    model: Union["PreTrainedModel", "TFPreTrainedModel"],
-    task: str,
-    monolith: bool,
-    custom_onnx_configs: Dict,
-    custom_architecture: bool,
-    _variant: str,
-    library_name: str,
-    int_dtype: str = "int64",
-    float_dtype: str = "fp32",
-    fn_get_submodels: Optional[Callable] = None,
-    preprocessors: Optional[List[Any]] = None,
-    legacy: bool = False,
-    model_kwargs: Optional[Dict] = None,
-):
-    if not custom_architecture:
-        if library_name == "diffusers":
-            onnx_config = None
-            models_and_onnx_configs = get_stable_diffusion_models_for_export(
-                model, int_dtype=int_dtype, float_dtype=float_dtype
-            )
-        else:
-            onnx_config_constructor = TasksManager.get_exporter_config_constructor(
-                model=model, exporter="onnx", task=task, library_name=library_name
-            )
-            onnx_config = onnx_config_constructor(
-                model.config,
-                int_dtype=int_dtype,
-                float_dtype=float_dtype,
-                preprocessors=preprocessors,
-                legacy=legacy,
-            )
-
-            onnx_config.variant = _variant
-            all_variants = "\n".join(
-                [f"    - {name}: {description}" for name, description in onnx_config.VARIANTS.items()]
-            )
-            logger.info(f"Using the export variant {onnx_config.variant}. Available variants are:\n{all_variants}")
-
-            # TODO: this succession of if/else strongly suggests a refactor is needed.
-            if (
-                model.config.is_encoder_decoder
-                and task.startswith(TasksManager._ENCODER_DECODER_TASKS)
-                and not monolith
-            ):
-                models_and_onnx_configs = get_encoder_decoder_models_for_export(model, onnx_config)
-            elif task.startswith("text-generation") and not monolith:
-                models_and_onnx_configs = get_decoder_models_for_export(model, onnx_config, legacy=legacy)
-            elif model.config.model_type == "sam":
-                models_and_onnx_configs = get_sam_models_for_export(model, onnx_config)
-            elif model.config.model_type == "speecht5":
-                models_and_onnx_configs = get_speecht5_models_for_export(model, onnx_config, model_kwargs)
-            else:
-                models_and_onnx_configs = {"model": (model, onnx_config)}
-
-        # When specifying custom ONNX configs for supported transformers architectures, we do
-        # not force to specify a custom ONNX config for each submodel.
-        for key, custom_onnx_config in custom_onnx_configs.items():
-            models_and_onnx_configs[key] = (models_and_onnx_configs[key][0], custom_onnx_config)
-    else:
-        onnx_config = None
-        submodels_for_export = None
-        models_and_onnx_configs = {}
-
-        if fn_get_submodels is not None:
-            submodels_for_export = fn_get_submodels(model)
-        else:
-            if library_name == "diffusers":
-                submodels_for_export = _get_submodels_for_export_stable_diffusion(model)
-            elif (
-                model.config.is_encoder_decoder
-                and task.startswith(TasksManager._ENCODER_DECODER_TASKS)
-                and not monolith
-            ):
-                submodels_for_export = _get_submodels_for_export_encoder_decoder(
-                    model, use_past=task.endswith("-with-past")
-                )
-            elif task.startswith("text-generation") and not monolith:
-                submodels_for_export = _get_submodels_for_export_decoder(model, use_past=task.endswith("-with-past"))
-            else:
-                submodels_for_export = {"model": model}
-
-        if submodels_for_export.keys() != custom_onnx_configs.keys():
-            logger.error(f"ONNX custom configs for: {', '.join(custom_onnx_configs.keys())}")
-            logger.error(f"Submodels to export: {', '.join(submodels_for_export.keys())}")
-            raise ValueError(
-                "Trying to export a custom model, but could not find as many custom ONNX configs as the number of submodels to export. Please specifiy the fn_get_submodels argument, that should return a dictionary of submodules with as many items as the provided custom_onnx_configs dictionary."
-            )
-
-        for key, custom_onnx_config in custom_onnx_configs.items():
-            models_and_onnx_configs[key] = (submodels_for_export[key], custom_onnx_config)
-
-    # Default to the first ONNX config for stable-diffusion and custom architecture case.
-    if onnx_config is None:
-        onnx_config = next(iter(models_and_onnx_configs.values()))[1]
-
-    return onnx_config, models_and_onnx_configs
-
-
 def main_export(
     model_name_or_path: str,
     output: Union[str, Path],
@@ -195,13 +79,13 @@ def main_export(
     **kwargs_shapes,
 ):
     """
-    Full-suite ONNX export.
+    Full-suite ONNX export function, exporting **from a model ID on Hugging Face Hub or a local model repository**.
 
     Args:
         > Required parameters
 
         model_name_or_path (`str`):
-            Model ID on huggingface.co or path on disk to the model repository to export.
+            Model ID on huggingface.co or path on disk to the model repository to export. Example: `model_name_or_path="BAAI/bge-m3"` or `mode_name_or_path="/path/to/model_folder`.
         output (`Union[str, Path]`):
             Path indicating the directory where to store the generated ONNX model.
 
@@ -465,7 +349,7 @@ def main_export(
         model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
     )
 
-    onnx_export(
+    onnx_export_from_model(
         model=model,
         output=output,
         opset=opset,
@@ -489,263 +373,6 @@ def main_export(
     )
 
 
-def onnx_export(
-    model: Union["PreTrainedModel", "TFPreTrainedModel"],
-    output: Union[str, Path],
-    opset: Optional[int] = None,
-    optimize: Optional[str] = None,
-    monolith: bool = False,
-    no_post_process: bool = False,
-    atol: Optional[float] = None,
-    do_validation: bool = True,
-    model_kwargs: Optional[Dict[str, Any]] = None,
-    custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None,
-    fn_get_submodels: Optional[Callable] = None,
-    _variant: str = "default",
-    legacy: bool = False,
-    preprocessors: List = None,
-    device: str = "cpu",
-    no_dynamic_axes: bool = False,
-    task: Optional[str] = None,
-    use_subprocess: bool = False,
-    do_constant_folding: bool = True,
-    **kwargs_shapes,
-):
-    library_name = TasksManager._infer_library_from_model(model)
-    framework = "pt" if is_torch_available() and isinstance(model, torch.nn.Module) else "tf"
-
-    dtype = get_parameter_dtype(model) if framework == "pt" else model.dtype
-
-    if "bfloat16" in str(dtype):
-        float_dtype = "bf16"
-    elif "float16" in str(dtype):
-        float_dtype = "fp16"
-    else:
-        float_dtype = "fp32"
-
-    if "stable-diffusion" in task:
-        model_type = "stable-diffusion"
-    elif hasattr(model.config, "export_model_type"):
-        model_type = model.config.export_model_type.replace("_", "-")
-    else:
-        model_type = model.config.model_type.replace("_", "-")
-
-    custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE
-    task = TasksManager.map_from_synonym(task)
-
-    # TODO: support onnx_config.py in the model repo
-    if custom_architecture and custom_onnx_configs is None:
-        raise ValueError(
-            f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the model type {model_type} to be supported natively in the ONNX export."
-        )
-
-    if task is None:
-        # TODO : _infer_task_from_model_or_model_class should also infer task from timm model
-        if library_name == "timm":
-            task = "image-classification"
-        else:
-            task = TasksManager._infer_task_from_model_or_model_class(model)
-
-        if (
-            library_name != "diffusers"
-            and task + "-with-past"
-            in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx", library_name=library_name)
-            and not monolith
-            and model.config.use_cache
-        ):
-            task += "-with-past"
-
-    if task.startswith("text-generation") and model.config.is_encoder_decoder:
-        raise ValueError(
-            f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report"
-            f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model,"
-            f" referring to `optimum.exporters.tasks.TaskManager`'s `_TRANSFORMERS_TASKS_TO_MODEL_LOADERS`."
-        )
-
-    if legacy and model_type in MODEL_TYPES_REQUIRING_POSITION_IDS and task.startswith("text-generation"):
-        logger.warning(
-            f"legacy=True was specified in the ONNX export, although the model {model_type} requires position_ids for batched inference. Passing `legacy=True` is strongly discouraged, and this option will be removed in a future release. Reference: https://github.com/huggingface/optimum/pull/1381"
-        )
-
-    if library_name != "diffusers" and model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE:
-        raise ValueError(
-            f"{model_type} is not supported yet. Only {list(TasksManager._SUPPORTED_CLI_MODEL_TYPE.keys())} are supported. "
-            f"If you want to support {model_type} please propose a PR or open up an issue."
-        )
-
-    output = Path(output)
-    if not output.exists():
-        output.mkdir(parents=True)
-
-    # For MODEL_TO_PATCH_FOR_PAST architectures, when exporting the model with an input of sequence length of 1, a tracer that does not handle
-    # controlflows will trace incorrectly the mask generation, resulting in incorrect attention masks for other sequence lengthss.
-    # Reference: https://github.com/huggingface/transformers/blob/af3de8d87c717c4bb090f037d0d89413c195a42f/src/transformers/modeling_attn_mask_utils.py#L94
-    input_shapes = {}
-    for input_name in DEFAULT_DUMMY_SHAPES.keys():
-        input_shapes[input_name] = (
-            kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name]
-        )
-
-        # TODO: this may be moved rather to the OnnxConfig to avoid bloating this script.
-        if (
-            model_type in MODEL_TO_PATCH_FOR_PAST
-            and input_name == "sequence_length"
-            and kwargs_shapes.get(input_name) == 1
-        ):
-            raise ValueError(
-                f"Exporting with a sequence length of 1 a {model_type} model is not supported and can yield unexpected results."
-            )
-
-    onnx_config, models_and_onnx_configs = _get_submodels_and_onnx_configs(
-        model=model,
-        task=task,
-        monolith=monolith,
-        custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {},
-        custom_architecture=custom_architecture,
-        float_dtype=float_dtype,
-        fn_get_submodels=fn_get_submodels,
-        preprocessors=preprocessors,
-        _variant=_variant,
-        legacy=legacy,
-        library_name=library_name,
-        model_kwargs=model_kwargs,
-    )
-
-    if library_name != "diffusers":
-        # Ensure the requested opset is sufficient
-        if opset is None:
-            opset = onnx_config.DEFAULT_ONNX_OPSET
-        elif opset < onnx_config.DEFAULT_ONNX_OPSET:
-            logger.warning(
-                f"Opset {opset} is lower than the recommended minmum opset ({onnx_config.DEFAULT_ONNX_OPSET}) to export {model_type}. "
-                f"The ONNX export may fail or the exported model may be suboptimal."
-            )
-        if atol is None:
-            atol = onnx_config.ATOL_FOR_VALIDATION
-            if isinstance(atol, dict):
-                atol = atol[task.replace("-with-past", "")]
-
-        # Saving the model config and preprocessor as this is needed sometimes.
-        model.config.save_pretrained(output)
-        generation_config = getattr(model, "generation_config", None)
-        if generation_config is not None:
-            generation_config.save_pretrained(output)
-
-        model_name_or_path = model.config._name_or_path
-        maybe_save_preprocessors(model_name_or_path, output)
-
-        onnx_files_subpaths = [key + ".onnx" for key in models_and_onnx_configs.keys()]
-    else:
-        # save the subcomponent configuration
-        for model_name in models_and_onnx_configs:
-            subcomponent = models_and_onnx_configs[model_name][0]
-            if hasattr(subcomponent, "save_config"):
-                subcomponent.save_config(output / model_name)
-            elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"):
-                subcomponent.config.save_pretrained(output / model_name)
-
-        onnx_files_subpaths = [os.path.join(name_dir, ONNX_WEIGHTS_NAME) for name_dir in models_and_onnx_configs]
-
-        # Saving the additional components needed to perform inference.
-        model.scheduler.save_pretrained(output.joinpath("scheduler"))
-
-        feature_extractor = getattr(model, "feature_extractor", None)
-        if feature_extractor is not None:
-            feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
-
-        tokenizer = getattr(model, "tokenizer", None)
-        if tokenizer is not None:
-            tokenizer.save_pretrained(output.joinpath("tokenizer"))
-
-        tokenizer_2 = getattr(model, "tokenizer_2", None)
-        if tokenizer_2 is not None:
-            tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
-
-        model.save_config(output)
-
-    if float_dtype == "bf16":
-        logger.warning(
-            f"Exporting the model {model.__class__.__name__} in bfloat16 float dtype. After the export, ONNX Runtime InferenceSession with CPU/CUDA execution provider likely does not implement all operators for the bfloat16 data type, and the loading is likely to fail."
-        )
-
-    _, onnx_outputs = export_models(
-        models_and_onnx_configs=models_and_onnx_configs,
-        opset=opset,
-        output_dir=output,
-        output_names=onnx_files_subpaths,
-        input_shapes=input_shapes,
-        device=device,
-        dtype=float_dtype,
-        no_dynamic_axes=no_dynamic_axes,
-        do_constant_folding=do_constant_folding,
-        model_kwargs=model_kwargs,
-    )
-
-    if optimize is not None:
-        from ...onnxruntime import AutoOptimizationConfig, ORTOptimizer
-
-        optimizer = ORTOptimizer.from_pretrained(output, file_names=onnx_files_subpaths)
-
-        optimization_config = AutoOptimizationConfig.with_optimization_level(optimization_level=optimize)
-
-        optimization_config.disable_shape_inference = True
-        optimizer.optimize(save_dir=output, optimization_config=optimization_config, file_suffix="")
-
-    # Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
-    # TODO: treating stable diffusion separately is quite ugly
-    if not no_post_process and library_name != "diffusers":
-        try:
-            logger.info("Post-processing the exported models...")
-            models_and_onnx_configs, onnx_files_subpaths = onnx_config.post_process_exported_models(
-                output, models_and_onnx_configs, onnx_files_subpaths
-            )
-        except Exception as e:
-            raise Exception(
-                f"The post-processing of the ONNX export failed. The export can still be performed by passing the option --no-post-process. Detailed error: {e}"
-            )
-
-    if library_name == "diffusers":
-        # TODO: fix Can't pickle local object 'get_stable_diffusion_models_for_export.<locals>.<lambda>'
-        use_subprocess = False
-    elif model_type in UNPICKABLE_ARCHS:
-        # Pickling is bugged for nn.utils.weight_norm: https://github.com/pytorch/pytorch/issues/102983
-        # TODO: fix "Cowardly refusing to serialize non-leaf tensor" error for wav2vec2-conformer
-        use_subprocess = False
-
-    if device == "cpu":
-        # Using multiprocessing for validation is useful only on CUDA EP that leaks memory.
-        use_subprocess = False
-
-    if do_validation is True:
-        try:
-            validate_models_outputs(
-                models_and_onnx_configs=models_and_onnx_configs,
-                onnx_named_outputs=onnx_outputs,
-                atol=atol,
-                output_dir=output,
-                onnx_files_subpaths=onnx_files_subpaths,
-                input_shapes=input_shapes,
-                device=device,
-                use_subprocess=use_subprocess,
-                model_kwargs=model_kwargs,
-            )
-            logger.info(f"The ONNX export succeeded and the exported model was saved at: {output.as_posix()}")
-        except ShapeError as e:
-            raise e
-        except AtolError as e:
-            logger.warning(
-                f"The ONNX export succeeded with the warning: {e}.\n The exported model was saved at: {output.as_posix()}"
-            )
-        except OutputMatchError as e:
-            logger.warning(
-                f"The ONNX export succeeded with the warning: {e}.\n The exported model was saved at: {output.as_posix()}"
-            )
-        except Exception as e:
-            raise Exception(
-                f"An error occured during validation, but the model was saved nonetheless at {output.as_posix()}. Detailed error: {e}."
-            )
-
-
 def main():
     parser = argparse.ArgumentParser("Hugging Face Optimum ONNX exporter")
 
diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py
index e89568e30be..4693fe38aa0 100644
--- a/optimum/exporters/onnx/convert.py
+++ b/optimum/exporters/onnx/convert.py
@@ -22,24 +22,36 @@
 from inspect import signature
 from itertools import chain
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import numpy as np
 import onnx
+from transformers.modeling_utils import get_parameter_dtype
 from transformers.utils import is_tf_available, is_torch_available
 
 from ...onnx.utils import _get_onnx_external_data_tensors, check_model_uses_external_data
 from ...utils import (
+    DEFAULT_DUMMY_SHAPES,
+    ONNX_WEIGHTS_NAME,
     TORCH_MINIMUM_VERSION,
     is_diffusers_available,
     is_torch_onnx_support_available,
     logging,
     require_numpy_strictly_lower,
 )
+from ...utils.modeling_utils import MODEL_TO_PATCH_FOR_PAST
+from ...utils.save_utils import maybe_save_preprocessors
 from ..error_utils import AtolError, MinimumVersionError, OutputMatchError, ShapeError
+from ..tasks import TasksManager
 from .base import OnnxConfig
+from .constants import UNPICKABLE_ARCHS
 from .model_configs import SpeechT5OnnxConfig
-from .utils import PickableInferenceSession, recursive_to_device
+from .utils import (
+    MODEL_TYPES_REQUIRING_POSITION_IDS,
+    PickableInferenceSession,
+    _get_submodels_and_onnx_configs,
+    recursive_to_device,
+)
 
 
 if is_torch_available():
@@ -884,3 +896,332 @@ def export(
     if not disable_dynamic_axes_fix:
         config.fix_dynamic_axes(output, device=device, input_shapes=input_shapes, dtype=dtype)
     return export_output
+
+
+def onnx_export_from_model(
+    model: Union["PreTrainedModel", "TFPreTrainedModel"],
+    output: Union[str, Path],
+    opset: Optional[int] = None,
+    optimize: Optional[str] = None,
+    monolith: bool = False,
+    no_post_process: bool = False,
+    atol: Optional[float] = None,
+    do_validation: bool = True,
+    model_kwargs: Optional[Dict[str, Any]] = None,
+    custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None,
+    fn_get_submodels: Optional[Callable] = None,
+    _variant: str = "default",
+    legacy: bool = False,
+    preprocessors: List = None,
+    device: str = "cpu",
+    no_dynamic_axes: bool = False,
+    task: Optional[str] = None,
+    use_subprocess: bool = False,
+    do_constant_folding: bool = True,
+    **kwargs_shapes,
+):
+    """
+    Full-suite ONNX export function, exporting **from a pre-loaded PyTorch or Tensorflow model**. This function is especially useful in case one needs to do modifications on the model, as overriding a forward call, before exporting to ONNX.
+
+    Args:
+        > Required parameters
+
+        model (`Union["PreTrainedModel", "TFPreTrainedModel"]`):
+            PyTorch or TensorFlow model to export to ONNX.
+        output (`Union[str, Path]`):
+            Path indicating the directory where to store the generated ONNX model.
+
+        > Optional parameters
+
+        task (`Optional[str]`, defaults to `None`):
+            The task to export the model for. If not specified, the task will be auto-inferred based on the model.
+        opset (`Optional[int]`, defaults to `None`):
+            If specified, ONNX opset version to export the model with. Otherwise, the default opset for the given model architecture
+            will be used.
+        device (`str`, defaults to `"cpu"`):
+            The device to use to do the export. Defaults to "cpu".
+        optimize (`Optional[str]`, defaults to `None`):
+            Allows to run ONNX Runtime optimizations directly during the export. Some of these optimizations are specific to
+            ONNX Runtime, and the resulting ONNX will not be usable with other runtime as OpenVINO or TensorRT.
+            Available options: `"O1", "O2", "O3", "O4"`. Reference: [`~optimum.onnxruntime.AutoOptimizationConfig`]
+        monolith (`bool`, defaults to `False`):
+            Forces to export the model as a single ONNX file.
+        no_post_process (`bool`, defaults to `False`):
+            Allows to disable any post-processing done by default on the exported ONNX models.
+        atol (`Optional[float]`, defaults to `None`):
+            If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used.
+        model_kwargs (`Optional[Dict[str, Any]]`, defaults to `None`):
+            Experimental usage: keyword arguments to pass to the model during
+            the export. This argument should be used along the `custom_onnx_configs` argument
+            in case, for example, the model inputs/outputs are changed (for example, if
+            `model_kwargs={"output_attentions": True}` is passed).
+        custom_onnx_configs (`Optional[Dict[str, OnnxConfig]]`, defaults to `None`):
+            Experimental usage: override the default ONNX config used for the given model. This argument may be useful for advanced users that desire a finer-grained control on the export. An example is available [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model).
+        fn_get_submodels (`Optional[Callable]`, defaults to `None`):
+            Experimental usage: Override the default submodels that are used at the export. This is
+            especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success.
+        use_subprocess (`bool`, defaults to `False`):
+            Do the ONNX exported model validation in subprocesses. This is especially useful when
+            exporting on CUDA device, where ORT does not release memory at inference session
+            destruction. When set to `True`, the `main_export` call should be guarded in
+            `if __name__ == "__main__":` block.
+        _variant (`str`, defaults to `default`):
+            Specify the variant of the ONNX export to use.
+        legacy (`bool`, defaults to `False`):
+            Disable the use of position_ids for text-generation models that require it for batched generation. Also enable to export decoder only models in three files (without + with past and the merged model). This argument is introduced for backward compatibility and will be removed in a future release of Optimum.
+        no_dynamic_axes (bool, defaults to `False`):
+            If True, disables the use of dynamic axes during ONNX export.
+        do_constant_folding (bool, defaults to `True`):
+            PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
+        **kwargs_shapes (`Dict`):
+            Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
+
+    Example usage:
+    ```python
+    >>> from transformers import AutoModelForCausalLM
+
+    >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
+    >>> # At this point, we could override some submodules, forward methods, weights, etc. from the model.
+
+    >>> onnx_export_from_model(model, output="gpt2_onnx/")
+    ```
+    """
+    library_name = TasksManager._infer_library_from_model(model)
+
+    # TODO: call standardize_model_attributes here once its model_name_or_path argument is optional.
+
+    if hasattr(model.config, "export_model_type"):
+        model_type = model.config.export_model_type.replace("_", "-")
+    else:
+        model_type = model.config.model_type.replace("_", "-")
+
+    custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE
+
+    if task is not None:
+        task = TasksManager.map_from_synonym(task)
+    else:
+        try:
+            task = TasksManager._infer_task_from_model_or_model_class(model=model)
+        except (ValueError, KeyError) as e:
+            raise RuntimeError(
+                f"The model task could not be automatically inferred in `onnx_export_from_model`. Please provide the argument `task` with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
+            )
+
+        if (
+            not custom_architecture
+            and library_name != "diffusers"
+            and task + "-with-past"
+            in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx", library_name=library_name)
+            and not monolith
+        ):
+            # -with-past is the default.
+            task = task + "-with-past"
+
+        logger.info(f"Automatic task detection to: {task}.")
+
+    framework = "pt" if is_torch_available() and isinstance(model, torch.nn.Module) else "tf"
+
+    dtype = get_parameter_dtype(model) if framework == "pt" else model.dtype
+
+    if "bfloat16" in str(dtype):
+        float_dtype = "bf16"
+    elif "float16" in str(dtype):
+        float_dtype = "fp16"
+    else:
+        float_dtype = "fp32"
+
+    # TODO: support onnx_config.py in the model repo
+    if custom_architecture and custom_onnx_configs is None:
+        raise ValueError(
+            f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the model type {model_type} to be supported natively in the ONNX export."
+        )
+
+    if task.startswith("text-generation") and model.config.is_encoder_decoder:
+        raise ValueError(
+            f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report"
+            f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model,"
+            f" referring to `optimum.exporters.tasks.TaskManager`'s `_TRANSFORMERS_TASKS_TO_MODEL_LOADERS`."
+        )
+
+    if legacy and model_type in MODEL_TYPES_REQUIRING_POSITION_IDS and task.startswith("text-generation"):
+        logger.warning(
+            f"legacy=True was specified in the ONNX export, although the model {model_type} requires position_ids for batched inference. Passing `legacy=True` is strongly discouraged, and this option will be removed in a future release. Reference: https://github.com/huggingface/optimum/pull/1381"
+        )
+
+    if library_name != "diffusers" and model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE:
+        raise ValueError(
+            f"{model_type} is not supported yet. Only {list(TasksManager._SUPPORTED_CLI_MODEL_TYPE.keys())} are supported. "
+            f"If you want to support {model_type} please propose a PR or open up an issue."
+        )
+
+    output = Path(output)
+    if not output.exists():
+        output.mkdir(parents=True)
+
+    # For MODEL_TO_PATCH_FOR_PAST architectures, when exporting the model with an input of sequence length of 1, a tracer that does not handle
+    # controlflows will trace incorrectly the mask generation, resulting in incorrect attention masks for other sequence lengthss.
+    # Reference: https://github.com/huggingface/transformers/blob/af3de8d87c717c4bb090f037d0d89413c195a42f/src/transformers/modeling_attn_mask_utils.py#L94
+    input_shapes = {}
+    for input_name in DEFAULT_DUMMY_SHAPES.keys():
+        input_shapes[input_name] = (
+            kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name]
+        )
+
+        # TODO: this may be moved rather to the OnnxConfig to avoid bloating this script.
+        if (
+            model_type in MODEL_TO_PATCH_FOR_PAST
+            and input_name == "sequence_length"
+            and kwargs_shapes.get(input_name) == 1
+        ):
+            raise ValueError(
+                f"Exporting with a sequence length of 1 a {model_type} model is not supported and can yield unexpected results."
+            )
+
+    onnx_config, models_and_onnx_configs = _get_submodels_and_onnx_configs(
+        model=model,
+        task=task,
+        monolith=monolith,
+        custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {},
+        custom_architecture=custom_architecture,
+        float_dtype=float_dtype,
+        fn_get_submodels=fn_get_submodels,
+        preprocessors=preprocessors,
+        _variant=_variant,
+        legacy=legacy,
+        library_name=library_name,
+        model_kwargs=model_kwargs,
+    )
+
+    if library_name != "diffusers":
+        # Ensure the requested opset is sufficient
+        if opset is None:
+            opset = onnx_config.DEFAULT_ONNX_OPSET
+        elif opset < onnx_config.DEFAULT_ONNX_OPSET:
+            logger.warning(
+                f"Opset {opset} is lower than the recommended minmum opset ({onnx_config.DEFAULT_ONNX_OPSET}) to export {model_type}. "
+                f"The ONNX export may fail or the exported model may be suboptimal."
+            )
+        if atol is None:
+            atol = onnx_config.ATOL_FOR_VALIDATION
+            if isinstance(atol, dict):
+                atol = atol[task.replace("-with-past", "")]
+
+        # Saving the model config and preprocessor as this is needed sometimes.
+        model.config.save_pretrained(output)
+        generation_config = getattr(model, "generation_config", None)
+        if generation_config is not None:
+            generation_config.save_pretrained(output)
+
+        model_name_or_path = model.config._name_or_path
+        maybe_save_preprocessors(model_name_or_path, output)
+
+        onnx_files_subpaths = [key + ".onnx" for key in models_and_onnx_configs.keys()]
+    else:
+        # save the subcomponent configuration
+        for model_name in models_and_onnx_configs:
+            subcomponent = models_and_onnx_configs[model_name][0]
+            if hasattr(subcomponent, "save_config"):
+                subcomponent.save_config(output / model_name)
+            elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"):
+                subcomponent.config.save_pretrained(output / model_name)
+
+        onnx_files_subpaths = [os.path.join(name_dir, ONNX_WEIGHTS_NAME) for name_dir in models_and_onnx_configs]
+
+        # Saving the additional components needed to perform inference.
+        model.scheduler.save_pretrained(output.joinpath("scheduler"))
+
+        feature_extractor = getattr(model, "feature_extractor", None)
+        if feature_extractor is not None:
+            feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
+
+        tokenizer = getattr(model, "tokenizer", None)
+        if tokenizer is not None:
+            tokenizer.save_pretrained(output.joinpath("tokenizer"))
+
+        tokenizer_2 = getattr(model, "tokenizer_2", None)
+        if tokenizer_2 is not None:
+            tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
+
+        model.save_config(output)
+
+    if float_dtype == "bf16":
+        logger.warning(
+            f"Exporting the model {model.__class__.__name__} in bfloat16 float dtype. After the export, ONNX Runtime InferenceSession with CPU/CUDA execution provider likely does not implement all operators for the bfloat16 data type, and the loading is likely to fail."
+        )
+
+    _, onnx_outputs = export_models(
+        models_and_onnx_configs=models_and_onnx_configs,
+        opset=opset,
+        output_dir=output,
+        output_names=onnx_files_subpaths,
+        input_shapes=input_shapes,
+        device=device,
+        dtype=float_dtype,
+        no_dynamic_axes=no_dynamic_axes,
+        do_constant_folding=do_constant_folding,
+        model_kwargs=model_kwargs,
+    )
+
+    if optimize is not None:
+        from ...onnxruntime import AutoOptimizationConfig, ORTOptimizer
+
+        optimizer = ORTOptimizer.from_pretrained(output, file_names=onnx_files_subpaths)
+
+        optimization_config = AutoOptimizationConfig.with_optimization_level(optimization_level=optimize)
+
+        optimization_config.disable_shape_inference = True
+        optimizer.optimize(save_dir=output, optimization_config=optimization_config, file_suffix="")
+
+    # Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
+    # TODO: treating stable diffusion separately is quite ugly
+    if not no_post_process and library_name != "diffusers":
+        try:
+            logger.info("Post-processing the exported models...")
+            models_and_onnx_configs, onnx_files_subpaths = onnx_config.post_process_exported_models(
+                output, models_and_onnx_configs, onnx_files_subpaths
+            )
+        except Exception as e:
+            raise Exception(
+                f"The post-processing of the ONNX export failed. The export can still be performed by passing the option --no-post-process. Detailed error: {e}"
+            )
+
+    if library_name == "diffusers":
+        # TODO: fix Can't pickle local object 'get_stable_diffusion_models_for_export.<locals>.<lambda>'
+        use_subprocess = False
+    elif model_type in UNPICKABLE_ARCHS:
+        # Pickling is bugged for nn.utils.weight_norm: https://github.com/pytorch/pytorch/issues/102983
+        # TODO: fix "Cowardly refusing to serialize non-leaf tensor" error for wav2vec2-conformer
+        use_subprocess = False
+
+    if device == "cpu":
+        # Using multiprocessing for validation is useful only on CUDA EP that leaks memory.
+        use_subprocess = False
+
+    if do_validation is True:
+        try:
+            validate_models_outputs(
+                models_and_onnx_configs=models_and_onnx_configs,
+                onnx_named_outputs=onnx_outputs,
+                atol=atol,
+                output_dir=output,
+                onnx_files_subpaths=onnx_files_subpaths,
+                input_shapes=input_shapes,
+                device=device,
+                use_subprocess=use_subprocess,
+                model_kwargs=model_kwargs,
+            )
+            logger.info(f"The ONNX export succeeded and the exported model was saved at: {output.as_posix()}")
+        except ShapeError as e:
+            raise e
+        except AtolError as e:
+            logger.warning(
+                f"The ONNX export succeeded with the warning: {e}.\n The exported model was saved at: {output.as_posix()}"
+            )
+        except OutputMatchError as e:
+            logger.warning(
+                f"The ONNX export succeeded with the warning: {e}.\n The exported model was saved at: {output.as_posix()}"
+            )
+        except Exception as e:
+            raise Exception(
+                f"An error occured during validation, but the model was saved nonetheless at {output.as_posix()}. Detailed error: {e}."
+            )
diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
index e5d6de25244..4e2260e0a30 100644
--- a/optimum/exporters/onnx/utils.py
+++ b/optimum/exporters/onnx/utils.py
@@ -15,7 +15,7 @@
 """Utility functions."""
 
 import copy
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from packaging import version
@@ -560,3 +560,102 @@ def __setstate__(self, values):
 
         self.model_path = values["model_path"]
         self.sess = ort.InferenceSession(self.model_path, sess_options=self.sess_options, providers=self.providers)
+
+
+def _get_submodels_and_onnx_configs(
+    model: Union["PreTrainedModel", "TFPreTrainedModel"],
+    task: str,
+    monolith: bool,
+    custom_onnx_configs: Dict,
+    custom_architecture: bool,
+    _variant: str,
+    library_name: str,
+    int_dtype: str = "int64",
+    float_dtype: str = "fp32",
+    fn_get_submodels: Optional[Callable] = None,
+    preprocessors: Optional[List[Any]] = None,
+    legacy: bool = False,
+    model_kwargs: Optional[Dict] = None,
+):
+    if not custom_architecture:
+        if library_name == "diffusers":
+            onnx_config = None
+            models_and_onnx_configs = get_stable_diffusion_models_for_export(
+                model, int_dtype=int_dtype, float_dtype=float_dtype
+            )
+        else:
+            onnx_config_constructor = TasksManager.get_exporter_config_constructor(
+                model=model, exporter="onnx", task=task, library_name=library_name
+            )
+            onnx_config = onnx_config_constructor(
+                model.config,
+                int_dtype=int_dtype,
+                float_dtype=float_dtype,
+                preprocessors=preprocessors,
+                legacy=legacy,
+            )
+
+            onnx_config.variant = _variant
+            all_variants = "\n".join(
+                [f"    - {name}: {description}" for name, description in onnx_config.VARIANTS.items()]
+            )
+            logger.info(f"Using the export variant {onnx_config.variant}. Available variants are:\n{all_variants}")
+
+            # TODO: this succession of if/else strongly suggests a refactor is needed.
+            if (
+                model.config.is_encoder_decoder
+                and task.startswith(TasksManager._ENCODER_DECODER_TASKS)
+                and not monolith
+            ):
+                models_and_onnx_configs = get_encoder_decoder_models_for_export(model, onnx_config)
+            elif task.startswith("text-generation") and not monolith:
+                models_and_onnx_configs = get_decoder_models_for_export(model, onnx_config, legacy=legacy)
+            elif model.config.model_type == "sam":
+                models_and_onnx_configs = get_sam_models_for_export(model, onnx_config)
+            elif model.config.model_type == "speecht5":
+                models_and_onnx_configs = get_speecht5_models_for_export(model, onnx_config, model_kwargs)
+            else:
+                models_and_onnx_configs = {"model": (model, onnx_config)}
+
+        # When specifying custom ONNX configs for supported transformers architectures, we do
+        # not force to specify a custom ONNX config for each submodel.
+        for key, custom_onnx_config in custom_onnx_configs.items():
+            models_and_onnx_configs[key] = (models_and_onnx_configs[key][0], custom_onnx_config)
+    else:
+        onnx_config = None
+        submodels_for_export = None
+        models_and_onnx_configs = {}
+
+        if fn_get_submodels is not None:
+            submodels_for_export = fn_get_submodels(model)
+        else:
+            if library_name == "diffusers":
+                submodels_for_export = _get_submodels_for_export_stable_diffusion(model)
+            elif (
+                model.config.is_encoder_decoder
+                and task.startswith(TasksManager._ENCODER_DECODER_TASKS)
+                and not monolith
+            ):
+                submodels_for_export = _get_submodels_for_export_encoder_decoder(
+                    model, use_past=task.endswith("-with-past")
+                )
+            elif task.startswith("text-generation") and not monolith:
+                submodels_for_export = _get_submodels_for_export_decoder(model, use_past=task.endswith("-with-past"))
+            else:
+                submodels_for_export = {"model": model}
+
+        if submodels_for_export.keys() != custom_onnx_configs.keys():
+            logger.error(f"ONNX custom configs for: {', '.join(custom_onnx_configs.keys())}")
+            logger.error(f"Submodels to export: {', '.join(submodels_for_export.keys())}")
+            raise ValueError(
+                "Trying to export a custom model, but could not find as many custom ONNX configs as the number of submodels to export. Please specifiy the fn_get_submodels argument, that should return a dictionary of submodules with as many items as the provided custom_onnx_configs dictionary."
+            )
+
+        for key, custom_onnx_config in custom_onnx_configs.items():
+            models_and_onnx_configs[key] = (submodels_for_export[key], custom_onnx_config)
+
+    # Default to the first ONNX config for stable-diffusion and custom architecture case.
+    if onnx_config is None:
+        onnx_config = next(iter(models_and_onnx_configs.values()))[1]
+
+    return onnx_config, models_and_onnx_configs
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index fb4efe6e508..402148c2b01 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -1721,6 +1721,8 @@ def standardize_model_attributes(
             library_name (`Optional[str]`, *optional*)::
                 The library name of the model. Can be any of "transformers", "timm", "diffusers", "sentence_transformers".
         """
+        # TODO: make model_name_or_path an optional argument here.
+
         library_name = TasksManager.infer_library_from_model(
             model_name_or_path, subfolder, revision, cache_dir, library_name
         )
diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index 05a946bbd94..667206b0068 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -26,7 +26,7 @@
 from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow
 
 from optimum.exporters.error_utils import MinimumVersionError
-from optimum.exporters.onnx.__main__ import main_export
+from optimum.exporters.onnx import main_export
 from optimum.onnxruntime import (
     ONNX_DECODER_MERGED_NAME,
     ONNX_DECODER_NAME,
diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py
index 518f6ce59f7..5fa25196073 100644
--- a/tests/exporters/onnx/test_onnx_export.py
+++ b/tests/exporters/onnx/test_onnx_export.py
@@ -34,9 +34,10 @@
     get_decoder_models_for_export,
     get_encoder_decoder_models_for_export,
     get_stable_diffusion_models_for_export,
+    main_export,
+    onnx_export_from_model,
     validate_models_outputs,
 )
-from optimum.exporters.onnx.__main__ import main_export, onnx_export
 from optimum.exporters.onnx.base import ConfigBehavior
 from optimum.exporters.onnx.config import TextDecoderOnnxConfig
 from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
@@ -632,7 +633,7 @@ def _onnx_export(
             preprocessors = None
 
         with TemporaryDirectory() as tmpdirname:
-            onnx_export(
+            onnx_export_from_model(
                 model=model,
                 output=Path(tmpdirname),
                 monolith=monolith,