From fcb169022962e8a33cd408eae566ab318696f5a7 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 16 Jul 2024 09:53:42 +0200 Subject: [PATCH 01/24] created auto task mappings --- optimum/onnxruntime/modeling_diffusion.py | 43 +++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index f4e54752115..3e5aed3fb01 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -18,6 +18,7 @@ import shutil import warnings from abc import abstractmethod +from collections import OrderedDict from pathlib import Path from tempfile import TemporaryDirectory from typing import Any, Dict, Optional, Union @@ -26,6 +27,7 @@ import torch from diffusers import ( DDIMScheduler, + DiffusionPipeline, LMSDiscreteScheduler, PNDMScheduler, StableDiffusionPipeline, @@ -69,8 +71,8 @@ logger = logging.getLogger(__name__) -class ORTStableDiffusionPipelineBase(ORTModel): - auto_model_class = StableDiffusionPipeline +class ORTDiffusionPipeline(ORTModel): + auto_model_class = DiffusionPipeline main_input_name = "input_ids" base_model_prefix = "onnx_model" config_name = "model_index.json" @@ -350,9 +352,9 @@ def _from_pretrained( text_encoder_path=new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name, unet_path=new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, vae_encoder_path=new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, - text_encoder_2_path=new_model_save_dir - / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER - / text_encoder_2_file_name, + text_encoder_2_path=( + new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name + ), provider=provider, session_options=session_options, provider_options=provider_options, @@ -561,7 +563,7 @@ def forward(self, sample: np.ndarray): @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionPipeline(ORTStableDiffusionPipelineBase, StableDiffusionPipelineMixin): +class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ @@ -570,7 +572,7 @@ class ORTStableDiffusionPipeline(ORTStableDiffusionPipelineBase, StableDiffusion @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionImg2ImgPipeline(ORTStableDiffusionPipelineBase, StableDiffusionImg2ImgPipelineMixin): +class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg2ImgPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ @@ -579,7 +581,7 @@ class ORTStableDiffusionImg2ImgPipeline(ORTStableDiffusionPipelineBase, StableDi @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDiffusionInpaintPipelineMixin): +class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInpaintPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ @@ -588,7 +590,7 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyPipelineMixin): +class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ @@ -596,7 +598,7 @@ class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentCo __call__ = LatentConsistencyPipelineMixin.__call__ -class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): +class ORTStableDiffusionXLPipelineBase(ORTDiffusionPipeline): auto_model_class = StableDiffusionXLImg2ImgPipeline def __init__( @@ -661,3 +663,24 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab """ __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ + + +AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionPipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLPipeline), + ] +) + +AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionImg2ImgPipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLImg2ImgPipeline), + ] +) + +AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionInpaintPipeline), + ] +) From 1cbb5448845036104648c6c20267a041a4568250 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 18 Jul 2024 16:50:32 +0200 Subject: [PATCH 02/24] added correct auto classes --- optimum/modeling_base.py | 9 ++++++--- optimum/onnxruntime/modeling_diffusion.py | 24 +++++++++++++---------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 5bab0622de4..3da2d9d0d21 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -85,7 +85,6 @@ class PreTrainedModel(ABC): # noqa: F811 class OptimizedModel(PreTrainedModel): config_class = AutoConfig - load_tf_weights = None base_model_prefix = "optimized_model" config_name = CONFIG_NAME @@ -378,10 +377,14 @@ def from_pretrained( ) model_id, revision = model_id.split("@") - library_name = TasksManager.infer_library_from_model(model_id, subfolder, revision, cache_dir, token=token) + library_name = TasksManager.infer_library_from_model( + model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + ) if library_name == "timm": - config = PretrainedConfig.from_pretrained(model_id, subfolder, revision) + config = PretrainedConfig.from_pretrained( + model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + ) if config is None: if os.path.isdir(os.path.join(model_id, subfolder)) and cls.config_name == CONFIG_NAME: diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 3e5aed3fb01..59732e63eae 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -28,10 +28,14 @@ from diffusers import ( DDIMScheduler, DiffusionPipeline, + LatentConsistencyModelPipeline, LMSDiscreteScheduler, PNDMScheduler, + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLPipeline, ) from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available @@ -73,11 +77,13 @@ class ORTDiffusionPipeline(ORTModel): auto_model_class = DiffusionPipeline - main_input_name = "input_ids" + main_input_name = "prompt" base_model_prefix = "onnx_model" config_name = "model_index.json" sub_component_config_name = "config.json" + # TODO: instead of having a bloated init, we should probably have an init per pipeline, + # so that we can easily add new pipelines without having to modify the base class def __init__( self, vae_decoder_session: ort.InferenceSession, @@ -401,7 +407,7 @@ def _from_transformers( provider_options: Optional[Dict[str, Any]] = None, use_io_binding: Optional[bool] = None, task: Optional[str] = None, - ) -> "ORTStableDiffusionPipeline": + ) -> "ORTDiffusionPipeline": if use_auth_token is not None: warnings.warn( "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", @@ -568,7 +574,7 @@ class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMi ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ - __call__ = StableDiffusionPipelineMixin.__call__ + auto_model_class = StableDiffusionPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -577,7 +583,7 @@ class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ - __call__ = StableDiffusionImg2ImgPipelineMixin.__call__ + auto_model_class = StableDiffusionImg2ImgPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -586,7 +592,7 @@ class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInp ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ - __call__ = StableDiffusionInpaintPipelineMixin.__call__ + auto_model_class = StableDiffusionInpaintPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -595,12 +601,10 @@ class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyP ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ - __call__ = LatentConsistencyPipelineMixin.__call__ + auto_model_class = LatentConsistencyModelPipeline class ORTStableDiffusionXLPipelineBase(ORTDiffusionPipeline): - auto_model_class = StableDiffusionXLImg2ImgPipeline - def __init__( self, vae_decoder_session: ort.InferenceSession, @@ -653,7 +657,7 @@ class ORTStableDiffusionXLPipeline(ORTStableDiffusionXLPipelineBase, StableDiffu ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline). """ - __call__ = StableDiffusionXLPipelineMixin.__call__ + auto_model_class = StableDiffusionXLPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -662,7 +666,7 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline). """ - __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ + auto_model_class = StableDiffusionXLImg2ImgPipeline AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( From cdba70ea788938f2c632132606f64a95e476b761 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 16 Jul 2024 09:53:42 +0200 Subject: [PATCH 03/24] created auto task mappings --- optimum/onnxruntime/modeling_diffusion.py | 43 +++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index f4e54752115..3e5aed3fb01 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -18,6 +18,7 @@ import shutil import warnings from abc import abstractmethod +from collections import OrderedDict from pathlib import Path from tempfile import TemporaryDirectory from typing import Any, Dict, Optional, Union @@ -26,6 +27,7 @@ import torch from diffusers import ( DDIMScheduler, + DiffusionPipeline, LMSDiscreteScheduler, PNDMScheduler, StableDiffusionPipeline, @@ -69,8 +71,8 @@ logger = logging.getLogger(__name__) -class ORTStableDiffusionPipelineBase(ORTModel): - auto_model_class = StableDiffusionPipeline +class ORTDiffusionPipeline(ORTModel): + auto_model_class = DiffusionPipeline main_input_name = "input_ids" base_model_prefix = "onnx_model" config_name = "model_index.json" @@ -350,9 +352,9 @@ def _from_pretrained( text_encoder_path=new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name, unet_path=new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, vae_encoder_path=new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, - text_encoder_2_path=new_model_save_dir - / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER - / text_encoder_2_file_name, + text_encoder_2_path=( + new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name + ), provider=provider, session_options=session_options, provider_options=provider_options, @@ -561,7 +563,7 @@ def forward(self, sample: np.ndarray): @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionPipeline(ORTStableDiffusionPipelineBase, StableDiffusionPipelineMixin): +class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ @@ -570,7 +572,7 @@ class ORTStableDiffusionPipeline(ORTStableDiffusionPipelineBase, StableDiffusion @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionImg2ImgPipeline(ORTStableDiffusionPipelineBase, StableDiffusionImg2ImgPipelineMixin): +class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg2ImgPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ @@ -579,7 +581,7 @@ class ORTStableDiffusionImg2ImgPipeline(ORTStableDiffusionPipelineBase, StableDi @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDiffusionInpaintPipelineMixin): +class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInpaintPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ @@ -588,7 +590,7 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyPipelineMixin): +class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ @@ -596,7 +598,7 @@ class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentCo __call__ = LatentConsistencyPipelineMixin.__call__ -class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): +class ORTStableDiffusionXLPipelineBase(ORTDiffusionPipeline): auto_model_class = StableDiffusionXLImg2ImgPipeline def __init__( @@ -661,3 +663,24 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab """ __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ + + +AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionPipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLPipeline), + ] +) + +AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionImg2ImgPipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLImg2ImgPipeline), + ] +) + +AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionInpaintPipeline), + ] +) From 5bebbd52040c9d057bbbf36c6c9d78fec2f785a0 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 18 Jul 2024 16:50:32 +0200 Subject: [PATCH 04/24] added correct auto classes --- optimum/modeling_base.py | 9 ++++++--- optimum/onnxruntime/modeling_diffusion.py | 24 +++++++++++++---------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 5bab0622de4..3da2d9d0d21 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -85,7 +85,6 @@ class PreTrainedModel(ABC): # noqa: F811 class OptimizedModel(PreTrainedModel): config_class = AutoConfig - load_tf_weights = None base_model_prefix = "optimized_model" config_name = CONFIG_NAME @@ -378,10 +377,14 @@ def from_pretrained( ) model_id, revision = model_id.split("@") - library_name = TasksManager.infer_library_from_model(model_id, subfolder, revision, cache_dir, token=token) + library_name = TasksManager.infer_library_from_model( + model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + ) if library_name == "timm": - config = PretrainedConfig.from_pretrained(model_id, subfolder, revision) + config = PretrainedConfig.from_pretrained( + model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + ) if config is None: if os.path.isdir(os.path.join(model_id, subfolder)) and cls.config_name == CONFIG_NAME: diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 3e5aed3fb01..59732e63eae 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -28,10 +28,14 @@ from diffusers import ( DDIMScheduler, DiffusionPipeline, + LatentConsistencyModelPipeline, LMSDiscreteScheduler, PNDMScheduler, + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLPipeline, ) from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available @@ -73,11 +77,13 @@ class ORTDiffusionPipeline(ORTModel): auto_model_class = DiffusionPipeline - main_input_name = "input_ids" + main_input_name = "prompt" base_model_prefix = "onnx_model" config_name = "model_index.json" sub_component_config_name = "config.json" + # TODO: instead of having a bloated init, we should probably have an init per pipeline, + # so that we can easily add new pipelines without having to modify the base class def __init__( self, vae_decoder_session: ort.InferenceSession, @@ -401,7 +407,7 @@ def _from_transformers( provider_options: Optional[Dict[str, Any]] = None, use_io_binding: Optional[bool] = None, task: Optional[str] = None, - ) -> "ORTStableDiffusionPipeline": + ) -> "ORTDiffusionPipeline": if use_auth_token is not None: warnings.warn( "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", @@ -568,7 +574,7 @@ class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMi ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ - __call__ = StableDiffusionPipelineMixin.__call__ + auto_model_class = StableDiffusionPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -577,7 +583,7 @@ class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ - __call__ = StableDiffusionImg2ImgPipelineMixin.__call__ + auto_model_class = StableDiffusionImg2ImgPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -586,7 +592,7 @@ class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInp ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ - __call__ = StableDiffusionInpaintPipelineMixin.__call__ + auto_model_class = StableDiffusionInpaintPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -595,12 +601,10 @@ class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyP ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ - __call__ = LatentConsistencyPipelineMixin.__call__ + auto_model_class = LatentConsistencyModelPipeline class ORTStableDiffusionXLPipelineBase(ORTDiffusionPipeline): - auto_model_class = StableDiffusionXLImg2ImgPipeline - def __init__( self, vae_decoder_session: ort.InferenceSession, @@ -653,7 +657,7 @@ class ORTStableDiffusionXLPipeline(ORTStableDiffusionXLPipelineBase, StableDiffu ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline). """ - __call__ = StableDiffusionXLPipelineMixin.__call__ + auto_model_class = StableDiffusionXLPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) @@ -662,7 +666,7 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline). """ - __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ + auto_model_class = StableDiffusionXLImg2ImgPipeline AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( From 40b2ac0ab619725aed28c3def0df3987857be6b5 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 19 Jul 2024 09:22:03 +0200 Subject: [PATCH 05/24] added ort/auto diffusion classes --- optimum/onnxruntime/modeling_diffusion.py | 104 +++++++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 59732e63eae..a5fcdc0ae56 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -26,6 +26,10 @@ import numpy as np import torch from diffusers import ( + AutoPipelineForImage2Image, + AutoPipelineForInpainting, + AutoPipelineForText2Image, + ConfigMixin, DDIMScheduler, DiffusionPipeline, LatentConsistencyModelPipeline, @@ -37,10 +41,16 @@ StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, ) +from diffusers.pipelines.auto_pipeline import ( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + AUTO_INPAINT_PIPELINES_MAPPING, + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, +) from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available from huggingface_hub import snapshot_download from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE +from huggingface_hub.utils import validate_hf_hub_args from transformers import CLIPFeatureExtractor, CLIPTokenizer from transformers.file_utils import add_end_docstrings @@ -576,6 +586,8 @@ class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMi auto_model_class = StableDiffusionPipeline + __call__ = StableDiffusionPipelineMixin.__call__ + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg2ImgPipelineMixin): @@ -585,6 +597,8 @@ class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg auto_model_class = StableDiffusionImg2ImgPipeline + __call__ = StableDiffusionImg2ImgPipelineMixin.__call__ + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInpaintPipelineMixin): @@ -594,6 +608,8 @@ class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInp auto_model_class = StableDiffusionInpaintPipeline + __call__ = StableDiffusionInpaintPipelineMixin.__call__ + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyPipelineMixin): @@ -603,6 +619,8 @@ class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyP auto_model_class = LatentConsistencyModelPipeline + __call__ = LatentConsistencyPipelineMixin.__call__ + class ORTStableDiffusionXLPipelineBase(ORTDiffusionPipeline): def __init__( @@ -659,6 +677,8 @@ class ORTStableDiffusionXLPipeline(ORTStableDiffusionXLPipelineBase, StableDiffu auto_model_class = StableDiffusionXLPipeline + __call__ = StableDiffusionXLPipelineMixin.__call__ + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, StableDiffusionXLImg2ImgPipelineMixin): @@ -668,23 +688,101 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab auto_model_class = StableDiffusionXLImg2ImgPipeline + __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ + -AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( +ORT_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( [ + ("lcm", ORTLatentConsistencyModelPipeline), ("stable-diffusion", ORTStableDiffusionPipeline), ("stable-diffusion-xl", ORTStableDiffusionXLPipeline), ] ) -AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( +ORT_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( [ ("stable-diffusion", ORTStableDiffusionImg2ImgPipeline), ("stable-diffusion-xl", ORTStableDiffusionXLImg2ImgPipeline), ] ) -AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict( +ORT_INPAINT_PIPELINES_MAPPING = OrderedDict( [ ("stable-diffusion", ORTStableDiffusionInpaintPipeline), ] ) + + +def _get_task_class(ort_mapping, pipeline_class_name, throw_error_if_not_exist: bool = True): + for model_type, ort_pipeline_class in ort_mapping.items(): + if pipeline_class_name == ort_pipeline_class.auto_model_class.__name__: + return ort_pipeline_class + + if throw_error_if_not_exist: + raise ValueError(f"ORTPipeline can't find a pipeline linked to {pipeline_class_name}") + + +class ORTPipelineBase(ConfigMixin): + config_name = "model_index.json" + + ort_pipeline_mapping = None + auto_pipeline_mapping = None + + def __init__(self, *args, **kwargs): + raise EnvironmentError( + f"{self.__class__.__name__} is designed to be instantiated " + f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or " + f"`{self.__class__.__name__}.from_pipe(pipeline)` methods." + ) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path, **kwargs): + load_config_kwargs = { + "force_download": kwargs.get("force_download", False), + "resume_download": kwargs.get("resume_download", None), + "local_files_only": kwargs.get("local_files_only", False), + "cache_dir": kwargs.get("cache_dir", None), + "revision": kwargs.get("revision", None), + "proxies": kwargs.get("proxies", None), + "token": kwargs.get("token", None), + } + + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + + original_class_name = config["_class_name"] + + pipeline_cls = _get_task_class( + cls.ort_pipeline_mapping, + cls.auto_pipeline_mapping, + original_class_name, + ) + + return pipeline_cls.from_pretrained(pretrained_model_or_path, **kwargs) + + @classmethod + def from_pipe(cls, **kwargs): + raise NotImplementedError( + f"from_pipe is not yet implemented for {cls.__name__}. Please use from_pretrained instead." + ) + + +class ORTPipelineForText2Image(ORTPipelineBase): + auto_model_class = AutoPipelineForText2Image + + ort_pipeline_mapping = ORT_TEXT2IMAGE_PIPELINES_MAPPING + auto_pipeline_mapping = AUTO_TEXT2IMAGE_PIPELINES_MAPPING + + +class ORTPipelineForImage2Image(ORTPipelineBase): + auto_model_class = AutoPipelineForImage2Image + + ort_pipeline_mapping = ORT_IMAGE2IMAGE_PIPELINES_MAPPING + auto_pipeline_mapping = AUTO_IMAGE2IMAGE_PIPELINES_MAPPING + + +class ORTPipelineForInpainting(ORTPipelineBase): + auto_model_class = AutoPipelineForInpainting + + ort_pipeline_mapping = ORT_INPAINT_PIPELINES_MAPPING + auto_pipeline_mapping = AUTO_INPAINT_PIPELINES_MAPPING From 29bfe57c01ff7c74503e094f01430168c3763b53 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 31 Jul 2024 15:07:53 +0200 Subject: [PATCH 06/24] fix ORTPipeline detection --- optimum/onnxruntime/__init__.py | 6 ++++ optimum/onnxruntime/modeling_diffusion.py | 42 +++++++++++++++-------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index f1d4f63a9ff..35cbf14587e 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -88,6 +88,9 @@ "ORTStableDiffusionXLPipeline", "ORTStableDiffusionXLImg2ImgPipeline", "ORTLatentConsistencyModelPipeline", + "ORTPipelineForText2Image", + "ORTPipelineForImage2Image", + "ORTPipelineForInpainting", ] @@ -147,6 +150,9 @@ else: from .modeling_diffusion import ( ORTLatentConsistencyModelPipeline, + ORTPipelineForImage2Image, + ORTPipelineForInpainting, + ORTPipelineForText2Image, ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index a5fcdc0ae56..982dd123343 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -712,14 +712,32 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab ] ) - -def _get_task_class(ort_mapping, pipeline_class_name, throw_error_if_not_exist: bool = True): - for model_type, ort_pipeline_class in ort_mapping.items(): - if pipeline_class_name == ort_pipeline_class.auto_model_class.__name__: - return ort_pipeline_class +SUPPORTED_TASKS_MAPPINGS = [ + ORT_TEXT2IMAGE_PIPELINES_MAPPING, + ORT_IMAGE2IMAGE_PIPELINES_MAPPING, + ORT_INPAINT_PIPELINES_MAPPING, +] + + +def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True): + def get_model(pipeline_class_name): + for task_mapping in SUPPORTED_TASKS_MAPPINGS: + for model_name, pipeline in task_mapping.items(): + if ( + pipeline.__name__ == pipeline_class_name + or pipeline.auto_model_class.__name__ == pipeline_class_name + ): + return model_name + + model_name = get_model(pipeline_class_name) + + if model_name is not None: + task_class = mapping.get(model_name, None) + if task_class is not None: + return task_class if throw_error_if_not_exist: - raise ValueError(f"ORTPipeline can't find a pipeline linked to {pipeline_class_name}") + raise ValueError(f"AutoPipeline can't find a pipeline linked to {pipeline_class_name} for {model_name}") class ORTPipelineBase(ConfigMixin): @@ -749,16 +767,12 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): } config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + config = config[0] if isinstance(config, tuple) else config + class_name = config["_class_name"] - original_class_name = config["_class_name"] - - pipeline_cls = _get_task_class( - cls.ort_pipeline_mapping, - cls.auto_pipeline_mapping, - original_class_name, - ) + ort_pipeline_cls = _get_task_class(cls.ort_pipeline_mapping, class_name) - return pipeline_cls.from_pretrained(pretrained_model_or_path, **kwargs) + return ort_pipeline_cls.from_pretrained(pretrained_model_or_path, **kwargs) @classmethod def from_pipe(cls, **kwargs): From f6df38ccca773e3de0cc55e567b0593fca4ece12 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 31 Jul 2024 15:08:13 +0200 Subject: [PATCH 07/24] start test refactoring --- optimum/utils/testing_utils.py | 11 + tests/onnxruntime/test_diffusion.py | 730 ++++++++++++++++++ .../test_stable_diffusion_pipeline.py | 562 -------------- tests/onnxruntime/utils_onnxruntime_tests.py | 15 +- 4 files changed, 752 insertions(+), 566 deletions(-) create mode 100644 tests/onnxruntime/test_diffusion.py delete mode 100644 tests/onnxruntime/test_stable_diffusion_pipeline.py diff --git a/optimum/utils/testing_utils.py b/optimum/utils/testing_utils.py index 76fe9a05b13..6579e230dc8 100644 --- a/optimum/utils/testing_utils.py +++ b/optimum/utils/testing_utils.py @@ -84,6 +84,17 @@ def require_ort_rocm(test_case): ) +def require_ort_cuda(test_case): + """Decorator marking a test that requires CUDAExecutionProvider for ONNX Runtime.""" + import onnxruntime as ort + + providers = ort.get_available_providers() + + return unittest.skipUnless("CUDAExecutionProvider" == providers[0], "test requires CUDAExecutionProvider")( + test_case + ) + + def require_hf_token(test_case): """ Decorator marking a test that requires huggingface hub token. diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py new file mode 100644 index 00000000000..2d5ab7a7f8b --- /dev/null +++ b/tests/onnxruntime/test_diffusion.py @@ -0,0 +1,730 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import random +import unittest +from typing import Dict + +import numpy as np +import PIL +import pytest +import torch +from diffusers import ( + AutoPipelineForImage2Image, + AutoPipelineForInpainting, + AutoPipelineForText2Image, + StableDiffusionPipeline, + StableDiffusionXLPipeline, +) +from diffusers.utils import load_image +from diffusers.utils.testing_utils import floats_tensor +from packaging.version import Version, parse +from parameterized import parameterized +from transformers.testing_utils import require_torch_gpu +from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin + +from optimum.onnxruntime import ( + ORTLatentConsistencyModelPipeline, + ORTPipelineForImage2Image, + ORTPipelineForInpainting, + ORTPipelineForText2Image, + ORTStableDiffusionImg2ImgPipeline, + ORTStableDiffusionInpaintPipeline, + ORTStableDiffusionPipeline, + ORTStableDiffusionXLImg2ImgPipeline, + ORTStableDiffusionXLPipeline, +) +from optimum.onnxruntime.modeling_diffusion import ( + ORTModelTextEncoder, + ORTModelUnet, + ORTModelVaeDecoder, + ORTModelVaeEncoder, +) +from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor +from optimum.utils.import_utils import _diffusers_version +from optimum.utils.testing_utils import grid_parameters, require_diffusers, require_ort_rocm + + +if parse(_diffusers_version) > Version("0.21.4"): + from diffusers import LatentConsistencyModelPipeline + + +def _generate_inputs(batch_size=1): + inputs = { + "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + +def _create_image(height=128, width=128, batch_size=1, channel=3, input_type="pil"): + if input_type == "pil": + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo.png" + ).resize((width, height)) + elif input_type == "np": + image = np.random.rand(height, width, channel) + elif input_type == "pt": + image = torch.rand((channel, height, width)) + + return [image] * batch_size + + +def to_np(image): + if isinstance(image[0], PIL.Image.Image): + return np.stack([np.array(i) for i in image], axis=0) + elif isinstance(image, torch.Tensor): + return image.cpu().numpy().transpose(0, 2, 3, 1) + return image + + +class ORTPipelineForText2ImageTest(ORTModelTestMixin): + ARCHITECTURE_TO_ORTMODEL_CLASS = { + "stable-diffusion": ORTStableDiffusionPipeline, + "stable-diffusion-xl": ORTStableDiffusionXLPipeline, + "lcm": ORTLatentConsistencyModelPipeline, + } + + AUTOMODEL_CLASS = AutoPipelineForText2Image + ORTMODEL_CLASS = ORTPipelineForText2Image + + TASK = "text-to-image" + + @require_diffusers + def test_load_vanilla_model_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn( + f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) + ) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_ort_pipeline_class_dispatch(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + self.assertIsInstance(pipeline, self.ARCHITECTURE_TO_ORTMODEL_CLASS[model_arch]) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_num_images_per_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + self.assertEqual(pipeline.vae_scale_factor, 2) + self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) + self.assertEqual(pipeline.unet.config["in_channels"], 4) + + batch_size, height = 1, 32 + for width in [64, 32]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + for num_images in [1, 3]: + outputs = pipeline(**inputs, num_images_per_prompt=num_images).images + self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) + + @parameterized.expand( + grid_parameters( + {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["CUDAExecutionProvider"]} + ) + ) + @require_torch_gpu + @pytest.mark.cuda_ep_test + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs).images + # Verify model devices + self.assertEqual(pipeline.device.type.lower(), "cuda") + # Verify model outptus + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand( + grid_parameters( + {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["ROCMExecutionProvider"]} + ) + ) + @require_torch_gpu + @require_ort_rocm + @pytest.mark.rocm_ep_test + @require_diffusers + def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs).images + # Verify model devices + self.assertEqual(pipeline.device.type.lower(), "cuda") + # Verify model outptus + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_callback(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + def callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: + callback_fn.has_been_called = True + callback_fn.number_of_steps += 1 + + callback_fn.has_been_called = False + callback_fn.number_of_steps = 0 + + inputs = self.generate_inputs(height=64, width=64) + pipeline(**inputs, callback=callback_fn, callback_steps=1) + self.assertTrue(callback_fn.has_been_called) + self.assertEqual(callback_fn.number_of_steps, inputs["num_inference_steps"]) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_shape(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + height, width, batch_size = 128, 64, 1 + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + for output_type in ["np", "pil", "latent"]: + inputs["output_type"] = output_type + outputs = pipeline(**inputs).images + if output_type == "pil": + self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + elif output_type == "np": + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + else: + self.assertEqual( + outputs.shape, + (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), + ) + + @parameterized.expand(["stable-diffusion", "stable-diffusion-xl"]) + @require_diffusers + def test_image_reproducibility(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + inputs = _generate_inputs() + height, width = 64, 64 + np.random.seed(0) + ort_outputs_1 = pipeline(**inputs, height=height, width=width) + np.random.seed(0) + ort_outputs_2 = pipeline(**inputs, height=height, width=width) + ort_outputs_3 = pipeline(**inputs, height=height, width=width) + self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + + @parameterized.expand(["stable-diffusion"]) + def test_negative_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + negative_prompt = ["This is a negative prompt"] + np.random.seed(0) + image_slice_1 = pipeline(**inputs, negative_prompt=negative_prompt).images[0, -3:, -3:, -1] + prompt = inputs.pop("prompt") + embeds = [] + for p in [prompt, negative_prompt]: + text_inputs = pipeline.tokenizer( + p, + padding="max_length", + max_length=pipeline.tokenizer.model_max_length, + truncation=True, + return_tensors="np", + ) + text_inputs = text_inputs["input_ids"].astype(pipeline.text_encoder.input_dtype.get("input_ids", np.int32)) + embeds.append(pipeline.text_encoder(text_inputs)[0]) + + inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds + np.random.seed(0) + image_slice_2 = pipeline(**inputs).images[0, -3:, -3:, -1] + self.assertTrue(np.allclose(image_slice_1, image_slice_2, atol=1e-4)) + + def generate_inputs(self, height=128, width=128, batch_size=1): + inputs = _generate_inputs(batch_size=batch_size) + inputs["height"] = height + inputs["width"] = width + return inputs + + +class ORTPipelineForImage2ImageTest(ORTModelTestMixin): + ARCHITECTURE_TO_ORTMODEL_CLASS = { + "stable-diffusion": ORTStableDiffusionImg2ImgPipeline, + "stable-diffusion-xl": ORTStableDiffusionXLImg2ImgPipeline, + } + AUTOMODEL_CLASS = AutoPipelineForImage2Image + ORTMODEL_CLASS = ORTPipelineForImage2Image + + TASK = "image-to-image" + + @require_diffusers + def test_load_vanilla_model_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn( + f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) + ) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_num_images_per_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + self.assertEqual(pipeline.vae_scale_factor, 2) + self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) + self.assertEqual(pipeline.unet.config["in_channels"], 4) + + batch_size, height = 1, 32 + for width in [64, 32]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + for num_images in [1, 3]: + outputs = pipeline(**inputs, num_images_per_prompt=num_images).images + self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) + + @parameterized.expand( + grid_parameters( + {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["CUDAExecutionProvider"]} + ) + ) + @require_torch_gpu + @pytest.mark.cuda_ep_test + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs).images + # Verify model devices + self.assertEqual(pipeline.device.type.lower(), "cuda") + # Verify model outptus + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand( + grid_parameters( + {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["ROCMExecutionProvider"]} + ) + ) + @require_torch_gpu + @require_ort_rocm + @pytest.mark.rocm_ep_test + @require_diffusers + def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs).images + # Verify model devices + self.assertEqual(pipeline.device.type.lower(), "cuda") + # Verify model outptus + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_callback(self, model_arch: str): + def callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: + callback_fn.has_been_called = True + callback_fn.number_of_steps += 1 + + pipe = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) + callback_fn.has_been_called = False + callback_fn.number_of_steps = 0 + inputs = self.generate_inputs(height=64, width=64) + pipe(**inputs, callback=callback_fn, callback_steps=1) + self.assertTrue(callback_fn.has_been_called) + self.assertEqual(callback_fn.number_of_steps, inputs["num_inference_steps"]) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_shape(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + height, width, batch_size = 128, 64, 1 + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + for input_type in ["np", "pil", "pt"]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) + + for output_type in ["np", "pil", "latent"]: + inputs["output_type"] = output_type + outputs = pipeline(**inputs).images + if output_type == "pil": + self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + elif output_type == "np": + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + else: + self.assertEqual( + outputs.shape, + (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), + ) + + def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): + inputs = _generate_inputs(batch_size=batch_size) + inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) + inputs["strength"] = 0.75 + return inputs + + # @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + # @require_diffusers + # def test_shape(self, model_arch: str): + # model_args = {"test_name": model_arch, "model_arch": model_arch} + # self._setup(model_args) + # height, width, batch_size = 128, 64, 1 + # pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + # if self.TASK == "image-to-image": + # input_types = ["np", "pil", "pt"] + # elif self.TASK == "text-to-image": + # input_types = ["np"] + # else: + # input_types = ["pil"] + + # for input_type in input_types: + # if self.TASK == "image-to-image": + # inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) + # else: + # inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + # for output_type in ["np", "pil", "latent"]: + # inputs["output_type"] = output_type + # outputs = pipeline(**inputs).images + # if output_type == "pil": + # self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + # elif output_type == "np": + # self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + # else: + # self.assertEqual( + # outputs.shape, + # (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), + # ) + + +# class ORTStableDiffusionImg2ImgPipelineTest(ORTStableDiffusionPipelineBase): +# SUPPORTED_ARCHITECTURES = ["stable-diffusion"] +# ORTMODEL_CLASS = ORTStableDiffusionImg2ImgPipeline +# TASK = "image-to-image" + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# def test_compare_diffusers_pipeline(self, model_arch: str): +# model_args = {"test_name": model_arch, "model_arch": model_arch} +# self._setup(model_args) +# height, width = 128, 128 + +# inputs = self.generate_inputs(height=height, width=width) +# inputs["prompt"] = "A painting of a squirrel eating a burger" +# inputs["image"] = floats_tensor((1, 3, height, width), rng=random.Random(SEED)) + +# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) +# ort_output = ort_pipeline(**inputs, generator=np.random.RandomState(SEED)).images + +# diffusers_onnx_pipeline = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) +# diffusers_onnx_output = diffusers_onnx_pipeline(**inputs, generator=np.random.RandomState(SEED)).images + +# self.assertTrue(np.allclose(ort_output, diffusers_onnx_output, atol=1e-1)) + + +# class ORTStableDiffusionPipelineTest(unittest.TestCase): +# SUPPORTED_ARCHITECTURES = [ +# "stable-diffusion", +# ] +# ORTMODEL_CLASS = ORTStableDiffusionPipeline +# TASK = "text-to-image" + + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# def test_image_reproducibility(self, model_arch: str): +# pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) +# inputs = _generate_inputs() +# height, width = 64, 32 +# np.random.seed(0) +# ort_outputs_1 = pipeline(**inputs, height=height, width=width) +# np.random.seed(0) +# ort_outputs_2 = pipeline(**inputs, height=height, width=width) +# ort_outputs_3 = pipeline(**inputs, height=height, width=width) +# # Compare model outputs +# self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) +# self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# def test_negative_prompt(self, model_arch: str): +# pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) +# inputs = _generate_inputs() +# inputs["height"], inputs["width"] = 64, 32 +# negative_prompt = ["This is a negative prompt"] +# np.random.seed(0) +# image_slice_1 = pipeline(**inputs, negative_prompt=negative_prompt).images[0, -3:, -3:, -1] +# prompt = inputs.pop("prompt") +# embeds = [] +# for p in [prompt, negative_prompt]: +# text_inputs = pipeline.tokenizer( +# p, +# padding="max_length", +# max_length=pipeline.tokenizer.model_max_length, +# truncation=True, +# return_tensors="np", +# ) +# text_inputs = text_inputs["input_ids"].astype(pipeline.text_encoder.input_dtype.get("input_ids", np.int32)) +# embeds.append(pipeline.text_encoder(text_inputs)[0]) + +# inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds +# np.random.seed(0) +# image_slice_2 = pipeline(**inputs).images[0, -3:, -3:, -1] +# self.assertTrue(np.allclose(image_slice_1, image_slice_2, atol=1e-4)) + + +# class ORTStableDiffusionXLPipelineTest(ORTModelTestMixin): +# SUPPORTED_ARCHITECTURES = [ +# "stable-diffusion-xl", +# ] +# ORTMODEL_CLASS = ORTStableDiffusionXLPipeline +# TASK = "text-to-image" + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# def test_compare_to_diffusers(self, model_arch: str): +# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) +# self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) +# self.assertIsInstance(ort_pipeline.text_encoder_2, ORTModelTextEncoder) +# self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) +# self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) +# self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) +# self.assertIsInstance(ort_pipeline.config, Dict) + +# pipeline = StableDiffusionXLPipeline.from_pretrained(MODEL_NAMES[model_arch]) +# batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 +# latents = ort_pipeline.prepare_latents( +# batch_size * num_images_per_prompt, +# ort_pipeline.unet.config["in_channels"], +# height, +# width, +# dtype=np.float32, +# generator=np.random.RandomState(0), +# ) + +# kwargs = { +# "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, +# "num_inference_steps": 1, +# "num_images_per_prompt": num_images_per_prompt, +# "height": height, +# "width": width, +# "guidance_rescale": 0.1, +# } + +# for output_type in ["latent", "np"]: +# ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images +# self.assertIsInstance(ort_outputs, np.ndarray) +# with torch.no_grad(): +# outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images + +# # Compare model outputs +# self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) +# # Compare model devices +# self.assertEqual(pipeline.device, ort_pipeline.device) + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# def test_image_reproducibility(self, model_arch: str): +# pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) +# inputs = _generate_inputs() +# height, width = 64, 32 +# np.random.seed(0) +# ort_outputs_1 = pipeline(**inputs, height=height, width=width) +# np.random.seed(0) +# ort_outputs_2 = pipeline(**inputs, height=height, width=width) +# ort_outputs_3 = pipeline(**inputs, height=height, width=width) +# self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) +# self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + + +# class ORTStableDiffusionInpaintPipelineTest(ORTStableDiffusionPipelineBase): +# SUPPORTED_ARCHITECTURES = [ +# "stable-diffusion", +# ] +# ORTMODEL_CLASS = ORTStableDiffusionInpaintPipeline +# TASK = "inpainting" + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# def test_compare_diffusers_pipeline(self, model_arch: str): +# model_args = {"test_name": model_arch, "model_arch": model_arch} +# self._setup(model_args) +# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) +# diffusers_pipeline = self.ORTMODEL_CLASS.auto_model_class.from_pretrained(MODEL_NAMES[model_arch]) +# height, width = 64, 64 +# latents_shape = ( +# 1, +# ort_pipeline.vae_decoder.config["latent_channels"], +# height // ort_pipeline.vae_scale_factor, +# width // ort_pipeline.vae_scale_factor, +# ) +# inputs = self.generate_inputs(height=height, width=width) + +# np_latents = np.random.rand(*latents_shape).astype(np.float32) +# torch_latents = torch.from_numpy(np_latents) + +# ort_outputs = ort_pipeline(**inputs, latents=np_latents).images +# self.assertEqual(ort_outputs.shape, (1, height, width, 3)) + +# diffusers_outputs = diffusers_pipeline(**inputs, latents=torch_latents).images +# self.assertEqual(diffusers_outputs.shape, (1, height, width, 3)) + +# self.assertTrue(np.allclose(ort_outputs, diffusers_outputs, atol=1e-4)) + +# def generate_inputs(self, height=128, width=128, batch_size=1): +# inputs = super(ORTStableDiffusionInpaintPipelineTest, self).generate_inputs(height, width) +# inputs["image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] +# inputs["mask_image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] +# return inputs + + +# class ORTStableDiffusionXLImg2ImgPipelineTest(ORTModelTestMixin): +# SUPPORTED_ARCHITECTURES = [ +# "stable-diffusion-xl", +# ] +# ORTMODEL_CLASS = ORTStableDiffusionXLImg2ImgPipeline +# TASK = "image-to-image" + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# def test_inference(self, model_arch: str): +# model_args = {"test_name": model_arch, "model_arch": model_arch} +# self._setup(model_args) +# pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + +# height, width = 128, 128 +# inputs = self.generate_inputs(height=height, width=width) +# inputs["image"] = load_image( +# "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" +# "/in_paint/overture-creations-5sI6fQgYIuo.png" +# ).resize((width, height)) +# output = pipeline(**inputs, generator=np.random.RandomState(0)).images[0, -3:, -3:, -1] +# expected_slice = np.array([0.6515, 0.5405, 0.4858, 0.5632, 0.5174, 0.5681, 0.4948, 0.4253, 0.5080]) + +# self.assertTrue(np.allclose(output.flatten(), expected_slice, atol=1e-1)) + +# def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): +# inputs = _generate_inputs(batch_size=batch_size) +# inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) +# inputs["strength"] = 0.75 +# return inputs + + +# class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): +# SUPPORTED_ARCHITECTURES = [ +# "latent-consistency", +# ] +# ORTMODEL_CLASS = ORTLatentConsistencyModelPipeline +# TASK = "text-to-image" + +# @parameterized.expand(SUPPORTED_ARCHITECTURES) +# @require_diffusers +# @unittest.skipIf( +# parse(_diffusers_version) <= Version("0.21.4"), +# "not supported with this diffusers version, needs diffusers>=v0.22.0", +# ) +# def test_compare_to_diffusers(self, model_arch: str): +# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) +# self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) +# self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) +# self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) +# self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) +# self.assertIsInstance(ort_pipeline.config, Dict) + +# pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) +# batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 +# latents = ort_pipeline.prepare_latents( +# batch_size * num_images_per_prompt, +# ort_pipeline.unet.config["in_channels"], +# height, +# width, +# dtype=np.float32, +# generator=np.random.RandomState(0), +# ) + +# kwargs = { +# "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, +# "num_inference_steps": 1, +# "num_images_per_prompt": num_images_per_prompt, +# "height": height, +# "width": width, +# "guidance_scale": 8.5, +# } + +# for output_type in ["latent", "np"]: +# ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images +# self.assertIsInstance(ort_outputs, np.ndarray) +# with torch.no_grad(): +# outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images + +# # Compare model outputs +# self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) +# # Compare model devices +# self.assertEqual(pipeline.device, ort_pipeline.device) + + +class ImageProcessorTest(unittest.TestCase): + def test_vae_image_processor_pt(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) + input_pt = torch.stack(_create_image(height=8, width=8, batch_size=1, input_type="pt")) + input_np = to_np(input_pt) + + for output_type in ["np", "pil"]: + out = image_processor.postprocess(image_processor.preprocess(input_pt), output_type=output_type) + out_np = to_np(out) + in_np = (input_np * 255).round() if output_type == "pil" else input_np + self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) + + def test_vae_image_processor_np(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) + input_np = np.stack(_create_image(height=8, width=8, input_type="np")) + for output_type in ["np", "pil"]: + out = image_processor.postprocess(image_processor.preprocess(input_np), output_type=output_type) + out_np = to_np(out) + in_np = (input_np * 255).round() if output_type == "pil" else input_np + self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) + + def test_vae_image_processor_pil(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) + input_pil = _create_image(height=8, width=8, batch_size=1, input_type="pil") + + for output_type in ["np", "pil"]: + out = image_processor.postprocess(image_processor.preprocess(input_pil), output_type=output_type) + for i, o in zip(input_pil, out): + in_np = np.array(i) + out_np = to_np(out) if output_type == "pil" else (to_np(out) * 255).round() + self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py deleted file mode 100644 index 44cd22ffecc..00000000000 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ /dev/null @@ -1,562 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import random -import unittest -from typing import Dict - -import numpy as np -import PIL -import pytest -import torch -from diffusers import ( - OnnxStableDiffusionImg2ImgPipeline, - StableDiffusionPipeline, - StableDiffusionXLPipeline, -) -from diffusers.utils import load_image -from diffusers.utils.testing_utils import floats_tensor -from packaging.version import Version, parse -from parameterized import parameterized -from transformers.testing_utils import require_torch_gpu -from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin - -from optimum.onnxruntime import ( - ORTLatentConsistencyModelPipeline, - ORTStableDiffusionImg2ImgPipeline, - ORTStableDiffusionInpaintPipeline, - ORTStableDiffusionPipeline, - ORTStableDiffusionXLImg2ImgPipeline, - ORTStableDiffusionXLPipeline, -) -from optimum.onnxruntime.modeling_diffusion import ( - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, -) -from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor -from optimum.utils.import_utils import _diffusers_version -from optimum.utils.testing_utils import grid_parameters, require_diffusers, require_ort_rocm - - -if parse(_diffusers_version) > Version("0.21.4"): - from diffusers import LatentConsistencyModelPipeline - - -def _generate_inputs(batch_size=1): - inputs = { - "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, - "num_inference_steps": 3, - "guidance_scale": 7.5, - "output_type": "np", - } - return inputs - - -def _create_image(height=128, width=128, batch_size=1, channel=3, input_type="pil"): - if input_type == "pil": - image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/overture-creations-5sI6fQgYIuo.png" - ).resize((width, height)) - elif input_type == "np": - image = np.random.rand(height, width, channel) - elif input_type == "pt": - image = torch.rand((channel, height, width)) - - return [image] * batch_size - - -def to_np(image): - if isinstance(image[0], PIL.Image.Image): - return np.stack([np.array(i) for i in image], axis=0) - elif isinstance(image, torch.Tensor): - return image.cpu().numpy().transpose(0, 2, 3, 1) - return image - - -class ORTStableDiffusionPipelineBase(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionPipeline - TASK = "text-to-image" - - @require_diffusers - def test_load_vanilla_model_which_is_not_supported(self): - with self.assertRaises(Exception) as context: - _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) - - self.assertIn( - f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) - ) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_num_images_per_prompt(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - self.assertEqual(pipeline.vae_scale_factor, 2) - self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) - self.assertEqual(pipeline.unet.config["in_channels"], 4) - - batch_size, height = 1, 32 - for width in [64, 32]: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - for num_images in [1, 3]: - outputs = pipeline(**inputs, num_images_per_prompt=num_images).images - self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) - - @parameterized.expand( - grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["CUDAExecutionProvider"]}) - ) - @require_torch_gpu - @pytest.mark.cuda_ep_test - @require_diffusers - def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): - model_args = {"test_name": test_name, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) - height, width, batch_size = 32, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - outputs = pipeline(**inputs).images - # Verify model devices - self.assertEqual(pipeline.device.type.lower(), "cuda") - # Verify model outptus - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - - @parameterized.expand( - grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["ROCMExecutionProvider"]}) - ) - @require_torch_gpu - @require_ort_rocm - @pytest.mark.rocm_ep_test - @require_diffusers - def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: str): - model_args = {"test_name": test_name, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) - height, width, batch_size = 32, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - outputs = pipeline(**inputs).images - # Verify model devices - self.assertEqual(pipeline.device.type.lower(), "cuda") - # Verify model outptus - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_callback(self, model_arch: str): - def callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: - callback_fn.has_been_called = True - callback_fn.number_of_steps += 1 - - pipe = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - callback_fn.has_been_called = False - callback_fn.number_of_steps = 0 - inputs = self.generate_inputs(height=64, width=64) - pipe(**inputs, callback=callback_fn, callback_steps=1) - self.assertTrue(callback_fn.has_been_called) - self.assertEqual(callback_fn.number_of_steps, inputs["num_inference_steps"]) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_shape(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - height, width, batch_size = 128, 64, 1 - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - - if self.TASK == "image-to-image": - input_types = ["np", "pil", "pt"] - elif self.TASK == "text-to-image": - input_types = ["np"] - else: - input_types = ["pil"] - - for input_type in input_types: - if self.TASK == "image-to-image": - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) - else: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - for output_type in ["np", "pil", "latent"]: - inputs["output_type"] = output_type - outputs = pipeline(**inputs).images - if output_type == "pil": - self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) - elif output_type == "np": - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - else: - self.assertEqual( - outputs.shape, - (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), - ) - - def generate_inputs(self, height=128, width=128, batch_size=1): - inputs = _generate_inputs(batch_size=batch_size) - inputs["height"] = height - inputs["width"] = width - return inputs - - -class ORTStableDiffusionImg2ImgPipelineTest(ORTStableDiffusionPipelineBase): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionImg2ImgPipeline - TASK = "image-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_diffusers_pipeline(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - height, width = 128, 128 - - inputs = self.generate_inputs(height=height, width=width) - inputs["prompt"] = "A painting of a squirrel eating a burger" - inputs["image"] = floats_tensor((1, 3, height, width), rng=random.Random(SEED)) - - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - ort_output = ort_pipeline(**inputs, generator=np.random.RandomState(SEED)).images - - diffusers_onnx_pipeline = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) - diffusers_onnx_output = diffusers_onnx_pipeline(**inputs, generator=np.random.RandomState(SEED)).images - - self.assertTrue(np.allclose(ort_output, diffusers_onnx_output, atol=1e-1)) - - def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): - inputs = _generate_inputs(batch_size=batch_size) - inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) - inputs["strength"] = 0.75 - return inputs - - -class ORTStableDiffusionPipelineTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionPipeline - TASK = "text-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_to_diffusers(self, model_arch: str): - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) - self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) - self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) - self.assertIsInstance(ort_pipeline.config, Dict) - - pipeline = StableDiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) - pipeline.safety_checker = None - batch_size, num_images_per_prompt, height, width = 1, 2, 64, 32 - - latents = ort_pipeline.prepare_latents( - batch_size * num_images_per_prompt, - ort_pipeline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) - - kwargs = { - "prompt": "sailing ship in storm by Leonardo da Vinci", - "num_inference_steps": 1, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_rescale": 0.1, - } - - for output_type in ["latent", "np"]: - ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images - with torch.no_grad(): - outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - - self.assertIsInstance(ort_outputs, np.ndarray) - # Compare model outputs - self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) - # Compare model devices - self.assertEqual(pipeline.device, ort_pipeline.device) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_image_reproducibility(self, model_arch: str): - pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - inputs = _generate_inputs() - height, width = 64, 32 - np.random.seed(0) - ort_outputs_1 = pipeline(**inputs, height=height, width=width) - np.random.seed(0) - ort_outputs_2 = pipeline(**inputs, height=height, width=width) - ort_outputs_3 = pipeline(**inputs, height=height, width=width) - # Compare model outputs - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) - self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - def test_negative_prompt(self, model_arch: str): - pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - inputs = _generate_inputs() - inputs["height"], inputs["width"] = 64, 32 - negative_prompt = ["This is a negative prompt"] - np.random.seed(0) - image_slice_1 = pipeline(**inputs, negative_prompt=negative_prompt).images[0, -3:, -3:, -1] - prompt = inputs.pop("prompt") - embeds = [] - for p in [prompt, negative_prompt]: - text_inputs = pipeline.tokenizer( - p, - padding="max_length", - max_length=pipeline.tokenizer.model_max_length, - truncation=True, - return_tensors="np", - ) - text_inputs = text_inputs["input_ids"].astype(pipeline.text_encoder.input_dtype.get("input_ids", np.int32)) - embeds.append(pipeline.text_encoder(text_inputs)[0]) - - inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds - np.random.seed(0) - image_slice_2 = pipeline(**inputs).images[0, -3:, -3:, -1] - self.assertTrue(np.allclose(image_slice_1, image_slice_2, atol=1e-4)) - - -class ORTStableDiffusionXLPipelineTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion-xl", - ] - ORTMODEL_CLASS = ORTStableDiffusionXLPipeline - TASK = "text-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_to_diffusers(self, model_arch: str): - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.text_encoder_2, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) - self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) - self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) - self.assertIsInstance(ort_pipeline.config, Dict) - - pipeline = StableDiffusionXLPipeline.from_pretrained(MODEL_NAMES[model_arch]) - batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 - latents = ort_pipeline.prepare_latents( - batch_size * num_images_per_prompt, - ort_pipeline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) - - kwargs = { - "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, - "num_inference_steps": 1, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_rescale": 0.1, - } - - for output_type in ["latent", "np"]: - ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images - self.assertIsInstance(ort_outputs, np.ndarray) - with torch.no_grad(): - outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - - # Compare model outputs - self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) - # Compare model devices - self.assertEqual(pipeline.device, ort_pipeline.device) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_image_reproducibility(self, model_arch: str): - pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - inputs = _generate_inputs() - height, width = 64, 32 - np.random.seed(0) - ort_outputs_1 = pipeline(**inputs, height=height, width=width) - np.random.seed(0) - ort_outputs_2 = pipeline(**inputs, height=height, width=width) - ort_outputs_3 = pipeline(**inputs, height=height, width=width) - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) - self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - - -class ORTStableDiffusionInpaintPipelineTest(ORTStableDiffusionPipelineBase): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionInpaintPipeline - TASK = "inpainting" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_diffusers_pipeline(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - diffusers_pipeline = self.ORTMODEL_CLASS.auto_model_class.from_pretrained(MODEL_NAMES[model_arch]) - height, width = 64, 64 - latents_shape = ( - 1, - ort_pipeline.vae_decoder.config["latent_channels"], - height // ort_pipeline.vae_scale_factor, - width // ort_pipeline.vae_scale_factor, - ) - inputs = self.generate_inputs(height=height, width=width) - - np_latents = np.random.rand(*latents_shape).astype(np.float32) - torch_latents = torch.from_numpy(np_latents) - - ort_outputs = ort_pipeline(**inputs, latents=np_latents).images - self.assertEqual(ort_outputs.shape, (1, height, width, 3)) - - diffusers_outputs = diffusers_pipeline(**inputs, latents=torch_latents).images - self.assertEqual(diffusers_outputs.shape, (1, height, width, 3)) - - self.assertTrue(np.allclose(ort_outputs, diffusers_outputs, atol=1e-4)) - - def generate_inputs(self, height=128, width=128, batch_size=1): - inputs = super(ORTStableDiffusionInpaintPipelineTest, self).generate_inputs(height, width) - inputs["image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] - inputs["mask_image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] - return inputs - - -class ORTStableDiffusionXLImg2ImgPipelineTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion-xl", - ] - ORTMODEL_CLASS = ORTStableDiffusionXLImg2ImgPipeline - TASK = "image-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_inference(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - - height, width = 128, 128 - inputs = self.generate_inputs(height=height, width=width) - inputs["image"] = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/overture-creations-5sI6fQgYIuo.png" - ).resize((width, height)) - output = pipeline(**inputs, generator=np.random.RandomState(0)).images[0, -3:, -3:, -1] - expected_slice = np.array([0.6515, 0.5405, 0.4858, 0.5632, 0.5174, 0.5681, 0.4948, 0.4253, 0.5080]) - - self.assertTrue(np.allclose(output.flatten(), expected_slice, atol=1e-1)) - - def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): - inputs = _generate_inputs(batch_size=batch_size) - inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) - inputs["strength"] = 0.75 - return inputs - - -class ImageProcessorTest(unittest.TestCase): - def test_vae_image_processor_pt(self): - image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_pt = torch.stack(_create_image(height=8, width=8, batch_size=1, input_type="pt")) - input_np = to_np(input_pt) - - for output_type in ["np", "pil"]: - out = image_processor.postprocess(image_processor.preprocess(input_pt), output_type=output_type) - out_np = to_np(out) - in_np = (input_np * 255).round() if output_type == "pil" else input_np - self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) - - def test_vae_image_processor_np(self): - image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_np = np.stack(_create_image(height=8, width=8, input_type="np")) - for output_type in ["np", "pil"]: - out = image_processor.postprocess(image_processor.preprocess(input_np), output_type=output_type) - out_np = to_np(out) - in_np = (input_np * 255).round() if output_type == "pil" else input_np - self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) - - def test_vae_image_processor_pil(self): - image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_pil = _create_image(height=8, width=8, batch_size=1, input_type="pil") - - for output_type in ["np", "pil"]: - out = image_processor.postprocess(image_processor.preprocess(input_pil), output_type=output_type) - for i, o in zip(input_pil, out): - in_np = np.array(i) - out_np = to_np(out) if output_type == "pil" else (to_np(out) * 255).round() - self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) - - -class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "latent-consistency", - ] - ORTMODEL_CLASS = ORTLatentConsistencyModelPipeline - TASK = "text-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - @unittest.skipIf( - parse(_diffusers_version) <= Version("0.21.4"), - "not supported with this diffusers version, needs diffusers>=v0.22.0", - ) - def test_compare_to_diffusers(self, model_arch: str): - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) - self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) - self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) - self.assertIsInstance(ort_pipeline.config, Dict) - - pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) - batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 - latents = ort_pipeline.prepare_latents( - batch_size * num_images_per_prompt, - ort_pipeline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) - - kwargs = { - "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, - "num_inference_steps": 1, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_scale": 8.5, - } - - for output_type in ["latent", "np"]: - ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images - self.assertIsInstance(ort_outputs, np.ndarray) - with torch.no_grad(): - outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - - # Compare model outputs - self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) - # Compare model devices - self.assertEqual(pipeline.device, ort_pipeline.device) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index bb6935461d7..e77b9b7c20b 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -108,7 +108,7 @@ "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel", "levit": "hf-internal-testing/tiny-random-LevitModel", - "latent-consistency": "echarlaix/tiny-random-latent-consistency", + "lcm": "echarlaix/tiny-random-latent-consistency", "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", @@ -213,9 +213,16 @@ def _setup(self, model_args: Dict): continue set_seed(SEED) - onnx_model = self.ORTMODEL_CLASS.from_pretrained( - model_id, **model_args, use_io_binding=False, export=True - ) + if hasattr(self, "ORTMODEL_CLASS"): + onnx_model = self.ORTMODEL_CLASS.from_pretrained( + model_id, **model_args, use_io_binding=False, export=True + ) + elif hasattr(self, "ORTPIPELINE_CLASS"): + onnx_model = self.ORTPIPELINE_CLASS.from_pretrained( + model_id, **model_args, use_io_binding=False, export=True + ) + else: + raise ValueError("ORTMODEL_CLASS or ORTPIPELINE_CLASS must be defined") model_dir = tempfile.mkdtemp( prefix=f"{model_arch_and_params}_{self.TASK}_{model_id.replace('/', '_')}" From 3123ea5fa6c201c86cb023c6301aa00afede3e15 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 27 Aug 2024 17:46:50 +0200 Subject: [PATCH 08/24] dynamic dtype --- optimum/onnxruntime/modeling_diffusion.py | 98 ++++++++++++++--------- 1 file changed, 61 insertions(+), 37 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 982dd123343..7445f1c6eff 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -53,6 +53,7 @@ from huggingface_hub.utils import validate_hf_hub_args from transformers import CLIPFeatureExtractor, CLIPTokenizer from transformers.file_utils import add_end_docstrings +from transformers.modeling_outputs import ModelOutput import onnxruntime as ort @@ -72,9 +73,9 @@ DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ) +from .io_binding import TypeHelper from .modeling_ort import ONNX_MODEL_END_DOCSTRING, ORTModel from .utils import ( - _ORT_TO_NP_TYPE, ONNX_WEIGHTS_NAME, get_provider_for_device, parse_device, @@ -501,14 +502,23 @@ class _ORTDiffusionModelPart: CONFIG_NAME = "config.json" + _prepare_onnx_inputs = ORTModel._prepare_onnx_inputs + _prepare_onnx_outputs = ORTModel._prepare_onnx_outputs + def __init__(self, session: ort.InferenceSession, parent_model: ORTModel): self.session = session self.parent_model = parent_model - self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} - self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} config_path = Path(session._model_path).parent / self.CONFIG_NAME self.config = self.parent_model._dict_from_json_file(config_path) if config_path.is_file() else {} - self.input_dtype = {inputs.name: _ORT_TO_NP_TYPE[inputs.type] for inputs in self.session.get_inputs()} + self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} + self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} + self.input_dtypes = {input_key.name: input_key.type for input_key in session.get_inputs()} + self.output_dtypes = {output_key.name: output_key.type for output_key in session.get_outputs()} + + @property + def input_dtype(self): + # for backward compatibility + return {key: TypeHelper.ort_type_to_numpy_type(value) for key, value in self.input_dtypes.items()} @property def device(self): @@ -523,12 +533,16 @@ def __call__(self, *args, **kwargs): class ORTModelTextEncoder(_ORTDiffusionModelPart): - def forward(self, input_ids: np.ndarray): - onnx_inputs = { - "input_ids": input_ids, - } - outputs = self.session.run(None, onnx_inputs) - return outputs + def forward(self, input_ids: Union[np.ndarray, torch.Tensor]): + use_torch = isinstance(input_ids, torch.Tensor) + + model_inputs = {"input_ids": input_ids} + + onnx_inputs = self._prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs) + + return ModelOutput(**model_outputs) class ORTModelUnet(_ORTDiffusionModelPart): @@ -537,45 +551,55 @@ def __init__(self, session: ort.InferenceSession, parent_model: ORTModel): def forward( self, - sample: np.ndarray, - timestep: np.ndarray, - encoder_hidden_states: np.ndarray, - text_embeds: Optional[np.ndarray] = None, - time_ids: Optional[np.ndarray] = None, - timestep_cond: Optional[np.ndarray] = None, + sample: Union[np.ndarray, torch.Tensor], + timestep: Union[np.ndarray, torch.Tensor], + encoder_hidden_states: Union[np.ndarray, torch.Tensor], + text_embeds: Optional[Union[np.ndarray, torch.Tensor]] = None, + time_ids: Optional[Union[np.ndarray, torch.Tensor]] = None, + timestep_cond: Optional[Union[np.ndarray, torch.Tensor]] = None, ): - onnx_inputs = { + use_torch = isinstance(sample, torch.Tensor) + + model_inputs = { "sample": sample, "timestep": timestep, "encoder_hidden_states": encoder_hidden_states, + "text_embeds": text_embeds, + "time_ids": time_ids, + "timestep_cond": timestep_cond, } - if text_embeds is not None: - onnx_inputs["text_embeds"] = text_embeds - if time_ids is not None: - onnx_inputs["time_ids"] = time_ids - if timestep_cond is not None: - onnx_inputs["timestep_cond"] = timestep_cond - outputs = self.session.run(None, onnx_inputs) - return outputs + onnx_inputs = self._prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs) + + return ModelOutput(**model_outputs) class ORTModelVaeDecoder(_ORTDiffusionModelPart): - def forward(self, latent_sample: np.ndarray): - onnx_inputs = { - "latent_sample": latent_sample, - } - outputs = self.session.run(None, onnx_inputs) - return outputs + def forward(self, latent_sample: Union[np.ndarray, torch.Tensor]): + use_torch = isinstance(latent_sample, torch.Tensor) + + model_inputs = {"latent_sample": latent_sample} + + onnx_inputs = self._prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs) + + return ModelOutput(**model_outputs) class ORTModelVaeEncoder(_ORTDiffusionModelPart): - def forward(self, sample: np.ndarray): - onnx_inputs = { - "sample": sample, - } - outputs = self.session.run(None, onnx_inputs) - return outputs + def forward(self, sample: Union[np.ndarray, torch.Tensor]): + use_torch = isinstance(sample, torch.Tensor) + + model_inputs = {"sample": sample} + + onnx_inputs = self._prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs) + + return ModelOutput(**model_outputs) @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) From 7803ef311e6efedcebf2220d8290d8216652d022 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 27 Aug 2024 17:50:36 +0200 Subject: [PATCH 09/24] support torch random numbers generator --- .../diffusers/pipeline_latent_consistency.py | 6 +- .../diffusers/pipeline_stable_diffusion.py | 16 ++++-- .../pipeline_stable_diffusion_img2img.py | 56 +++++++++++++++---- .../pipeline_stable_diffusion_inpaint.py | 22 +++++--- .../diffusers/pipeline_stable_diffusion_xl.py | 21 +++++-- .../pipeline_stable_diffusion_xl_img2img.py | 28 +++++++--- optimum/pipelines/diffusers/pipeline_utils.py | 8 +-- 7 files changed, 115 insertions(+), 42 deletions(-) diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index 41c85b5b6ac..630d463de73 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -36,7 +36,7 @@ def __call__( original_inference_steps: int = None, guidance_scale: float = 8.5, num_images_per_prompt: int = 1, - generator: Optional[np.random.RandomState] = None, + generator: Optional[Union[np.random.RandomState, torch.Generator]] = None, latents: Optional[np.ndarray] = None, prompt_embeds: Optional[np.ndarray] = None, output_type: str = "pil", @@ -66,7 +66,7 @@ def __call__( usually at the expense of lower image quality. num_images_per_prompt (`int`, defaults to 1): The number of images to generate per prompt. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[Union[np.random.RandomState, torch.Generator]]`, defaults to `None`): A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image @@ -121,7 +121,7 @@ def __call__( batch_size = prompt_embeds.shape[0] if generator is None: - generator = np.random + generator = np.random.RandomState() prompt_embeds = self._encode_prompt( prompt, diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion.py index 98bff0de44d..6cc47fab1b9 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion.py @@ -189,7 +189,15 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype ) if latents is None: - latents = generator.randn(*shape).astype(dtype) + if isinstance(generator, np.random.RandomState): + latents = generator.randn(*shape).astype(dtype) + elif isinstance(generator, torch.Generator): + latents = torch.randn(*shape, generator=generator).numpy().astype(dtype) + else: + raise ValueError( + f"Expected `generator` to be of type `np.random.RandomState` or `torch.Generator`, but got" + f" {type(generator)}." + ) elif latents.shape != shape: raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") @@ -209,7 +217,7 @@ def __call__( negative_prompt: Optional[Union[str, List[str]]] = None, num_images_per_prompt: int = 1, eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, + generator: Optional[Union[np.random.RandomState, torch.Generator]] = None, latents: Optional[np.ndarray] = None, prompt_embeds: Optional[np.ndarray] = None, negative_prompt_embeds: Optional[np.ndarray] = None, @@ -248,7 +256,7 @@ def __call__( eta (`float`, defaults to 0.0): Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[Union[np.random.RandomState, torch.Generator]]`, defaults to `None`):: A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image @@ -303,7 +311,7 @@ def __call__( batch_size = prompt_embeds.shape[0] if generator is None: - generator = np.random + generator = np.random.RandomState() # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py index 81a6ffa1e04..f7f0586ac90 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py @@ -16,7 +16,7 @@ from typing import Callable, List, Optional, Union import numpy as np -import PIL +import PIL.Image import torch from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import deprecate @@ -72,6 +72,43 @@ def check_inputs( f" {negative_prompt_embeds.shape}." ) + # Adapted from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents + def prepare_latents(self, image, timesteps, batch_size, num_images_per_prompt, dtype, generator=None): + batch_size = batch_size * num_images_per_prompt + + if image.shape[1] == 4: + init_latents = image + else: + init_latents = self.vae_encoder(sample=image)[0] * self.vae_decoder.config.get("scaling_factor", 0.18215) + + if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0: + # expand init_latents for batch_size + additional_image_per_prompt = batch_size // init_latents.shape[0] + init_latents = np.concatenate([init_latents] * additional_image_per_prompt, axis=0) + elif batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] != 0: + raise ValueError( + f"Cannot duplicate `image` of batch size {init_latents.shape[0]} to {batch_size} text prompts." + ) + else: + init_latents = np.concatenate([init_latents], axis=0) + + # add noise to latents using the timesteps + if isinstance(generator, np.random.RandomState): + noise = generator.randn(*init_latents.shape).astype(dtype) + elif isinstance(generator, torch.Generator): + noise = torch.randn(*init_latents.shape, generator=generator).numpy().astype(dtype) + else: + raise ValueError( + f"Expected `generator` to be of type `np.random.RandomState` or `torch.Generator`, but got" + f" {type(generator)}." + ) + + init_latents = self.scheduler.add_noise( + torch.from_numpy(init_latents), torch.from_numpy(noise), torch.from_numpy(timesteps) + ).numpy() + + return init_latents + # Adapted from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionImg2ImgPipeline.__call__ def __call__( self, @@ -83,7 +120,7 @@ def __call__( negative_prompt: Optional[Union[str, List[str]]] = None, num_images_per_prompt: int = 1, eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, + generator: Optional[Union[np.random.RandomState, torch.Generator]] = None, prompt_embeds: Optional[np.ndarray] = None, negative_prompt_embeds: Optional[np.ndarray] = None, output_type: str = "pil", @@ -125,7 +162,7 @@ def __call__( eta (`float`, defaults to 0.0): Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[Union[np.random.RandomState, torch.Generator]]`, defaults to `None`): A np.random.RandomState to make generation deterministic. prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not @@ -168,7 +205,7 @@ def __call__( batch_size = prompt_embeds.shape[0] if generator is None: - generator = np.random + generator = np.random.RandomState() # set timesteps self.scheduler.set_timesteps(num_inference_steps) @@ -225,12 +262,8 @@ def __call__( timesteps = self.scheduler.timesteps.numpy()[-init_timestep] timesteps = np.array([timesteps] * batch_size * num_images_per_prompt) - # add noise to latents using the timesteps - noise = generator.randn(*init_latents.shape).astype(latents_dtype) - init_latents = self.scheduler.add_noise( - torch.from_numpy(init_latents), torch.from_numpy(noise), torch.from_numpy(timesteps) - ) - init_latents = init_latents.numpy() + # 5. Prepare latent variables + latents = self.prepare_latents(image, timesteps, batch_size, num_images_per_prompt, latents_dtype, generator) # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. @@ -276,7 +309,8 @@ def __call__( # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): if callback is not None and i % callback_steps == 0: - callback(i, t, latents) + step_idx = i // getattr(self.scheduler, "order", 1) + callback(step_idx, t, latents) if output_type == "latent": image = latents diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py index 19de793ccd0..cb3c7db96e9 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py @@ -16,7 +16,7 @@ from typing import Callable, List, Optional, Union import numpy as np -import PIL +import PIL.Image import torch from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import PIL_INTERPOLATION @@ -108,7 +108,7 @@ def __call__( negative_prompt: Optional[Union[str, List[str]]] = None, num_images_per_prompt: int = 1, eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, + generator: Optional[Union[np.random.RandomState, torch.Generator]] = None, latents: Optional[np.ndarray] = None, prompt_embeds: Optional[np.ndarray] = None, negative_prompt_embeds: Optional[np.ndarray] = None, @@ -200,7 +200,7 @@ def __call__( batch_size = prompt_embeds.shape[0] if generator is None: - generator = np.random + generator = np.random.RandomState() # set timesteps self.scheduler.set_timesteps(num_inference_steps) @@ -229,11 +229,19 @@ def __call__( width // self.vae_scale_factor, ) latents_dtype = prompt_embeds.dtype + if latents is None: - latents = generator.randn(*latents_shape).astype(latents_dtype) - else: - if latents.shape != latents_shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") + if isinstance(generator, np.random.RandomState): + latents = generator.randn(*latents_shape).astype(latents_dtype) + elif isinstance(generator, torch.Generator): + latents = torch.randn(*latents_shape, generator=generator).numpy().astype(latents_dtype) + else: + raise ValueError( + f"Expected `generator` to be of type `np.random.RandomState` or `torch.Generator`, but got" + f" {type(generator)}." + ) + elif latents.shape != latents_shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") # prepare mask and masked_image mask, masked_image = prepare_mask_and_masked_image( diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py index 2a5e7bf78b0..3c210862acf 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py @@ -235,7 +235,15 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype ) if latents is None: - latents = generator.randn(*shape).astype(dtype) + if isinstance(generator, np.random.RandomState): + latents = generator.randn(*shape).astype(dtype) + elif isinstance(generator, torch.Generator): + latents = torch.randn(*shape, generator=generator).numpy().astype(dtype) + else: + raise ValueError( + f"Expected `generator` to be of type `np.random.RandomState` or `torch.Generator`, but got" + f" {type(generator)}." + ) elif latents.shape != shape: raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") @@ -270,7 +278,7 @@ def __call__( negative_prompt: Optional[Union[str, List[str]]] = None, num_images_per_prompt: int = 1, eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, + generator: Optional[Union[np.random.RandomState, torch.Generator]] = None, latents: Optional[np.ndarray] = None, prompt_embeds: Optional[np.ndarray] = None, negative_prompt_embeds: Optional[np.ndarray] = None, @@ -315,7 +323,7 @@ def __call__( eta (`float`, defaults to 0.0): Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[Union[np.random.RandomState, torch.Generator]]`, defaults to `None`):: A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image @@ -383,7 +391,7 @@ def __call__( batch_size = prompt_embeds.shape[0] if generator is None: - generator = np.random + generator = np.random.RandomState() # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` @@ -410,6 +418,7 @@ def __call__( # 4. Prepare timesteps self.scheduler.set_timesteps(num_inference_steps) timesteps = self.scheduler.timesteps + print("timesteps", timesteps) # 5. Prepare latent variables latents = self.prepare_latents( @@ -440,6 +449,7 @@ def __call__( timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) # 8. Denoising loop + num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance @@ -475,7 +485,8 @@ def __call__( # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): if callback is not None and i % callback_steps == 0: - callback(i, t, latents) + step_idx = i // getattr(self.scheduler, "order", 1) + callback(step_idx, t, latents) if output_type == "latent": image = latents diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py index a07903a735e..19988599b64 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py @@ -17,7 +17,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np -import PIL +import PIL.Image import torch from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOutput @@ -222,7 +222,7 @@ def get_timesteps(self, num_inference_steps, strength): return timesteps, num_inference_steps - t_start # Adapted from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents - def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, generator=None): + def prepare_latents(self, image, timesteps, batch_size, num_images_per_prompt, dtype, generator=None): batch_size = batch_size * num_images_per_prompt if image.shape[1] == 4: @@ -242,11 +242,22 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt init_latents = np.concatenate([init_latents], axis=0) # add noise to latents using the timesteps - noise = generator.randn(*init_latents.shape).astype(dtype) + if isinstance(generator, np.random.RandomState): + noise = generator.randn(*init_latents.shape).astype(dtype) + elif isinstance(generator, torch.Generator): + noise = torch.randn(*init_latents.shape, generator=generator).numpy().astype(dtype) + else: + raise ValueError( + f"Expected `generator` to be of type `np.random.RandomState` or `torch.Generator`, but got" + f" {type(generator)}." + ) + init_latents = self.scheduler.add_noise( - torch.from_numpy(init_latents), torch.from_numpy(noise), torch.from_numpy(timestep) + torch.from_numpy(init_latents), torch.from_numpy(noise), torch.from_numpy(timesteps) ) - return init_latents.numpy() + init_latents = init_latents.numpy() + + return init_latents def _get_add_time_ids( self, original_size, crops_coords_top_left, target_size, aesthetic_score, negative_aesthetic_score, dtype @@ -274,7 +285,7 @@ def __call__( negative_prompt: Optional[Union[str, List[str]]] = None, num_images_per_prompt: int = 1, eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, + generator: Optional[Union[np.random.RandomState, torch.Generator]] = None, latents: Optional[np.ndarray] = None, prompt_embeds: Optional[np.ndarray] = None, negative_prompt_embeds: Optional[np.ndarray] = None, @@ -375,7 +386,7 @@ def __call__( batch_size = prompt_embeds.shape[0] if generator is None: - generator = np.random + generator = np.random.RandomState() # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` @@ -482,7 +493,8 @@ def __call__( # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): if callback is not None and i % callback_steps == 0: - callback(i, t, latents) + step_idx = i // getattr(self.scheduler, "order", 1) + callback(step_idx, t, latents) if output_type == "latent": image = latents diff --git a/optimum/pipelines/diffusers/pipeline_utils.py b/optimum/pipelines/diffusers/pipeline_utils.py index 869b91ffe59..e9d5986b61c 100644 --- a/optimum/pipelines/diffusers/pipeline_utils.py +++ b/optimum/pipelines/diffusers/pipeline_utils.py @@ -17,7 +17,7 @@ from typing import List, Optional, Union import numpy as np -import PIL +import PIL.Image import torch from diffusers import ConfigMixin from diffusers.image_processor import VaeImageProcessor as DiffusersVaeImageProcessor @@ -206,7 +206,7 @@ def postprocess( def get_height_width( self, - image: [PIL.Image.Image, np.ndarray], + image: Union[PIL.Image.Image, np.ndarray], height: Optional[int] = None, width: Optional[int] = None, ): @@ -264,10 +264,10 @@ def reshape(images: np.ndarray) -> np.ndarray: # TODO : remove after diffusers v0.21.0 release def resize( self, - image: [PIL.Image.Image, np.ndarray, torch.Tensor], + image: Union[PIL.Image.Image, np.ndarray, torch.Tensor], height: Optional[int] = None, width: Optional[int] = None, - ) -> [PIL.Image.Image, np.ndarray, torch.Tensor]: + ) -> Union[PIL.Image.Image, np.ndarray, torch.Tensor]: """ Resize image. """ From aa41f422cec94979f7ec8e330a6076640d331edf Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 27 Aug 2024 17:50:57 +0200 Subject: [PATCH 10/24] compact diffusion testing suite --- tests/onnxruntime/test_diffusion.py | 818 ++++++++++--------- tests/onnxruntime/utils_onnxruntime_tests.py | 13 +- 2 files changed, 434 insertions(+), 397 deletions(-) diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py index 2d5ab7a7f8b..1840725299e 100644 --- a/tests/onnxruntime/test_diffusion.py +++ b/tests/onnxruntime/test_diffusion.py @@ -12,9 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import random import unittest -from typing import Dict import numpy as np import PIL @@ -24,12 +22,8 @@ AutoPipelineForImage2Image, AutoPipelineForInpainting, AutoPipelineForText2Image, - StableDiffusionPipeline, - StableDiffusionXLPipeline, ) from diffusers.utils import load_image -from diffusers.utils.testing_utils import floats_tensor -from packaging.version import Version, parse from parameterized import parameterized from transformers.testing_utils import require_torch_gpu from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin @@ -45,22 +39,20 @@ ORTStableDiffusionXLImg2ImgPipeline, ORTStableDiffusionXLPipeline, ) -from optimum.onnxruntime.modeling_diffusion import ( - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, -) from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor -from optimum.utils.import_utils import _diffusers_version from optimum.utils.testing_utils import grid_parameters, require_diffusers, require_ort_rocm -if parse(_diffusers_version) > Version("0.21.4"): - from diffusers import LatentConsistencyModelPipeline +def get_generator(generator_framework, seed): + if generator_framework == "np": + return np.random.RandomState(seed) + elif generator_framework == "pt": + return torch.Generator().manual_seed(seed) + else: + raise ValueError(f"Unknown generator_framework: {generator_framework}") -def _generate_inputs(batch_size=1): +def _generate_prompts(batch_size=1): inputs = { "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, "num_inference_steps": 3, @@ -70,7 +62,7 @@ def _generate_inputs(batch_size=1): return inputs -def _create_image(height=128, width=128, batch_size=1, channel=3, input_type="pil"): +def _generate_images(height=128, width=128, batch_size=1, channel=3, input_type="pil"): if input_type == "pil": image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" @@ -94,16 +86,24 @@ def to_np(image): class ORTPipelineForText2ImageTest(ORTModelTestMixin): ARCHITECTURE_TO_ORTMODEL_CLASS = { + "lcm": ORTLatentConsistencyModelPipeline, "stable-diffusion": ORTStableDiffusionPipeline, "stable-diffusion-xl": ORTStableDiffusionXLPipeline, - "lcm": ORTLatentConsistencyModelPipeline, } - AUTOMODEL_CLASS = AutoPipelineForText2Image ORTMODEL_CLASS = ORTPipelineForText2Image + AUTOMODEL_CLASS = AutoPipelineForText2Image TASK = "text-to-image" + def generate_inputs(self, height=128, width=128, batch_size=1): + inputs = _generate_prompts(batch_size=batch_size) + + inputs["height"] = height + inputs["width"] = width + + return inputs + @require_diffusers def test_load_vanilla_model_which_is_not_supported(self): with self.assertRaises(Exception) as context: @@ -131,12 +131,41 @@ def test_num_images_per_prompt(self, model_arch: str): self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) self.assertEqual(pipeline.unet.config["in_channels"], 4) - batch_size, height = 1, 32 - for width in [64, 32]: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - for num_images in [1, 3]: - outputs = pipeline(**inputs, num_images_per_prompt=num_images).images - self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + for num_images in [1, 3]: + outputs = pipeline(**inputs, num_images_per_prompt=num_images).images + self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_compare_to_diffusers_pipeline(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 128, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + if model_arch == "lcm": + # LCM doesn't support deterministic outputs beyond the first inference step + # TODO: Investigate why this is the case + inputs["num_inference_steps"] = 1 + + for output_type in ["latent", "np"]: + inputs["output_type"] = output_type + + ort_output = ort_pipeline(**inputs, generator=torch.Generator().manual_seed(SEED)).images + diffusers_output = diffusers_pipeline(**inputs, generator=torch.Generator().manual_seed(SEED)).images + + self.assertTrue( + np.allclose(ort_output, diffusers_output, atol=1e-4), + np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4), + ) + self.assertEqual(ort_pipeline.device, diffusers_pipeline.device) @parameterized.expand( grid_parameters( @@ -172,7 +201,7 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: st model_args = {"test_name": test_name, "model_arch": model_arch} self._setup(model_args) pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) - height, width, batch_size = 32, 64, 1 + height, width, batch_size = 64, 32, 1 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) outputs = pipeline(**inputs).images # Verify model devices @@ -186,19 +215,32 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: st def test_callback(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - def callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: - callback_fn.has_been_called = True - callback_fn.number_of_steps += 1 + height, width, batch_size = 64, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + class Callback: + def __init__(self): + self.has_been_called = False + self.number_of_steps = 0 + + def __call__(self, step: int, timestep: int, latents: np.ndarray) -> None: + self.has_been_called = True + self.number_of_steps += 1 - callback_fn.has_been_called = False - callback_fn.number_of_steps = 0 + ort_callback = Callback() + auto_callback = Callback() - inputs = self.generate_inputs(height=64, width=64) - pipeline(**inputs, callback=callback_fn, callback_steps=1) - self.assertTrue(callback_fn.has_been_called) - self.assertEqual(callback_fn.number_of_steps, inputs["num_inference_steps"]) + ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + # callback_steps=1 to trigger callback every step + ort_pipe(**inputs, callback=ort_callback, callback_steps=1) + auto_pipe(**inputs, callback=auto_callback, callback_steps=1) + + self.assertTrue(ort_callback.has_been_called) + self.assertTrue(auto_callback.has_been_called) + self.assertEqual(auto_callback.number_of_steps, ort_callback.number_of_steps) @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) @require_diffusers @@ -222,55 +264,74 @@ def test_shape(self, model_arch: str): (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), ) - @parameterized.expand(["stable-diffusion", "stable-diffusion-xl"]) + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) @require_diffusers def test_image_reproducibility(self, model_arch: str): + if model_arch in ["lcm"]: + pytest.skip("Latent Consistency Model (LCM) doesn't support deterministic outputs") + model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - inputs = _generate_inputs() - height, width = 64, 64 - np.random.seed(0) - ort_outputs_1 = pipeline(**inputs, height=height, width=width) - np.random.seed(0) - ort_outputs_2 = pipeline(**inputs, height=height, width=width) - ort_outputs_3 = pipeline(**inputs, height=height, width=width) - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) - self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - - @parameterized.expand(["stable-diffusion"]) + + for generator_framework in ["np", "pt"]: + ort_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_2 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_3 = pipeline(**inputs, generator=get_generator(generator_framework, SEED + 1)) + + self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) def test_negative_prompt(self, model_arch: str): + if model_arch in ["lcm"]: + pytest.skip("LCM (Latent Consistency Model) does not support negative prompts") + model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + height, width, batch_size = 64, 64, 1 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + negative_prompt = ["This is a negative prompt"] - np.random.seed(0) - image_slice_1 = pipeline(**inputs, negative_prompt=negative_prompt).images[0, -3:, -3:, -1] + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + image_slice_1 = pipeline( + **inputs, negative_prompt=negative_prompt, generator=np.random.RandomState(SEED) + ).images[0, -3:, -3:, -1] prompt = inputs.pop("prompt") - embeds = [] - for p in [prompt, negative_prompt]: - text_inputs = pipeline.tokenizer( - p, - padding="max_length", + + if model_arch == "stable-diffusion-xl": + ( + inputs["prompt_embeds"], + inputs["negative_prompt_embeds"], + inputs["pooled_prompt_embeds"], + inputs["negative_pooled_prompt_embeds"], + ) = pipeline._encode_prompt(prompt, 1, False, negative_prompt) + else: + text_ids = pipeline.tokenizer( + prompt, max_length=pipeline.tokenizer.model_max_length, + padding="max_length", + return_tensors="np", truncation=True, + ).input_ids + negative_text_ids = pipeline.tokenizer( + negative_prompt, + max_length=pipeline.tokenizer.model_max_length, + padding="max_length", return_tensors="np", - ) - text_inputs = text_inputs["input_ids"].astype(pipeline.text_encoder.input_dtype.get("input_ids", np.int32)) - embeds.append(pipeline.text_encoder(text_inputs)[0]) + truncation=True, + ).input_ids + inputs["prompt_embeds"] = pipeline.text_encoder(text_ids)[0] + inputs["negative_prompt_embeds"] = pipeline.text_encoder(negative_text_ids)[0] - inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds - np.random.seed(0) - image_slice_2 = pipeline(**inputs).images[0, -3:, -3:, -1] - self.assertTrue(np.allclose(image_slice_1, image_slice_2, atol=1e-4)) + image_slice_2 = pipeline(**inputs, generator=np.random.RandomState(SEED)).images[0, -3:, -3:, -1] - def generate_inputs(self, height=128, width=128, batch_size=1): - inputs = _generate_inputs(batch_size=batch_size) - inputs["height"] = height - inputs["width"] = width - return inputs + self.assertTrue(np.allclose(image_slice_1, image_slice_2, rtol=1e-1)) class ORTPipelineForImage2ImageTest(ORTModelTestMixin): @@ -283,6 +344,19 @@ class ORTPipelineForImage2ImageTest(ORTModelTestMixin): TASK = "image-to-image" + def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="np"): + inputs = _generate_prompts(batch_size=batch_size) + + inputs["image"] = _generate_images( + height=height, width=width, batch_size=batch_size, channel=channel, input_type=input_type + ) + + inputs["strength"] = 0.75 + inputs["height"] = height + inputs["width"] = width + + return inputs + @require_diffusers def test_load_vanilla_model_which_is_not_supported(self): with self.assertRaises(Exception) as context: @@ -297,6 +371,7 @@ def test_load_vanilla_model_which_is_not_supported(self): def test_num_images_per_prompt(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) self.assertEqual(pipeline.vae_scale_factor, 2) self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) @@ -320,9 +395,11 @@ def test_num_images_per_prompt(self, model_arch: str): def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): model_args = {"test_name": test_name, "model_arch": model_arch} self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + height, width, batch_size = 32, 64, 1 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) outputs = pipeline(**inputs).images # Verify model devices self.assertEqual(pipeline.device.type.lower(), "cuda") @@ -342,9 +419,11 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: str): model_args = {"test_name": test_name, "model_arch": model_arch} self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + height, width, batch_size = 32, 64, 1 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) outputs = pipeline(**inputs).images # Verify model devices self.assertEqual(pipeline.device.type.lower(), "cuda") @@ -355,26 +434,47 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: st @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) @require_diffusers def test_callback(self, model_arch: str): - def callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: - callback_fn.has_been_called = True - callback_fn.number_of_steps += 1 - - pipe = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - callback_fn.has_been_called = False - callback_fn.number_of_steps = 0 - inputs = self.generate_inputs(height=64, width=64) - pipe(**inputs, callback=callback_fn, callback_steps=1) - self.assertTrue(callback_fn.has_been_called) - self.assertEqual(callback_fn.number_of_steps, inputs["num_inference_steps"]) + if model_arch in ["stable-diffusion"]: + pytest.skip( + "Stable Diffusion For Img2Img doesn't behave as expected with callbacks (doesn't call it every step with callback_steps=1)" + ) + + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs["num_inference_steps"] = 3 + + class Callback: + def __init__(self): + self.has_been_called = False + self.number_of_steps = 0 + + def __call__(self, step: int, timestep: int, latents: np.ndarray) -> None: + self.has_been_called = True + self.number_of_steps += 1 + + ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + ort_callback = Callback() + auto_callback = Callback() + # callback_steps=1 to trigger callback every step + ort_pipe(**inputs, callback=ort_callback, callback_steps=1) + auto_pipe(**inputs, callback=auto_callback, callback_steps=1) + + self.assertTrue(ort_callback.has_been_called) + self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) @require_diffusers def test_shape(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) - height, width, batch_size = 128, 64, 1 + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + height, width, batch_size = 32, 64, 1 for input_type in ["np", "pil", "pt"]: inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) @@ -392,315 +492,259 @@ def test_shape(self, model_arch: str): (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), ) - def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): - inputs = _generate_inputs(batch_size=batch_size) - inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) - inputs["strength"] = 0.75 + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_compare_to_diffusers_pipeline(self, model_arch: str): + pytest.skip("Img2Img models do not support support output reproducibility for some reason") + + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 128, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + ort_output = ort_pipeline(**inputs, generator=torch.Generator().manual_seed(SEED)).images + + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + diffusers_output = diffusers_pipeline(**inputs, generator=torch.Generator().manual_seed(SEED)).images + + self.assertTrue(np.allclose(ort_output, diffusers_output, rtol=1e-2)) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_image_reproducibility(self, model_arch: str): + pytest.skip("Img2Img models do not support support output reproducibility for some reason") + + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for generator_framework in ["np", "pt"]: + ort_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_2 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_3 = pipeline(**inputs, generator=get_generator(generator_framework, SEED + 1)) + + self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + + +class ORTPipelineForInpaintingTest(ORTModelTestMixin): + ARCHITECTURE_TO_ORTMODEL_CLASS = { + "stable-diffusion": ORTStableDiffusionInpaintPipeline, + } + + AUTOMODEL_CLASS = AutoPipelineForInpainting + ORTMODEL_CLASS = ORTPipelineForInpainting + + TASK = "inpainting" + + def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"): + assert batch_size == 1, "Inpainting models only support batch_size=1" + assert input_type == "pil", "Inpainting models only support input_type='pil'" + + inputs = _generate_prompts(batch_size=batch_size) + + inputs["image"] = _generate_images( + height=height, width=width, batch_size=1, channel=channel, input_type="pil" + )[0] + inputs["mask_image"] = _generate_images( + height=height, width=width, batch_size=1, channel=channel, input_type="pil" + )[0] + + inputs["height"] = height + inputs["width"] = width + return inputs - # @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) - # @require_diffusers - # def test_shape(self, model_arch: str): - # model_args = {"test_name": model_arch, "model_arch": model_arch} - # self._setup(model_args) - # height, width, batch_size = 128, 64, 1 - # pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - - # if self.TASK == "image-to-image": - # input_types = ["np", "pil", "pt"] - # elif self.TASK == "text-to-image": - # input_types = ["np"] - # else: - # input_types = ["pil"] - - # for input_type in input_types: - # if self.TASK == "image-to-image": - # inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) - # else: - # inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - # for output_type in ["np", "pil", "latent"]: - # inputs["output_type"] = output_type - # outputs = pipeline(**inputs).images - # if output_type == "pil": - # self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) - # elif output_type == "np": - # self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - # else: - # self.assertEqual( - # outputs.shape, - # (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), - # ) - - -# class ORTStableDiffusionImg2ImgPipelineTest(ORTStableDiffusionPipelineBase): -# SUPPORTED_ARCHITECTURES = ["stable-diffusion"] -# ORTMODEL_CLASS = ORTStableDiffusionImg2ImgPipeline -# TASK = "image-to-image" - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# def test_compare_diffusers_pipeline(self, model_arch: str): -# model_args = {"test_name": model_arch, "model_arch": model_arch} -# self._setup(model_args) -# height, width = 128, 128 - -# inputs = self.generate_inputs(height=height, width=width) -# inputs["prompt"] = "A painting of a squirrel eating a burger" -# inputs["image"] = floats_tensor((1, 3, height, width), rng=random.Random(SEED)) - -# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) -# ort_output = ort_pipeline(**inputs, generator=np.random.RandomState(SEED)).images - -# diffusers_onnx_pipeline = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) -# diffusers_onnx_output = diffusers_onnx_pipeline(**inputs, generator=np.random.RandomState(SEED)).images - -# self.assertTrue(np.allclose(ort_output, diffusers_onnx_output, atol=1e-1)) - - -# class ORTStableDiffusionPipelineTest(unittest.TestCase): -# SUPPORTED_ARCHITECTURES = [ -# "stable-diffusion", -# ] -# ORTMODEL_CLASS = ORTStableDiffusionPipeline -# TASK = "text-to-image" - - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# def test_image_reproducibility(self, model_arch: str): -# pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) -# inputs = _generate_inputs() -# height, width = 64, 32 -# np.random.seed(0) -# ort_outputs_1 = pipeline(**inputs, height=height, width=width) -# np.random.seed(0) -# ort_outputs_2 = pipeline(**inputs, height=height, width=width) -# ort_outputs_3 = pipeline(**inputs, height=height, width=width) -# # Compare model outputs -# self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) -# self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# def test_negative_prompt(self, model_arch: str): -# pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) -# inputs = _generate_inputs() -# inputs["height"], inputs["width"] = 64, 32 -# negative_prompt = ["This is a negative prompt"] -# np.random.seed(0) -# image_slice_1 = pipeline(**inputs, negative_prompt=negative_prompt).images[0, -3:, -3:, -1] -# prompt = inputs.pop("prompt") -# embeds = [] -# for p in [prompt, negative_prompt]: -# text_inputs = pipeline.tokenizer( -# p, -# padding="max_length", -# max_length=pipeline.tokenizer.model_max_length, -# truncation=True, -# return_tensors="np", -# ) -# text_inputs = text_inputs["input_ids"].astype(pipeline.text_encoder.input_dtype.get("input_ids", np.int32)) -# embeds.append(pipeline.text_encoder(text_inputs)[0]) - -# inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds -# np.random.seed(0) -# image_slice_2 = pipeline(**inputs).images[0, -3:, -3:, -1] -# self.assertTrue(np.allclose(image_slice_1, image_slice_2, atol=1e-4)) - - -# class ORTStableDiffusionXLPipelineTest(ORTModelTestMixin): -# SUPPORTED_ARCHITECTURES = [ -# "stable-diffusion-xl", -# ] -# ORTMODEL_CLASS = ORTStableDiffusionXLPipeline -# TASK = "text-to-image" - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# def test_compare_to_diffusers(self, model_arch: str): -# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) -# self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) -# self.assertIsInstance(ort_pipeline.text_encoder_2, ORTModelTextEncoder) -# self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) -# self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) -# self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) -# self.assertIsInstance(ort_pipeline.config, Dict) - -# pipeline = StableDiffusionXLPipeline.from_pretrained(MODEL_NAMES[model_arch]) -# batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 -# latents = ort_pipeline.prepare_latents( -# batch_size * num_images_per_prompt, -# ort_pipeline.unet.config["in_channels"], -# height, -# width, -# dtype=np.float32, -# generator=np.random.RandomState(0), -# ) - -# kwargs = { -# "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, -# "num_inference_steps": 1, -# "num_images_per_prompt": num_images_per_prompt, -# "height": height, -# "width": width, -# "guidance_rescale": 0.1, -# } - -# for output_type in ["latent", "np"]: -# ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images -# self.assertIsInstance(ort_outputs, np.ndarray) -# with torch.no_grad(): -# outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - -# # Compare model outputs -# self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) -# # Compare model devices -# self.assertEqual(pipeline.device, ort_pipeline.device) - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# def test_image_reproducibility(self, model_arch: str): -# pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) -# inputs = _generate_inputs() -# height, width = 64, 32 -# np.random.seed(0) -# ort_outputs_1 = pipeline(**inputs, height=height, width=width) -# np.random.seed(0) -# ort_outputs_2 = pipeline(**inputs, height=height, width=width) -# ort_outputs_3 = pipeline(**inputs, height=height, width=width) -# self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) -# self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - - -# class ORTStableDiffusionInpaintPipelineTest(ORTStableDiffusionPipelineBase): -# SUPPORTED_ARCHITECTURES = [ -# "stable-diffusion", -# ] -# ORTMODEL_CLASS = ORTStableDiffusionInpaintPipeline -# TASK = "inpainting" - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# def test_compare_diffusers_pipeline(self, model_arch: str): -# model_args = {"test_name": model_arch, "model_arch": model_arch} -# self._setup(model_args) -# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) -# diffusers_pipeline = self.ORTMODEL_CLASS.auto_model_class.from_pretrained(MODEL_NAMES[model_arch]) -# height, width = 64, 64 -# latents_shape = ( -# 1, -# ort_pipeline.vae_decoder.config["latent_channels"], -# height // ort_pipeline.vae_scale_factor, -# width // ort_pipeline.vae_scale_factor, -# ) -# inputs = self.generate_inputs(height=height, width=width) - -# np_latents = np.random.rand(*latents_shape).astype(np.float32) -# torch_latents = torch.from_numpy(np_latents) - -# ort_outputs = ort_pipeline(**inputs, latents=np_latents).images -# self.assertEqual(ort_outputs.shape, (1, height, width, 3)) - -# diffusers_outputs = diffusers_pipeline(**inputs, latents=torch_latents).images -# self.assertEqual(diffusers_outputs.shape, (1, height, width, 3)) - -# self.assertTrue(np.allclose(ort_outputs, diffusers_outputs, atol=1e-4)) - -# def generate_inputs(self, height=128, width=128, batch_size=1): -# inputs = super(ORTStableDiffusionInpaintPipelineTest, self).generate_inputs(height, width) -# inputs["image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] -# inputs["mask_image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] -# return inputs - - -# class ORTStableDiffusionXLImg2ImgPipelineTest(ORTModelTestMixin): -# SUPPORTED_ARCHITECTURES = [ -# "stable-diffusion-xl", -# ] -# ORTMODEL_CLASS = ORTStableDiffusionXLImg2ImgPipeline -# TASK = "image-to-image" - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# def test_inference(self, model_arch: str): -# model_args = {"test_name": model_arch, "model_arch": model_arch} -# self._setup(model_args) -# pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - -# height, width = 128, 128 -# inputs = self.generate_inputs(height=height, width=width) -# inputs["image"] = load_image( -# "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" -# "/in_paint/overture-creations-5sI6fQgYIuo.png" -# ).resize((width, height)) -# output = pipeline(**inputs, generator=np.random.RandomState(0)).images[0, -3:, -3:, -1] -# expected_slice = np.array([0.6515, 0.5405, 0.4858, 0.5632, 0.5174, 0.5681, 0.4948, 0.4253, 0.5080]) - -# self.assertTrue(np.allclose(output.flatten(), expected_slice, atol=1e-1)) - -# def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): -# inputs = _generate_inputs(batch_size=batch_size) -# inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) -# inputs["strength"] = 0.75 -# return inputs - - -# class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): -# SUPPORTED_ARCHITECTURES = [ -# "latent-consistency", -# ] -# ORTMODEL_CLASS = ORTLatentConsistencyModelPipeline -# TASK = "text-to-image" - -# @parameterized.expand(SUPPORTED_ARCHITECTURES) -# @require_diffusers -# @unittest.skipIf( -# parse(_diffusers_version) <= Version("0.21.4"), -# "not supported with this diffusers version, needs diffusers>=v0.22.0", -# ) -# def test_compare_to_diffusers(self, model_arch: str): -# ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) -# self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) -# self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) -# self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) -# self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) -# self.assertIsInstance(ort_pipeline.config, Dict) - -# pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) -# batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 -# latents = ort_pipeline.prepare_latents( -# batch_size * num_images_per_prompt, -# ort_pipeline.unet.config["in_channels"], -# height, -# width, -# dtype=np.float32, -# generator=np.random.RandomState(0), -# ) - -# kwargs = { -# "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, -# "num_inference_steps": 1, -# "num_images_per_prompt": num_images_per_prompt, -# "height": height, -# "width": width, -# "guidance_scale": 8.5, -# } - -# for output_type in ["latent", "np"]: -# ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images -# self.assertIsInstance(ort_outputs, np.ndarray) -# with torch.no_grad(): -# outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - -# # Compare model outputs -# self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) -# # Compare model devices -# self.assertEqual(pipeline.device, ort_pipeline.device) + @require_diffusers + def test_load_vanilla_model_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn( + f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) + ) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_num_images_per_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + self.assertEqual(pipeline.vae_scale_factor, 2) + self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) + self.assertEqual(pipeline.unet.config["in_channels"], 4) + + batch_size, height = 1, 32 + for width in [64, 32]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + for num_images in [1, 3]: + outputs = pipeline(**inputs, num_images_per_prompt=num_images).images + self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) + + @parameterized.expand( + grid_parameters( + {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["CUDAExecutionProvider"]} + ) + ) + @require_torch_gpu + @pytest.mark.cuda_ep_test + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + outputs = pipeline(**inputs).images + # Verify model devices + self.assertEqual(pipeline.device.type.lower(), "cuda") + # Verify model outptus + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand( + grid_parameters( + {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["ROCMExecutionProvider"]} + ) + ) + @require_torch_gpu + @require_ort_rocm + @pytest.mark.rocm_ep_test + @require_diffusers + def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + outputs = pipeline(**inputs).images + # Verify model devices + self.assertEqual(pipeline.device.type.lower(), "cuda") + # Verify model outptus + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_callback(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs["num_inference_steps"] = 3 + + class Callback: + def __init__(self): + self.has_been_called = False + self.number_of_steps = 0 + + def __call__(self, step: int, timestep: int, latents: np.ndarray) -> None: + self.has_been_called = True + self.number_of_steps += 1 + + ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + ort_callback = Callback() + auto_callback = Callback() + # callback_steps=1 to trigger callback every step + ort_pipe(**inputs, callback=ort_callback, callback_steps=1) + auto_pipe(**inputs, callback=auto_callback, callback_steps=1) + + self.assertTrue(ort_callback.has_been_called) + self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_shape(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + height, width, batch_size = 32, 64, 1 + + for input_type in ["pil"]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) + + for output_type in ["np", "pil", "latent"]: + inputs["output_type"] = output_type + outputs = pipeline(**inputs).images + if output_type == "pil": + self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + elif output_type == "np": + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + else: + self.assertEqual( + outputs.shape, + (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), + ) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_compare_to_diffusers_pipeline(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + latents_shape = ( + batch_size, + ort_pipeline.vae_decoder.config["latent_channels"], + height // ort_pipeline.vae_scale_factor, + width // ort_pipeline.vae_scale_factor, + ) + + np_latents = np.random.rand(*latents_shape).astype(np.float32) + torch_latents = torch.from_numpy(np_latents) + + ort_output = ort_pipeline(**inputs, latents=np_latents).images + diffusers_output = diffusers_pipeline(**inputs, latents=torch_latents).images + + self.assertTrue( + np.allclose(ort_output, diffusers_output, atol=1e-4), + np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4), + ) + + @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @require_diffusers + def test_image_reproducibility(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for generator_framework in ["np", "pt"]: + ort_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_2 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_3 = pipeline(**inputs, generator=get_generator(generator_framework, SEED + 1)) + + self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) class ImageProcessorTest(unittest.TestCase): def test_vae_image_processor_pt(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_pt = torch.stack(_create_image(height=8, width=8, batch_size=1, input_type="pt")) + input_pt = torch.stack(_generate_images(height=8, width=8, batch_size=1, input_type="pt")) input_np = to_np(input_pt) for output_type in ["np", "pil"]: @@ -711,7 +755,7 @@ def test_vae_image_processor_pt(self): def test_vae_image_processor_np(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_np = np.stack(_create_image(height=8, width=8, input_type="np")) + input_np = np.stack(_generate_images(height=8, width=8, input_type="np")) for output_type in ["np", "pil"]: out = image_processor.postprocess(image_processor.preprocess(input_np), output_type=output_type) out_np = to_np(out) @@ -720,7 +764,7 @@ def test_vae_image_processor_np(self): def test_vae_image_processor_pil(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_pil = _create_image(height=8, width=8, batch_size=1, input_type="pil") + input_pil = _generate_images(height=8, width=8, batch_size=1, input_type="pil") for output_type in ["np", "pil"]: out = image_processor.postprocess(image_processor.preprocess(input_pil), output_type=output_type) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index e77b9b7c20b..aa06476498e 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -213,16 +213,9 @@ def _setup(self, model_args: Dict): continue set_seed(SEED) - if hasattr(self, "ORTMODEL_CLASS"): - onnx_model = self.ORTMODEL_CLASS.from_pretrained( - model_id, **model_args, use_io_binding=False, export=True - ) - elif hasattr(self, "ORTPIPELINE_CLASS"): - onnx_model = self.ORTPIPELINE_CLASS.from_pretrained( - model_id, **model_args, use_io_binding=False, export=True - ) - else: - raise ValueError("ORTMODEL_CLASS or ORTPIPELINE_CLASS must be defined") + onnx_model = self.ORTMODEL_CLASS.from_pretrained( + model_id, **model_args, use_io_binding=False, export=True + ) model_dir = tempfile.mkdtemp( prefix=f"{model_arch_and_params}_{self.TASK}_{model_id.replace('/', '_')}" From 4837828102b2cbd876af9c9aef6f44a8d0651d5b Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 27 Aug 2024 19:02:00 +0200 Subject: [PATCH 11/24] fix --- tests/onnxruntime/test_diffusion.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py index 1840725299e..a8b82dd7c4f 100644 --- a/tests/onnxruntime/test_diffusion.py +++ b/tests/onnxruntime/test_diffusion.py @@ -352,8 +352,6 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_ ) inputs["strength"] = 0.75 - inputs["height"] = height - inputs["width"] = width return inputs @@ -694,6 +692,11 @@ def test_shape(self, model_arch: str): @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) @require_diffusers def test_compare_to_diffusers_pipeline(self, model_arch: str): + if model_arch in ["stable-diffusion"]: + pytest.skip( + "Stable Diffusion For Inpainting fails, it was used to be compared to StableDiffusionPipeline for some reason which is the text-to-image variant" + ) + model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) From 80532b3bad2e6b82b2f057672ec339cc18ab35ac Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sat, 7 Sep 2024 17:06:56 +0200 Subject: [PATCH 12/24] test --- optimum/onnxruntime/base.py | 12 +- optimum/onnxruntime/modeling_diffusion.py | 214 +++++++++++----------- 2 files changed, 107 insertions(+), 119 deletions(-) diff --git a/optimum/onnxruntime/base.py b/optimum/onnxruntime/base.py index d9877670ba8..5206edfc081 100644 --- a/optimum/onnxruntime/base.py +++ b/optimum/onnxruntime/base.py @@ -22,7 +22,6 @@ from onnxruntime import InferenceSession -from ..utils import NormalizedConfigManager from ..utils.logging import warn_once from .io_binding import TypeHelper from .modeling_ort import ORTModel @@ -41,17 +40,10 @@ class ORTModelPart: _prepare_onnx_inputs = ORTModel._prepare_onnx_inputs _prepare_onnx_outputs = ORTModel._prepare_onnx_outputs - def __init__( - self, - session: InferenceSession, - parent_model: "ORTModel", - ): + def __init__(self, session: InferenceSession, parent_model: "ORTModel"): self.session = session self.parent_model = parent_model - self.normalized_config = NormalizedConfigManager.get_normalized_config_class( - self.parent_model.config.model_type - )(self.parent_model.config) - self.main_input_name = self.parent_model.main_input_name + self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} self.input_dtypes = {input_key.name: input_key.type for input_key in session.get_inputs()} diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 7e998b4a897..606919ea7f4 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -17,7 +17,6 @@ import os import shutil import warnings -from abc import abstractmethod from collections import OrderedDict from pathlib import Path from tempfile import TemporaryDirectory @@ -41,11 +40,6 @@ StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, ) -from diffusers.pipelines.auto_pipeline import ( - AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, - AUTO_INPAINT_PIPELINES_MAPPING, - AUTO_TEXT2IMAGE_PIPELINES_MAPPING, -) from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available from huggingface_hub import snapshot_download @@ -73,6 +67,7 @@ DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ) +from .base import ORTModelPart from .io_binding import TypeHelper from .modeling_ort import ONNX_MODEL_END_DOCSTRING, ORTModel from .utils import ( @@ -86,25 +81,25 @@ logger = logging.getLogger(__name__) -class ORTDiffusionPipeline(ORTModel): - auto_model_class = DiffusionPipeline - main_input_name = "prompt" - base_model_prefix = "onnx_model" +class ORTPipeline(ORTModel): + auto_model_class = None + model_type = "onnx_pipeline" + config_name = "model_index.json" sub_component_config_name = "config.json" - # TODO: instead of having a bloated init, we should probably have an init per pipeline, - # so that we can easily add new pipelines without having to modify the base class + main_input_name = "prompt" + def __init__( self, vae_decoder_session: ort.InferenceSession, - text_encoder_session: ort.InferenceSession, unet_session: ort.InferenceSession, - config: Dict[str, Any], tokenizer: CLIPTokenizer, + config: Dict[str, Any], scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], feature_extractor: Optional[CLIPFeatureExtractor] = None, vae_encoder_session: Optional[ort.InferenceSession] = None, + text_encoder_session: Optional[ort.InferenceSession] = None, text_encoder_2_session: Optional[ort.InferenceSession] = None, tokenizer_2: Optional[CLIPTokenizer] = None, use_io_binding: Optional[bool] = None, @@ -113,23 +108,28 @@ def __init__( """ Args: vae_decoder_session (`ort.InferenceSession`): - The ONNX Runtime inference session associated to the VAE decoder. - text_encoder_session (`ort.InferenceSession`): - The ONNX Runtime inference session associated to the text encoder. + The ONNX Runtime inference session associated to the VAE decoder unet_session (`ort.InferenceSession`): The ONNX Runtime inference session associated to the U-NET. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer) + for the text encoder. config (`Dict[str, Any]`): A config dictionary from which the model components will be instantiated. Make sure to only load configuration files of compatible classes. - tokenizer (`CLIPTokenizer`): - Tokenizer of class - [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). scheduler (`Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler]`): A scheduler to be used in combination with the U-NET component to denoise the encoded image latents. feature_extractor (`Optional[CLIPFeatureExtractor]`, defaults to `None`): A model extracting features from generated images to be used as inputs for the `safety_checker` vae_encoder_session (`Optional[ort.InferenceSession]`, defaults to `None`): The ONNX Runtime inference session associated to the VAE encoder. + text_encoder_session (`Optional[ort.InferenceSession]`, defaults to `None`): + The ONNX Runtime inference session associated to the text encoder. + tokenizer_2 (`Optional[CLIPTokenizer]`, defaults to `None`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer) + for the second text encoder. use_io_binding (`Optional[bool]`, defaults to `None`): Whether to use IOBinding during inference to avoid memory copy between the host and devices. Defaults to `True` if the device is CUDA, otherwise defaults to `False`. @@ -137,7 +137,7 @@ def __init__( The directory under which the model exported to ONNX was saved. """ self.shared_attributes_init( - vae_decoder_session, + model=vae_decoder_session, use_io_binding=use_io_binding, model_save_dir=model_save_dir, ) @@ -418,7 +418,7 @@ def _from_transformers( provider_options: Optional[Dict[str, Any]] = None, use_io_binding: Optional[bool] = None, task: Optional[str] = None, - ) -> "ORTDiffusionPipeline": + ) -> "ORTPipeline": if use_auth_token is not None: warnings.warn( "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", @@ -499,46 +499,27 @@ def _save_config(self, save_directory): self.save_config(save_directory) -# TODO : Use ORTModelPart once IOBinding support is added -class _ORTDiffusionModelPart: - """ - For multi-file ONNX models, represents a part of the model. - It has its own `onnxruntime.InferenceSession`, and can perform a forward pass. - """ - +class ORTPipelinePart(ORTModelPart): CONFIG_NAME = "config.json" - _prepare_onnx_inputs = ORTModel._prepare_onnx_inputs - _prepare_onnx_outputs = ORTModel._prepare_onnx_outputs - - def __init__(self, session: ort.InferenceSession, parent_model: ORTModel): - self.session = session - self.parent_model = parent_model + def __init__(self, session: ort.InferenceSession, parent_model: ORTPipeline): config_path = Path(session._model_path).parent / self.CONFIG_NAME - self.config = self.parent_model._dict_from_json_file(config_path) if config_path.is_file() else {} - self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} - self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} - self.input_dtypes = {input_key.name: input_key.type for input_key in session.get_inputs()} - self.output_dtypes = {output_key.name: output_key.type for output_key in session.get_outputs()} - @property - def input_dtype(self): - # for backward compatibility - return {key: TypeHelper.ort_type_to_numpy_type(value) for key, value in self.input_dtypes.items()} - - @property - def device(self): - return self.parent_model.device + if config_path.is_file(): + # TODO: use FrozenDict + self.config = parent_model._dict_from_json_file(config_path) + else: + self.config = {} - @abstractmethod - def forward(self, *args, **kwargs): - pass + super().__init__(session, parent_model) - def __call__(self, *args, **kwargs): - return self.forward(*args, **kwargs) + @property + def input_dtype(self): + # for backward compatibility and diffusion mixins (will be standardized in the future) + return {name: TypeHelper.ort_type_to_numpy_type(ort_type) for name, ort_type in self.input_dtypes.items()} -class ORTModelTextEncoder(_ORTDiffusionModelPart): +class ORTModelTextEncoder(ORTPipelinePart): def forward(self, input_ids: Union[np.ndarray, torch.Tensor]): use_torch = isinstance(input_ids, torch.Tensor) @@ -551,10 +532,7 @@ def forward(self, input_ids: Union[np.ndarray, torch.Tensor]): return ModelOutput(**model_outputs) -class ORTModelUnet(_ORTDiffusionModelPart): - def __init__(self, session: ort.InferenceSession, parent_model: ORTModel): - super().__init__(session, parent_model) - +class ORTModelUnet(ORTPipelinePart): def forward( self, sample: Union[np.ndarray, torch.Tensor], @@ -582,7 +560,7 @@ def forward( return ModelOutput(**model_outputs) -class ORTModelVaeDecoder(_ORTDiffusionModelPart): +class ORTModelVaeDecoder(ORTPipelinePart): def forward(self, latent_sample: Union[np.ndarray, torch.Tensor]): use_torch = isinstance(latent_sample, torch.Tensor) @@ -595,7 +573,7 @@ def forward(self, latent_sample: Union[np.ndarray, torch.Tensor]): return ModelOutput(**model_outputs) -class ORTModelVaeEncoder(_ORTDiffusionModelPart): +class ORTModelVaeEncoder(ORTPipelinePart): def forward(self, sample: Union[np.ndarray, torch.Tensor]): use_torch = isinstance(sample, torch.Tensor) @@ -609,7 +587,7 @@ def forward(self, sample: Union[np.ndarray, torch.Tensor]): @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMixin): +class ORTStableDiffusionPipeline(ORTPipeline, StableDiffusionPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ @@ -620,7 +598,7 @@ class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipelineMi @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg2ImgPipelineMixin): +class ORTStableDiffusionImg2ImgPipeline(ORTPipeline, StableDiffusionImg2ImgPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ @@ -631,7 +609,7 @@ class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInpaintPipelineMixin): +class ORTStableDiffusionInpaintPipeline(ORTPipeline, StableDiffusionInpaintPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ @@ -642,7 +620,7 @@ class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInp @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyPipelineMixin): +class ORTLatentConsistencyModelPipeline(ORTPipeline, LatentConsistencyPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ @@ -652,7 +630,7 @@ class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyP __call__ = LatentConsistencyPipelineMixin.__call__ -class ORTStableDiffusionXLPipelineBase(ORTDiffusionPipeline): +class ORTStableDiffusionXLPipelineBase(ORTPipeline): def __init__( self, vae_decoder_session: ort.InferenceSession, @@ -721,6 +699,48 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ +SUPPORTED_ORT_PIPELINES = [ + ORTStableDiffusionPipeline, + ORTStableDiffusionImg2ImgPipeline, + ORTStableDiffusionInpaintPipeline, + ORTLatentConsistencyModelPipeline, + ORTStableDiffusionXLPipeline, + ORTStableDiffusionXLImg2ImgPipeline, +] + + +def _get_pipeline_class(class_name: str, throw_error_if_not_exist: bool = True): + for ort_pipeline_class in SUPPORTED_ORT_PIPELINES: + if ort_pipeline_class.auto_model_class.__name__ == class_name: + return ort_pipeline_class + + if throw_error_if_not_exist: + raise ValueError(f"ORTDiffusionPipeline can't find a pipeline linked to {class_name}") + + +class ORTDiffusionPipeline(ConfigMixin): + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path, **kwargs): + load_config_kwargs = { + "force_download": kwargs.get("force_download", False), + "resume_download": kwargs.get("resume_download", None), + "local_files_only": kwargs.get("local_files_only", False), + "cache_dir": kwargs.get("cache_dir", None), + "revision": kwargs.get("revision", None), + "proxies": kwargs.get("proxies", None), + "token": kwargs.get("token", None), + } + + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + config = config[0] if isinstance(config, tuple) else config + class_name = config["_class_name"] + + ort_pipeline_class = _get_pipeline_class(class_name) + + return ort_pipeline_class.from_pretrained(pretrained_model_or_path, **kwargs) + + ORT_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( [ ("lcm", ORTLatentConsistencyModelPipeline), @@ -742,49 +762,38 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab ] ) -SUPPORTED_TASKS_MAPPINGS = [ +SUPPORTED_ORT_PIPELINES_MAPPINGS = [ ORT_TEXT2IMAGE_PIPELINES_MAPPING, ORT_IMAGE2IMAGE_PIPELINES_MAPPING, ORT_INPAINT_PIPELINES_MAPPING, ] -def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True): - def get_model(pipeline_class_name): - for task_mapping in SUPPORTED_TASKS_MAPPINGS: - for model_name, pipeline in task_mapping.items(): +def _get_task_class(mapping, pipeline_class_name): + def _get_model_name(pipeline_class_name): + for ort_pipelines_mapping in SUPPORTED_ORT_PIPELINES_MAPPINGS: + for model_name, ort_pipeline in ort_pipelines_mapping.items(): if ( - pipeline.__name__ == pipeline_class_name - or pipeline.auto_model_class.__name__ == pipeline_class_name + ort_pipeline.__name__ == pipeline_class_name + or ort_pipeline.auto_model_class.__name__ == pipeline_class_name ): return model_name - model_name = get_model(pipeline_class_name) + model_name = _get_model_name(pipeline_class_name) if model_name is not None: task_class = mapping.get(model_name, None) if task_class is not None: return task_class - if throw_error_if_not_exist: - raise ValueError(f"AutoPipeline can't find a pipeline linked to {pipeline_class_name} for {model_name}") + raise ValueError(f"ORTPipelineForTask can't find a pipeline linked to {pipeline_class_name} for {model_name}") -class ORTPipelineBase(ConfigMixin): - config_name = "model_index.json" - - ort_pipeline_mapping = None - auto_pipeline_mapping = None - - def __init__(self, *args, **kwargs): - raise EnvironmentError( - f"{self.__class__.__name__} is designed to be instantiated " - f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or " - f"`{self.__class__.__name__}.from_pipe(pipeline)` methods." - ) +class ORTPipelineForTask(ConfigMixin): + auto_model_class = None + ort_pipelines_mapping = None @classmethod - @validate_hf_hub_args def from_pretrained(cls, pretrained_model_or_path, **kwargs): load_config_kwargs = { "force_download": kwargs.get("force_download", False), @@ -795,38 +804,25 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): "proxies": kwargs.get("proxies", None), "token": kwargs.get("token", None), } - config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) config = config[0] if isinstance(config, tuple) else config class_name = config["_class_name"] - ort_pipeline_cls = _get_task_class(cls.ort_pipeline_mapping, class_name) + ort_pipeline_class = _get_task_class(cls.ort_pipelines_mapping, class_name) - return ort_pipeline_cls.from_pretrained(pretrained_model_or_path, **kwargs) - - @classmethod - def from_pipe(cls, **kwargs): - raise NotImplementedError( - f"from_pipe is not yet implemented for {cls.__name__}. Please use from_pretrained instead." - ) + return ort_pipeline_class.from_pretrained(pretrained_model_or_path, **kwargs) -class ORTPipelineForText2Image(ORTPipelineBase): +class ORTPipelineForText2Image(ORTPipelineForTask): auto_model_class = AutoPipelineForText2Image - - ort_pipeline_mapping = ORT_TEXT2IMAGE_PIPELINES_MAPPING - auto_pipeline_mapping = AUTO_TEXT2IMAGE_PIPELINES_MAPPING + ort_pipelines_mapping = ORT_TEXT2IMAGE_PIPELINES_MAPPING -class ORTPipelineForImage2Image(ORTPipelineBase): +class ORTPipelineForImage2Image(ORTPipelineForTask): auto_model_class = AutoPipelineForImage2Image + ort_pipelines_mapping = ORT_IMAGE2IMAGE_PIPELINES_MAPPING - ort_pipeline_mapping = ORT_IMAGE2IMAGE_PIPELINES_MAPPING - auto_pipeline_mapping = AUTO_IMAGE2IMAGE_PIPELINES_MAPPING - -class ORTPipelineForInpainting(ORTPipelineBase): +class ORTPipelineForInpainting(ORTPipelineForTask): auto_model_class = AutoPipelineForInpainting - - ort_pipeline_mapping = ORT_INPAINT_PIPELINES_MAPPING - auto_pipeline_mapping = AUTO_INPAINT_PIPELINES_MAPPING + ort_pipelines_mapping = ORT_INPAINT_PIPELINES_MAPPING From f99a058f7ea75578770808e116256348bada63ac Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sat, 7 Sep 2024 17:29:14 +0200 Subject: [PATCH 13/24] test --- optimum/onnxruntime/base.py | 1 + optimum/onnxruntime/modeling_diffusion.py | 14 +++++++++----- optimum/onnxruntime/modeling_seq2seq.py | 10 ---------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/optimum/onnxruntime/base.py b/optimum/onnxruntime/base.py index 5206edfc081..ccfd646ea0d 100644 --- a/optimum/onnxruntime/base.py +++ b/optimum/onnxruntime/base.py @@ -43,6 +43,7 @@ class ORTModelPart: def __init__(self, session: InferenceSession, parent_model: "ORTModel"): self.session = session self.parent_model = parent_model + self.main_input_name = self.parent_model.main_input_name self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 606919ea7f4..0d3fa2bcc54 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -30,7 +30,6 @@ AutoPipelineForText2Image, ConfigMixin, DDIMScheduler, - DiffusionPipeline, LatentConsistencyModelPipeline, LMSDiscreteScheduler, PNDMScheduler, @@ -88,8 +87,6 @@ class ORTPipeline(ORTModel): config_name = "model_index.json" sub_component_config_name = "config.json" - main_input_name = "prompt" - def __init__( self, vae_decoder_session: ort.InferenceSession, @@ -592,6 +589,7 @@ class ORTStableDiffusionPipeline(ORTPipeline, StableDiffusionPipelineMixin): ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ + main_input_name = "prompt" auto_model_class = StableDiffusionPipeline __call__ = StableDiffusionPipelineMixin.__call__ @@ -603,6 +601,7 @@ class ORTStableDiffusionImg2ImgPipeline(ORTPipeline, StableDiffusionImg2ImgPipel ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ + main_input_name = "prompt" auto_model_class = StableDiffusionImg2ImgPipeline __call__ = StableDiffusionImg2ImgPipelineMixin.__call__ @@ -614,6 +613,7 @@ class ORTStableDiffusionInpaintPipeline(ORTPipeline, StableDiffusionInpaintPipel ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ + main_input_name = "prompt" auto_model_class = StableDiffusionInpaintPipeline __call__ = StableDiffusionInpaintPipelineMixin.__call__ @@ -625,6 +625,7 @@ class ORTLatentConsistencyModelPipeline(ORTPipeline, LatentConsistencyPipelineMi ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ + main_input_name = "prompt" auto_model_class = LatentConsistencyModelPipeline __call__ = LatentConsistencyPipelineMixin.__call__ @@ -683,6 +684,7 @@ class ORTStableDiffusionXLPipeline(ORTStableDiffusionXLPipelineBase, StableDiffu ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline). """ + main_input_name = "prompt" auto_model_class = StableDiffusionXLPipeline __call__ = StableDiffusionXLPipelineMixin.__call__ @@ -694,6 +696,7 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline). """ + main_input_name = "prompt" auto_model_class = StableDiffusionXLImg2ImgPipeline __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ @@ -719,6 +722,8 @@ def _get_pipeline_class(class_name: str, throw_error_if_not_exist: bool = True): class ORTDiffusionPipeline(ConfigMixin): + config_name = "model_index.json" + @classmethod @validate_hf_hub_args def from_pretrained(cls, pretrained_model_or_path, **kwargs): @@ -790,8 +795,7 @@ def _get_model_name(pipeline_class_name): class ORTPipelineForTask(ConfigMixin): - auto_model_class = None - ort_pipelines_mapping = None + config_name = "model_index.json" @classmethod def from_pretrained(cls, pretrained_model_or_path, **kwargs): diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index 4ce3e4707ed..fc185500d80 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -72,16 +72,6 @@ from transformers.generation_utils import GenerationMixin -# if check_if_transformers_greater("4.37.0"): -# # starting from transformers v4.37.0, the whisper generation loop is implemented in the `WhisperGenerationMixin` -# # and it implements many new features including short and long form generation, and starts with 2 init tokens -# from transformers.models.whisper.generation_whisper import WhisperGenerationMixin -# else: - -# class WhisperGenerationMixin(WhisperForConditionalGeneration, GenerationMixin): -# pass - - if check_if_transformers_greater("4.43.0"): from transformers.cache_utils import EncoderDecoderCache else: From 781ede7d6a530d023bb78283336564c107e129ca Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sat, 7 Sep 2024 20:13:54 +0200 Subject: [PATCH 14/24] test --- optimum/onnxruntime/base.py | 41 +++++++++-------- optimum/onnxruntime/modeling_seq2seq.py | 58 ------------------------- 2 files changed, 22 insertions(+), 77 deletions(-) diff --git a/optimum/onnxruntime/base.py b/optimum/onnxruntime/base.py index ccfd646ea0d..b59c59ede7d 100644 --- a/optimum/onnxruntime/base.py +++ b/optimum/onnxruntime/base.py @@ -22,6 +22,7 @@ from onnxruntime import InferenceSession +from ..utils import NormalizedConfigManager from ..utils.logging import warn_once from .io_binding import TypeHelper from .modeling_ort import ORTModel @@ -83,12 +84,18 @@ class ORTEncoder(ORTModelPart): Encoder part of the encoder-decoder model for ONNX Runtime inference. """ - def forward( - self, - input_ids: torch.LongTensor, - attention_mask: torch.LongTensor, - **kwargs, - ) -> BaseModelOutput: + def __init__(self, session: InferenceSession, parent_model: "ORTModel"): + super().__init__(session, parent_model) + + config = ( + self.parent_model.config.encoder + if hasattr(self.parent_model.config, "encoder") + else self.parent_model.config + ) + + self.normalized_config = NormalizedConfigManager.get_normalized_config_class(config.model_type)(config) + + def forward(self, input_ids: torch.LongTensor, attention_mask: torch.LongTensor, **kwargs) -> BaseModelOutput: use_torch = isinstance(input_ids, torch.Tensor) self.parent_model.raise_on_numpy_input_io_binding(use_torch) @@ -131,6 +138,14 @@ def __init__( ): super().__init__(session, parent_model) + config = ( + self.parent_model.config.encoder + if hasattr(self.parent_model.config, "encoder") + else self.parent_model.config + ) + + self.normalized_config = NormalizedConfigManager.get_normalized_config_class(config.model_type)(config) + # TODO: make this less hacky. self.key_value_input_names = [key for key in self.input_names if (".key" in key) or (".value" in key)] self.key_value_output_names = [key for key in self.output_names if (".key" in key) or (".value" in key)] @@ -146,11 +161,7 @@ def __init__( self.use_past_in_outputs = len(self.key_value_output_names) > 0 self.use_past_in_inputs = len(self.key_value_input_names) > 0 - self.use_fp16 = False - for inp in session.get_inputs(): - if "past_key_values" in inp.name and inp.type == "tensor(float16)": - self.use_fp16 = True - break + self.use_fp16 = self.dtype == torch.float16 # We may use ORTDecoderForSeq2Seq for vision-encoder-decoder models, where models as gpt2 # can be used but do not support KV caching for the cross-attention key/values, see: @@ -454,11 +465,3 @@ def prepare_inputs_for_merged( cache_position = cache_position.to(self.device) return use_cache_branch_tensor, past_key_values, cache_position - - -class ORTDecoder(ORTDecoderForSeq2Seq): - def __init__(self, *args, **kwargs): - logger.warning( - "The class `ORTDecoder` is deprecated and will be removed in optimum v1.15.0, please use `ORTDecoderForSeq2Seq` instead." - ) - super().__init__(*args, **kwargs) diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index fc185500d80..3cecadafe3e 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -46,7 +46,6 @@ from ..onnx.utils import _get_external_data_paths from ..utils import check_if_transformers_greater from ..utils.file_utils import validate_file_exists -from ..utils.normalized_config import NormalizedConfigManager from ..utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors from .base import ORTDecoderForSeq2Seq, ORTEncoder from .constants import ( @@ -1155,49 +1154,6 @@ class ORTModelForSeq2SeqLM(ORTModelForConditionalGeneration, GenerationMixin): auto_model_class = AutoModelForSeq2SeqLM main_input_name = "input_ids" - def __init__( - self, - encoder_session: ort.InferenceSession, - decoder_session: ort.InferenceSession, - config: "PretrainedConfig", - onnx_paths: List[str], - decoder_with_past_session: Optional[ort.InferenceSession] = None, - use_cache: bool = True, - use_io_binding: Optional[bool] = None, - model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, - preprocessors: Optional[List] = None, - generation_config: Optional[GenerationConfig] = None, - **kwargs, - ): - super().__init__( - encoder_session, - decoder_session, - config, - onnx_paths, - decoder_with_past_session, - use_cache, - use_io_binding, - model_save_dir, - preprocessors, - generation_config, - **kwargs, - ) - - # The normalized_config initialization in ORTModelPart is unfortunately wrong as the top level config is initialized. - if config.model_type == "encoder-decoder": - self.encoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.encoder.model_type - )(config.encoder) - - self.decoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - - if self.decoder_with_past is not None: - self.decoder_with_past.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: return ORTEncoder(session, self) @@ -1511,20 +1467,6 @@ def __init__( **kwargs, ) - # The normalized_config initialization in ORTModelPart is unfortunately wrong as the top level config is initialized. - self.encoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.encoder.model_type - )(config.encoder) - - self.decoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - - if self.decoder_with_past is not None: - self.decoder_with_past.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: return ORTEncoderForVisionEncoderDecoder(session, self) From f0e3f2be5ccfcdb4da6bdfae32a1a5262292b699 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sat, 7 Sep 2024 20:23:21 +0200 Subject: [PATCH 15/24] use latent-consistency architecture name instead of lcm --- optimum/exporters/tasks.py | 2 +- optimum/onnxruntime/__init__.py | 2 ++ optimum/onnxruntime/modeling_diffusion.py | 2 +- tests/exporters/exporters_utils.py | 2 +- tests/onnxruntime/test_diffusion.py | 12 ++++++------ tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 97053040879..a489f34fb06 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -308,9 +308,9 @@ class TasksManager: "image-feature-extraction": "feature-extraction", # for backward compatibility and testing (where # model task and model type are still the same) - "lcm": "text-to-image", "stable-diffusion": "text-to-image", "stable-diffusion-xl": "text-to-image", + "latent-consistency": "text-to-image", } _CUSTOM_CLASSES = { diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index 35cbf14587e..78ef2896d05 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -91,6 +91,7 @@ "ORTPipelineForText2Image", "ORTPipelineForImage2Image", "ORTPipelineForInpainting", + "ORTDiffusionPipeline", ] @@ -149,6 +150,7 @@ ) else: from .modeling_diffusion import ( + ORTDiffusionPipeline, ORTLatentConsistencyModelPipeline, ORTPipelineForImage2Image, ORTPipelineForInpainting, diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 0d3fa2bcc54..32c64f38ef2 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -748,9 +748,9 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): ORT_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( [ - ("lcm", ORTLatentConsistencyModelPipeline), ("stable-diffusion", ORTStableDiffusionPipeline), ("stable-diffusion-xl", ORTStableDiffusionXLPipeline), + ("latent-consistency", ORTLatentConsistencyModelPipeline), ] ) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index a55c7a124df..c8a33b0be35 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -298,7 +298,7 @@ PYTORCH_DIFFUSION_MODEL = { "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", - "lcm": "echarlaix/tiny-random-latent-consistency", + "latent-consistency": "echarlaix/tiny-random-latent-consistency", } PYTORCH_TIMM_MODEL = { diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py index a8b82dd7c4f..a7360ab386b 100644 --- a/tests/onnxruntime/test_diffusion.py +++ b/tests/onnxruntime/test_diffusion.py @@ -86,7 +86,7 @@ def to_np(image): class ORTPipelineForText2ImageTest(ORTModelTestMixin): ARCHITECTURE_TO_ORTMODEL_CLASS = { - "lcm": ORTLatentConsistencyModelPipeline, + "latent-consistency": ORTLatentConsistencyModelPipeline, "stable-diffusion": ORTStableDiffusionPipeline, "stable-diffusion-xl": ORTStableDiffusionXLPipeline, } @@ -150,8 +150,8 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - if model_arch == "lcm": - # LCM doesn't support deterministic outputs beyond the first inference step + if model_arch == "latent-consistency": + # Latent Consistency Model (LCM) doesn't support deterministic outputs beyond the first inference step # TODO: Investigate why this is the case inputs["num_inference_steps"] = 1 @@ -267,7 +267,7 @@ def test_shape(self, model_arch: str): @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) @require_diffusers def test_image_reproducibility(self, model_arch: str): - if model_arch in ["lcm"]: + if model_arch in ["latent-consistency"]: pytest.skip("Latent Consistency Model (LCM) doesn't support deterministic outputs") model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -288,8 +288,8 @@ def test_image_reproducibility(self, model_arch: str): @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) def test_negative_prompt(self, model_arch: str): - if model_arch in ["lcm"]: - pytest.skip("LCM (Latent Consistency Model) does not support negative prompts") + if model_arch in ["latent-consistency"]: + pytest.skip("Latent Consistency Model (LCM) does not support negative prompts") model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index aa06476498e..bb6935461d7 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -108,7 +108,7 @@ "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel", "levit": "hf-internal-testing/tiny-random-LevitModel", - "lcm": "echarlaix/tiny-random-latent-consistency", + "latent-consistency": "echarlaix/tiny-random-latent-consistency", "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", From 80c63d087c2c7fb537a8d9740627f9042660e9a2 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sat, 7 Sep 2024 21:32:02 +0200 Subject: [PATCH 16/24] fix --- optimum/onnxruntime/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/onnxruntime/base.py b/optimum/onnxruntime/base.py index b59c59ede7d..0e54bafed78 100644 --- a/optimum/onnxruntime/base.py +++ b/optimum/onnxruntime/base.py @@ -139,8 +139,8 @@ def __init__( super().__init__(session, parent_model) config = ( - self.parent_model.config.encoder - if hasattr(self.parent_model.config, "encoder") + self.parent_model.config.decoder + if hasattr(self.parent_model.config, "decoder") else self.parent_model.config ) From a4518f23ede32ebebcf9a2b0a4beb3e4d7ac86b4 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sun, 8 Sep 2024 10:59:01 +0200 Subject: [PATCH 17/24] add ort diffusion pipeline tests --- optimum/onnxruntime/modeling_diffusion.py | 15 +- .../diffusers/pipeline_stable_diffusion_xl.py | 1 - tests/onnxruntime/test_diffusion.py | 134 ++++++++++-------- 3 files changed, 84 insertions(+), 66 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 32c64f38ef2..18cd38c5f29 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -712,13 +712,16 @@ class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, Stab ] -def _get_pipeline_class(class_name: str, throw_error_if_not_exist: bool = True): +def _get_pipeline_class(pipeline_class_name: str, throw_error_if_not_exist: bool = True): for ort_pipeline_class in SUPPORTED_ORT_PIPELINES: - if ort_pipeline_class.auto_model_class.__name__ == class_name: + if ( + ort_pipeline_class.__name__ == pipeline_class_name + or ort_pipeline_class.auto_model_class.__name__ == pipeline_class_name + ): return ort_pipeline_class if throw_error_if_not_exist: - raise ValueError(f"ORTDiffusionPipeline can't find a pipeline linked to {class_name}") + raise ValueError(f"ORTDiffusionPipeline can't find a pipeline linked to {pipeline_class_name}") class ORTDiffusionPipeline(ConfigMixin): @@ -777,10 +780,10 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): def _get_task_class(mapping, pipeline_class_name): def _get_model_name(pipeline_class_name): for ort_pipelines_mapping in SUPPORTED_ORT_PIPELINES_MAPPINGS: - for model_name, ort_pipeline in ort_pipelines_mapping.items(): + for model_name, ort_pipeline_class in ort_pipelines_mapping.items(): if ( - ort_pipeline.__name__ == pipeline_class_name - or ort_pipeline.auto_model_class.__name__ == pipeline_class_name + ort_pipeline_class.__name__ == pipeline_class_name + or ort_pipeline_class.auto_model_class.__name__ == pipeline_class_name ): return model_name diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py index 3c210862acf..0407c16a77a 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py @@ -418,7 +418,6 @@ def __call__( # 4. Prepare timesteps self.scheduler.set_timesteps(num_inference_steps) timesteps = self.scheduler.timesteps - print("timesteps", timesteps) # 5. Prepare latent variables latents = self.prepare_latents( diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py index a7360ab386b..9f480b2d1a0 100644 --- a/tests/onnxruntime/test_diffusion.py +++ b/tests/onnxruntime/test_diffusion.py @@ -22,6 +22,7 @@ AutoPipelineForImage2Image, AutoPipelineForInpainting, AutoPipelineForText2Image, + DiffusionPipeline, ) from diffusers.utils import load_image from parameterized import parameterized @@ -29,27 +30,22 @@ from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin from optimum.onnxruntime import ( - ORTLatentConsistencyModelPipeline, + ORTDiffusionPipeline, ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, - ORTStableDiffusionImg2ImgPipeline, - ORTStableDiffusionInpaintPipeline, - ORTStableDiffusionPipeline, - ORTStableDiffusionXLImg2ImgPipeline, - ORTStableDiffusionXLPipeline, ) from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor from optimum.utils.testing_utils import grid_parameters, require_diffusers, require_ort_rocm -def get_generator(generator_framework, seed): - if generator_framework == "np": +def get_generator(framework, seed): + if framework == "np": return np.random.RandomState(seed) - elif generator_framework == "pt": + elif framework == "pt": return torch.Generator().manual_seed(seed) else: - raise ValueError(f"Unknown generator_framework: {generator_framework}") + raise ValueError(f"Unknown framework: {framework}") def _generate_prompts(batch_size=1): @@ -85,11 +81,7 @@ def to_np(image): class ORTPipelineForText2ImageTest(ORTModelTestMixin): - ARCHITECTURE_TO_ORTMODEL_CLASS = { - "latent-consistency": ORTLatentConsistencyModelPipeline, - "stable-diffusion": ORTStableDiffusionPipeline, - "stable-diffusion-xl": ORTStableDiffusionXLPipeline, - } + SUPPORTED_ARCHITECTURES = ["latent-consistency", "stable-diffusion", "stable-diffusion-xl"] ORTMODEL_CLASS = ORTPipelineForText2Image AUTOMODEL_CLASS = AutoPipelineForText2Image @@ -113,15 +105,23 @@ def test_load_vanilla_model_which_is_not_supported(self): f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_ort_pipeline_class_dispatch(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - self.assertIsInstance(pipeline, self.ARCHITECTURE_TO_ORTMODEL_CLASS[model_arch]) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + auto_pipeline = DiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = ORTDiffusionPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_num_images_per_prompt(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -138,7 +138,7 @@ def test_num_images_per_prompt(self, model_arch: str): outputs = pipeline(**inputs, num_images_per_prompt=num_images).images self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_compare_to_diffusers_pipeline(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -168,9 +168,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): self.assertEqual(ort_pipeline.device, diffusers_pipeline.device) @parameterized.expand( - grid_parameters( - {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["CUDAExecutionProvider"]} - ) + grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["CUDAExecutionProvider"]}) ) @require_torch_gpu @pytest.mark.cuda_ep_test @@ -189,9 +187,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): self.assertEqual(outputs.shape, (batch_size, height, width, 3)) @parameterized.expand( - grid_parameters( - {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["ROCMExecutionProvider"]} - ) + grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["ROCMExecutionProvider"]}) ) @require_torch_gpu @require_ort_rocm @@ -210,7 +206,7 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: st self.assertIsInstance(outputs, np.ndarray) self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -242,7 +238,7 @@ def __call__(self, step: int, timestep: int, latents: np.ndarray) -> None: self.assertTrue(auto_callback.has_been_called) self.assertEqual(auto_callback.number_of_steps, ort_callback.number_of_steps) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_shape(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -264,7 +260,7 @@ def test_shape(self, model_arch: str): (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_image_reproducibility(self, model_arch: str): if model_arch in ["latent-consistency"]: @@ -286,7 +282,7 @@ def test_image_reproducibility(self, model_arch: str): self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_negative_prompt(self, model_arch: str): if model_arch in ["latent-consistency"]: pytest.skip("Latent Consistency Model (LCM) does not support negative prompts") @@ -335,10 +331,8 @@ def test_negative_prompt(self, model_arch: str): class ORTPipelineForImage2ImageTest(ORTModelTestMixin): - ARCHITECTURE_TO_ORTMODEL_CLASS = { - "stable-diffusion": ORTStableDiffusionImg2ImgPipeline, - "stable-diffusion-xl": ORTStableDiffusionXLImg2ImgPipeline, - } + SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl"] + AUTOMODEL_CLASS = AutoPipelineForImage2Image ORTMODEL_CLASS = ORTPipelineForImage2Image @@ -364,7 +358,23 @@ def test_load_vanilla_model_which_is_not_supported(self): f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(list(SUPPORTED_ARCHITECTURES)) + @require_diffusers + def test_ort_pipeline_class_dispatch(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + # auto_pipeline = DiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) + # ort_pipeline = ORTDiffusionPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) + + # self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_num_images_per_prompt(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -383,9 +393,7 @@ def test_num_images_per_prompt(self, model_arch: str): self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) @parameterized.expand( - grid_parameters( - {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["CUDAExecutionProvider"]} - ) + grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["CUDAExecutionProvider"]}) ) @require_torch_gpu @pytest.mark.cuda_ep_test @@ -406,9 +414,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): self.assertEqual(outputs.shape, (batch_size, height, width, 3)) @parameterized.expand( - grid_parameters( - {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["ROCMExecutionProvider"]} - ) + grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["ROCMExecutionProvider"]}) ) @require_torch_gpu @require_ort_rocm @@ -429,7 +435,7 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: st self.assertIsInstance(outputs, np.ndarray) self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): if model_arch in ["stable-diffusion"]: @@ -465,7 +471,7 @@ def __call__(self, step: int, timestep: int, latents: np.ndarray) -> None: self.assertTrue(ort_callback.has_been_called) self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_shape(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -490,7 +496,7 @@ def test_shape(self, model_arch: str): (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_compare_to_diffusers_pipeline(self, model_arch: str): pytest.skip("Img2Img models do not support support output reproducibility for some reason") @@ -509,7 +515,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): self.assertTrue(np.allclose(ort_output, diffusers_output, rtol=1e-2)) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_image_reproducibility(self, model_arch: str): pytest.skip("Img2Img models do not support support output reproducibility for some reason") @@ -532,9 +538,7 @@ def test_image_reproducibility(self, model_arch: str): class ORTPipelineForInpaintingTest(ORTModelTestMixin): - ARCHITECTURE_TO_ORTMODEL_CLASS = { - "stable-diffusion": ORTStableDiffusionInpaintPipeline, - } + SUPPORTED_ARCHITECTURES = ["stable-diffusion"] AUTOMODEL_CLASS = AutoPipelineForInpainting ORTMODEL_CLASS = ORTPipelineForInpainting @@ -568,7 +572,23 @@ def test_load_vanilla_model_which_is_not_supported(self): f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_ort_pipeline_class_dispatch(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + # auto_pipeline = DiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) + # ort_pipeline = ORTDiffusionPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) + + # self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_num_images_per_prompt(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -587,9 +607,7 @@ def test_num_images_per_prompt(self, model_arch: str): self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) @parameterized.expand( - grid_parameters( - {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["CUDAExecutionProvider"]} - ) + grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["CUDAExecutionProvider"]}) ) @require_torch_gpu @pytest.mark.cuda_ep_test @@ -610,9 +628,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): self.assertEqual(outputs.shape, (batch_size, height, width, 3)) @parameterized.expand( - grid_parameters( - {"model_arch": list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys()), "provider": ["ROCMExecutionProvider"]} - ) + grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["ROCMExecutionProvider"]}) ) @require_torch_gpu @require_ort_rocm @@ -633,7 +649,7 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: st self.assertIsInstance(outputs, np.ndarray) self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -664,7 +680,7 @@ def __call__(self, step: int, timestep: int, latents: np.ndarray) -> None: self.assertTrue(ort_callback.has_been_called) self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_shape(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -689,7 +705,7 @@ def test_shape(self, model_arch: str): (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_compare_to_diffusers_pipeline(self, model_arch: str): if model_arch in ["stable-diffusion"]: @@ -724,7 +740,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4), ) - @parameterized.expand(list(ARCHITECTURE_TO_ORTMODEL_CLASS.keys())) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_image_reproducibility(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} From 9f0c7b632388274f6c451d2ee597935761198b1f Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 10 Sep 2024 11:03:44 +0200 Subject: [PATCH 18/24] added dummy objects --- optimum/onnxruntime/__init__.py | 10 +++++- optimum/utils/dummy_diffusers_objects.py | 44 ++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index 78ef2896d05..09a48ec955c 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -79,6 +79,10 @@ "ORTStableDiffusionXLPipeline", "ORTStableDiffusionXLImg2ImgPipeline", "ORTLatentConsistencyModelPipeline", + "ORTPipelineForImage2Image", + "ORTPipelineForInpainting", + "ORTPipelineForText2Image", + "ORTDiffusionPipeline", ] else: _import_structure["modeling_diffusion"] = [ @@ -88,9 +92,9 @@ "ORTStableDiffusionXLPipeline", "ORTStableDiffusionXLImg2ImgPipeline", "ORTLatentConsistencyModelPipeline", - "ORTPipelineForText2Image", "ORTPipelineForImage2Image", "ORTPipelineForInpainting", + "ORTPipelineForText2Image", "ORTDiffusionPipeline", ] @@ -141,7 +145,11 @@ raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: from ..utils.dummy_diffusers_objects import ( + ORTDiffusionPipeline, ORTLatentConsistencyModelPipeline, + ORTPipelineForImage2Image, + ORTPipelineForInpainting, + ORTPipelineForText2Image, ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index f6914bbcd3a..35d1ffe9fc7 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -79,3 +79,47 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) + + +class ORTDiffusionPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForText2Image(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForImage2Image(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForInpainting(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) From 56d06d467e049c7838b1b6036e2b8c65eb5d7500 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 10 Sep 2024 11:19:49 +0200 Subject: [PATCH 19/24] remove duplicate code --- .../pipeline_stable_diffusion_img2img.py | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py index f7f0586ac90..a66035a789b 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py @@ -19,7 +19,6 @@ import PIL.Image import torch from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput -from diffusers.utils import deprecate from .pipeline_stable_diffusion import StableDiffusionPipelineMixin @@ -228,31 +227,7 @@ def __call__( latents_dtype = prompt_embeds.dtype image = image.astype(latents_dtype) - # encode the init image into latents and scale the latents - init_latents = self.vae_encoder(sample=image)[0] - scaling_factor = self.vae_decoder.config.get("scaling_factor", 0.18215) - init_latents = scaling_factor * init_latents - - if isinstance(prompt, str): - prompt = [prompt] - if len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] == 0: - # expand init_latents for batch_size - deprecation_message = ( - f"You have passed {len(prompt)} text prompts (`prompt`), but only {init_latents.shape[0]} initial" - " images (`image`). Initial images are now duplicating to match the number of text prompts. Note" - " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" - " your script to pass as many initial images as text prompts to suppress this warning." - ) - deprecate("len(prompt) != len(image)", "1.0.0", deprecation_message, standard_warn=False) - additional_image_per_prompt = len(prompt) // init_latents.shape[0] - init_latents = np.concatenate([init_latents] * additional_image_per_prompt * num_images_per_prompt, axis=0) - elif len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] != 0: - raise ValueError( - f"Cannot duplicate `image` of batch size {init_latents.shape[0]} to {len(prompt)} text prompts." - ) - else: - init_latents = np.concatenate([init_latents] * num_images_per_prompt, axis=0) # get the original timestep using init_timestep offset = self.scheduler.config.get("steps_offset", 0) @@ -274,8 +249,6 @@ def __call__( if accepts_eta: extra_step_kwargs["eta"] = eta - latents = init_latents - t_start = max(num_inference_steps - init_timestep + offset, 0) timesteps = self.scheduler.timesteps[t_start:].numpy() From 475efdfcca21a34fd43204e5ce3a7d5adc44c17f Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 11 Sep 2024 13:08:56 +0200 Subject: [PATCH 20/24] support testing without diffusers --- optimum/onnxruntime/__init__.py | 16 +++++ optimum/utils/dummy_diffusers_objects.py | 44 ++++++++++++ tests/onnxruntime/test_modeling.py | 91 ++++++++++++++---------- 3 files changed, 113 insertions(+), 38 deletions(-) diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index 09a48ec955c..a6e3c139797 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -83,6 +83,10 @@ "ORTPipelineForInpainting", "ORTPipelineForText2Image", "ORTDiffusionPipeline", + "ORTModelTextEncoder", + "ORTModelUnet", + "ORTModelVaeDecoder", + "ORTModelVaeEncoder", ] else: _import_structure["modeling_diffusion"] = [ @@ -96,6 +100,10 @@ "ORTPipelineForInpainting", "ORTPipelineForText2Image", "ORTDiffusionPipeline", + "ORTModelTextEncoder", + "ORTModelUnet", + "ORTModelVaeDecoder", + "ORTModelVaeEncoder", ] @@ -147,6 +155,10 @@ from ..utils.dummy_diffusers_objects import ( ORTDiffusionPipeline, ORTLatentConsistencyModelPipeline, + ORTModelTextEncoder, + ORTModelUnet, + ORTModelVaeDecoder, + ORTModelVaeEncoder, ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, @@ -160,6 +172,10 @@ from .modeling_diffusion import ( ORTDiffusionPipeline, ORTLatentConsistencyModelPipeline, + ORTModelTextEncoder, + ORTModelUnet, + ORTModelVaeDecoder, + ORTModelVaeEncoder, ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index 35d1ffe9fc7..f63d3a603c4 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -123,3 +123,47 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) + + +class ORTModelTextEncoder(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTModelVaeDecoder(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTModelVaeEncoder(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTModelUnet(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 4b44acb38ab..d8dd46e4ad2 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -71,6 +71,7 @@ ONNX_DECODER_WITH_PAST_NAME, ONNX_ENCODER_NAME, ONNX_WEIGHTS_NAME, + ORTDiffusionPipeline, ORTModelForAudioClassification, ORTModelForAudioFrameClassification, ORTModelForAudioXVector, @@ -89,15 +90,12 @@ ORTModelForSpeechSeq2Seq, ORTModelForTokenClassification, ORTModelForVision2Seq, - ORTStableDiffusionPipeline, -) -from optimum.onnxruntime.base import ORTDecoderForSeq2Seq, ORTEncoder -from optimum.onnxruntime.modeling_diffusion import ( ORTModelTextEncoder, ORTModelUnet, ORTModelVaeDecoder, ORTModelVaeEncoder, ) +from optimum.onnxruntime.base import ORTDecoderForSeq2Seq, ORTEncoder from optimum.onnxruntime.modeling_ort import ORTModel from optimum.pipelines import pipeline from optimum.utils import ( @@ -108,7 +106,13 @@ DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, logging, ) -from optimum.utils.testing_utils import grid_parameters, remove_directory, require_hf_token, require_ort_rocm +from optimum.utils.testing_utils import ( + grid_parameters, + remove_directory, + require_diffusers, + require_hf_token, + require_ort_rocm, +) logger = logging.get_logger() @@ -205,12 +209,11 @@ def test_load_seq2seq_model_from_empty_cache(self): with self.assertRaises(Exception): _ = ORTModelForSeq2SeqLM.from_pretrained(self.TINY_ONNX_SEQ2SEQ_MODEL_ID, local_files_only=True) + @require_diffusers def test_load_stable_diffusion_model_from_cache(self): - _ = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) # caching + _ = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) # caching - model = ORTStableDiffusionPipeline.from_pretrained( - self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True - ) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) self.assertIsInstance(model.vae_decoder, ORTModelVaeDecoder) @@ -218,6 +221,7 @@ def test_load_stable_diffusion_model_from_cache(self): self.assertIsInstance(model.unet, ORTModelUnet) self.assertIsInstance(model.config, Dict) + @require_diffusers def test_load_stable_diffusion_model_from_empty_cache(self): dirpath = os.path.join( default_cache_path, "models--" + self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID.replace("/", "--") @@ -225,9 +229,7 @@ def test_load_stable_diffusion_model_from_empty_cache(self): remove_directory(dirpath) with self.assertRaises(Exception): - _ = ORTStableDiffusionPipeline.from_pretrained( - self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True - ) + _ = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) @require_torch_gpu @pytest.mark.cuda_ep_test @@ -300,18 +302,20 @@ def test_load_seq2seq_model_unknown_provider(self): with self.assertRaises(ValueError): ORTModelForSeq2SeqLM.from_pretrained(self.ONNX_SEQ2SEQ_MODEL_ID, provider="FooExecutionProvider") + @require_diffusers def test_load_stable_diffusion_model_from_hub(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) self.assertIsInstance(model.vae_decoder, ORTModelVaeDecoder) self.assertIsInstance(model.vae_encoder, ORTModelVaeEncoder) self.assertIsInstance(model.unet, ORTModelUnet) self.assertIsInstance(model.config, Dict) + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_load_stable_diffusion_model_cuda_provider(self): - model = ORTStableDiffusionPipeline.from_pretrained( + model = ORTDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CUDAExecutionProvider" ) self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"]) @@ -321,11 +325,12 @@ def test_load_stable_diffusion_model_cuda_provider(self): self.assertListEqual(model.vae_encoder.session.get_providers(), model.providers) self.assertEqual(model.device, torch.device("cuda:0")) + @require_diffusers @require_torch_gpu @require_ort_rocm @pytest.mark.rocm_ep_test def test_load_stable_diffusion_model_rocm_provider(self): - model = ORTStableDiffusionPipeline.from_pretrained( + model = ORTDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="ROCMExecutionProvider" ) self.assertListEqual(model.providers, ["ROCMExecutionProvider", "CPUExecutionProvider"]) @@ -335,8 +340,9 @@ def test_load_stable_diffusion_model_rocm_provider(self): self.assertListEqual(model.vae_encoder.session.get_providers(), model.providers) self.assertEqual(model.device, torch.device("cuda:0")) + @require_diffusers def test_load_stable_diffusion_model_cpu_provider(self): - model = ORTStableDiffusionPipeline.from_pretrained( + model = ORTDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CPUExecutionProvider" ) self.assertListEqual(model.providers, ["CPUExecutionProvider"]) @@ -346,9 +352,10 @@ def test_load_stable_diffusion_model_cpu_provider(self): self.assertListEqual(model.vae_encoder.session.get_providers(), model.providers) self.assertEqual(model.device, torch.device("cpu")) + @require_diffusers def test_load_stable_diffusion_model_unknown_provider(self): with self.assertRaises(ValueError): - ORTStableDiffusionPipeline.from_pretrained( + ORTDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="FooExecutionProvider" ) @@ -478,12 +485,11 @@ def test_passing_session_options_seq2seq(self): self.assertEqual(model.encoder.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.decoder.session.get_session_options().intra_op_num_threads, 3) + @require_diffusers def test_passing_session_options_stable_diffusion(self): options = onnxruntime.SessionOptions() options.intra_op_num_threads = 3 - model = ORTStableDiffusionPipeline.from_pretrained( - self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, session_options=options - ) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, session_options=options) self.assertEqual(model.unet.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.text_encoder.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.vae_decoder.session.get_session_options().intra_op_num_threads, 3) @@ -772,10 +778,11 @@ def test_seq2seq_model_on_rocm_ep_str(self): self.assertEqual(model.decoder_with_past.session.get_providers()[0], "ROCMExecutionProvider") self.assertListEqual(model.providers, ["ROCMExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_passing_provider_options_stable_diffusion(self): - model = ORTStableDiffusionPipeline.from_pretrained( + model = ORTDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CUDAExecutionProvider" ) self.assertEqual( @@ -791,7 +798,7 @@ def test_passing_provider_options_stable_diffusion(self): self.assertEqual( model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["do_copy_in_default_stream"], "1" ) - model = ORTStableDiffusionPipeline.from_pretrained( + model = ORTDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CUDAExecutionProvider", provider_options={"do_copy_in_default_stream": 0}, @@ -810,8 +817,9 @@ def test_passing_provider_options_stable_diffusion(self): model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["do_copy_in_default_stream"], "0" ) + @require_diffusers def test_stable_diffusion_model_on_cpu(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) cpu = torch.device("cpu") model.to(cpu) self.assertEqual(model.device, cpu) @@ -825,9 +833,9 @@ def test_stable_diffusion_model_on_cpu(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CPUExecutionProvider") self.assertListEqual(model.providers, ["CPUExecutionProvider"]) - # test string device input for to() + @require_diffusers def test_stable_diffusion_model_on_cpu_str(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) cpu = torch.device("cpu") model.to("cpu") self.assertEqual(model.device, cpu) @@ -841,10 +849,11 @@ def test_stable_diffusion_model_on_cpu_str(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CPUExecutionProvider") self.assertListEqual(model.providers, ["CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_stable_diffusion_model_on_gpu(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) gpu = torch.device("cuda") model.to(gpu) self.assertEqual(model.device, torch.device("cuda:0")) @@ -858,11 +867,12 @@ def test_stable_diffusion_model_on_gpu(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CUDAExecutionProvider") self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @require_ort_rocm @pytest.mark.rocm_ep_test def test_stable_diffusion_model_on_rocm_ep(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) gpu = torch.device("cuda") model.to(gpu) self.assertEqual(model.device, torch.device("cuda:0")) @@ -876,34 +886,35 @@ def test_stable_diffusion_model_on_rocm_ep(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "ROCMExecutionProvider") self.assertListEqual(model.providers, ["ROCMExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @unittest.skipIf(get_gpu_count() <= 1, "this test requires multi-gpu") def test_stable_diffusion_model_on_gpu_id(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to(torch.device("cuda:1")) self.assertEqual(model.unet.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.text_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_decoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to(1) self.assertEqual(model.unet.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.text_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_decoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to("cuda:1") self.assertEqual(model.unet.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.text_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_decoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") - # test string device input for to() + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_stable_diffusion_model_on_gpu_str(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to("cuda") self.assertEqual(model.device, torch.device("cuda:0")) self.assertEqual(model.unet.device, torch.device("cuda:0")) @@ -916,11 +927,12 @@ def test_stable_diffusion_model_on_gpu_str(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CUDAExecutionProvider") self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @require_ort_rocm @pytest.mark.rocm_ep_test def test_stable_diffusion_model_on_rocm_ep_str(self): - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to("cuda") self.assertEqual(model.device, torch.device("cuda:0")) self.assertEqual(model.unet.device, torch.device("cuda:0")) @@ -975,9 +987,10 @@ def test_save_seq2seq_model_without_past(self): self.assertTrue(ONNX_DECODER_WITH_PAST_NAME not in folder_contents) self.assertTrue(CONFIG_NAME in folder_contents) + @require_diffusers def test_save_stable_diffusion_model(self): with tempfile.TemporaryDirectory() as tmpdirname: - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.save_pretrained(tmpdirname) folder_contents = os.listdir(tmpdirname) self.assertIn(model.config_name, folder_contents) @@ -1050,10 +1063,11 @@ def test_save_load_seq2seq_model_with_external_data(self, use_cache: bool): os.environ.pop("FORCE_ONNX_EXTERNAL_DATA") remove_directory(tmpdirname) + @require_diffusers def test_save_load_stable_diffusion_model_with_external_data(self): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["FORCE_ONNX_EXTERNAL_DATA"] = "1" # force exporting small model with external data - model = ORTStableDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) + model = ORTDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) model.save_pretrained(tmpdirname) # verify external data is exported @@ -1068,7 +1082,7 @@ def test_save_load_stable_diffusion_model_with_external_data(self): self.assertIn(ONNX_WEIGHTS_NAME + "_data", folder_contents) # verify loading from local folder works - model = ORTStableDiffusionPipeline.from_pretrained(tmpdirname, export=False) + model = ORTDiffusionPipeline.from_pretrained(tmpdirname, export=False) os.environ.pop("FORCE_ONNX_EXTERNAL_DATA") remove_directory(tmpdirname) @@ -1180,11 +1194,12 @@ def test_push_seq2seq_model_with_external_data_to_hub(self): ) os.environ.pop("FORCE_ONNX_EXTERNAL_DATA") + @require_diffusers @require_hf_token def test_push_stable_diffusion_model_with_external_data_to_hub(self): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["FORCE_ONNX_EXTERNAL_DATA"] = "1" # force exporting small model with external data - model = ORTStableDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) + model = ORTDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) model.save_pretrained( tmpdirname + "/onnx", token=os.environ.get("HF_AUTH_TOKEN", None), @@ -1194,7 +1209,7 @@ def test_push_stable_diffusion_model_with_external_data_to_hub(self): ) # verify loading from hub works - model = ORTStableDiffusionPipeline.from_pretrained( + model = ORTDiffusionPipeline.from_pretrained( MODEL_NAMES["stable-diffusion"] + "-onnx", export=False, token=os.environ.get("HF_AUTH_TOKEN", None), From e2ad89a8ca72a1a77a960b0092728553fced5ab1 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 11 Sep 2024 13:11:41 +0200 Subject: [PATCH 21/24] remove unnecessary --- optimum/utils/testing_utils.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/optimum/utils/testing_utils.py b/optimum/utils/testing_utils.py index 6579e230dc8..76fe9a05b13 100644 --- a/optimum/utils/testing_utils.py +++ b/optimum/utils/testing_utils.py @@ -84,17 +84,6 @@ def require_ort_rocm(test_case): ) -def require_ort_cuda(test_case): - """Decorator marking a test that requires CUDAExecutionProvider for ONNX Runtime.""" - import onnxruntime as ort - - providers = ort.get_available_providers() - - return unittest.skipUnless("CUDAExecutionProvider" == providers[0], "test requires CUDAExecutionProvider")( - test_case - ) - - def require_hf_token(test_case): """ Decorator marking a test that requires huggingface hub token. From 7b4b5bdd614694e87830ffa03749b8b0184fb48a Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 11 Sep 2024 13:53:17 +0200 Subject: [PATCH 22/24] revert --- tests/onnxruntime/test_modeling.py | 52 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index d8dd46e4ad2..edcab8b228d 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -71,7 +71,6 @@ ONNX_DECODER_WITH_PAST_NAME, ONNX_ENCODER_NAME, ONNX_WEIGHTS_NAME, - ORTDiffusionPipeline, ORTModelForAudioClassification, ORTModelForAudioFrameClassification, ORTModelForAudioXVector, @@ -94,6 +93,7 @@ ORTModelUnet, ORTModelVaeDecoder, ORTModelVaeEncoder, + ORTStableDiffusionPipeline, ) from optimum.onnxruntime.base import ORTDecoderForSeq2Seq, ORTEncoder from optimum.onnxruntime.modeling_ort import ORTModel @@ -211,9 +211,9 @@ def test_load_seq2seq_model_from_empty_cache(self): @require_diffusers def test_load_stable_diffusion_model_from_cache(self): - _ = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) # caching + _ = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) # caching - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) self.assertIsInstance(model.vae_decoder, ORTModelVaeDecoder) @@ -229,7 +229,7 @@ def test_load_stable_diffusion_model_from_empty_cache(self): remove_directory(dirpath) with self.assertRaises(Exception): - _ = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) + _ = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) @require_torch_gpu @pytest.mark.cuda_ep_test @@ -304,7 +304,7 @@ def test_load_seq2seq_model_unknown_provider(self): @require_diffusers def test_load_stable_diffusion_model_from_hub(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) self.assertIsInstance(model.vae_decoder, ORTModelVaeDecoder) self.assertIsInstance(model.vae_encoder, ORTModelVaeEncoder) @@ -315,7 +315,7 @@ def test_load_stable_diffusion_model_from_hub(self): @require_torch_gpu @pytest.mark.cuda_ep_test def test_load_stable_diffusion_model_cuda_provider(self): - model = ORTDiffusionPipeline.from_pretrained( + model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CUDAExecutionProvider" ) self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"]) @@ -330,7 +330,7 @@ def test_load_stable_diffusion_model_cuda_provider(self): @require_ort_rocm @pytest.mark.rocm_ep_test def test_load_stable_diffusion_model_rocm_provider(self): - model = ORTDiffusionPipeline.from_pretrained( + model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="ROCMExecutionProvider" ) self.assertListEqual(model.providers, ["ROCMExecutionProvider", "CPUExecutionProvider"]) @@ -342,7 +342,7 @@ def test_load_stable_diffusion_model_rocm_provider(self): @require_diffusers def test_load_stable_diffusion_model_cpu_provider(self): - model = ORTDiffusionPipeline.from_pretrained( + model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CPUExecutionProvider" ) self.assertListEqual(model.providers, ["CPUExecutionProvider"]) @@ -355,7 +355,7 @@ def test_load_stable_diffusion_model_cpu_provider(self): @require_diffusers def test_load_stable_diffusion_model_unknown_provider(self): with self.assertRaises(ValueError): - ORTDiffusionPipeline.from_pretrained( + ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="FooExecutionProvider" ) @@ -489,7 +489,7 @@ def test_passing_session_options_seq2seq(self): def test_passing_session_options_stable_diffusion(self): options = onnxruntime.SessionOptions() options.intra_op_num_threads = 3 - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, session_options=options) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, session_options=options) self.assertEqual(model.unet.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.text_encoder.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.vae_decoder.session.get_session_options().intra_op_num_threads, 3) @@ -782,7 +782,7 @@ def test_seq2seq_model_on_rocm_ep_str(self): @require_torch_gpu @pytest.mark.cuda_ep_test def test_passing_provider_options_stable_diffusion(self): - model = ORTDiffusionPipeline.from_pretrained( + model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CUDAExecutionProvider" ) self.assertEqual( @@ -798,7 +798,7 @@ def test_passing_provider_options_stable_diffusion(self): self.assertEqual( model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["do_copy_in_default_stream"], "1" ) - model = ORTDiffusionPipeline.from_pretrained( + model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CUDAExecutionProvider", provider_options={"do_copy_in_default_stream": 0}, @@ -819,7 +819,7 @@ def test_passing_provider_options_stable_diffusion(self): @require_diffusers def test_stable_diffusion_model_on_cpu(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) cpu = torch.device("cpu") model.to(cpu) self.assertEqual(model.device, cpu) @@ -835,7 +835,7 @@ def test_stable_diffusion_model_on_cpu(self): @require_diffusers def test_stable_diffusion_model_on_cpu_str(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) cpu = torch.device("cpu") model.to("cpu") self.assertEqual(model.device, cpu) @@ -853,7 +853,7 @@ def test_stable_diffusion_model_on_cpu_str(self): @require_torch_gpu @pytest.mark.cuda_ep_test def test_stable_diffusion_model_on_gpu(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) gpu = torch.device("cuda") model.to(gpu) self.assertEqual(model.device, torch.device("cuda:0")) @@ -872,7 +872,7 @@ def test_stable_diffusion_model_on_gpu(self): @require_ort_rocm @pytest.mark.rocm_ep_test def test_stable_diffusion_model_on_rocm_ep(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) gpu = torch.device("cuda") model.to(gpu) self.assertEqual(model.device, torch.device("cuda:0")) @@ -889,21 +889,21 @@ def test_stable_diffusion_model_on_rocm_ep(self): @require_diffusers @unittest.skipIf(get_gpu_count() <= 1, "this test requires multi-gpu") def test_stable_diffusion_model_on_gpu_id(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to(torch.device("cuda:1")) self.assertEqual(model.unet.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.text_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_decoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to(1) self.assertEqual(model.unet.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.text_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_decoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to("cuda:1") self.assertEqual(model.unet.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.text_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") @@ -914,7 +914,7 @@ def test_stable_diffusion_model_on_gpu_id(self): @require_torch_gpu @pytest.mark.cuda_ep_test def test_stable_diffusion_model_on_gpu_str(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to("cuda") self.assertEqual(model.device, torch.device("cuda:0")) self.assertEqual(model.unet.device, torch.device("cuda:0")) @@ -932,7 +932,7 @@ def test_stable_diffusion_model_on_gpu_str(self): @require_ort_rocm @pytest.mark.rocm_ep_test def test_stable_diffusion_model_on_rocm_ep_str(self): - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.to("cuda") self.assertEqual(model.device, torch.device("cuda:0")) self.assertEqual(model.unet.device, torch.device("cuda:0")) @@ -990,7 +990,7 @@ def test_save_seq2seq_model_without_past(self): @require_diffusers def test_save_stable_diffusion_model(self): with tempfile.TemporaryDirectory() as tmpdirname: - model = ORTDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) + model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) model.save_pretrained(tmpdirname) folder_contents = os.listdir(tmpdirname) self.assertIn(model.config_name, folder_contents) @@ -1067,7 +1067,7 @@ def test_save_load_seq2seq_model_with_external_data(self, use_cache: bool): def test_save_load_stable_diffusion_model_with_external_data(self): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["FORCE_ONNX_EXTERNAL_DATA"] = "1" # force exporting small model with external data - model = ORTDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) + model = ORTStableDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) model.save_pretrained(tmpdirname) # verify external data is exported @@ -1082,7 +1082,7 @@ def test_save_load_stable_diffusion_model_with_external_data(self): self.assertIn(ONNX_WEIGHTS_NAME + "_data", folder_contents) # verify loading from local folder works - model = ORTDiffusionPipeline.from_pretrained(tmpdirname, export=False) + model = ORTStableDiffusionPipeline.from_pretrained(tmpdirname, export=False) os.environ.pop("FORCE_ONNX_EXTERNAL_DATA") remove_directory(tmpdirname) @@ -1199,7 +1199,7 @@ def test_push_seq2seq_model_with_external_data_to_hub(self): def test_push_stable_diffusion_model_with_external_data_to_hub(self): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["FORCE_ONNX_EXTERNAL_DATA"] = "1" # force exporting small model with external data - model = ORTDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) + model = ORTStableDiffusionPipeline.from_pretrained(MODEL_NAMES["stable-diffusion"], export=True) model.save_pretrained( tmpdirname + "/onnx", token=os.environ.get("HF_AUTH_TOKEN", None), @@ -1209,7 +1209,7 @@ def test_push_stable_diffusion_model_with_external_data_to_hub(self): ) # verify loading from hub works - model = ORTDiffusionPipeline.from_pretrained( + model = ORTStableDiffusionPipeline.from_pretrained( MODEL_NAMES["stable-diffusion"] + "-onnx", export=False, token=os.environ.get("HF_AUTH_TOKEN", None), From 036dc46b09b43a1c189e234768b79cdbdb54c7a0 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 12 Sep 2024 12:32:57 +0200 Subject: [PATCH 23/24] style --- tests/onnxruntime/test_modeling.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index edcab8b228d..af3d47f29d3 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -213,7 +213,9 @@ def test_load_seq2seq_model_from_empty_cache(self): def test_load_stable_diffusion_model_from_cache(self): _ = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) # caching - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) + model = ORTStableDiffusionPipeline.from_pretrained( + self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True + ) self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) self.assertIsInstance(model.vae_decoder, ORTModelVaeDecoder) @@ -229,7 +231,9 @@ def test_load_stable_diffusion_model_from_empty_cache(self): remove_directory(dirpath) with self.assertRaises(Exception): - _ = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True) + _ = ORTStableDiffusionPipeline.from_pretrained( + self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True + ) @require_torch_gpu @pytest.mark.cuda_ep_test @@ -489,7 +493,9 @@ def test_passing_session_options_seq2seq(self): def test_passing_session_options_stable_diffusion(self): options = onnxruntime.SessionOptions() options.intra_op_num_threads = 3 - model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, session_options=options) + model = ORTStableDiffusionPipeline.from_pretrained( + self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, session_options=options + ) self.assertEqual(model.unet.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.text_encoder.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.vae_decoder.session.get_session_options().intra_op_num_threads, 3) From afbb9afc99c556a4dae3cbc2207f1d62e045388b Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 12 Sep 2024 16:30:10 +0200 Subject: [PATCH 24/24] remove model parts from optimum.onnxruntime --- optimum/onnxruntime/__init__.py | 16 --------- optimum/utils/dummy_diffusers_objects.py | 44 ------------------------ tests/onnxruntime/test_modeling.py | 16 ++++++--- 3 files changed, 11 insertions(+), 65 deletions(-) diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index a6e3c139797..09a48ec955c 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -83,10 +83,6 @@ "ORTPipelineForInpainting", "ORTPipelineForText2Image", "ORTDiffusionPipeline", - "ORTModelTextEncoder", - "ORTModelUnet", - "ORTModelVaeDecoder", - "ORTModelVaeEncoder", ] else: _import_structure["modeling_diffusion"] = [ @@ -100,10 +96,6 @@ "ORTPipelineForInpainting", "ORTPipelineForText2Image", "ORTDiffusionPipeline", - "ORTModelTextEncoder", - "ORTModelUnet", - "ORTModelVaeDecoder", - "ORTModelVaeEncoder", ] @@ -155,10 +147,6 @@ from ..utils.dummy_diffusers_objects import ( ORTDiffusionPipeline, ORTLatentConsistencyModelPipeline, - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, @@ -172,10 +160,6 @@ from .modeling_diffusion import ( ORTDiffusionPipeline, ORTLatentConsistencyModelPipeline, - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index f63d3a603c4..35d1ffe9fc7 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -123,47 +123,3 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) - - -class ORTModelTextEncoder(metaclass=DummyObject): - _backends = ["diffusers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["diffusers"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["diffusers"]) - - -class ORTModelVaeDecoder(metaclass=DummyObject): - _backends = ["diffusers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["diffusers"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["diffusers"]) - - -class ORTModelVaeEncoder(metaclass=DummyObject): - _backends = ["diffusers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["diffusers"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["diffusers"]) - - -class ORTModelUnet(metaclass=DummyObject): - _backends = ["diffusers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["diffusers"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["diffusers"]) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index af3d47f29d3..199b96342e7 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -89,11 +89,6 @@ ORTModelForSpeechSeq2Seq, ORTModelForTokenClassification, ORTModelForVision2Seq, - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, - ORTStableDiffusionPipeline, ) from optimum.onnxruntime.base import ORTDecoderForSeq2Seq, ORTEncoder from optimum.onnxruntime.modeling_ort import ORTModel @@ -106,6 +101,7 @@ DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, logging, ) +from optimum.utils.import_utils import is_diffusers_available from optimum.utils.testing_utils import ( grid_parameters, remove_directory, @@ -115,6 +111,16 @@ ) +if is_diffusers_available(): + from optimum.onnxruntime.modeling_diffusion import ( + ORTModelTextEncoder, + ORTModelUnet, + ORTModelVaeDecoder, + ORTModelVaeEncoder, + ORTStableDiffusionPipeline, + ) + + logger = logging.get_logger()