diff --git a/optimum/pipelines/pipelines_base.py b/optimum/pipelines/pipelines_base.py index 37430205855..cc065fa922a 100644 --- a/optimum/pipelines/pipelines_base.py +++ b/optimum/pipelines/pipelines_base.py @@ -20,6 +20,7 @@ AudioClassificationPipeline, AutoConfig, AutoFeatureExtractor, + AutoImageProcessor, AutomaticSpeechRecognitionPipeline, AutoTokenizer, FeatureExtractionPipeline, @@ -41,7 +42,8 @@ ZeroShotClassificationPipeline, ) from transformers import pipeline as transformers_pipeline -from transformers.feature_extraction_utils import PreTrainedFeatureExtractor +from transformers.feature_extraction_utils import FeatureExtractionMixin +from transformers.image_processing_utils import ImageProcessingMixin from transformers.pipelines import SUPPORTED_TASKS as TRANSFORMERS_SUPPORTED_TASKS from transformers.pipelines import infer_framework_load_model @@ -291,7 +293,8 @@ def pipeline( task: str = None, model: Optional[Any] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, + image_processor: Optional[Union[str, ImageProcessingMixin]] = None, + feature_extractor: Optional[Union[str, FeatureExtractionMixin]] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, accelerator: Optional[str] = "ort", @@ -328,16 +331,20 @@ def pipeline( supported_tasks = ORT_SUPPORTED_TASKS if accelerator == "ort" else TRANSFORMERS_SUPPORTED_TASKS - no_feature_extractor_tasks = set() no_tokenizer_tasks = set() + no_image_processor = set() + no_feature_extractor_tasks = set() + for _task, values in supported_tasks.items(): if values["type"] == "text": + no_image_processor.add(_task) no_feature_extractor_tasks.add(_task) elif values["type"] in {"image", "video"}: no_tokenizer_tasks.add(_task) elif values["type"] in {"audio"}: no_tokenizer_tasks.add(_task) - elif values["type"] not in ["multimodal", "audio", "video"]: + no_image_processor.add(_task) + elif values["type"] not in ["multimodal", "image", "audio", "video"]: raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}") # copied from transformers.pipelines.__init__.py l.609 @@ -350,6 +357,11 @@ def pipeline( else: load_tokenizer = True + if targeted_task in no_image_processor: + load_image_processor = False + else: + load_image_processor = True + if targeted_task in no_feature_extractor_tasks: load_feature_extractor = False else: @@ -360,6 +372,8 @@ def pipeline( targeted_task, load_tokenizer, tokenizer, + load_image_processor, + image_processor, feature_extractor, load_feature_extractor, SUPPORTED_TASKS=supported_tasks, @@ -374,11 +388,14 @@ def pipeline( tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=use_fast, **kwargs) if feature_extractor is None and load_feature_extractor: feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, **kwargs) + if image_processor is None and load_image_processor: + image_processor = AutoImageProcessor.from_pretrained(model_id, **kwargs) return transformers_pipeline( task, model=model, tokenizer=tokenizer, + image_processor=image_processor, feature_extractor=feature_extractor, use_fast=use_fast, **kwargs,