Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/xenova-add-siglip' into add-hier…
Browse files Browse the repository at this point in the history
…a-onnx
  • Loading branch information
xenova committed Aug 29, 2024
2 parents 95336c0 + 9db1428 commit d4321b6
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/source/exporters/onnx/overview.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- SEW
- SEW-D
- Speech2Text
- SigLIP
- SpeechT5
- Splinter
- SqueezeBert
Expand Down
25 changes: 25 additions & 0 deletions optimum/exporters/onnx/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,31 @@ def patch_model_for_export(
return CLIPModelPatcher(self, model, model_kwargs=model_kwargs)


class SiglipNormalizedConfig(CLIPNormalizedConfig):
pass


class SiglipOnnxConfig(CLIPOnnxConfig):
NORMALIZED_CONFIG_CLASS = SiglipNormalizedConfig
DEFAULT_ONNX_OPSET = 13

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
return {
"input_ids": {0: "text_batch_size", 1: "sequence_length"},
"pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"},
# NOTE: No attention_mask
}


class SiglipTextWithProjectionOnnxConfig(CLIPTextWithProjectionOnnxConfig):
pass


class SiglipTextOnnxConfig(CLIPTextOnnxConfig):
pass


class UNetOnnxConfig(VisionOnnxConfig):
ATOL_FOR_VALIDATION = 1e-3
# The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu
Expand Down
13 changes: 13 additions & 0 deletions optimum/exporters/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,19 @@ class TasksManager:
"audio-classification",
onnx="SEWDOnnxConfig",
),
"siglip": supported_tasks_mapping(
"feature-extraction",
"zero-shot-image-classification",
onnx="SiglipOnnxConfig",
),
"siglip-text-model": supported_tasks_mapping(
"feature-extraction",
onnx="SiglipTextOnnxConfig",
),
"siglip-text-with-projection": supported_tasks_mapping(
"feature-extraction",
onnx="SiglipTextWithProjectionOnnxConfig",
),
"speech-to-text": supported_tasks_mapping(
"feature-extraction",
"feature-extraction-with-past",
Expand Down
2 changes: 2 additions & 0 deletions optimum/utils/normalized_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ class NormalizedConfigManager:
'owlvit',
'perceiver',
'roformer',
'segformer',
'siglip',
'squeezebert',
'table-transformer',
"""
Expand Down
1 change: 1 addition & 0 deletions tests/exporters/exporters_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@
"roformer": "hf-internal-testing/tiny-random-RoFormerModel",
"sam": "fxmarty/sam-vit-tiny-random",
"segformer": "hf-internal-testing/tiny-random-SegformerModel",
"siglip": "HuggingFaceM4/tiny-random-siglip",
"splinter": "hf-internal-testing/tiny-random-SplinterModel",
"squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel",
"swin": "hf-internal-testing/tiny-random-SwinModel",
Expand Down

0 comments on commit d4321b6

Please sign in to comment.