diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..9bb1ca9 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,42 @@ +name: Lint + +on: + push: + branches: + - main + pull_request: + +jobs: + lint-python: + name: Pylint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + python -m pip install -e . + - name: Analysing the code with pylint + run: | + pylint --output-format=colorized $(git ls-files '*.py') + + lint-python-format: + name: Python format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: "3.9" + - uses: psf/black@stable + with: + options: "--check --diff" + - uses: isort/isort-action@master + with: + configuration: + --check + --diff diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b6e1f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__/ +*.egg-info/ +*.egg + +.idea* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..30a0ae1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,55 @@ +repos: +- repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + args: + [ + "--force-single-line-imports", + "--ensure-newline-before-comments", + "--line-length=120", + ] +- repo: https://github.com/asottile/pyupgrade + rev: v3.8.0 + hooks: + - id: pyupgrade +- repo: https://github.com/PyCQA/docformatter + rev: v1.7.3 + hooks: + - id: docformatter + additional_dependencies: [tomli] + args: + [ + "--in-place", + "--config", + "pyproject.toml", + ] +- repo: https://github.com/executablebooks/mdformat + rev: 0.7.16 + hooks: + - id: mdformat + additional_dependencies: + - mdformat-gfm + - mdformat-black +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + - id: check-toml + - id: check-json + - id: check-ast + - id: fix-byte-order-marker + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: detect-private-key + - id: end-of-file-fixer + - id: detect-private-key + - id: no-commit-to-branch + args: ["-b=main"] diff --git a/README.md b/README.md index 7cef31e..dd6a7aa 100644 --- a/README.md +++ b/README.md @@ -1 +1,24 @@ -# ti-vit \ No newline at end of file +# TI-ViT + +The repository contains script for exporting PyTorch VIT model to ONNX format in the form that compatible with +[edgeai-tidl-tools](https://github.com/TexasInstruments/edgeai-tidl-tools) (version 8.6.0.5). + +## Installation + +To install export script run the following command: +```commandline +pip3 install git+https://github.com/ENOT-AutoDL/ti-vit.git@main +``` + +## Examples + +To export the model version with maximum performance, run the following command: +```commandline +export-ti-vit -o npu-max-perf.onnx -t npu-max-perf +``` + +To export the model version with minimal loss of accuracy, run the following command: +```commandline +export-ti-vit -o npu-max-acc.onnx -t npu-max-acc +``` + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..854aa48 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,62 @@ +[project] +name = 'ti-vit' +version = '0.0.1' +dependencies = [ + 'torch==1.13.1', + 'torchvision==0.14.1', +] + +[project.scripts] +export-ti-vit = "ti_vit.export:export_ti_compatible_vit" + +[tool.black] +line-length = 120 +target-version = ["py38", "py39"] +include = '\.pyi?$' + +[tool.isort] +profile = "black" +line_length = 120 +ensure_newline_before_comments = true +force_single_line = true + +[tool.nbqa.mutate] +pyupgrade = 1 + +[tool.nbqa.addopts] +pyupgrade = ["--py38-plus"] + +[tool.docformatter] +recursive = true +wrap-summaries = 0 +wrap-descriptions = 0 +blank = true +black = true +pre-summary-newline = true + +[tool.pylint.format] +max-line-length = 120 + +[tool.pylint.design] +max-args = 12 +max-locals = 30 +max-attributes = 20 +min-public-methods = 0 + +[tool.pylint.typecheck] +generated-members = ["torch.*"] + +[tool.pylint.messages_control] +disable = [ + "logging-fstring-interpolation", + "missing-module-docstring", + "unnecessary-pass", +] + +[tool.pylint.BASIC] +good-names = ["B", "N", "C"] + +[tool.pyright] +reportMissingImports = false +reportMissingTypeStubs = false +reportWildcardImportFromLibrary = false diff --git a/src/ti_vit/__init__.py b/src/ti_vit/__init__.py new file mode 100644 index 0000000..b32ecd8 --- /dev/null +++ b/src/ti_vit/__init__.py @@ -0,0 +1,2 @@ +from ti_vit.model import TICompatibleVitOrtMaxAcc +from ti_vit.model import TICompatibleVitOrtMaxPerf diff --git a/src/ti_vit/attention.py b/src/ti_vit/attention.py new file mode 100644 index 0000000..d41b71b --- /dev/null +++ b/src/ti_vit/attention.py @@ -0,0 +1,167 @@ +import typing +from enum import Enum +from typing import Tuple + +import torch +from torch import nn + +from ti_vit.common import copy_weights +from ti_vit.common import sync_device_and_mode + + +class AttentionType(Enum): + """ + Type of attention block. + + - CONV_CONV - qkv projection and output projection is a convolution with 1x1 kernel + - CONV_LINEAR - qkv projection is a convolution with 1x1 kernel, output projection is linear + + """ + + CONV_CONV = "CONV_CONV" + CONV_LINEAR = "CONV_LINEAR" + + +class TICompatibleAttention(nn.Module): + """TI compatible attention block.""" + + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = False, + attention_type: AttentionType = AttentionType.CONV_LINEAR, + ): + """ + Parameters + ---------- + dim : int + Total dimension of the model. + num_heads : int + Number of parallel attention heads. + qkv_bias : bool + If True, adds a learnable bias to the qkv projection. Default value is False. + attention_type : AttentionType + Type of attention block (see ``AttentionType`` enum documentation). + """ + super().__init__() + + if dim % num_heads != 0: + raise ValueError(f'"dim"={dim} should be divisible by "num_heads"={num_heads}') + + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = head_dim**-0.5 + + if attention_type == AttentionType.CONV_CONV: + self.qkv_proj = nn.Conv2d(in_channels=dim, out_channels=dim * 3, kernel_size=(1, 1), bias=qkv_bias) + self.out_proj = nn.Conv2d(in_channels=dim, out_channels=dim, kernel_size=(1, 1)) + elif attention_type == AttentionType.CONV_LINEAR: + self.qkv_proj = nn.Conv2d(in_channels=dim, out_channels=dim * 3, kernel_size=(1, 1), bias=qkv_bias) + self.out_proj = nn.Linear(in_features=dim, out_features=dim) + else: + raise ValueError(f'Got unknown attention_type "{attention_type}"') + + self._attention_type = attention_type + + def forward( # pylint: disable=missing-function-docstring + self, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + need_weights: bool = True, + ) -> Tuple[torch.Tensor, None]: + del key, value + + assert not need_weights + + x = query + B, N, C = x.shape + + # (B, N, C) -> (B, N, C, 1) -> (B, C, N, 1) + x = x.unsqueeze(3).permute(0, 2, 1, 3) + + qkv = self.qkv_proj(x) + qkv = qkv.reshape(B, 3, C, N) + q, k, v = qkv.split(1, dim=1) + + # (B, 1, C, N) -> (B, H, C//H, N) -> (B, H, N, C//H) + q = q.reshape(B, self.num_heads, C // self.num_heads, N).permute(0, 1, 3, 2) + # (B, 1, C, N) -> (B, H, C//H, N) + k = k.reshape(B, self.num_heads, C // self.num_heads, N) + # (B, 1, C, N) -> (B, H, C//H, N) -> (B, H, N, C//H) + v = v.reshape(B, self.num_heads, C // self.num_heads, N).permute(0, 1, 3, 2) + + attn = (q @ k) * self.scale + attn = attn.softmax(dim=-1) + + x = attn @ v + + if self._attention_type == AttentionType.CONV_CONV: + # (B, H, N, C//H) -> (B, H, C//H, N) -> (B, C, N, 1) + x = x.permute(0, 1, 3, 2).reshape(B, C, N, 1) + x = self.out_proj(x) + x = x.permute(0, 2, 1, 3) + x = x.squeeze(3) + else: + # (B, H, N, C//H) -> (B, N, H, C//H) -> (B, N, C) + x = x.permute(0, 2, 1, 3).reshape(B, N, C) + x = self.out_proj(x) + + return x, None + + @classmethod + def from_module( + cls, + vit_attn: nn.Module, + attention_type: AttentionType = AttentionType.CONV_CONV, + ) -> "TICompatibleAttention": + """ + Create TI compatible attention block from common ViT attention block. + + Parameters + ---------- + vit_attn : nn.Module + Source block. + attention_type : AttentionType + Attention type (see ``AttentionType`` enum documentation). + + Returns + ------- + TICompatibleAttention + Instance of ``TICompatibleAttention`` with appropriate weights, device and training mode. + + """ + if hasattr(vit_attn, "qkv"): + qkv_proj = typing.cast(nn.Linear, vit_attn.qkv) + out_proj = typing.cast(nn.Linear, vit_attn.proj) + else: + in_proj_weight = typing.cast(nn.Parameter, vit_attn.in_proj_weight) + out_features, in_features = in_proj_weight.shape + qkv_proj = nn.Linear( + in_features=in_features, + out_features=out_features, + bias=hasattr(vit_attn, "in_proj_bias"), + device=in_proj_weight.device, + dtype=in_proj_weight.dtype, + ) + qkv_proj.weight = in_proj_weight + qkv_proj.bias = vit_attn.in_proj_bias # pyright: ignore[reportAttributeAccessIssue] + + out_proj = typing.cast(nn.Linear, vit_attn.out_proj) + + ti_compatible_attn = cls( + dim=qkv_proj.in_features, + num_heads=typing.cast(int, vit_attn.num_heads), + qkv_bias=qkv_proj.bias is not None, + attention_type=attention_type, + ) + sync_device_and_mode(src=vit_attn, dst=ti_compatible_attn) + + copy_weights(src=qkv_proj, dst=ti_compatible_attn.qkv_proj) + copy_weights(src=out_proj, dst=ti_compatible_attn.out_proj) + + if hasattr(vit_attn, "scale"): + ti_compatible_attn.scale = vit_attn.scale + + return ti_compatible_attn diff --git a/src/ti_vit/common.py b/src/ti_vit/common.py new file mode 100644 index 0000000..0579e2b --- /dev/null +++ b/src/ti_vit/common.py @@ -0,0 +1,45 @@ +from typing import Union + +import torch +from torch import nn + + +def copy_weights(src: nn.Linear, dst: Union[nn.Linear, nn.Conv2d]) -> None: + """ + Update weights and bias parameters of the destination module with values from the source module. + + Parameters + ---------- + src : nn.Linear + The source module. + dst : Union[nn.Linear, nn.Conv2d] + The destination module. + + """ + with torch.no_grad(): + if isinstance(dst, nn.Linear): + dst.weight.copy_(src.weight) + elif isinstance(dst, nn.Conv2d): + dst.weight.copy_(src.weight.unsqueeze(-1).unsqueeze(-1)) + else: + raise TypeError(f"dst must be nn.Linear or nn.Conv2d (type(dst)={type(dst)})") + + if src.bias is not None: + dst.bias.copy_(src.bias) # pyright: ignore[reportOptionalMemberAccess] + + +def sync_device_and_mode(src: nn.Module, dst: nn.Module) -> None: + """ + Update device and training mode parameters of the destination module with values from the source module. + + Parameters + ---------- + src : nn.Module + The source module. + dst : nn.Module + The destination module. + + """ + device = next(src.parameters()).device + dst.to(device=device) + dst.train(mode=src.training) diff --git a/src/ti_vit/export.py b/src/ti_vit/export.py new file mode 100644 index 0000000..04b79e9 --- /dev/null +++ b/src/ti_vit/export.py @@ -0,0 +1,111 @@ +import argparse +import logging +import sys +import warnings +from pathlib import Path +from typing import Optional +from typing import Union + +import torch +from torchvision.models import ViT_B_16_Weights +from torchvision.models import vit_b_16 + +from ti_vit.model import TICompatibleVitOrtMaxAcc +from ti_vit.model import TICompatibleVitOrtMaxPerf + + +def export( + output_onnx_path: Union[str, Path], + model_type: str, + checkpoint_path: Optional[Union[str, Path]] = None, + resolution: int = 224, +) -> None: + """ + Parameters + ---------- + output_onnx_path : Union[str, Path] + Path to the output ONNX file. + model_type : str + Type of the final model. Possible values are "npu-max-acc", "npu-max-perf" or "cpu". + checkpoint_path : Optional[Union[str, Path]] = None + Path to the PyTorch model checkpoint. If value is None, then ViT_B_16 pretrained torchvision model is used. + Default value is None. + resolution : int + Resolution of input image. Default value is 224. + """ + if checkpoint_path is None: + model = vit_b_16(weights=ViT_B_16_Weights.DEFAULT, progress=True) + else: + checkpoint = torch.load(str(checkpoint_path)) + model = checkpoint["model_ckpt"] + + model.cpu().eval() + + try: + transform_model_func = { + "cpu": lambda model: model, + "npu-max-acc": TICompatibleVitOrtMaxAcc, + "npu-max-perf": lambda model: TICompatibleVitOrtMaxPerf(model=model, ignore_tidl_errors=False), + "npu-max-perf-experimental": lambda model: TICompatibleVitOrtMaxPerf(model=model, ignore_tidl_errors=True), + }[model_type] + except KeyError as exc: + raise ValueError(f"Got unknown transformation type ('{model_type}')") from exc + + model = transform_model_func(model) + + device = next(model.parameters()).device + dummy_data = torch.ones([1, 3, resolution, resolution], dtype=torch.float32, device=device) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") # disable export warnings + torch.onnx.export( + model=model, + f=str(output_onnx_path), + args=dummy_data, + input_names=["input"], + output_names=["output"], + opset_version=9, + ) + + +def export_ti_compatible_vit() -> None: # pylint: disable=missing-function-docstring + logger = logging.getLogger("ti_vit") + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.setLevel(logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--output-onnx", type=str, required=True, help="Path to the output onnx.") + parser.add_argument( + "-t", + "--model-type", + type=str, + required=False, + default="npu-max-perf", + help='Type of the final model (optional argument). Possible values are "npu-max-acc", "npu-max-perf", or "cpu".' + ' Default value is "npu-max-perf".', + ) + parser.add_argument( + "-c", + "--checkpoint", + type=str, + required=False, + help="Path to the ViT checkpoint (optional argument). By default torchvision checkpoint is downloaded." + "(VIT_B_16).", + default=None, + ) + parser.add_argument( + "-r", + "--resolution", + type=int, + required=False, + default=224, + help="Resolution of input images (optional argument). Default value is 224.", + ) + args = parser.parse_args() + + export( + checkpoint_path=args.checkpoint, + output_onnx_path=args.output_onnx, + model_type=args.model_type, + resolution=args.resolution, + ) diff --git a/src/ti_vit/mlp.py b/src/ti_vit/mlp.py new file mode 100644 index 0000000..37f8184 --- /dev/null +++ b/src/ti_vit/mlp.py @@ -0,0 +1,156 @@ +from enum import Enum + +import torch +from torch import nn +from torchvision.models.vision_transformer import MLPBlock + +from ti_vit.common import copy_weights +from ti_vit.common import sync_device_and_mode + + +class MLPType(Enum): + """ + Type of MLP block. + + - CONV_CONV - MLP block assembled as ``convolution_with_kernel_1x1 + activation + convolution_with_kernel_1x1`` + - LINEAR_CONV - MLP block assembled as ``linear + activation + convolution_with_kernel_1x1`` + + """ + + CONV_CONV = "CONV_CONV" + LINEAR_CONV = "LINEAR_CONV" + + +class GeluApproximationType(Enum): + """ + GELU approximation type. + + - NONE - disable approximation + - SIGMOID - approximate as ``x * sigmoid(1.702 * x)`` + - TANH - approximate as ``0.5 * x * (tanh(0.7978845834732056 * (x + 0.044715 * x * x * x)) + 1.0)`` + + """ + + NONE = "NONE" + SIGMOID = "SIGMOID" + TANH = "TANH" + + +class TICompatibleMLP(nn.Module): + """TI compatible MLP block.""" + + def __init__( + self, + dims: int, + hidden_dims: int, + mlp_type: MLPType = MLPType.CONV_CONV, + gelu_approx_type: GeluApproximationType = GeluApproximationType.NONE, + ): + """ + dims : int + Number of channels of the input. + hidden_dims : int + Number of channels of the expanded tensor. + mlp_type : MLPType + MLP type (see ``MLPType`` enum documentation). + gelu_approx_type : GeluApproximationType + GELU approximation type (see ``GeluApproximationType`` enum documentation). + """ + super().__init__() + + try: + self.gelu = { + GeluApproximationType.NONE: nn.GELU(), + GeluApproximationType.SIGMOID: self._gelu_approx_sigmoid, + GeluApproximationType.TANH: self._gelu_approx_tanh, + }[gelu_approx_type] + self._gelu_approx_type = gelu_approx_type + except KeyError as exc: + raise ValueError(f'Got unknown type of gelu approximation "{gelu_approx_type}"') from exc + + if mlp_type == MLPType.CONV_CONV: + self.expand = nn.Conv2d(in_channels=dims, out_channels=hidden_dims, kernel_size=(1, 1)) + self.shrink = nn.Conv2d(in_channels=hidden_dims, out_channels=dims, kernel_size=(1, 1)) + elif mlp_type == MLPType.LINEAR_CONV: + self.expand = nn.Linear(in_features=dims, out_features=hidden_dims) + self.shrink = nn.Conv2d(in_channels=hidden_dims, out_channels=dims, kernel_size=(1, 1)) + else: + raise ValueError(f'Got unknown mlp_type "{mlp_type}"') + + self._mlp_type = mlp_type + + @staticmethod + def _gelu_approx_tanh(x: torch.Tensor) -> torch.Tensor: + # This is default torch approximation (0.5 * x * (tanh(0.7978845834732056 * (x + 0.044715 * x * x * x)) + 1.0)), + # where tanh replaced by (2.0 * nn.functional.sigmoid(2.0 * x) - 1.0) + return x * torch.sigmoid(1.5957691669464111 * (x + 0.044715 * x * x * x)) + + @staticmethod + def _gelu_approx_sigmoid(x: torch.Tensor) -> torch.Tensor: + # simplified torch approximation + return x * torch.sigmoid(1.702 * x) + + def forward(self, x: torch.Tensor) -> torch.Tensor: # pylint: disable=missing-function-docstring + if self._mlp_type == MLPType.CONV_CONV: + x = x.unsqueeze(3).permute(0, 2, 1, 3) + x = self.expand(x) + x = self.gelu(x) + else: + x = self.expand(x) + if self._gelu_approx_type == GeluApproximationType.NONE: + x = self.gelu(x) + x = x.unsqueeze(3).permute(0, 2, 1, 3) + else: + x = x.unsqueeze(3).permute(0, 2, 1, 3) + x = self.gelu(x) + + x = self.shrink(x) + x = x.permute(0, 2, 1, 3).squeeze(3) + + return x + + @classmethod + def from_module( + cls, + vit_mlp: MLPBlock, + mlp_type: MLPType = MLPType.CONV_CONV, + gelu_approx_type: GeluApproximationType = GeluApproximationType.NONE, + ) -> "TICompatibleMLP": + """ + Create TI compatible MLP block from common ViT MLP block. + + Parameters + ---------- + vit_mlp : MLPBlock + Source block. + mlp_type : MLPType + MLP type (see ``MLPType`` enum documentation). + gelu_approx_type : GeluApproximationType + GELU approximation type (see ``GeluApproximationType`` enum documentation). + + Returns + ------- + TICompatibleMLP + Instance of ``TICompatibleMLP`` with appropriate weights, device and training mode. + + """ + expand, shrink = vit_mlp[0], vit_mlp[3] + if not isinstance(expand, nn.Linear) or not isinstance(shrink, nn.Linear): + raise ValueError('Got unknown type of vit_mlp. Cannot find "Linear" layers.') + if not isinstance(vit_mlp[1], nn.GELU): + raise ValueError('Got unknown type of vit_mlp. Cannot find "GELU" layer.') + if not isinstance(vit_mlp[2], nn.Dropout) or not isinstance(vit_mlp[4], nn.Dropout): + raise ValueError('Got unknown type of vit_mlp. Cannot find "dropout" layers.') + + ti_compatible_mlp = cls( + dims=expand.in_features, + hidden_dims=expand.out_features, + mlp_type=mlp_type, + gelu_approx_type=gelu_approx_type, + ) + sync_device_and_mode(src=vit_mlp, dst=ti_compatible_mlp) + + copy_weights(src=expand, dst=ti_compatible_mlp.expand) + copy_weights(src=shrink, dst=ti_compatible_mlp.shrink) + + return ti_compatible_mlp diff --git a/src/ti_vit/model.py b/src/ti_vit/model.py new file mode 100644 index 0000000..01399e9 --- /dev/null +++ b/src/ti_vit/model.py @@ -0,0 +1,124 @@ +import logging +import typing +from typing import Any +from typing import Dict +from typing import NamedTuple +from typing import Optional + +import torch +from torch import nn +from torchvision.models.vision_transformer import EncoderBlock +from torchvision.models.vision_transformer import VisionTransformer + +from ti_vit.attention import AttentionType +from ti_vit.attention import TICompatibleAttention +from ti_vit.mlp import GeluApproximationType +from ti_vit.mlp import MLPType +from ti_vit.mlp import TICompatibleMLP + +_LOGGER = logging.getLogger(__name__) + + +class _BlockCfg(NamedTuple): + attention_cfg: Optional[Dict[str, Any]] + mlp_cfg: Optional[Dict[str, Any]] + + +class _TICompatibleVit(nn.Module): + def __init__(self, model: VisionTransformer, cfg: Dict[int, _BlockCfg]): + super().__init__() + + self._model = model + + attn_counter, mlp_counter = 0, 0 + for block_index, block_cfg in cfg.items(): + block: EncoderBlock = typing.cast(EncoderBlock, model.encoder.layers[block_index]) + + if block_cfg.attention_cfg is not None: + self_attention = TICompatibleAttention.from_module(block.self_attention, **block_cfg.attention_cfg) + setattr(block, "self_attention", self_attention) + _LOGGER.debug( + f"REPLACE {type(block.self_attention)} => {type(self_attention)} " + f"(BLOCK={block_index}, CFG={block_cfg.attention_cfg})" + ) + attn_counter += 1 + + if block_cfg.mlp_cfg is not None: + mlp = TICompatibleMLP.from_module(block.mlp, **block_cfg.mlp_cfg) + setattr(block, "mlp", mlp) + _LOGGER.debug( + f"REPLACE {type(block.mlp)} => {type(mlp)} " f"(BLOCK={block_index}, CFG={block_cfg.mlp_cfg})" + ) + mlp_counter += 1 + + _LOGGER.info(f"{attn_counter} attentions replaced") + _LOGGER.info(f"{mlp_counter} MLPs replaced") + + def forward(self, x: torch.Tensor) -> torch.Tensor: # pylint: disable=missing-function-docstring + return self._model(x) + + +class TICompatibleVitOrtMaxPerf(_TICompatibleVit): + """TI compatible ViT model with maximum performance.""" + + def __init__(self, model: VisionTransformer, ignore_tidl_errors: bool = False): + """ + Parameters + ---------- + model : VisionTransformer + Source ViT model. + ignore_tidl_errors : bool + Experimental option. + """ + if ignore_tidl_errors: + cfg = {i: self._mlp_perf_block_cfg() if i < 8 else self._attn_mlp_perf_block_cfg() for i in range(12)} + else: + cfg = {i: self._mlp_perf_block_cfg() for i in range(12)} + + super().__init__(model=model, cfg=cfg) + + @staticmethod + def _attn_mlp_perf_block_cfg() -> _BlockCfg: + return _BlockCfg( + attention_cfg={ + "attention_type": AttentionType.CONV_LINEAR, + }, + mlp_cfg={"mlp_type": MLPType.CONV_CONV, "gelu_approx_type": GeluApproximationType.TANH}, + ) + + @staticmethod + def _mlp_perf_block_cfg() -> _BlockCfg: + return _BlockCfg( + attention_cfg=None, + mlp_cfg={"mlp_type": MLPType.CONV_CONV, "gelu_approx_type": GeluApproximationType.TANH}, + ) + + +class TICompatibleVitOrtMaxAcc(_TICompatibleVit): + """TI compatible ViT model with minimal accuracy drop.""" + + def __init__(self, model: VisionTransformer): + """ + Parameters + ---------- + model : VisionTransformer + Source ViT model. + """ + super().__init__( + model=model, + cfg={i: self._mlp_lc_block_cfg() if i < 8 else self._mlp_cc_block_cfg() for i in range(12)}, + ) + + @staticmethod + def _mlp_lc_block_cfg() -> _BlockCfg: + return _BlockCfg( + attention_cfg=None, + mlp_cfg={"mlp_type": MLPType.LINEAR_CONV, "gelu_approx_type": GeluApproximationType.NONE}, + ) + + @staticmethod + def _mlp_cc_block_cfg() -> _BlockCfg: + return _BlockCfg( + attention_cfg=None, + mlp_cfg={"mlp_type": MLPType.CONV_CONV, "gelu_approx_type": GeluApproximationType.NONE}, + )