Skip to content

Commit

Permalink
updated efficientnet models with onnx and real data checkpoint for ef…
Browse files Browse the repository at this point in the history
…ficientnetv2_m
  • Loading branch information
ryanchesler committed Feb 26, 2023
1 parent d7e33b6 commit 6a3bbf3
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 27 deletions.
8 changes: 8 additions & 0 deletions doctr/models/detection/differentiable_binarization/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,10 @@ def __init__(
self.assume_straight_pages = True
self.postprocessor = DBPostProcessor(assume_straight_pages=self.assume_straight_pages)
self.device = torch.cuda.is_available()
if os.environ.get("CUDA_VISIBLE_DEVICES", []) == "":
self.device = "cpu"
elif len(os.environ.get("CUDA_VISIBLE_DEVICES", [])) > 0:
self.device = "cuda"
model_path = str(download_from_url(self.cfg["url"], cache_subdir='models'))
if self.device:
self.sess = ort.InferenceSession(model_path, providers=['CUDAExecutionProvider'])
Expand Down Expand Up @@ -399,6 +403,10 @@ def __init__(
self.assume_straight_pages = True
self.postprocessor = DBPostProcessor(assume_straight_pages=self.assume_straight_pages)
self.device = torch.cuda.is_available()
if os.environ.get("CUDA_VISIBLE_DEVICES", []) == "":
self.device = "cpu"
elif len(os.environ.get("CUDA_VISIBLE_DEVICES", [])) > 0:
self.device = "cuda"
model_path = str(download_from_url(self.cfg["url"], cache_subdir='models'))
if self.device:
self.sess = ort.InferenceSession(model_path, providers=['CUDAExecutionProvider'])
Expand Down
6 changes: 5 additions & 1 deletion doctr/models/detection/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import torch
from torch import nn

import os
from doctr.models.preprocessor import PreProcessor

__all__ = ["DetectionPredictor"]
Expand All @@ -33,6 +33,10 @@ def __init__(
self.pre_processor = pre_processor
self.postprocessor = self.model.postprocessor
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if os.environ.get("CUDA_VISIBLE_DEVICES", []) == "":
self.device = "cpu"
elif len(os.environ.get("CUDA_VISIBLE_DEVICES", [])) > 0:
self.device = "cuda"
if "onnx" not in str((type(self.model))) and (self.device == torch.device("cuda")):
# self.model = nn.DataParallel(self.model)
# self.model = self.model.half()
Expand Down
191 changes: 183 additions & 8 deletions doctr/models/recognition/crnn/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
from ..core import RecognitionModel, RecognitionPostProcessor

__all__ = ['CRNN', 'crnn_vgg16_bn', 'crnn_vgg16_bn_onnx', 'crnn_mobilenet_v3_small',
'crnn_mobilenet_v3_large', 'crnn_efficientnet_b0', 'crnn_efficientnet_b3', 'crnn_efficientnetv2_m']
'crnn_mobilenet_v3_large', 'crnn_efficientnet_b0', 'crnn_efficientnet_b0_onnx',
'crnn_efficientnet_b3', 'crnn_efficientnet_b3_onnx', 'crnn_efficientnetv2_m',
'crnn_efficientnetv2_m_onnx', 'crnn_efficientnetv2_mV2', 'crnn_efficientnetv2_mV2_onnx']

default_cfgs: Dict[str, Dict[str, Any]] = {
"crnn_vgg16_bn": {
Expand All @@ -33,7 +35,7 @@
"vocab": VOCABS["legacy_french"],
"url": "https://doctr-static.mindee.com/models?id=v0.3.1/crnn_vgg16_bn-9762b0b0.pt&src=0",
},
'crnn_vgg16_bn_onnx': {
'crnn_vgg16_bn_onnx': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
Expand Down Expand Up @@ -61,20 +63,55 @@
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_crnnv2/crnn_effnet_b0.pt'
},
'crnn_efficientnet_b0_onnx': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_onnx_models/crnn_effnet_b0.onnx'
},
'crnn_efficientnet_b3': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': "https://github.com/h2oai/doctr/releases/download/efficientnet_crnnv2/crnn_effnet_b3.pt",
},
'crnn_efficientnet_b3_onnx': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_onnx_models/crnn_effnet_b3.onnx'
},
'crnn_efficientnetv2_m': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_crnnv2/crnn_effnetv2_m.pt'
},
'crnn_efficientnetv2_m_onnx': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_onnx_models/crnn_effnetv2_m.onnx'
},
'crnn_efficientnetv2_mV2': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_onnx_models/crnn_effnetv2_mV2.pt'
},
'crnn_efficientnetv2_mV2_onnx': {
'mean': (0.694, 0.695, 0.693),
'std': (0.299, 0.296, 0.301),
'input_shape': (3, 32, 128),
'vocab': VOCABS['french'] + " ",
'url': 'https://github.com/h2oai/doctr/releases/download/efficientnet_onnx_models/crnn_effnetv2_mV2.onnx'
},
}


Expand Down Expand Up @@ -293,18 +330,23 @@ def crnn_vgg16_bn(pretrained: bool = False, **kwargs: Any) -> CRNN:

return _crnn("crnn_vgg16_bn", pretrained, vgg16_bn_r, ignore_keys=["linear.weight", "linear.bias"], **kwargs)

class crnn_vgg16_bn_onnx(RecognitionModel, nn.Module):
"""Onnx converted crnn_vgg16_bn_onnx"""
class _crnn_onnx(RecognitionModel, nn.Module):
"""Onnx converted models"""
def __init__(
self,
pretrained = True
pretrained = True,
model_name = None
) -> None:
super().__init__()
self.vocab = default_cfgs["crnn_vgg16_bn_onnx"]["vocab"]
self.cfg = default_cfgs["crnn_vgg16_bn_onnx"]
self.vocab = default_cfgs[model_name]["vocab"]
self.cfg = default_cfgs[model_name]

self.postprocessor = CTCPostProcessor(vocab=self.vocab)
self.device = torch.cuda.is_available()
if os.environ["CUDA_VISIBLE_DEVICES"] == "":
self.device = "cpu"
elif len(os.environ["CUDA_VISIBLE_DEVICES"]) > 0:
self.device = "cuda"
model_path = str(download_from_url(self.cfg["url"], cache_subdir='models'))
if self.device:
self.sess = ort.InferenceSession(model_path, providers=['CUDAExecutionProvider'])
Expand All @@ -323,6 +365,115 @@ def forward(
else:
logits = self.compiled_model_onnx([x.detach().cpu().numpy()])[self.output_layer_onnx]
return logits

def crnn_efficientnet_b0_onnx(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with efficientnetb0 onnx
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
>>> model = crnn_convnext_tiny(pretrained=True)
>>> input_tensor = torch.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
Returns:
text recognition architecture
"""
kwargs["rnn_units"] = 512
return _crnn_onnx(
True,
'crnn_efficientnet_b0_onnx',
**kwargs,
)
def crnn_efficientnet_b3_onnx(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with efficientnetb3 onnx
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
>>> model = crnn_convnext_tiny(pretrained=True)
>>> input_tensor = torch.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
Returns:
text recognition architecture
"""
kwargs["rnn_units"] = 512
return _crnn_onnx(
True,
'crnn_efficientnet_b3_onnx',
**kwargs,
)

def crnn_efficientnetv2_m_onnx(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with efficientnetv2_m onnx
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
>>> model = crnn_convnext_tiny(pretrained=True)
>>> input_tensor = torch.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
Returns:
text recognition architecture
"""
kwargs["rnn_units"] = 512
return _crnn_onnx(
True,
'crnn_efficientnetv2_m_onnx',
**kwargs,
)

def crnn_efficientnetv2_mV2_onnx(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with efficientnetv2_m onnx
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
>>> model = crnn_convnext_tiny(pretrained=True)
>>> input_tensor = torch.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
Returns:
text recognition architecture
"""
kwargs["rnn_units"] = 512
return _crnn_onnx(
True,
'crnn_efficientnetv2_mV2_onnx',
**kwargs,
)

def crnn_vgg16_bn_onnx(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with vgg16_bn onnx
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
>>> model = crnn_convnext_tiny(pretrained=True)
>>> input_tensor = torch.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
Returns:
text recognition architecture
"""
kwargs["rnn_units"] = 512
return _crnn_onnx(
True,
"crnn_vgg16_bn_onnx",
**kwargs,
)


def crnn_mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> CRNN:
Expand Down Expand Up @@ -424,7 +575,7 @@ def crnn_efficientnet_b3(pretrained: bool = False, **kwargs: Any) -> CRNN:
)

def crnn_efficientnetv2_m(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with efficientnet_b7
"""CRNN with efficientnetv2_m
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
Expand All @@ -446,3 +597,27 @@ def crnn_efficientnetv2_m(pretrained: bool = False, **kwargs: Any) -> CRNN:
ignore_keys=['linear.weight', 'linear.bias'],
**kwargs,
)

def crnn_efficientnetv2_mV2(pretrained: bool = False, **kwargs: Any) -> CRNN:
"""CRNN with efficientnetv2_m
>>> import torch
>>> from doctr.models import crnn_convnext_tiny
>>> model = crnn_convnext_tiny(pretrained=True)
>>> input_tensor = torch.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
Returns:
text recognition architecture
"""
kwargs["rnn_units"] = 512
return _crnn(
'crnn_efficientnetv2_mV2',
pretrained,
efficientnetv2_m,
ignore_keys=['linear.weight', 'linear.bias'],
**kwargs,
)
19 changes: 5 additions & 14 deletions doctr/models/recognition/export_rec_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@

from doctr.models import ocr_predictor

model = ocr_predictor(pretrained=True)
model = ocr_predictor(reco_arch = "crnn_efficientnetv2_mV2", pretrained=True)
model.reco_predictor.model = model.reco_predictor.model.eval()

input = torch.randn(1, 3, 32, 128)
input2 = torch.randn(49, 3, 32, 128)
model = model.to("cpu")
start = time.time()
pred = model.reco_predictor.model(input)
print("pytorch time", time.time() - start)
torch.onnx.export(model.reco_predictor.model,
input,
"rec.onnx",
"crnn_effnetv2_mV2.onnx",
export_params = True,
opset_version=11,
do_constant_folding=True,
Expand All @@ -24,19 +25,9 @@
dynamic_axes = {"input":{0:"batch_size"},
"output":{0:"batch_size"}})

import onnx
import onnxoptimizer

onnx_model = onnx.load("rec.onnx")

passes = onnxoptimizer.get_fuse_and_elimination_passes()
new_model = onnxoptimizer.optimize(model = onnx_model, passes = passes)
onnx.checker.check_model(new_model)
onnx.save(new_model, "optimized_rec.onnx")

import onnxruntime

ort_session = onnxruntime.InferenceSession("rec.onnx")
ort_session = onnxruntime.InferenceSession("crnn_effnetv2_mV2.onnx", providers = ['CPUExecutionProvider'])

ort_inputs = {"input":input.numpy()}
start = time.time()
Expand All @@ -57,7 +48,7 @@
from openvino.runtime import Core

ie = Core()
model_onnx = ie.read_model(model="rec.onnx")
model_onnx = ie.read_model(model="crnn_effnetv2_mV2.onnx")
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")

output_layer_onnx = compiled_model_onnx.output(0)
Expand Down
8 changes: 6 additions & 2 deletions doctr/models/recognition/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import torch
from torch import nn

import os
from doctr.models.preprocessor import PreProcessor

from ._utils import remap_preds, split_crops
Expand Down Expand Up @@ -37,6 +37,10 @@ def __init__(
self.model = model.eval()
self.postprocessor = self.model.postprocessor
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if os.environ.get("CUDA_VISIBLE_DEVICES", []) == "":
self.device = "cpu"
elif len(os.environ.get("CUDA_VISIBLE_DEVICES", [])) > 0:
self.device = "cuda"
if "onnx" not in str((type(self.model))) and (self.device == torch.device("cuda")):
# self.model = nn.DataParallel(self.model)
self.model = self.model.to(self.device)
Expand Down Expand Up @@ -82,7 +86,7 @@ def forward(
batch = batch.to(self.device)
# batch = batch.half()
char_logits = self.model(batch)
if type(char_logits) != torch.Tensor():
if not torch.is_tensor(char_logits):
char_logits = torch.tensor(char_logits)
raw += [self.postprocessor(char_logits)]

Expand Down
3 changes: 2 additions & 1 deletion doctr/models/recognition/zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@


ARCHS: List[str] = ['crnn_vgg16_bn', 'crnn_vgg16_bn_onnx', 'crnn_mobilenet_v3_small', 'crnn_mobilenet_v3_large', 'sar_resnet31', 'master',
'crnn_efficientnet_b0', 'crnn_efficientnet_b3', 'crnn_efficientnetv2_m', "parseq_large"]
'crnn_efficientnet_b0', 'crnn_efficientnet_b0_onnx', 'crnn_efficientnet_b3', 'crnn_efficientnet_b3_onnx',
'crnn_efficientnetv2_m', 'crnn_efficientnetv2_m_onnx', 'crnn_efficientnetv2_mV2', 'crnn_efficientnetv2_mV2_onnx', "parseq_large"]


def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredictor:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from setuptools import setup
PKG_NAME = "python-doctr"
VERSION = os.getenv("BUILD_VERSION", "0.5.3a0")
VERSION = os.getenv("BUILD_VERSION", "0.5.4a0")


if __name__ == "__main__":
Expand Down

0 comments on commit 6a3bbf3

Please sign in to comment.