lacmus-foundation · gosha20777 · Feb 18, 2020 · Feb 17, 2020 · Feb 17, 2020
diff --git a/keras_retinanet/bin/evaluate.py b/keras_retinanet/bin/evaluate.py
@@ -28,16 +28,21 @@
 from .. import models
 from ..preprocessing.csv_generator import CSVGenerator
 from ..preprocessing.pascal_voc import PascalVocGenerator
+from ..utils.anchors import make_shapes_callback
 from ..utils.config import read_config_file, parse_anchor_parameters
 from ..utils.eval import evaluate
 from ..utils.gpu import setup_gpu
 from ..utils.keras_version import check_keras_version
 from ..utils.tf_version import check_tf_version
 
 
-def create_generator(args):
+def create_generator(args, preprocess_image):
     """ Create generators for evaluation.
     """
+    common_args = {
+        'preprocess_image' : preprocess_image,
+    }
+
     if args.dataset_type == 'coco':
         # import here to prevent unnecessary dependency on cocoapi
         from ..preprocessing.coco import CocoGenerator
@@ -49,6 +54,7 @@ def create_generator(args):
             image_max_side=args.image_max_side,
             config=args.config,
             shuffle_groups=False,
+            **common_args
         )
     elif args.dataset_type == 'pascal':
         validation_generator = PascalVocGenerator(
@@ -58,6 +64,7 @@ def create_generator(args):
             image_max_side=args.image_max_side,
             config=args.config,
             shuffle_groups=False,
+            **common_args
         )
     elif args.dataset_type == 'csv':
         validation_generator = CSVGenerator(
@@ -67,6 +74,7 @@ def create_generator(args):
             image_max_side=args.image_max_side,
             config=args.config,
             shuffle_groups=False,
+            **common_args
         )
     else:
         raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
@@ -129,7 +137,8 @@ def main(args=None):
         args.config = read_config_file(args.config)
 
     # create the generator
-    generator = create_generator(args)
+    backbone = models.backbone(args.backbone)
+    generator = create_generator(args, backbone.preprocess_image)
 
     # optionally load anchor parameters
     anchor_params = None
@@ -140,6 +149,8 @@ def main(args=None):
     print('Loading model, this may take a second...')
     model = models.load_model(args.model, backbone_name=args.backbone)
 
+    generator.compute_shapes = make_shapes_callback(model)
+
     # optionally convert the model
     if args.convert_model:
         model = models.convert_model(model, anchor_params=anchor_params)

diff --git a/keras_retinanet/models/__init__.py b/keras_retinanet/models/__init__.py
@@ -51,6 +51,8 @@ def backbone(backbone_name):
     """
     if 'resnet' in backbone_name:
         from .resnet import ResNetBackbone as b
+    elif 'mobilenet_v3' in backbone_name:
+        from .mobilenet_v3 import MobileNetV3Backbone as b
     elif 'mobilenet' in backbone_name:
         from .mobilenet import MobileNetBackbone as b
     elif 'vgg' in backbone_name:

diff --git a/keras_retinanet/models/mobilenet_v3.py b/keras_retinanet/models/mobilenet_v3.py
@@ -0,0 +1,119 @@
+''' 
+https://github.com/lacmus-foundation/lacmus
+Copyright (C) 2019-2020 lacmus-foundation
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+'''
+
+from . import Backbone
+from . import retinanet
+
+from ..utils.image import preprocess_image
+
+import keras
+from keras.utils import get_file
+
+from .mobilenetv3.mobilenet_v3_base import relu6, hard_swish
+from .mobilenetv3.mobilenet_v3_large import MobileNetV3_Large
+from .mobilenetv3.mobilenet_v3_small import MobileNetV3_Small
+
+
+class MobileNetV3Backbone(Backbone):
+    """ Describes backbone information and provides utility functions.
+    """
+
+    def __init__(self, backbone):
+        super(MobileNetV3Backbone, self).__init__(backbone)
+        self.custom_objects['hard_swish'] = hard_swish
+        self.custom_objects['relu6'] = relu6
+
+    allowed_backbones = ['mobilenet_v3_small', 'mobilenet_v3_large']
+
+    def retinanet(self, *args, **kwargs):
+        """ Returns a retinanet model using the correct backbone.
+        """
+        return mobilenetv3_retinanet(*args, backbone_name=self.backbone, **kwargs)
+
+    def download_imagenet(self):
+        """ Downloads ImageNet weights and returns path to weights file.
+        """
+        err_msg = "ImageNet weights are not available for the backbone {0}. Please train with --no-weights option"
+        err_msg = err_msg.format(self.backbone)
+        raise NotImplementedError(err_msg)
+
+    def validate(self):
+        """ Checks whether the backbone string is correct.
+        """
+        name_parts = self.backbone.split('_')
+
+        if '_'.join(name_parts[:3]) not in MobileNetV3Backbone.allowed_backbones:
+            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetV3Backbone.allowed_backbones))
+
+    def preprocess_image(self, inputs):
+        """ Takes as input an image and prepares it for being passed through the network.
+        """
+        return preprocess_image(inputs, mode='tf')
+
+
+def mobilenetv3_retinanet(num_classes, backbone_name='mobilenet_v3_small', inputs=None, modifier=None, **kwargs):
+    """ Constructs a retinanet model using a mobilenet backbone.
+
+    Args
+        num_classes: Number of classes to predict.
+        backbone: Which backbone to use (mobilenet_v3_small or mobilenet_v3_large).
+        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+    Returns
+        RetinaNet model with a MobileNet backbone.
+    """
+    name_parts = backbone_name.split('_')
+    if len(name_parts) > 3:
+    	alpha = float(name_parts[3])
+    else:
+        alpha = 1.0 
+
+    # choose default input
+    if inputs is None:
+        if keras.backend.image_data_format() == 'channels_first':
+            shape=(3, None, None)
+        else:
+            shape=(None, None, 3)
+    else:
+        shape = inputs.shape
+
+    if 'mobilenet_v3_small' in backbone_name: 
+    	backbone = MobileNetV3_Small(shape=shape, n_class=1, alpha=alpha, include_top=False).build()
+    	layer_outputs = [
+            backbone.layers[30].output, # activation_7, bneck 3 before pw, 28x28x88
+            backbone.layers[98].output, # multiply_5, bneck 8 before pwl, 14x14x144 
+            backbone.layers[145].output # activation_24, just before global pooling, 7x7x576
+        ]
+    elif 'mobilenet_v3_large' in backbone_name:
+        backbone = MobileNetV3_Large(shape=shape, n_class=1, alpha=alpha, include_top=False).build() 
+        layer_outputs = [
+            backbone.layers[67].output, # multiply_3, bneck 6 before pwl, 28x28x120
+            backbone.layers[129].output, # multiply_5, bneck 12 before pwl, 14x14x672
+            backbone.layers[176].output # activation_32, just before global pooling, 7x7x960
+        ]
+
+    inputs = backbone.inputs
+    # create the full model
+    backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone_name)
+
+    # invoke modifier if given
+    if modifier:
+        backbone = modifier(backbone)
+
+    return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone.outputs, **kwargs)
diff --git a/keras_retinanet/models/mobilenetv3/__init__.py b/keras_retinanet/models/mobilenetv3/__init__.py
diff --git a/keras_retinanet/models/mobilenetv3/mobilenet_v3_base.py b/keras_retinanet/models/mobilenetv3/mobilenet_v3_base.py
@@ -0,0 +1,144 @@
+"""MobileNet v3 models for Keras.
+# Reference
+    [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs)
+"""
+
+
+from keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D
+from keras.layers import Activation, BatchNormalization, Add, Multiply, Reshape
+
+from keras import backend as K
+
+def relu6(x):
+    """Relu 6
+    """
+    return K.relu(x, max_value=6.0)
+
+def hard_swish(x):
+    """Hard swish
+    """
+    return x * K.relu(x + 3.0, max_value=6.0) / 6.0
+
+class MobileNetBase:
+    def __init__(self, shape, n_class, alpha=1.0):
+        """Init
+
+        # Arguments
+            input_shape: An integer or tuple/list of 3 integers, shape
+                of input tensor.
+            n_class: Integer, number of classes.
+            alpha: Integer, width multiplier.
+        """
+        self.shape = shape
+        self.n_class = n_class
+        self.alpha = alpha
+
+    def _return_activation(self, x, nl):
+        """Convolution Block
+        This function defines a activation choice.
+
+        # Arguments
+            x: Tensor, input tensor of conv layer.
+            nl: String, nonlinearity activation type.
+
+        # Returns
+            Output tensor.
+        """
+        if nl == 'HS':
+            x = Activation(hard_swish)(x)
+        if nl == 'RE':
+            x = Activation(relu6)(x)
+
+        return x
+
+    def _conv_block(self, inputs, filters, kernel, strides, nl):
+        """Convolution Block
+        This function defines a 2D convolution operation with BN and activation.
+
+        # Arguments
+            inputs: Tensor, input tensor of conv layer.
+            filters: Integer, the dimensionality of the output space.
+            kernel: An integer or tuple/list of 2 integers, specifying the
+                width and height of the 2D convolution window.
+            strides: An integer or tuple/list of 2 integers,
+                specifying the strides of the convolution along the width and height.
+                Can be a single integer to specify the same value for
+                all spatial dimensions.
+            nl: String, nonlinearity activation type.
+
+        # Returns
+            Output tensor.
+        """
+
+        channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
+
+        x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
+        x = BatchNormalization(axis=channel_axis)(x)
+
+        return self._return_activation(x, nl)
+
+    def _squeeze(self, inputs):
+        """Squeeze and Excitation.
+        This function defines a squeeze structure.
+
+        # Arguments
+            inputs: Tensor, input tensor of conv layer.
+        """
+        input_channels = int(inputs.shape[-1])
+
+        x = GlobalAveragePooling2D()(inputs)
+        x = Dense(input_channels, activation='relu')(x)
+        x = Dense(input_channels, activation='hard_sigmoid')(x)
+        x = Reshape((1, 1, input_channels))(x)
+        x = Multiply()([inputs, x])
+
+        return x
+
+    def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl):
+        """Bottleneck
+        This function defines a basic bottleneck structure.
+
+        # Arguments
+            inputs: Tensor, input tensor of conv layer.
+            filters: Integer, the dimensionality of the output space.
+            kernel: An integer or tuple/list of 2 integers, specifying the
+                width and height of the 2D convolution window.
+            e: Integer, expansion factor.
+                t is always applied to the input size.
+            s: An integer or tuple/list of 2 integers,specifying the strides
+                of the convolution along the width and height.Can be a single
+                integer to specify the same value for all spatial dimensions.
+            squeeze: Boolean, Whether to use the squeeze.
+            nl: String, nonlinearity activation type.
+
+        # Returns
+            Output tensor.
+        """
+
+        channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
+        input_shape = K.int_shape(inputs)
+
+        tchannel = int(e)
+        cchannel = int(self.alpha * filters)
+
+        r = s == 1 and input_shape[3] == filters
+
+        x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl)
+
+        x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x)
+        x = BatchNormalization(axis=channel_axis)(x)
+        x = self._return_activation(x, nl)
+
+        if squeeze:
+            x = self._squeeze(x)
+
+        x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x)
+        x = BatchNormalization(axis=channel_axis)(x)
+
+        if r:
+            x = Add()([x, inputs])
+
+        return x
+
+    def build(self):
+        pass