diff --git a/README.md b/README.md index fc9c186..17ec6d8 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ A common YOLOv3/v2 object detection pipeline inherited from [keras-yolo3-Mobilen - [x] Darknet19 - [x] MobilenetV1 - [x] MobilenetV2 -- [x] EfficientNetB0 +- [x] EfficientNet - [x] VGG16 - [x] Xception diff --git a/common/backbones/efficientnet.py b/common/backbones/efficientnet.py index 2452d3e..aae93f6 100644 --- a/common/backbones/efficientnet.py +++ b/common/backbones/efficientnet.py @@ -606,8 +606,8 @@ def preprocess_input(x): if __name__ == '__main__': input_tensor = Input(shape=(None, None, 3), name='image_input') - #model = EfficientNetB0(include_top=False, input_shape=(416, 416, 3), weights='imagenet') - model = EfficientNetB0(include_top=True, input_tensor=input_tensor, weights='imagenet') + model = EfficientNetB1(include_top=False, input_shape=(416, 416, 3), weights='imagenet') + #model = EfficientNetB0(include_top=True, input_tensor=input_tensor, weights='imagenet') model.summary() import numpy as np diff --git a/common/utils.py b/common/utils.py index de68131..2a76a8b 100644 --- a/common/utils.py +++ b/common/utils.py @@ -117,7 +117,8 @@ def draw_boxes(image, boxes, classes, scores, class_names, colors, show_score=Tr label = '{} {:.2f}'.format(class_name, score) else: label = '{}'.format(class_name) - print(label, (xmin, ymin), (xmax, ymax)) + #print(label, (xmin, ymin), (xmax, ymax)) + # if no color info, use black(0,0,0) if colors == None: color = (0,0,0) diff --git a/eval.py b/eval.py index 1bd491a..b013b3d 100644 --- a/eval.py +++ b/eval.py @@ -13,6 +13,7 @@ from PIL import Image import operator import matplotlib.pyplot as plt +from tqdm import tqdm from tensorflow.keras.models import load_model import tensorflow.keras.backend as K @@ -278,6 +279,7 @@ def get_prediction_class_records(model, model_format, annotation_records, anchor session = model.createSession() pred_classes_records = {} + pbar = tqdm(total=len(annotation_records), desc='Eval model') for (image_name, gt_records) in annotation_records.items(): image = Image.open(image_name) image_array = np.array(image, dtype='uint8') @@ -297,7 +299,8 @@ def get_prediction_class_records(model, model_format, annotation_records, anchor else: raise ValueError('invalid model format') - print('Found {} boxes for {}'.format(len(pred_boxes), image_name)) + #print('Found {} boxes for {}'.format(len(pred_boxes), image_name)) + pbar.update(1) if save_result: @@ -334,6 +337,7 @@ def get_prediction_class_records(model, model_format, annotation_records, anchor for pred_class_list in pred_classes_records.values(): pred_class_list.sort(key=lambda ele: ele[2], reverse=True) + pbar.close() return pred_classes_records diff --git a/requirements.txt b/requirements.txt index db69a73..9b5805e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,6 @@ opencv-python tensorflow-gpu tensorflow-model-optimization matplotlib +tqdm pillow mnn diff --git a/tools/validate_yolo.py b/tools/validate_yolo.py index 7b8b142..66e47b1 100644 --- a/tools/validate_yolo.py +++ b/tools/validate_yolo.py @@ -250,7 +250,8 @@ def handle_prediction(prediction, image_file, image, image_shape, anchors, class print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): - print("Class: {}, Score: {}".format(class_names[cls], score)) + xmin, ymin, xmax, ymax = box + print("Class: {}, Score: {}, Box: {},{}".format(class_names[cls], score, (xmin, ymin), (xmax, ymax))) colors = get_colors(class_names) image = draw_boxes(image, boxes, classes, scores, class_names, colors) diff --git a/yolo3/model.py b/yolo3/model.py index 62e3234..8aebca7 100644 --- a/yolo3/model.py +++ b/yolo3/model.py @@ -16,7 +16,7 @@ from yolo3.models.yolo3_vgg16 import yolo3_vgg16_body, tiny_yolo3_vgg16_body from yolo3.models.yolo3_xception import yolo3_xception_body, yolo3lite_xception_body, tiny_yolo3_xception_body, tiny_yolo3lite_xception_body, yolo3_spp_xception_body from yolo3.models.yolo3_nano import yolo3_nano_body -from yolo3.models.yolo3_efficientnet import yolo3_efficientnetb0_body, tiny_yolo3_efficientnetb0_body, yolo3lite_efficientnetb0_body, yolo3lite_spp_efficientnetb0_body, tiny_yolo3lite_efficientnetb0_body +from yolo3.models.yolo3_efficientnet import yolo3_efficientnet_body, tiny_yolo3_efficientnet_body, yolo3lite_efficientnet_body, yolo3lite_spp_efficientnet_body, tiny_yolo3lite_efficientnet_body from yolo3.loss import yolo3_loss from yolo3.postprocess import batched_yolo3_postprocess, batched_yolo3_prenms, Yolo3PostProcessLayer @@ -40,9 +40,11 @@ 'yolo3_shufflenetv2_lite': [yolo3lite_shufflenetv2_body, 205, None], 'yolo3_shufflenetv2_lite_spp': [yolo3lite_spp_shufflenetv2_body, 205, None], - 'yolo3_efficientnetb0': [yolo3_efficientnetb0_body, 235, None], - 'yolo3_efficientnetb0_lite': [yolo3lite_efficientnetb0_body, 235, None], - 'yolo3_efficientnetb0_lite_spp': [yolo3lite_spp_efficientnetb0_body, 235, None], + # NOTE: backbone_length is for EfficientNetB0 + # if change to other efficientnet level, you need to modify it + 'yolo3_efficientnet': [yolo3_efficientnet_body, 235, None], + 'yolo3_efficientnet_lite': [yolo3lite_efficientnet_body, 235, None], + 'yolo3_efficientnet_lite_spp': [yolo3lite_spp_efficientnet_body, 235, None], 'yolo3_darknet': [yolo3_body, 185, 'weights/darknet53.h5'], 'yolo3_darknet_spp': [custom_yolo3_spp_body, 185, 'weights/yolov3-spp.h5'], @@ -73,8 +75,10 @@ 'tiny_yolo3_shufflenetv2': [tiny_yolo3_shufflenetv2_body, 205, None], 'tiny_yolo3_shufflenetv2_lite': [tiny_yolo3lite_shufflenetv2_body, 205, None], - 'tiny_yolo3_efficientnetb0': [tiny_yolo3_efficientnetb0_body, 235, None], - 'tiny_yolo3_efficientnetb0_lite': [tiny_yolo3lite_efficientnetb0_body, 235, None], + # NOTE: backbone_length is for EfficientNetB0 + # if change to other efficientnet level, you need to modify it + 'tiny_yolo3_efficientnet': [tiny_yolo3_efficientnet_body, 235, None], + 'tiny_yolo3_efficientnet_lite': [tiny_yolo3lite_efficientnet_body, 235, None], 'tiny_yolo3_darknet': [custom_tiny_yolo3_body, 20, 'weights/yolov3-tiny.h5'], #Doesn't have pretrained weights, so no need to return backbone length diff --git a/yolo3/models/yolo3_efficientnet.py b/yolo3/models/yolo3_efficientnet.py index d9e12f1..14b6aa3 100644 --- a/yolo3/models/yolo3_efficientnet.py +++ b/yolo3/models/yolo3_efficientnet.py @@ -9,179 +9,337 @@ from yolo3.models.layers import compose, DarknetConv2D, DarknetConv2D_BN_Leaky, Depthwise_Separable_Conv2D_BN_Leaky, make_last_layers, make_depthwise_separable_last_layers, make_spp_depthwise_separable_last_layers -def yolo3_efficientnetb0_body(inputs, num_anchors, num_classes): - """Create YOLO_V3 EfficientNetB0 model CNN body in Keras.""" - efficientnetb0 = EfficientNetB0(input_tensor=inputs, weights='imagenet', include_top=False) - - # input: 416 x 416 x 3 - # top_activation: 13 x 13 x 1280 - # block6a_expand_activation(middle in block6a): 26 x 26 x 672 - # block5c_add(end of block5c): 26 x 26 x 112 - # block4a_expand_activation(middle in block4a): 52 x 52 x 240 - # block3b_add(end of block3b): 52 x 52 x 40 - - f1 = efficientnetb0.get_layer('top_activation').output - # f1 :13 x 13 x 1280 - x, y1 = make_last_layers(f1, 672, num_anchors * (num_classes + 5)) +def get_efficientnet_backbone_info(input_tensor, level=0): + """Parse different level EfficientNet backbone feature map info for YOLOv3 head build.""" + if level == 0: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 1280 + # block6a_expand_activation(middle in block6a): 26 x 26 x 672 + # block5c_add(end of block5c): 26 x 26 x 112 + # block4a_expand_activation(middle in block4a): 52 x 52 x 240 + # block3b_add(end of block3b): 52 x 52 x 40 + efficientnet = EfficientNetB0(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 1280 + f2_name = 'block6a_expand_activation' + f2_channel_num = 672 + f3_name = 'block4a_expand_activation' + f3_channel_num = 240 + + elif level == 1: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 1280 + # block6a_expand_activation(middle in block6a): 26 x 26 x 672 + # block5d_add(end of block5d): 26 x 26 x 112 + # block4a_expand_activation(middle in block4a): 52 x 52 x 240 + # block3c_add(end of block3c): 52 x 52 x 40 + efficientnet = EfficientNetB1(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 1280 + f2_name = 'block6a_expand_activation' + f2_channel_num = 672 + f3_name = 'block4a_expand_activation' + f3_channel_num = 240 + + elif level == 2: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 1408 + # block6a_expand_activation(middle in block6a): 26 x 26 x 720 + # block5d_add(end of block5d): 26 x 26 x 120 + # block4a_expand_activation(middle in block4a): 52 x 52 x 288 + # block3c_add(end of block3c): 52 x 52 x 48 + efficientnet = EfficientNetB2(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 1408 + f2_name = 'block6a_expand_activation' + f2_channel_num = 720 + f3_name = 'block4a_expand_activation' + f3_channel_num = 288 + + elif level == 3: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 1536 + # block6a_expand_activation(middle in block6a): 26 x 26 x 816 + # block5e_add(end of block5e): 26 x 26 x 136 + # block4a_expand_activation(middle in block4a): 52 x 52 x 288 + # block3c_add(end of block3c): 52 x 52 x 48 + efficientnet = EfficientNetB3(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 1536 + f2_name = 'block6a_expand_activation' + f2_channel_num = 816 + f3_name = 'block4a_expand_activation' + f3_channel_num = 288 + + elif level == 4: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 1792 + # block6a_expand_activation(middle in block6a): 26 x 26 x 960 + # block5f_add(end of block5f): 26 x 26 x 160 + # block4a_expand_activation(middle in block4a): 52 x 52 x 336 + # block3d_add(end of block3d): 52 x 52 x 56 + efficientnet = EfficientNetB4(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 1792 + f2_name = 'block6a_expand_activation' + f2_channel_num = 960 + f3_name = 'block4a_expand_activation' + f3_channel_num = 336 + + elif level == 5: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 2048 + # block6a_expand_activation(middle in block6a): 26 x 26 x 1056 + # block5g_add(end of block5g): 26 x 26 x 176 + # block4a_expand_activation(middle in block4a): 52 x 52 x 384 + # block3e_add(end of block3e): 52 x 52 x 64 + efficientnet = EfficientNetB5(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 2048 + f2_name = 'block6a_expand_activation' + f2_channel_num = 1056 + f3_name = 'block4a_expand_activation' + f3_channel_num = 384 + + elif level == 6: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 2304 + # block6a_expand_activation(middle in block6a): 26 x 26 x 1200 + # block5h_add(end of block5h): 26 x 26 x 200 + # block4a_expand_activation(middle in block4a): 52 x 52 x 432 + # block3f_add(end of block3f): 52 x 52 x 72 + efficientnet = EfficientNetB6(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 2304 + f2_name = 'block6a_expand_activation' + f2_channel_num = 1200 + f3_name = 'block4a_expand_activation' + f3_channel_num = 432 + + elif level == 7: + # input: 416 x 416 x 3 + # top_activation: 13 x 13 x 2560 + # block6a_expand_activation(middle in block6a): 26 x 26 x 1344 + # block5j_add(end of block5j): 26 x 26 x 224 + # block4a_expand_activation(middle in block4a): 52 x 52 x 480 + # block3g_add(end of block3g): 52 x 52 x 80 + efficientnet = EfficientNetB7(input_tensor=input_tensor, weights='imagenet', include_top=False) + + f1_name = 'top_activation' + f1_channel_num = 2560 + f2_name = 'block6a_expand_activation' + f2_channel_num = 1344 + f3_name = 'block4a_expand_activation' + f3_channel_num = 480 + + else: + raise ValueError('Invalid efficientnet backbone type') + + feature_map_info = {'f1_name' : f1_name, + 'f1_channel_num' : f1_channel_num, + 'f2_name' : f2_name, + 'f2_channel_num' : f2_channel_num, + 'f3_name' : f3_name, + 'f3_channel_num' : f3_channel_num, + } + + return efficientnet, feature_map_info + + +def yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=0): + ''' + Create YOLO_v3 EfficientNet model CNN body in keras. + # Arguments + level: EfficientNet level number. + by default we use basic EfficientNetB0 as backbone + ''' + efficientnet, feature_map_info = get_efficientnet_backbone_info(inputs, level=level) + + f1 = efficientnet.get_layer('top_activation').output + + #x, y1 = make_last_layers(f1, 672, num_anchors * (num_classes + 5)) + f2_channel_num = feature_map_info['f2_channel_num'] + x, y1 = make_last_layers(f1, f2_channel_num, num_anchors * (num_classes + 5)) x = compose( - DarknetConv2D_BN_Leaky(336, (1,1)), + #DarknetConv2D_BN_Leaky(336, (1,1)), + DarknetConv2D_BN_Leaky(f2_channel_num//2, (1,1)), UpSampling2D(2))(x) - f2 = efficientnetb0.get_layer('block6a_expand_activation').output - # f2: 26 x 26 x 672 + f2 = efficientnet.get_layer('block6a_expand_activation').output + x = Concatenate()([x,f2]) - x, y2 = make_last_layers(x, 240, num_anchors*(num_classes+5)) + #x, y2 = make_last_layers(x, 240, num_anchors*(num_classes+5)) + f3_channel_num = feature_map_info['f3_channel_num'] + x, y2 = make_last_layers(x, f3_channel_num, num_anchors*(num_classes+5)) x = compose( - DarknetConv2D_BN_Leaky(120, (1,1)), + #DarknetConv2D_BN_Leaky(120, (1,1)), + DarknetConv2D_BN_Leaky(f3_channel_num//2, (1,1)), UpSampling2D(2))(x) - f3 = efficientnetb0.get_layer('block4a_expand_activation').output - # f3 : 52 x 52 x 240 + f3 = efficientnet.get_layer('block4a_expand_activation').output + x = Concatenate()([x, f3]) - x, y3 = make_last_layers(x, 120, num_anchors*(num_classes+5)) + #x, y3 = make_last_layers(x, 120, num_anchors*(num_classes+5)) + x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5)) return Model(inputs = inputs, outputs=[y1,y2,y3]) -def yolo3lite_efficientnetb0_body(inputs, num_anchors, num_classes): - '''Create YOLO_v3 Lite EfficientNetB0 model CNN body in keras.''' - efficientnetb0 = EfficientNetB0(input_tensor=inputs, weights='imagenet', include_top=False) +def yolo3lite_efficientnet_body(inputs, num_anchors, num_classes, level=0): + ''' + Create YOLO_v3 Lite EfficientNet model CNN body in keras. + # Arguments + level: EfficientNet level number. + by default we use basic EfficientNetB0 as backbone + ''' + efficientnet, feature_map_info = get_efficientnet_backbone_info(inputs, level=level) - # input: 416 x 416 x 3 - # top_activation: 13 x 13 x 1280 - # block6a_expand_activation(middle in block6a): 26 x 26 x 672 - # block5c_add(end of block5c): 26 x 26 x 112 - # block4a_expand_activation(middle in block4a): 52 x 52 x 240 - # block3b_add(end of block3b): 52 x 52 x 40 + f1 = efficientnet.get_layer('top_activation').output - f1 = efficientnetb0.get_layer('top_activation').output - # f1 :13 x 13 x 1280 - x, y1 = make_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') + #x, y1 = make_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') + f2_channel_num = feature_map_info['f2_channel_num'] + x, y1 = make_depthwise_separable_last_layers(f1, f2_channel_num, num_anchors * (num_classes + 5), block_id_str='8') x = compose( - DarknetConv2D_BN_Leaky(336, (1,1)), + #DarknetConv2D_BN_Leaky(336, (1,1)), + DarknetConv2D_BN_Leaky(f2_channel_num//2, (1,1)), UpSampling2D(2))(x) - f2 = efficientnetb0.get_layer('block6a_expand_activation').output - # f2: 26 x 26 x 672 + f2 = efficientnet.get_layer('block6a_expand_activation').output + x = Concatenate()([x,f2]) - x, y2 = make_depthwise_separable_last_layers(x, 240, num_anchors*(num_classes+5), block_id_str='9') + #x, y2 = make_depthwise_separable_last_layers(x, 240, num_anchors*(num_classes+5), block_id_str='9') + f3_channel_num = feature_map_info['f3_channel_num'] + x, y2 = make_depthwise_separable_last_layers(x, f3_channel_num, num_anchors*(num_classes+5), block_id_str='9') x = compose( - DarknetConv2D_BN_Leaky(120, (1,1)), + #DarknetConv2D_BN_Leaky(120, (1,1)), + DarknetConv2D_BN_Leaky(f3_channel_num//2, (1,1)), UpSampling2D(2))(x) - f3 = efficientnetb0.get_layer('block4a_expand_activation').output - # f3 : 52 x 52 x 240 + f3 = efficientnet.get_layer('block4a_expand_activation').output + x = Concatenate()([x, f3]) - x, y3 = make_depthwise_separable_last_layers(x, 120, num_anchors*(num_classes+5), block_id_str='10') + #x, y3 = make_depthwise_separable_last_layers(x, 120, num_anchors*(num_classes+5), block_id_str='10') + x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), block_id_str='10') return Model(inputs = inputs, outputs=[y1,y2,y3]) -def yolo3lite_spp_efficientnetb0_body(inputs, num_anchors, num_classes): - '''Create YOLO_v3 Lite SPP EfficientNetB0 model CNN body in keras.''' - efficientnetb0 = EfficientNetB0(input_tensor=inputs, weights='imagenet', include_top=False) +def yolo3lite_spp_efficientnet_body(inputs, num_anchors, num_classes, level=0): + ''' + Create YOLO_v3 Lite SPP EfficientNet model CNN body in keras. + # Arguments + level: EfficientNet level number. + by default we use basic EfficientNetB0 as backbone + ''' + efficientnet, feature_map_info = get_efficientnet_backbone_info(inputs, level=level) - # input: 416 x 416 x 3 - # top_activation: 13 x 13 x 1280 - # block6a_expand_activation(middle in block6a): 26 x 26 x 672 - # block5c_add(end of block5c): 26 x 26 x 112 - # block4a_expand_activation(middle in block4a): 52 x 52 x 240 - # block3b_add(end of block3b): 52 x 52 x 40 + f1 = efficientnet.get_layer('top_activation').output - f1 = efficientnetb0.get_layer('top_activation').output - # f1 :13 x 13 x 1280 - #x, y1 = make_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') - x, y1 = make_spp_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') + #x, y1 = make_spp_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') + f2_channel_num = feature_map_info['f2_channel_num'] + x, y1 = make_spp_depthwise_separable_last_layers(f1, f2_channel_num, num_anchors * (num_classes + 5), block_id_str='8') x = compose( - DarknetConv2D_BN_Leaky(336, (1,1)), + #DarknetConv2D_BN_Leaky(336, (1,1)), + DarknetConv2D_BN_Leaky(f2_channel_num//2, (1,1)), UpSampling2D(2))(x) - f2 = efficientnetb0.get_layer('block6a_expand_activation').output - # f2: 26 x 26 x 672 + f2 = efficientnet.get_layer('block6a_expand_activation').output + x = Concatenate()([x,f2]) - x, y2 = make_depthwise_separable_last_layers(x, 240, num_anchors*(num_classes+5), block_id_str='9') + #x, y2 = make_depthwise_separable_last_layers(x, 240, num_anchors*(num_classes+5), block_id_str='9') + f3_channel_num = feature_map_info['f3_channel_num'] + x, y2 = make_depthwise_separable_last_layers(x, f3_channel_num, num_anchors*(num_classes+5), block_id_str='9') x = compose( - DarknetConv2D_BN_Leaky(120, (1,1)), + #DarknetConv2D_BN_Leaky(120, (1,1)), + DarknetConv2D_BN_Leaky(f3_channel_num//2, (1,1)), UpSampling2D(2))(x) - f3 = efficientnetb0.get_layer('block4a_expand_activation').output - # f3 : 52 x 52 x 240 + f3 = efficientnet.get_layer('block4a_expand_activation').output + x = Concatenate()([x, f3]) - x, y3 = make_depthwise_separable_last_layers(x, 120, num_anchors*(num_classes+5), block_id_str='10') + #x, y3 = make_depthwise_separable_last_layers(x, 120, num_anchors*(num_classes+5), block_id_str='10') + x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), block_id_str='10') return Model(inputs = inputs, outputs=[y1,y2,y3]) -def tiny_yolo3_efficientnetb0_body(inputs, num_anchors, num_classes): - '''Create Tiny YOLO_v3 EfficientNetB0 model CNN body in keras.''' - efficientnetb0 = EfficientNetB0(input_tensor=inputs, weights='imagenet', include_top=False) - - # input: 416 x 416 x 3 - # top_activation: 13 x 13 x 1280 - # block6a_expand_activation(middle in block6a): 26 x 26 x 672 - # block5c_add(end of block5c): 26 x 26 x 112 - # block4a_expand_activation(middle in block4a): 52 x 52 x 240 - # block3b_add(end of block3b): 52 x 52 x 40 +def tiny_yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=0): + ''' + Create Tiny YOLO_v3 EfficientNet model CNN body in keras. + # Arguments + level: EfficientNet level number. + by default we use basic EfficientNetB0 as backbone + ''' + efficientnet, feature_map_info = get_efficientnet_backbone_info(inputs, level=level) - x1 = efficientnetb0.get_layer('block6a_expand_activation').output + x1 = efficientnet.get_layer('block6a_expand_activation').output + x2 = efficientnet.get_layer('top_activation').output + f1_channel_num = feature_map_info['f1_channel_num'] + f2_channel_num = feature_map_info['f2_channel_num'] - x2 = efficientnetb0.get_layer('top_activation').output - x2 = DarknetConv2D_BN_Leaky(672, (1,1))(x2) + x2 = DarknetConv2D_BN_Leaky(f2_channel_num, (1,1))(x2) y1 = compose( - DarknetConv2D_BN_Leaky(1280, (3,3)), - #Depthwise_Separable_Conv2D_BN_Leaky(filters=1280, kernel_size=(3, 3), block_id_str='17'), + DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), + #Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) x2 = compose( - DarknetConv2D_BN_Leaky(336, (1,1)), + DarknetConv2D_BN_Leaky(f2_channel_num//2, (1,1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), - DarknetConv2D_BN_Leaky(672, (3,3)), - #Depthwise_Separable_Conv2D_BN_Leaky(filters=672, kernel_size=(3, 3), block_id_str='18'), + DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), + #Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) return Model(inputs, [y1,y2]) -def tiny_yolo3lite_efficientnetb0_body(inputs, num_anchors, num_classes): - '''Create Tiny YOLO_v3 Lite EfficientNetB0 model CNN body in keras.''' - efficientnetb0 = EfficientNetB0(input_tensor=inputs, weights='imagenet', include_top=False) - - # input: 416 x 416 x 3 - # top_activation: 13 x 13 x 1280 - # block6a_expand_activation(middle in block6a): 26 x 26 x 672 - # block5c_add(end of block5c): 26 x 26 x 112 - # block4a_expand_activation(middle in block4a): 52 x 52 x 240 - # block3b_add(end of block3b): 52 x 52 x 40 +def tiny_yolo3lite_efficientnet_body(inputs, num_anchors, num_classes, level=0): + ''' + Create Tiny YOLO_v3 Lite EfficientNet model CNN body in keras. + # Arguments + level: EfficientNet level number. + by default we use basic EfficientNetB0 as backbone + ''' + efficientnet, feature_map_info = get_efficientnet_backbone_info(inputs, level=level) - x1 = efficientnetb0.get_layer('block6a_expand_activation').output + x1 = efficientnet.get_layer('block6a_expand_activation').output + x2 = efficientnet.get_layer('top_activation').output + f1_channel_num = feature_map_info['f1_channel_num'] + f2_channel_num = feature_map_info['f2_channel_num'] - x2 = efficientnetb0.get_layer('top_activation').output - x2 = DarknetConv2D_BN_Leaky(672, (1,1))(x2) + x2 = DarknetConv2D_BN_Leaky(f2_channel_num, (1,1))(x2) y1 = compose( - #DarknetConv2D_BN_Leaky(1280, (3,3)), - Depthwise_Separable_Conv2D_BN_Leaky(filters=1280, kernel_size=(3, 3), block_id_str='17'), + #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), + Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) x2 = compose( - DarknetConv2D_BN_Leaky(336, (1,1)), + DarknetConv2D_BN_Leaky(f2_channel_num//2, (1,1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), - #DarknetConv2D_BN_Leaky(672, (3,3)), - Depthwise_Separable_Conv2D_BN_Leaky(filters=672, kernel_size=(3, 3), block_id_str='18'), + #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), + Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) return Model(inputs, [y1,y2])