-
Notifications
You must be signed in to change notification settings - Fork 222
/
yolo.py
437 lines (364 loc) · 18.7 KB
/
yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Run a YOLOv3/YOLOv2 style detection model on test images.
"""
import colorsys
import os, sys, argparse
import cv2
import time
from timeit import default_timer as timer
import tensorflow as tf
import numpy as np
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Lambda
from tensorflow_model_optimization.sparsity import keras as sparsity
from PIL import Image
from yolo5.model import get_yolo5_model, get_yolo5_inference_model
from yolo5.postprocess_np import yolo5_postprocess_np
from yolo3.model import get_yolo3_model, get_yolo3_inference_model
from yolo3.postprocess_np import yolo3_postprocess_np
from yolo2.model import get_yolo2_model, get_yolo2_inference_model
from yolo2.postprocess_np import yolo2_postprocess_np
from common.data_utils import preprocess_image
from common.utils import get_classes, get_anchors, get_colors, draw_boxes, optimize_tf_gpu
#from tensorflow.keras.utils import multi_gpu_model
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
optimize_tf_gpu(tf, K)
#tf.enable_eager_execution()
default_config = {
"model_type": 'tiny_yolo3_darknet',
"weights_path": os.path.join('weights', 'yolov3-tiny.h5'),
"pruning_model": False,
"anchors_path": os.path.join('configs', 'tiny_yolo3_anchors.txt'),
"classes_path": os.path.join('configs', 'coco_classes.txt'),
"score" : 0.1,
"iou" : 0.4,
"model_input_shape" : (416, 416),
"elim_grid_sense": False,
#"gpu_num" : 1,
}
class YOLO_np(object):
_defaults = default_config
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
super(YOLO_np, self).__init__()
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = get_classes(self.classes_path)
self.anchors = get_anchors(self.anchors_path)
self.colors = get_colors(len(self.class_names))
K.set_learning_phase(0)
self.yolo_model = self._generate_model()
def _generate_model(self):
'''to generate the bounding boxes'''
weights_path = os.path.expanduser(self.weights_path)
assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
#YOLOv3 model has 9 anchors and 3 feature layers but
#Tiny YOLOv3 model has 6 anchors and 2 feature layers,
#so we can calculate feature layers number to get model type
num_feature_layers = num_anchors//3
try:
if self.model_type.startswith('scaled_yolo4_') or self.model_type.startswith('yolo5_'):
# Scaled-YOLOv4 & YOLOv5 entrance
yolo_model, _ = get_yolo5_model(self.model_type, num_feature_layers, num_anchors, num_classes, input_shape=self.model_input_shape + (3,), model_pruning=self.pruning_model)
elif self.model_type.startswith('yolo3_') or self.model_type.startswith('yolo4_') or \
self.model_type.startswith('tiny_yolo3_') or self.model_type.startswith('tiny_yolo4_'):
# YOLOv3 & v4 entrance
yolo_model, _ = get_yolo3_model(self.model_type, num_feature_layers, num_anchors, num_classes, input_shape=self.model_input_shape + (3,), model_pruning=self.pruning_model)
elif self.model_type.startswith('yolo2_') or self.model_type.startswith('tiny_yolo2_'):
# YOLOv2 entrance
yolo_model, _ = get_yolo2_model(self.model_type, num_anchors, num_classes, input_shape=self.model_input_shape + (3,), model_pruning=self.pruning_model)
else:
raise ValueError('Unsupported model type')
yolo_model.load_weights(weights_path) # make sure model, anchors and classes match
if self.pruning_model:
yolo_model = sparsity.strip_pruning(yolo_model)
yolo_model.summary()
except Exception as e:
print(repr(e))
assert yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(weights_path))
#if self.gpu_num>=2:
#yolo_model = multi_gpu_model(yolo_model, gpus=self.gpu_num)
return yolo_model
def detect_image(self, image):
if self.model_input_shape != (None, None):
assert self.model_input_shape[0]%32 == 0, 'Multiples of 32 required'
assert self.model_input_shape[1]%32 == 0, 'Multiples of 32 required'
image_data = preprocess_image(image, self.model_input_shape)
#origin image shape, in (height, width) format
image_shape = image.size[::-1]
start = time.time()
out_boxes, out_classes, out_scores = self.predict(image_data, image_shape)
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
end = time.time()
print("Inference time: {:.8f}s".format(end - start))
#draw result on input image
image_array = np.array(image, dtype='uint8')
image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors)
out_classnames = [self.class_names[c] for c in out_classes]
return Image.fromarray(image_array), out_boxes, out_classnames, out_scores
def predict(self, image_data, image_shape):
num_anchors = len(self.anchors)
if self.model_type.startswith('scaled_yolo4_') or self.model_type.startswith('yolo5_'):
# Scaled-YOLOv4 & YOLOv5 entrance, enable "elim_grid_sense" by default
out_boxes, out_classes, out_scores = yolo5_postprocess_np(self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_input_shape, max_boxes=100, confidence=self.score, iou_threshold=self.iou, elim_grid_sense=True)
elif self.model_type.startswith('yolo3_') or self.model_type.startswith('yolo4_') or \
self.model_type.startswith('tiny_yolo3_') or self.model_type.startswith('tiny_yolo4_'):
# YOLOv3 & v4 entrance
out_boxes, out_classes, out_scores = yolo3_postprocess_np(self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_input_shape, max_boxes=100, confidence=self.score, iou_threshold=self.iou, elim_grid_sense=self.elim_grid_sense)
elif self.model_type.startswith('yolo2_') or self.model_type.startswith('tiny_yolo2_'):
# YOLOv2 entrance
out_boxes, out_classes, out_scores = yolo2_postprocess_np(self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_input_shape, max_boxes=100, confidence=self.score, iou_threshold=self.iou, elim_grid_sense=self.elim_grid_sense)
else:
raise ValueError('Unsupported model type')
return out_boxes, out_classes, out_scores
def dump_model_file(self, output_model_file):
self.yolo_model.save(output_model_file)
class YOLO(object):
_defaults = default_config
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
super(YOLO, self).__init__()
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = get_classes(self.classes_path)
self.anchors = get_anchors(self.anchors_path)
self.colors = get_colors(len(self.class_names))
K.set_learning_phase(0)
self.inference_model = self._generate_model()
def _generate_model(self):
'''to generate the bounding boxes'''
weights_path = os.path.expanduser(self.weights_path)
assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
#YOLOv3 model has 9 anchors and 3 feature layers but
#Tiny YOLOv3 model has 6 anchors and 2 feature layers,
#so we can calculate feature layers number to get model type
num_feature_layers = num_anchors//3
if self.model_type.startswith('scaled_yolo4_') or self.model_type.startswith('yolo5_'):
# Scaled-YOLOv4 & YOLOv5 entrance, enable "elim_grid_sense" by default
inference_model = get_yolo5_inference_model(self.model_type, self.anchors, num_classes, weights_path=weights_path, input_shape=self.model_input_shape + (3,), confidence=self.score, iou_threshold=self.iou, elim_grid_sense=True)
elif self.model_type.startswith('yolo3_') or self.model_type.startswith('yolo4_') or \
self.model_type.startswith('tiny_yolo3_') or self.model_type.startswith('tiny_yolo4_'):
# YOLOv3 & v4 entrance
inference_model = get_yolo3_inference_model(self.model_type, self.anchors, num_classes, weights_path=weights_path, input_shape=self.model_input_shape + (3,), confidence=self.score, iou_threshold=self.iou, elim_grid_sense=self.elim_grid_sense)
elif self.model_type.startswith('yolo2_') or self.model_type.startswith('tiny_yolo2_'):
# YOLOv2 entrance
inference_model = get_yolo2_inference_model(self.model_type, self.anchors, num_classes, weights_path=weights_path, input_shape=self.model_input_shape + (3,), confidence=self.score, iou_threshold=self.iou, elim_grid_sense=self.elim_grid_sense)
else:
raise ValueError('Unsupported model type')
inference_model.summary()
return inference_model
def predict(self, image_data, image_shape):
out_boxes, out_scores, out_classes = self.inference_model.predict([image_data, image_shape])
out_boxes = out_boxes[0]
out_scores = out_scores[0]
out_classes = out_classes[0]
out_boxes = out_boxes.astype(np.int32)
out_classes = out_classes.astype(np.int32)
return out_boxes, out_classes, out_scores
def detect_image(self, image):
if self.model_input_shape != (None, None):
assert self.model_input_shape[0]%32 == 0, 'Multiples of 32 required'
assert self.model_input_shape[1]%32 == 0, 'Multiples of 32 required'
image_data = preprocess_image(image, self.model_input_shape)
# prepare origin image shape, (height, width) format
image_shape = np.array([image.size[1], image.size[0]])
image_shape = np.expand_dims(image_shape, 0)
start = time.time()
out_boxes, out_classes, out_scores = self.predict(image_data, image_shape)
end = time.time()
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
print("Inference time: {:.8f}s".format(end - start))
#draw result on input image
image_array = np.array(image, dtype='uint8')
image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors)
out_classnames = [self.class_names[c] for c in out_classes]
return Image.fromarray(image_array), out_boxes, out_classnames, out_scores
def dump_model_file(self, output_model_file):
self.inference_model.save(output_model_file)
def dump_saved_model(self, saved_model_path):
model = self.inference_model
os.makedirs(saved_model_path, exist_ok=True)
tf.keras.experimental.export_saved_model(model, saved_model_path)
print('export inference model to %s' % str(saved_model_path))
def detect_video(yolo, video_path, output_path=""):
import cv2
vid = cv2.VideoCapture(0 if video_path == '0' else video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
isOutput = True if output_path != "" else False
if isOutput:
# here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
# to convert it to x264 to reduce file size:
# ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
#
#video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if video_path == '0' else cv2.VideoWriter_fourcc(*"mp4v")
video_FourCC = cv2.VideoWriter_fourcc(*"mp4v")
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
#print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, (5. if video_path == '0' else video_fps), video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
ret, frame = vid.read()
if ret != True:
break
image = Image.fromarray(frame)
image, _, _, _ = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release everything if job is finished
vid.release()
if isOutput:
out.release()
cv2.destroyAllWindows()
def detect_img(yolo):
while True:
img = input('Input image filename:')
try:
image = Image.open(img).convert('RGB')
except:
print('Open Error! Try again!')
continue
else:
r_image, _, _, _ = yolo.detect_image(image)
r_image.show()
def main():
# class YOLO defines the default value, so suppress any default here
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='demo or dump out YOLO h5 model')
'''
Command line options
'''
parser.add_argument(
'--model_type', type=str,
help='YOLO model type: yolo3_mobilenet_lite/tiny_yolo3_mobilenet/yolo3_darknet/..., default ' + YOLO.get_defaults("model_type")
)
parser.add_argument(
'--weights_path', type=str,
help='path to model weight file, default ' + YOLO.get_defaults("weights_path")
)
parser.add_argument(
'--pruning_model', default=False, action="store_true",
help='Whether to be a pruning model/weights file, default ' + str(YOLO.get_defaults("pruning_model"))
)
parser.add_argument(
'--anchors_path', type=str,
help='path to anchor definitions, default ' + YOLO.get_defaults("anchors_path")
)
parser.add_argument(
'--classes_path', type=str,
help='path to class definitions, default ' + YOLO.get_defaults("classes_path")
)
parser.add_argument(
'--model_input_shape', type=str,
help='model image input shape as <height>x<width>, default ' +
str(YOLO.get_defaults("model_input_shape")[0])+'x'+str(YOLO.get_defaults("model_input_shape")[1]),
default=str(YOLO.get_defaults("model_input_shape")[0])+'x'+str(YOLO.get_defaults("model_input_shape")[1])
)
parser.add_argument(
'--elim_grid_sense', default=False, action="store_true",
help = "Eliminate grid sensitivity, default " + str(YOLO.get_defaults("elim_grid_sense"))
)
#parser.add_argument(
#'--gpu_num', type=int,
#help='Number of GPU to use, default ' + str(YOLO.get_defaults("gpu_num"))
#)
parser.add_argument(
'--image', default=False, action="store_true",
help='Image detection mode, will ignore all positional arguments'
)
'''
Command line positional arguments -- for video detection mode
'''
parser.add_argument(
"--input", nargs='?', type=str,required=False,default='./path2your_video',
help = "Video input path"
)
parser.add_argument(
"--output", nargs='?', type=str, default="",
help = "[Optional] Video output path"
)
'''
Command line positional arguments -- for model dump
'''
parser.add_argument(
'--dump_model', default=False, action="store_true",
help='Dump out training model to inference model'
)
parser.add_argument(
'--output_model_file', type=str,
help='output inference model file'
)
args = parser.parse_args()
# param parse
if args.model_input_shape:
height, width = args.model_input_shape.split('x')
args.model_input_shape = (int(height), int(width))
assert (args.model_input_shape[0]%32 == 0 and args.model_input_shape[1]%32 == 0), 'model_input_shape should be multiples of 32'
# get wrapped inference object, you can also try "YOLO" here ;)
yolo = YOLO_np(**vars(args))
if args.dump_model:
"""
Dump out training model to inference model
"""
if not args.output_model_file:
raise ValueError('output model file is not specified')
print('Dumping out training model to inference model')
yolo.dump_model_file(args.output_model_file)
sys.exit()
if args.image:
"""
Image detection mode, disregard any remaining command line arguments
"""
print("Image detection mode")
if "input" in args:
print(" Ignoring remaining command line arguments: " + args.input + "," + args.output)
detect_img(yolo)
elif "input" in args:
detect_video(yolo, args.input, args.output)
else:
print("Must specify at least video_input_path. See usage with --help.")
if __name__ == '__main__':
main()