-
Notifications
You must be signed in to change notification settings - Fork 25
/
detector_yolov7.py
213 lines (168 loc) · 7.84 KB
/
detector_yolov7.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import sys
# sys.path[0] = "./yolov7"
# sys.path[1] = "./yolov7"
sys.path.append("./yolov7")
import numpy
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import cv2
import time
import math
from yolov7.models.experimental import attempt_load
from yolov7.utils.datasets import LoadStreams, LoadImages
from yolov7.utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from yolov7.utils.plots import plot_one_box
from yolov7.utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel
from configurator import config
class Detector:
# yolov7 params
weights = config["yolov7"]["weights"] # model.pt path(s)
img_size = int(config["yolov7"]["inference_size"]) # inference size (pixels)
conf_thres = float(config["yolov7"]["conf_thres"]) # object confidence threshold (0.25 def)
iou_thres = float(config["yolov7"]["iou_thres"]) # IOU threshold for NMS
classes = None # filter by class: --class 0, or --class 0 2 3
agnostic_nms = False # class-agnostic NMS
augment = bool(config["yolov7"]["augment"]) # augmented inference
device = config["yolov7"]["device"] # cuda device, i.e. 0 or 0,1,2,3 or cpu
source = 0 # file/folder, 0 for webcam
trace = source
no_trace = False # don`t trace model
_warmup_once = False
def __init__(self, names):
set_logging()
# initialize
self.device = select_device(self.device)
self.half = self.device.type != 'cpu'
# load model
self.model = attempt_load(self.weights, map_location=self.device) # load FP32 model
self.stride = int(self.model.stride.max()) # model stride
self.imgsz = check_img_size(self.img_size, s=self.stride) # check img_size
if self.trace:
self.model = TracedModel(self.model, self.device, self.img_size)
if self.half:
self.model.half() # to FP16
# Second-stage classifier
self.classify = False
# cuDNN
cudnn.benchmark = True # set True to speed up constant image size inference
# Get names and colors
self.names = self.model.module.names if hasattr(self.model, 'module') else names
self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names]
# Run inference
if self.device.type != 'cpu':
self.model(torch.zeros(1, 3, self.imgsz, self.imgsz).to(self.device).type_as(next(self.model.parameters()))) # run once
def set_colors(self, new_colors):
self.colors = new_colors
def get_cls_label(self, _cls):
return self.names[_cls]
def detect(self, img: numpy.ndarray) -> dict:
"""
Make an inference (prediction) of a given image.
Image size must be multiple of 32 (ex. 640).
And the channels count must be 3 (RGB).
:param img: Input image, must be numpy.ndarray with shape of (samples, channels, height, width).
:return: Detected bounding boxes in a dict format, where's the key is a class.
"""
# get some img data
img_height, img_width, img_channels = img.shape
img0 = img # preserve for plotting & displaying
if img_channels > 3:
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # leave 3 channels
# reshape for PyTorch (samples, channels, height, width)
img = numpy.moveaxis(img, -1, 0)
old_img_w = old_img_h = self.imgsz
old_img_b = 1
t0 = time.time()
img = torch.from_numpy(img).to(self.device)
img = img.half() if self.half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# print(img.size())
# pad image dimensions to be multiple of 32 (ex. 1280x720 to 1312x736)
# note.
# Keras format is (samples, height, width, channels)
# PyTorch is (samples, channels, height, width)
if config['yolov7']['resize_image_to_fit_multiply_of_32']:
padding1_mult = math.floor(img.shape[2] / 32) + 1
padding2_mult = math.floor(img.shape[3] / 32) + 1
pad1 = (32 * padding1_mult) - img.shape[2]
pad2 = (32 * padding2_mult) - img.shape[3]
padding = torch.nn.ReplicationPad2d((0, pad2, pad1, 0, 0, 0))
img = padding(img)
if self._warmup_once:
# Warmup
if self.device.type != 'cpu' and (
old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
self.model(img, augment=self.augment)[0]
self._warmup_once = False
# Inference
t1 = time_synchronized()
with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
pred = self.model(img, augment=self.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
t3 = time_synchronized()
# Process detections
result = {}
for i, det in enumerate(pred): # detections per image
s = ''
gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to img0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
# Gather results
for *xyxy, conf, cls in reversed(det):
result.setdefault(self.names[int(cls)], [])
result[self.names[int(cls)]].append({
"cls": cls,
"conf": conf,
"xyxy": xyxy
})
return result
def filter_rects(self, bbox_list: dict, e_classes: list):
filtered_rects = []
for i, (c, ds) in enumerate(bbox_list.items()):
if c in e_classes:
for d in ds:
aim_box = {
"tcls": c,
"cls": d["cls"],
"conf": d["conf"],
"xyxy": d["xyxy"]
}
filtered_rects.append(aim_box)
return filtered_rects
def paint_boxes(self, img: numpy.ndarray, bbox_list: dict, min_conf: float) -> numpy.ndarray:
"""
Paint predicted bounding boxes to a given image.
:param img: Input image.
:param bbox_list: Detected bounding boxes (expected output from detect method).
:return:
"""
for i, (c, ds) in enumerate(bbox_list.items()):
for d in ds:
if float(d['conf']) > min_conf:
plot_one_box(d["xyxy"], img, label=f"{self.get_cls_label(int(d['cls']))} {d['conf']:.2f}", color=self.colors[int(d["cls"])], line_thickness=2)
return img
def paint_aim_boxes(self, img: numpy.ndarray, aims_list: list) -> numpy.ndarray:
"""
Paint aims bounding boxes to a given image.
:param img: Input image.
:param aims_list: Detected aim boxes (expected output from filter_rects method).
:return:
"""
for aim in aims_list:
self.plot_one_box(aim["xyxy"], img, label=f"{aim['tcls']} {aim['conf']:.2f}", color=self.colors[int(aim["cls"])], line_thickness=2)
return img