This repository has been archived by the owner on Mar 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
178 lines (143 loc) · 7.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import argparse
import cv2
import json
import math
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torch
from datasets.coco import CocoValDataset
from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.keypoints import extract_keypoints, group_keypoints
from modules.load_state import load_state
def run_coco_eval(gt_file_path, dt_file_path):
annotation_type = 'keypoints'
print('Running test for {} results.'.format(annotation_type))
coco_gt = COCO(gt_file_path)
coco_dt = coco_gt.loadRes(dt_file_path)
result = COCOeval(coco_gt, coco_dt, annotation_type)
result.evaluate()
result.accumulate()
result.summarize()
def normalize(img, img_mean, img_scale):
img = np.array(img, dtype=np.float32)
img = (img - img_mean) * img_scale
return img
def pad_width(img, stride, pad_value, min_dims):
h, w, _ = img.shape
h = min(min_dims[0], h)
min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride
min_dims[1] = max(min_dims[1], w)
min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride
pad = []
pad.append(int(math.floor((min_dims[0] - h) / 2.0)))
pad.append(int(math.floor((min_dims[1] - w) / 2.0)))
pad.append(int(min_dims[0] - h - pad[0]))
pad.append(int(min_dims[1] - w - pad[1]))
padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3],
cv2.BORDER_CONSTANT, value=pad_value)
return padded_img, pad
def convert_to_coco_format(pose_entries, all_keypoints):
coco_keypoints = []
scores = []
for n in range(len(pose_entries)):
if len(pose_entries[n]) == 0:
continue
keypoints = [0] * 17 * 3
to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
person_score = pose_entries[n][-2]
position_id = -1
for keypoint_id in pose_entries[n][:-2]:
position_id += 1
if position_id == 1: # no 'neck' in COCO
continue
cx, cy, score, visibility = 0, 0, 0, 0 # keypoint not found
if keypoint_id != -1:
cx, cy, score = all_keypoints[int(keypoint_id), 0:3]
cx = cx + 0.5
cy = cy + 0.5
visibility = 1
keypoints[to_coco_map[position_id] * 3 + 0] = cx
keypoints[to_coco_map[position_id] * 3 + 1] = cy
keypoints[to_coco_map[position_id] * 3 + 2] = visibility
coco_keypoints.append(keypoints)
scores.append(person_score * max(0, (pose_entries[n][-1] - 1))) # -1 for 'neck'
return coco_keypoints, scores
def infer(net, img, scales, base_height, stride, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
normed_img = normalize(img, img_mean, img_scale)
height, width, _ = normed_img.shape
scales_ratios = [scale * base_height / float(height) for scale in scales]
avg_heatmaps = np.zeros((height, width, 19), dtype=np.float32)
avg_pafs = np.zeros((height, width, 38), dtype=np.float32)
for ratio in scales_ratios:
scaled_img = cv2.resize(normed_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC)
min_dims = [base_height, max(scaled_img.shape[1], base_height)]
padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)
tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float().cuda()
stages_output = net(tensor_img)
stage2_heatmaps = stages_output[-2]
heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
heatmaps = cv2.resize(heatmaps, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
heatmaps = heatmaps[pad[0]:heatmaps.shape[0] - pad[2], pad[1]:heatmaps.shape[1] - pad[3]:, :]
heatmaps = cv2.resize(heatmaps, (width, height), interpolation=cv2.INTER_CUBIC)
avg_heatmaps = avg_heatmaps + heatmaps / len(scales_ratios)
stage2_pafs = stages_output[-1]
pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
pafs = cv2.resize(pafs, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
pafs = pafs[pad[0]:pafs.shape[0] - pad[2], pad[1]:pafs.shape[1] - pad[3], :]
pafs = cv2.resize(pafs, (width, height), interpolation=cv2.INTER_CUBIC)
avg_pafs = avg_pafs + pafs / len(scales_ratios)
return avg_heatmaps, avg_pafs
def evaluate(labels, output_name, images_folder, net, multiscale=False, visualize=False):
net = net.cuda().eval()
base_height = 368
scales = [1]
if multiscale:
scales = [0.5, 1.0, 1.5, 2.0]
stride = 8
dataset = CocoValDataset(labels, images_folder)
coco_result = []
for sample in dataset:
file_name = sample['file_name']
img = sample['img']
avg_heatmaps, avg_pafs = infer(net, img, scales, base_height, stride)
total_keypoints_num = 0
all_keypoints_by_type = []
for kpt_idx in range(18): # 19th for bg
total_keypoints_num += extract_keypoints(avg_heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)
pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, avg_pafs)
coco_keypoints, scores = convert_to_coco_format(pose_entries, all_keypoints)
image_id = int(file_name[0:file_name.rfind('.')])
for idx in range(len(coco_keypoints)):
coco_result.append({
'image_id': image_id,
'category_id': 1, # person
'keypoints': coco_keypoints[idx],
'score': scores[idx]
})
if visualize:
for keypoints in coco_keypoints:
for idx in range(len(keypoints) // 3):
cv2.circle(img, (int(keypoints[idx * 3]), int(keypoints[idx * 3 + 1])),
3, (255, 0, 255), -1)
cv2.imshow('keypoints', img)
key = cv2.waitKey()
if key == 27: # esc
return
with open(output_name, 'w') as f:
json.dump(coco_result, f, indent=4)
run_coco_eval(labels, output_name)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints val labels')
parser.add_argument('--output-name', type=str, default='detections.json',
help='name of output json file with detected keypoints')
parser.add_argument('--images-folder', type=str, required=True, help='path to COCO val images folder')
parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales')
parser.add_argument('--visualize', action='store_true', help='show keypoints')
args = parser.parse_args()
net = PoseEstimationWithMobileNet()
checkpoint = torch.load(args.checkpoint_path)
load_state(net, checkpoint)
evaluate(args.labels, args.output_name, args.images_folder, net, args.multiscale, args.visualize)