forked from Ascend-Research/HeadPoseEstimation-WHENet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_images.py
268 lines (236 loc) · 11.7 KB
/
prepare_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
'''
Preparation of CMU Panoptic derived training data for WHENet
Two steps, one for mtc_dataset (CMU Dome 'Range of Motion' dataset) and on
for the CMU Dome 'Haggling' dataset.
Download the data for the Monocular Total Capture (MTC) from:
http://domedb.perception.cs.cmu.edu/mtc.html
http://posefs1.perception.cs.cmu.edu/mtc/mtc_dataset.tar.gz
It's big. 270GB.
Download the data for the Haggling data using the PanopticToolbox
https://github.com/CMU-Perceptual-Computing-Lab/panoptic-toolbox
Use the getData.sh script as described at the above link to retrieve
the sequences corresponding to the HD cameras (indices 0-31)
````
getData.sh <sequence_name> 0 31
````
Get all the Haggling data. It will be placed within the directory
that the panoptic toolbox was downloaded to in subdirectories
named according to the sequence name.
For each sequence, extract the hdFace3d.tar files in place which
will create a hdFace3d directory.
In the entry point at the bottom of the file, set the root directory
for the panoptic data, either mtc or haggling, containing the
sequences. Set the output path and, if you are regenerating data,
remove the annotation.txt file (otherwise it will be appended to.)
Choose whether you want to process mtc data or haggling data with
the 'do_mtc' flag, setting it to True for mtc and false for haggling.
Final data cleanup and deskewing
================================
300W-LP has approximately 150k images but has fewer images with
yaws around 0 than other angles
MTC range of motion data has around 100k images, and only one
subject per video so that there are no occlusions
Haggling data has more data, but these contain multiple subjects
who may occlude each other.
To generate a final dataset, the range of motion images are
augmented with haggling images where both have yaw-ranges
outside -99-99 until the total is the same as 300W-LP. Then
for small yaws, images are sampled from haggling and range of motion
until the histogram of yaws is level. Throughout this process
images with pitch/roll outside -90-90 are excluded.
The annotation for the included examples are prepended with
either PANOPTIC or 300W to indicate which dataset they came
from and the annotation file is then split into train and
test splits which are saved as combine_train and combine_valid
and stored in the panoptic dataset root.
'''
import cv2
import os
import json
import numpy as np
from utils import projectPoints, align, rotationMatrixToEulerAngles2, reference_head, get_sphere, select_euler, inverse_rotate_zyx
from PIL import Image
import matplotlib.pyplot as plt
model_points, _ = reference_head(scale=1, pyr=(0., 0., 0.))
kp_idx=np.asarray([17, 21, 26, 22, 45, 42, 39, 36, 35, 31, 54, 48, 57, 8])
kp_idx_model=np.asarray([38, 34, 33, 29, 13, 17, 25, 21, 54, 50, 43, 39, 45, 6])
sphere = []
for theta in range(0, 360, 10):
for phi in range(0, 180, 10):
sphere.append(get_sphere(theta, phi, 22))
sphere = np.asarray(sphere)
sphere = sphere + [0, 5, -5]
sphere = sphere.T
def last_8chars(x):
x = x[-12:]
x = x.split(".")[0]
# print(x)
return(x)
without_top = [0, 3, 5, 8, 9, 11, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29]
def save_img_head(frame, save_path, seq, cam, cam_id, json_file, frame_id, threshold, yaw_ref):
img_path = os.path.join(save_path, seq)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = Image.fromarray(frame)
# print(frame.size)
E_ref = np.mat([[1, 0, 0, 0.],
[0, -1, 0, 0],
[0, 0, -1, 50],
[0, 0, 0, 1]])
cam['K'] = np.mat(cam['K'])
cam['distCoef'] = np.array(cam['distCoef'])
cam['R'] = np.mat(cam['R'])
cam['t'] = np.array(cam['t']).reshape((3, 1))
with open(json_file) as dfile:
fframe = json.load(dfile)
count_face = -1
yaw_avg = 0
for face in fframe['people']:
# 3D Face has 70 3D joints, stored as an array [x1,y1,z1,x2,y2,z2,...]
face3d = np.array(face['face70']['landmarks']).reshape((-1, 3)).transpose()
face_conf = np.asarray(face['face70']['averageScore'])
model_points_3D = np.ones((4, 58), dtype=np.float32)
model_points_3D[0:3] = model_points
clean_match = (face_conf[kp_idx] > 0.1) #only pick points confidence higher than 0.1
kp_idx_clean = kp_idx[clean_match]
kp_idx_model_clean = kp_idx_model[clean_match]
if(len(kp_idx_clean)>6):
count_face += 1
rotation, translation, error, scale = align(np.mat(model_points_3D[0:3, kp_idx_model_clean]),
np.mat(face3d[:, kp_idx_clean]))
sphere_new = scale * rotation @ (sphere) + translation
pt_helmet = projectPoints(sphere_new,
cam['K'], cam['R'], cam['t'],
cam['distCoef'])
temp = np.zeros((4, 4))
temp[0:3, 0:3] = rotation
temp[0:3, 3:4] = translation
temp[3, 3] = 1
E_virt = np.linalg.inv(temp @ np.linalg.inv(E_ref))
E_real = np.zeros((4, 4))
E_real[0:3, 0:3] = cam['R']
E_real[0:3, 3:4] = cam['t']
E_real[3, 3] = 1
compound = E_real @ np.linalg.inv(E_virt)
status, [pitch, yaw, roll] = select_euler(np.rad2deg(inverse_rotate_zyx(compound)))
yaw= -yaw
roll = -roll
yaw_avg = yaw_avg+yaw
if(abs(yaw-yaw_ref)>threshold or yaw_ref==-999):
if (status == True):
x_min = int(max(min(pt_helmet[0, :]),0))
y_min = int(max(min(pt_helmet[1, :]),0))
x_max = int(min(max(pt_helmet[0, :]), frame.size[0]))
y_max = int(min(max(pt_helmet[1, :]),frame.size[1]))
# print(x_min, y_min, x_max, y_max)
if(x_min<x_max and y_min<y_max and abs(x_min-x_max)< frame.size[0]): #some sanity check
h = y_max-y_min
w = x_max-x_min
if not(h/w > 2 or w/h >2): #eleminate those too wide or too narrow
img = frame.crop((x_min, y_min, x_max, y_max))
# draw = ImageDraw.Draw(img)
# draw.text((0, 10), "yaw: {}".format(round(yaw)), (0, 255, 255))
# draw.text((0, 0), "pitch: {}".format(round(pitch)), (0, 255, 255))
# draw.text((0, 20), "roll: {}".format(round(roll)), (0, 255, 255))
# plt.imshow(img)
# plt.show()
filename = '{0:02d}_{1:01d}_{2:08d}.jpg'.format(cam_id, count_face, frame_id)
if not (os.path.exists(img_path)):
os.mkdir(img_path)
file_path = os.path.join(img_path, filename)
img.save(file_path, "JPEG")
anno_path = os.path.join(save_path, "annotation.txt")
line = seq+'/'+filename + ','+str(yaw)+','+str(pitch)+','+str(roll)+'\n'
with open(anno_path, "a") as f:
f.write(line)
if count_face!=-1:
return yaw_avg/(count_face+1)
else:
return -999
def sample_video(root_path, sequence_name, save_path, thresh=5, interval=10):
video_path = os.path.join(root_path, sequence_name, 'hdVideos')
json_path = os.path.join(root_path, sequence_name, 'hdFace3d')
# save_path = os.path.join(save_path, sequence_name)
file_list = os.listdir(json_path)
json_list = []
with open(root_path+'/'+sequence_name+'/calibration_{0}.json'.format(sequence_name)) as cfile:
calib = json.load(cfile)
# Cameras are identified by a tuple of (panel#,node#)
cameras = {(cam['panel'], cam['node']): cam for cam in calib['cameras']}
for filename in sorted(file_list, key=last_8chars):
json_list.append(os.path.join(json_path, filename))
start_frame = int(json_list[0][-12:].split(".")[0])
end_frame = int(json_list[-1][-12:].split(".")[0])
for i in range(start_frame, end_frame, interval):
print(json_list[i-start_frame])
print(start_frame, end_frame)
for i in without_top: #0 to 30 hd cameras
clip = 'hd_00_{0:02d}.mp4'.format(i)
video_clip = os.path.join(video_path, clip)
cap = cv2.VideoCapture(video_clip)
ret, frame = cap.read()
count = 0
frame_id = start_frame
# frame_id = 10921
yaw_prev = -999 #initial value
while frame_id<end_frame:
ret, frame = cap.read()
count+=1
if(count==frame_id):
# print('i',i)
# print('json',frame_id-start_frame)
#print('{}: {}'.format(sequence_name,frame_id))
try:
if not (frame==frame[0,0]).all():
yaw_prev = save_img_head(frame, save_path, sequence_name, cameras[(0, i)],i, json_list[frame_id-start_frame], frame_id, thresh, yaw_prev)
frame_id = frame_id + interval
except:
break
def mtc_dataset(root_path, sequence_name, save_path, thresh=5):
img_path = os.path.join(root_path, 'hdImgs', sequence_name)
json_path = os.path.join(root_path, 'config',sequence_name, 'hdFace3d')
img_dir_list = os.listdir(img_path)
img_dir_list =sorted(img_dir_list, key=last_8chars)
file_list = os.listdir(json_path)
json_list = []
for filename in sorted(file_list, key=last_8chars):
json_list.append(os.path.join(json_path, filename))
start_frame = int(json_list[0][-12:].split(".")[0])
# temp = json_list[-1][-12:]
end_frame = int(json_list[-1][-12:].split(".")[0])
with open(root_path+'/config/'+sequence_name+'/calibration_{0}.json'.format(sequence_name)) as cfile:
calib = json.load(cfile)
# Cameras are identified by a tuple of (panel#,node#)
cameras = {(cam['panel'], cam['node']): cam for cam in calib['cameras']}
for cam_n in range(0,31):
yaw_prev=-999
for i in range(len(img_dir_list)):
curr_dir = os.path.join(img_path, img_dir_list[i])
# print(os.listdir(curr_dir))
frame_id = int(img_dir_list[i])
frame_file_name = "00_{0:02d}_{1:08d}.jpg".format(cam_n,frame_id)
# print(frame_file_name)
frame_file_path = os.path.join(curr_dir,frame_file_name)
if(os.path.isfile(frame_file_path)):
print(frame_file_name,' exists!!')
frame = cv2.imread(os.path.join(frame_file_path))
yaw_prev = save_img_head(frame, save_path, sequence_name, cameras[(0, cam_n)], cam_n, json_list[frame_id - start_frame], frame_id, thresh, yaw_prev)
if __name__ == '__main__':
root = '/home/tmp/panoptic-toolbox'
out_path = os.path.dirname(os.path.abspath(__file__))+'/data/pre_process'
try:
os.makedirs( out_path )
except:
pass
do_mtc = False
if do_mtc:
seq_list = ['171026_pose1', '171026_pose2', '171026_pose3', '171204_pose1', '171204_pose2', '171204_pose3', '171204_pose4', '171204_pose5', '171204_pose6']
for i in range(0,1):
mtc_dataset(root, seq_list[i], out_path)
else:
vid_seq_list =['170404_haggling_a1','170404_haggling_a2','170404_haggling_a3','170404_haggling_b1','170404_haggling_b2','170404_haggling_b3','170407_haggling_a1','170407_haggling_a2','170407_haggling_a3','170407_haggling_b1','170407_haggling_b2','170407_haggling_b3']
for i in range(0,8):
try:
os.path.makedirs( out_path + '/' + vid_seq_list[i] )
except:
pass
sample_video(root,vid_seq_list[i], out_path, interval=10)