-
Notifications
You must be signed in to change notification settings - Fork 6
/
convert_dataset.py
381 lines (337 loc) · 16.7 KB
/
convert_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from path import Path
from imageio import imread, imwrite
from skimage.transform import rescale, resize
from skimage.measure import block_reduce
from colmap_util import read_model as rm
import numpy as np
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from tqdm import tqdm
from wrappers import FFMpeg
import gzip
from pebble import ProcessPool
import yaml
from itertools import product
import pandas as pd
parser = ArgumentParser(description='Convert dataset to KITTI format, optionnally create a visualization video',
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--depth_dir', metavar='DIR', type=Path, required=True,
help='folder where depth maps generated by ETH3D are stored Usually ends with "ground_truth_depth/<video name>"')
parser.add_argument('--images_root_folder', metavar='DIR', type=Path, required=True,
help='folder where video frames are stored')
parser.add_argument('--occ_dir', metavar='DIR', type=Path,
help='folder where occlusion depth maps generated by ETH3D are stored. Usually ends with "occlusion_depth/<video name>"')
parser.add_argument('--metadata_path', type=Path, required=True,
help='path to metadata CSV file generated during video_to_colmap.py')
parser.add_argument('--dataset_output_dir', metavar='DIR', default=None, type=Path, required=True)
parser.add_argument('--video_output_dir', metavar='DIR', default=None, type=Path)
parser.add_argument('--interpolated_frames_path', metavar='TXT', type=Path)
parser.add_argument('--final_model', metavar='DIR', type=Path)
parser.add_argument('--visualize', action='store_true',
help='If selected, will generate images with depth colorized for visualization purpose')
parser.add_argument('--video', action='store_true',
help='If selected, will generate a video from visualization images')
parser.add_argument('--downscale', type=int, default=1, help='How much ground truth depth is downscaled in order to save space')
parser.add_argument('--threads', '-j', type=int, default=8, help='')
parser.add_argument('--compressed', action='store_true',
help='Indicates if GroundTruthCreator was used with option `--compress_depth_maps`')
parser.add_argument('--reg_mat', type=Path, default=None,
help='registration matrix that was used for lidar point cloud registration')
parser.add_argument('--verbose', '-v', action='count', default=0)
def rescale_and_save_cameras(cameras, images, output_dir, output_width=None, downscale=None):
def rescale_camera(cam):
if downscale is None:
current_downscale = output_width / cam.width
else:
current_downscale = downscale
if current_downscale == 1:
return cam
if 'SIMPLE' in cam.model or 'RADIAL' in cam.model:
cam.params[:3] /= current_downscale
else:
cam.params[:4] /= current_downscale
return cam._replace(width=int(cam.width//current_downscale),
height=int(cam.height//current_downscale))
def construct_intrinsics(cam):
# assert('PINHOLE' in cam.model)
if 'SIMPLE' in cam.model or 'RADIAL' in cam.model:
fx, cx, cy = cam.params
fy = fx
else:
fx, fy, cx, cy, *_ = cam.params
return np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]])
def save_cam(cam, intrinsics_path, yaml_path):
intrinsics = construct_intrinsics(cam)
np.savetxt(intrinsics_path, intrinsics)
with open(yaml_path, 'w') as f:
camera_dict = {"model": cam.model,
"params": cam.params.tolist(),
"width": cam.width,
"height": cam.height}
yaml.dump(camera_dict, f, default_flow_style=False)
return cam
rescaled_cameras = {}
if len(cameras) == 1:
key = list(cameras.keys())[0]
cam = cameras[key]
rescaled_cameras[key] = rescale_camera(cam)
save_cam(cam, output_dir / "intrinsics.txt", output_dir / "camera.yaml")
else:
for _, img in images.items():
try:
cam = rescaled_cameras[img.camera_id]
except KeyError:
cam = rescale_camera(cameras[img.camera_id])
rescaled_cameras[img.camera_id] = cam
finally:
save_cam(cam, output_dir / Path(img.name).stem + "_intrinsics.txt",
output_dir / Path(img.name).stem + "_camera.yaml")
return rescaled_cameras
def to_transform_matrix(q, t, scale=1):
cam_R = rm.qvec2rotmat(q).T
cam_t = (- cam_R @ t).reshape(3, 1) * scale
transform = np.vstack((np.hstack([cam_R, cam_t]), [0, 0, 0, 1]))
return transform
def save_poses(images, images_list, output_dir, scale):
starting_pos = None
poses = []
for i in images_list:
try:
img = images[i]
current_pos = to_transform_matrix(img.qvec, img.tvec, scale)
if starting_pos is None:
starting_pos = current_pos
relative_position = np.linalg.inv(starting_pos) @ current_pos
poses.append(relative_position[:3])
except KeyError:
# Frame is not registered so we put NaN coordinates instead
poses.append(np.full((3, 4), np.NaN))
poses = np.stack(poses)
np.savetxt(output_dir/'poses.txt', poses.reshape((len(images_list), -1)))
return poses
def high_res_colormap(low_res_cmap, resolution=1000, max_value=1):
# Construct the list colormap, with interpolated values for higer resolution
# For a linear segmented colormap, you can just specify the number of point in
# cm.get_cmap(name, lutsize) with the parameter lutsize
x = np.linspace(0, 1, low_res_cmap.N)
low_res = low_res_cmap(x)
new_x = np.linspace(0, max_value, resolution)
high_res = np.stack([np.interp(new_x, x, low_res[:, i]) for i in range(low_res.shape[1])], axis=1)
return ListedColormap(high_res)
def opencv_rainbow(resolution=1000):
# Construct the opencv equivalent of Rainbow
opencv_rainbow_data = (
(0.000, (1.00, 0.00, 0.00)),
(0.400, (1.00, 1.00, 0.00)),
(0.600, (0.00, 1.00, 0.00)),
(0.800, (0.00, 0.00, 1.00)),
(1.000, (0.60, 0.00, 1.00))
)
return LinearSegmentedColormap.from_list('opencv_rainbow', opencv_rainbow_data, resolution)
COLORMAPS = {'rainbow': opencv_rainbow(),
'magma': high_res_colormap(cm.get_cmap('magma')),
'bone': cm.get_cmap('bone', 10000)}
def apply_cmap_and_resize(depth, colormap, downscale):
downscale_depth = block_reduce(depth, (downscale, downscale), np.min)
finite_depth = depth[depth < np.inf]
if finite_depth.size != 0:
max_d = depth[depth < np.inf].max()
depth_norm = downscale_depth/max_d
depth_norm[downscale_depth == np.inf] = 1
else:
depth_norm = np.ones_like(downscale_depth)
depth_viz = COLORMAPS[colormap](depth_norm)[:, :, :3]
depth_viz[downscale_depth == np.inf] = 0
return downscale_depth, depth_viz*255
def process_one_frame(img_path, depth_path, occ_path, depth_shape,
dataset_output_dir, video_output_dir, downscale, interpolated,
visualization=False, viz_width=1920, compressed=True):
img = imread(img_path)
if len(img.shape) == 3:
h, w, _ = img.shape
elif len(img.shape) == 2:
h, w = img.shape
img = img.reshape(h, w, 1)
assert(viz_width % 2 == 0)
viz_height = int(viz_width * h / (2*w)) * 2
output_img = np.zeros((viz_height, viz_width, 3), dtype=np.uint8)
if depth_shape is not None:
resized_img = resize(img, depth_shape)
rescaled_img = rescale(resized_img, 1/downscale, multichannel=True)*255
imwrite(dataset_output_dir / img_path.basename(), rescaled_img.astype(np.uint8))
if visualization:
viz_img = resize(img, (viz_height//2, viz_width//2))*255
# Img goes to upper left corner of visualization
output_img[:viz_height//2, :viz_width//2] = viz_img
if depth_path is not None:
with gzip.open(depth_path, "rb") if compressed else open(depth_path, "rb") as f:
depth = np.frombuffer(f.read(), np.float32).reshape(depth_shape)
output_depth_name = dataset_output_dir / img_path.stem + '.npy'
downscaled_depth, viz = apply_cmap_and_resize(depth, 'rainbow', downscale)
if not interpolated:
np.save(output_depth_name, downscaled_depth)
if visualization:
viz_rescaled = resize(viz, (viz_height//2, viz_width//2))
# Depth colormap goes to upper right corner
output_img[:viz_height//2, viz_width//2:] = viz_rescaled
# Mix Depth / image goest to lower left corner
output_img[viz_height//2:, :viz_width//2] = \
output_img[:viz_height//2, :viz_width//2]//2 + \
output_img[:viz_height//2, viz_width//2:]//2
if occ_path is not None and visualization:
with gzip.open(occ_path, "rb") if compressed else open(occ_path, "rb") as f:
occ = np.frombuffer(f.read(), np.float32).reshape(depth_shape)
_, occ_viz = apply_cmap_and_resize(occ, 'bone', downscale)
occ_viz_rescaled = resize(occ_viz, (viz_height//2, viz_width//2))
# Occlusion depthmap visualization goes to lower right corner
output_img[viz_height//2:, viz_width//2:] = occ_viz_rescaled
if interpolated:
output_img[:5] = output_img[-5:] = output_img[:, :5] = output_img[:, -5:] = [255, 128, 0]
if visualization:
imwrite(video_output_dir/img_path.stem + '.png', output_img)
def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir,
dataset_output_dir, video_output_dir, ffmpeg, pose_scale=1,
interpolated_frames=[], metadata=None, images_list=None,
threads=8, downscale=None, compressed=True,
width=None, visualization=False, video=False, verbose=0, **env):
dataset_output_dir.makedirs_p()
video_output_dir.makedirs_p()
if video:
visualization = True
cameras_colmap, images_colmap, _ = rm.read_model(final_model, '.txt')
# image_df = pd.DataFrame.from_dict(images, orient="index").set_index("id")
if metadata is not None:
metadata = metadata.set_index("db_id", drop=False).sort_values("time")
framerate = metadata["framerate"].values[0]
# image_df = image_df.reindex(metadata.index)
images_list = metadata["image_path"].values
else:
assert images_list is not None
framerate = None
video = False
# Discard images and cameras that are not represented by the image list
images_colmap = {i.name: i for k, i in images_colmap.items() if i.name in images_list}
cameras_ids = set([i.camera_id for i in images_colmap.values()])
cameras_colmap = {k: cameras_colmap[k] for k in cameras_ids}
if downscale is None:
assert width is not None
rescaled_cameras = rescale_and_save_cameras(cameras_colmap,
images_colmap,
dataset_output_dir,
width, downscale)
poses = save_poses(images_colmap, images_list, dataset_output_dir, pose_scale)
depth_maps = []
occ_maps = []
interpolated = []
imgs = []
registered = []
depth_shapes = []
for i in images_list:
img_path = images_root_folder / i
imgs.append(img_path)
fname = img_path.basename()
depth_path = depth_dir / fname
occ_path = occ_dir / fname
if compressed:
depth_path += ".gz"
occ_path += ".gz"
if i in images_colmap:
assert depth_path.isfile()
registered.append(True)
if occ_path.isfile():
occ_maps.append(occ_path)
else:
occ_maps.append(None)
depth_maps.append(depth_path)
camera = cameras_colmap[images_colmap[i].camera_id]
depth_shapes.append((camera.height, camera.width))
if i in interpolated_frames:
if verbose > 2:
print("Image {} was interpolated".format(fname))
interpolated.append(True)
else:
interpolated.append(False)
else:
if verbose > 2:
print("Image {} was not registered".format(fname))
registered.append(False)
depth_maps.append(None)
occ_maps.append(None)
interpolated.append(False)
depth_shapes.append(None)
print('{}/{} Frames not registered ({:.2f}%)'.format(len(images_list) - sum(registered),
len(images_list),
100*(1 - sum(registered)/len(images_list))))
print('{}/{} Frames interpolated ({:.2f}%)'.format(sum(interpolated),
len(images_list),
100*sum(interpolated)/len(images_list)))
if threads == 1:
for i, d, o, ds, n in tqdm(zip(imgs, depth_maps, occ_maps, depth_shapes, interpolated), total=len(imgs)):
process_one_frame(i, d, o, ds, dataset_output_dir, video_output_dir, downscale, n, visualization, viz_width=1920)
else:
with ProcessPool(max_workers=threads) as pool:
tasks = pool.map(process_one_frame, imgs, depth_maps, occ_maps, depth_shapes,
[dataset_output_dir]*len(imgs), [video_output_dir]*len(imgs),
[downscale]*len(imgs), interpolated,
[visualization]*len(imgs), [1920]*len(imgs))
try:
for _ in tqdm(tasks.result(), total=len(imgs)):
pass
except KeyboardInterrupt as e:
tasks.cancel()
raise e
if metadata is not None:
wanted_keys = ['image_path', 'time', 'height', 'width', 'camera_model', 'camera_id']
filtered_metadata = metadata[wanted_keys].copy()
filtered_metadata['interpolated'] = interpolated
filtered_metadata['registered'] = registered
for i, j in product(range(3), range(4)):
filtered_metadata['pose{}{}'.format(i, j)] = poses[:, i, j]
filtered_metadata["fx"] = np.NaN
filtered_metadata["fy"] = np.NaN
filtered_metadata["cx"] = np.NaN
filtered_metadata["cy"] = np.NaN
for cam_id in filtered_metadata["camera_id"].unique():
if cam_id not in rescaled_cameras.keys():
continue
cam = rescaled_cameras[cam_id]
rows = filtered_metadata["camera_id"] == cam_id
filtered_metadata.loc[rows, "fx"] = cam.params[0]
if "SIMPLE" in cam.model or "RADIAL" in cam.model:
filtered_metadata.loc[rows, "fy"] = cam.params[0]
filtered_metadata.loc[rows, "cx"] = cam.params[1]
filtered_metadata.loc[rows, "cy"] = cam.params[2]
else:
filtered_metadata.loc[rows, "fy"] = cam.params[1]
filtered_metadata.loc[rows, "cx"] = cam.params[2]
filtered_metadata.loc[rows, "cy"] = cam.params[3]
filtered_metadata.to_csv(dataset_output_dir / 'metadata.csv')
not_registered = [i + '\n' for i, r in zip(images_list, registered) if not r]
with open(dataset_output_dir / 'not_registered.txt', 'w') as f:
f.writelines(not_registered)
if video:
video_path = str(video_output_dir.parent/'{}_groundtruth_viz.mp4'.format(video_output_dir.stem))
glob_pattern = str(video_output_dir/'*.png')
ffmpeg.create_video(video_path, glob_pattern, True, framerate)
video_output_dir.rmtree_p()
if __name__ == '__main__':
args = parser.parse_args()
env = vars(args)
env["metadata"] = pd.read_csv(env["metadata_path"])
if args.reg_mat is not None:
registration_matrix = np.genfromtxt(args.reg_mat)
# If registration matrix is not a true rotation, it means the frame positions
# need to be rescaled
reg_scale = 1/np.linalg.norm(registration_matrix[:, :3], 2)
else:
reg_scale = 1
if args.interpolated_frames_path is None:
env["interpolated_frames"] = []
else:
with open(args.interpolated_frames_path, "r") as f:
env["interpolated_frames"] = [line[:-1] for line in f.readlines()]
env["ffmpeg"] = FFMpeg()
convert_dataset(pose_scale=reg_scale, **env)