From f306433885f8607f36ec0ae57432b62773d0d578 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:42:07 +0100 Subject: [PATCH] wavemap: Add example (#10) * wavemap: Add example pipeline start * wavemap: Running * wavemap: save progress * validation: Fix calibration slightly * wavemap: Correct pose direction --- .devcontainer/requirements.txt | 4 + radarmeetsvision/interface.py | 7 +- .../dataset/blearndataset.py | 1 + .../create_validation_dataset.py | 90 +++++----- scripts/wavemap/pipeline.py | 164 ++++++++++++++++++ 5 files changed, 217 insertions(+), 49 deletions(-) create mode 100644 scripts/wavemap/pipeline.py diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index 1c400d2..d70b218 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -1,2 +1,6 @@ colorlog +git+https://github.com/ethz-asl/wavemap#subdirectory=library/python laspy +nanobind +scikit-build-core +typing_extensions diff --git a/radarmeetsvision/interface.py b/radarmeetsvision/interface.py index 0bcb8e1..55e5443 100644 --- a/radarmeetsvision/interface.py +++ b/radarmeetsvision/interface.py @@ -145,9 +145,10 @@ def get_dataset_loader(self, task, datasets_dir, dataset_list, index_list=None): return loader, dataset - def get_single_dataset_loader(self, dataset_dir, min_index=0, max_index=-1): + def get_single_dataset(self, dataset_dir, min_index=0, max_index=-1): dataset = BlearnDataset(dataset_dir, 'all', self.size, min_index, max_index, self.depth_prior_dir) - return DataLoader(dataset, batch_size=self.batch_size, pin_memory=True, drop_last=True) + loader = DataLoader(dataset, batch_size=self.batch_size, pin_memory=True, drop_last=True) + return dataset, loader def update_best_result(self, results, nsamples): if nsamples: @@ -231,7 +232,7 @@ def validate_epoch(self, epoch, val_loader, iteration_callback=None): # TODO: Expand on this interface if iteration_callback is not None: - iteration_callback(i, depth_prediction) + iteration_callback(int(sample['index']), depth_prediction) if mask is not None: current_results = eval_depth(depth_prediction[mask], depth_target[mask]) diff --git a/radarmeetsvision/metric_depth_network/dataset/blearndataset.py b/radarmeetsvision/metric_depth_network/dataset/blearndataset.py index f384b29..b0935be 100644 --- a/radarmeetsvision/metric_depth_network/dataset/blearndataset.py +++ b/radarmeetsvision/metric_depth_network/dataset/blearndataset.py @@ -138,6 +138,7 @@ def __getitem__(self, item): sample['depth_prior'] = torch.nan_to_num(sample['depth_prior'], nan=0.0) sample['image_path'] = str(img_path) + sample['index'] = index return sample diff --git a/scripts/validation_dataset/create_validation_dataset.py b/scripts/validation_dataset/create_validation_dataset.py index 0ec40f7..f0c9642 100644 --- a/scripts/validation_dataset/create_validation_dataset.py +++ b/scripts/validation_dataset/create_validation_dataset.py @@ -40,10 +40,10 @@ def __init__(self, input_dir): # TODO: Don't hardcode the calibrations, what about different setups # Correct for aesch and maschinenhalle - self.R_camera_radar = np.array(([[-9.99537680e-01, 2.06396371e-03, -3.03342592e-02], - [ 3.03920764e-02, 3.94275324e-02, -9.98760127e-01], - [8.65399670e-04, 9.99220301e-01, 3.94720324e-02]])) - self.t_camera_radar = np.array([0.01140952, -0.0056055, -0.02026767]).T + self.R_camera_radar = np.array(([[0.99993637, 0.00594794, -0.0095858], + [-0.00965354, 0.01146757, -0.99988765], + [-0.00583735, 0.99991655, 0.01152426]])) + self.t_camera_radar = np.array([[0.00755498, 0.00640742, -0.02652784]]).T self.K = np.array([[1200.4311769445228, 0.0, 634.1037111885645], [0.0, 1201.8315992165312, 432.2169659507848], [0.0, 0.0, 1.0]]) @@ -60,12 +60,17 @@ def generate(self): depth_prior_dir = output_dir / 'depth_prior' depth_prior_dir.mkdir(exist_ok=True) + # Empty pose file + pose_file = output_dir / 'pose_file.txt' + with pose_file.open('w') as f: + pass + # Build the dict with depth GTs depth_dict_file = output_dir / 'depth_dict.pkl' intrinsics_dict_file = output_dir / 'intrinsics_dict.pkl' if not depth_dict_file.is_file() or not intrinsics_dict_file.is_file(): logger.info("Computing depth dict") - depth_dict, intrinsics_dict = self.get_depth_dict(self.camera_parameters_file, self.offset_file, self.world_pointcloud_file) + depth_dict, intrinsics_dict = self.get_depth_dict() with depth_dict_file.open('wb') as f: pickle.dump(depth_dict, f) with intrinsics_dict_file.open('wb') as f: @@ -77,25 +82,21 @@ def generate(self): intrinsics_dict = pickle.load(f) topics = ['/image_raw', '/radar/cfar_detections', '/tf_static'] - image_count = 0 points_radar_window = [] snr_radar_window = [] - noise_radar_window = [] bridge = CvBridge() - R_camera_radar = None - t_camera_radar = None + + translations = [] with rosbag.Bag(self.bag_file, 'r') as bag: for i, (topic, msg, t) in enumerate(bag.read_messages(topics=topics)): if topic == '/radar/cfar_detections': # Transform PC to camera frame - points_radar, snr_radar, noise_radar = self.pointcloud2_to_xyz_array(msg) + points_radar, snr_radar, _ = self.pointcloud2_to_xyz_array(msg) points_radar = points_radar.T - # TODO: Fake point if len(points_radar): - # R_camera_radar @ points_radar + t_camera_radar - points_radar_window.append(self.R_camera_radar @ points_radar) + points_radar_window.append(self.R_camera_radar @ points_radar + self.t_camera_radar) snr_radar_window.append(snr_radar) if len(points_radar_window) > 3: points_radar_window.pop(0) @@ -111,23 +112,30 @@ def generate(self): img = bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') depth_prior = self.get_depth_prior(points_radar_window, snr_radar_window, depth_dict[timestamp]) + index = intrinsics_dict[timestamp]['index'] if depth_prior is not None: img = bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') img = cv2.resize(img, (img.shape[1]//self.downscale, img.shape[0]//self.downscale)) - img_file = rgb_dir / f'{image_count:05d}_rgb.jpg' + img_file = rgb_dir / f'{index:05d}_rgb.jpg' cv2.imwrite(str(img_file), img) # Write the depth gt - depth_file = output_dir / 'depth' / f'{image_count:05d}_d.npy' + depth_file = output_dir / 'depth' / f'{index:05d}_d.npy' np.save(depth_file, depth_dict[timestamp]) - plt.imsave(output_dir / 'depth' / f'{image_count:05d}_d.jpg', depth_dict[timestamp], vmin=0,vmax=15) + plt.imsave(output_dir / 'depth' / f'{index:05d}_d.jpg', depth_dict[timestamp], vmin=0,vmax=15) - depth_prior_file = output_dir / 'depth_prior' / f'{image_count:05d}_ra.npy' + depth_prior_file = output_dir / 'depth_prior' / f'{index:05d}_ra.npy' np.save(depth_prior_file, depth_prior) - plt.imsave(output_dir / 'depth_prior' / f'{image_count:05d}_ra.jpg', depth_prior, vmin=0,vmax=15) + plt.imsave(output_dir / 'depth_prior' / f'{index:05d}_ra.jpg', depth_prior, vmin=0,vmax=15) + + # Write pose + pose_flat = intrinsics_dict[timestamp]['pose'].flatten() + with pose_file.open('a') as f: + f.write(f'{index}: ') + f.write(' '.join(map(str, pose_flat))) + f.write('\n') - image_count += 1 def pointcloud2_to_xyz_array(self, cloud_msg): # Convert PointCloud2 to a list of points (x, y, z) @@ -154,7 +162,7 @@ def get_depth_prior(self, points_radar_window, snr_radar_window, depth): radar_image_points = self.K @ points_radar radar_depth = radar_image_points[2, :].copy() radar_image_points /= radar_depth - + depth_positive_mask = radar_depth > 0.0 if depth_positive_mask.sum() > 0: u = radar_image_points[0, depth_positive_mask] @@ -183,28 +191,11 @@ def get_depth_prior(self, points_radar_window, snr_radar_window, depth): depth_prior = depth_prior.T - # depth_factor = np.zeros((height, width)) - # depth_factor[height//factor:2*height//factor, width//factor:2*width//factor] = depth - # fig, axs = plt.subplots(1, 2) - # axs.set_xlim([0, width]) - # axs.set_ylim([height, 0]) - # scatter = axs.scatter(u_distorted, self.transform_v_to_plt(v_distorted, height), c=radar_depth, vmin=0.0, vmax=5.0) - # cbar = plt.colorbar(scatter, ax=axs, cmap='viridis') - # cbar.set_label('Radar Depth') - # scatter = axs[0].scatter(u_distorted, v_distorted, c=radar_depth, vmin=0.0, vmax=15.0) - # cbar = plt.colorbar(scatter, ax=axs, cmap='viridis') - # axs[0].imshow(depth, cmap='viridis', origin='upper', vmin=0.0, vmax=15.0) - # axs[0].set_xlim([0, width]) - # axs[0].set_ylim([height, 0]) - # axs[1].imshow(depth_prior, vmin=0.0, cmap='viridis', vmax=15.0) - - # print(radar_depth, points_snr, points_noise) - # plt.show() - return depth_prior def process_camera_data(self, data_dict, points, offset): timestamp = data_dict['timestamp'] + index = data_dict['index'] K = data_dict['K'] R = data_dict['R'] t = data_dict['t'] @@ -243,16 +234,21 @@ def process_camera_data(self, data_dict, points, offset): # Use interpolation to assign depth values instead of direct pixel indexing depth = self.interpolate_depth(u_distorted, v_distorted, u, v, depth_flat, height, width) depth_dict[timestamp] = depth - intrinsics_dict[timestamp] = (K, radial_dist, tangential_dist) - - logger.info(f'Computed depth for {timestamp}') + intrinsics_dict[timestamp] = {'K': K, + 'radial_dist': radial_dist, + 'tangential_dist': tangential_dist, + 'index': index, + 'pose': pose + } + + logger.info(f'Computed depth for {timestamp}, {index}') return depth_dict, intrinsics_dict - def get_depth_dict(self, camera_parameters_file, offset_file, world_pointcloud_file): + def get_depth_dict(self): # Read the offset and point cloud data - offset = self.read_offset(offset_file) - points = self.read_pointcloud(world_pointcloud_file, offset) + offset = self.read_offset(self.offset_file) + points = self.read_pointcloud(self.world_pointcloud_file, offset) # Prepare a multiprocessing pool pool = mp.Pool(mp.cpu_count()) @@ -347,11 +343,12 @@ def get_next_camera(self, camera_parameters_file): filename_mask = r'([0-9]*).jpg ([0-9]*) ([0-9]*)' + image_count = 0 intrinsics_index = None translation_index = None radial_dist_index = None tangential_dist_index = None - data_dict = {'timestamp': None, 'width': None, 'height': None, 'K': None, 't': None, 'R': None} + data_dict = {'timestamp': None, 'index': None, 'width': None, 'height': None, 'K': None, 't': None, 'R': None} for i in range(len(lines)): line = lines[i] @@ -361,11 +358,13 @@ def get_next_camera(self, camera_parameters_file): data_dict['timestamp'] = int(out.group(1)) data_dict['width'] = int(out.group(2)) data_dict['height'] = int(out.group(3)) + data_dict['index'] = image_count intrinsics_index = i + 1 radial_dist_index = i + 4 tangential_dist_index = i + 5 translation_index = i + 6 + image_count += 1 elif intrinsics_index == i: intrinsics_index = None @@ -398,7 +397,6 @@ def get_next_camera(self, camera_parameters_file): yield data_dict - def read_pointcloud(self, pointcloud_file, offset): points = None ox = offset[0, 0] diff --git a/scripts/wavemap/pipeline.py b/scripts/wavemap/pipeline.py new file mode 100644 index 0000000..76a316f --- /dev/null +++ b/scripts/wavemap/pipeline.py @@ -0,0 +1,164 @@ +import argparse +import matplotlib.pyplot as plt +import numpy as np +import pywavemap as wave +import radarmeetsvision as rmv +import re + +from pathlib import Path + +class WavemapPipeline: + def __init__(self, dataset_dir): + self.map = wave.Map.create({ + "type": "hashed_chunked_wavelet_octree", + "min_cell_width": { + "meters": 0.05 + } + }) + + # Create a measurement integration pipeline + self.pipeline = wave.Pipeline(self.map) + + # Add map operations + self.pipeline.add_operation({ + "type": "threshold_map", + "once_every": { + "seconds": 5.0 # TODO: What does this mean in this context? + } + }) + + # Add a measurement integrator + # TODO: What about distortion + self.pipeline.add_integrator( + "rmv", { + "projection_model": { + "type": "pinhole_camera_projector", + "width": 640, + "height": 480, + "fx": 600.215588472, + "fy": 600.915799608, + "cx": 317.051855594, + "cy": 216.108482975 + }, + "measurement_model": { + "type": "continuous_ray", + "range_sigma": { + "meters": 0.02 # TODO: What makes sense here? Relative possible? + }, + "scaling_free": 0.2, + "scaling_occupied": 0.4 + }, + "integration_method": { + "type": "hashed_chunked_wavelet_integrator", + "min_range": { + "meters": 0.1 + }, + "max_range": { + "meters": 20.0 + } + }, + }) + + self.dataset_dir = Path(dataset_dir) + self.pose_dict = self.read_pose_dict(dataset_dir) + + self.initial_pose = None + + def read_pose_dict(self, dataset_dir): + pose_mask = r'-?\d+\.\d+(?:e[+-]?\d+)?|-?\d+' + pose_dict = {} + pose_file = Path(dataset_dir) / 'pose_file.txt' + if pose_file.is_file(): + with pose_file.open('r') as f: + lines = f.readlines() + for line in lines: + out = re.findall(pose_mask, line) + if out is not None: + index = int(out[0]) + pose_dict[index] = np.zeros((4, 4)) + pose_dict[index][3, 3] = 1.0 + for i in range(12): + row = i // 4 + col = i % 4 + pose_dict[index][row][col] = float(out[i + 1]) + else: + print(f'Could not find {pose_file}') + + return pose_dict + + def integrate_wavemap(self, depth_prediction_np, pose_np, index): + pose_np = np.linalg.inv(pose_np) + image = wave.Image(depth_prediction_np) + pose = wave.Pose(pose_np) + + # Integrate the depth image + self.pipeline.run_pipeline(["rmv"], wave.PosedImage(pose, image)) + + def prediction_callback(self, index, depth_prediction): + depth_prediction_np = depth_prediction.cpu().numpy().squeeze() + file = self.dataset_dir / 'prediction' / f'{index:05d}_pred' + np.save(str(file)+'.npy', depth_prediction_np) + plt.imsave(str(file)+'.jpg', depth_prediction_np) + pose_np = self.pose_dict[index] + + print(f"Integrating measurement {index}") + self.integrate_wavemap(depth_prediction_np.T, pose_np, index) + + def prediction_reader(self, prediction_dir, dataset): + filelist = dataset.filelist + for i in range(len(dataset)): + index = filelist[i] + prediction_file = prediction_dir / f'{index:05d}_pred.npy' + depth_prediction_np = np.load(str(prediction_file)) + pose_np = self.pose_dict[index] + + print(f"Integrating measurement {index}") + self.integrate_wavemap(depth_prediction_np.T, pose_np, index) + if i > 50: + break + + def finalize(self): + # Remove map nodes that are no longer needed + self.map.prune() + + # Save the map + print(f"Saving map of size {self.map.memory_usage} bytes") + self.map.store('/home/asl/Downloads/output.wvmp') + + del self.pipeline, self.map + +def main(args): + rmv.setup_global_logger() + interface = rmv.Interface() + interface.set_encoder('vitb') + depth_min = 0.19983673095703125 + depth_max = 120.49285888671875 + interface.set_depth_range((depth_min, depth_max)) + interface.set_output_channels(2) + + interface.set_size(480, 640) + interface.set_batch_size(1) + interface.set_criterion() + interface.set_use_depth_prior(True) + + interface.load_model(pretrained_from=args.network) + dataset, loader = interface.get_single_dataset(args.dataset, min_index=0, max_index=-1) + + wp = WavemapPipeline(args.dataset) + + predictions_dir = Path(args.dataset) / 'prediction' + if not predictions_dir.is_dir(): + predictions_dir.mkdir(exist_ok=True) + interface.validate_epoch(0, loader, iteration_callback=wp.prediction_callback) + else: + wp.prediction_reader(predictions_dir, dataset) + + wp.finalize() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Purely evalute a network') + parser.add_argument('--dataset', type=str, required=True, help='Path to the dataset directory') + parser.add_argument('--network', type=str, help='Path to the network file') + args = parser.parse_args() + main(args)