From 8fca7dce40f88ee47e39e14416332d3dd301b106 Mon Sep 17 00:00:00 2001 From: Jacob Zhong Date: Mon, 17 May 2021 13:46:09 -0400 Subject: [PATCH] Add CADC loader --- d3d/dataset/README.md | 4 + d3d/dataset/cadc/__init__.py | 1 + d3d/dataset/cadc/loader.py | 327 +++++++++++++++++++++++++++++++++++ d3d/dataset/kitti/object.py | 2 +- 4 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 d3d/dataset/cadc/__init__.py create mode 100644 d3d/dataset/cadc/loader.py diff --git a/d3d/dataset/README.md b/d3d/dataset/README.md index d87fcf5..ab78b25 100644 --- a/d3d/dataset/README.md +++ b/d3d/dataset/README.md @@ -24,6 +24,10 @@ - [ ] [SemanticKITTI](http://www.semantic-kitti.org/): To be added - [ ] [vKITTI3D](https://github.com/VisualComputingInstitute/vkitti3D-dataset): To be added - [ ] [AIO-Drive](https://klabcmu.github.io/index.html): To be added +- [ ] [A2D2](https://www.a2d2.audi/a2d2/en.html): To be added +- [ ] [Ford AVData](https://avdata.ford.com/): To be added + +More datasets: https://boschresearch.github.io/multimodalperception/dataset.html # Indoor datasets - [ ] [Objectron](https://github.com/google-research-datasets/Objectron): To be added diff --git a/d3d/dataset/cadc/__init__.py b/d3d/dataset/cadc/__init__.py new file mode 100644 index 0000000..3bf02d1 --- /dev/null +++ b/d3d/dataset/cadc/__init__.py @@ -0,0 +1 @@ +from .loader import CADCDLoader diff --git a/d3d/dataset/cadc/loader.py b/d3d/dataset/cadc/loader.py new file mode 100644 index 0000000..904c0d1 --- /dev/null +++ b/d3d/dataset/cadc/loader.py @@ -0,0 +1,327 @@ +from collections import defaultdict +from itertools import chain +from pathlib import Path +from zipfile import ZipFile + +import numpy as np +from d3d.abstraction import (ObjectTag, ObjectTarget3D, Target3DArray, + TransformSet) +from d3d.dataset.base import (TrackingDatasetBase, expand_idx, expand_idx_name, + split_trainval_seq) +from d3d.dataset.kitti import utils +from d3d.dataset.kitti.utils import KittiObjectClass, OxtData +from d3d.dataset.zip import PatchedZipFile +from scipy.spatial.transform import Rotation +from sortedcontainers import SortedDict + + +class CADCDLoader(TrackingDatasetBase): + """ + Load and parse CADC Dataset into a usable format, please organize the files into following structure + + * Zip Files:: + + - + - 2018_03_06 + - calib.zip + - 0001 + - labeled.zip + - raw.zip + - 3d_ann.json + - 0002 + - ... + - ... + + * Unzipped Structure:: + + - + - 2018_03_06 + - calib + - ... + - 0001 + - labeled + - image_00 + - image_01 + - ... + - raw + - image_00 + - image_01 + - ... + - 3d_ann.json + - 0002 + - ... + + For description of constructor parameters, please refer to :class:`d3d.dataset.base.TrackingDatasetBase` + + :param datatype: 'labeled' / 'raw' + :type datatype: str + """ + + VALID_CAM_NAMES = ["camera_F", "camera_FR", "camera_RF", "camera_RB", "camera_B", "camera_LB", "camera_LF", "camera_FL"] + VALID_LIDAR_NAMES = ["lidar"] + VALID_OBJ_CLASSES = KittiObjectClass + _frame2folder = { + "camera_F": "image_00", "camera_FR": "image_01", "camera_RF": "image_02", "camera_RB": "image_03", + "camera_B": "image_04", "cmaera_LB": "image_05", "camera_LF": "image_06", "camera_FL": "image_07", + "lidar": "lidar_points" + } + + def __init__(self, base_path, datatype: str = 'labeled', inzip=True, phase="training", + trainval_split=1, trainval_random=False, trainval_byseq=False, nframes=0): + super().__init__(base_path, inzip=inzip, phase=phase, nframes=nframes, + trainval_split=trainval_split, trainval_random=trainval_random, + trainval_byseq=trainval_byseq) + self.datatype = datatype + + if phase == "testing": + raise ValueError("There's no testing split for CADC dataset!") + if datatype != "labeled": + raise NotImplementedError("Currently only labeled data are supported!") + + # count total number of frames + frame_count = dict() + _dates = ["2018_03_06", "2018_03_07", "2019_02_27"] + if self.inzip: + globs = [self.base_path.glob(f"{date}/00*/{datatype}.zip") + for date in _dates] + for archive in chain(*globs): + with ZipFile(archive) as data: + velo_files = (name for name in data.namelist() if name.endswith(".bin")) + + seq = archive.stem + frame_count[seq] = sum(1 for _ in velo_files) + else: + for date in _dates: + if not (self.base_path / date).exists(): + continue + + for archive in (self.base_path / date).iterdir(): + if not archive.is_dir(): # skip calibration files + continue + + seq = archive.name + frame_count[seq] = sum(1 for _ in (archive / "velodyne_points" / "data").iterdir()) + + if not len(frame_count): + raise ValueError("Cannot parse dataset or empty dataset, please check path, inzip option and file structure") + self.frame_dict = SortedDict(frame_count) + + self.frames = split_trainval_seq(phase, self.frame_dict, trainval_split, trainval_random, trainval_byseq) + self._label_cache = {} # used to store parsed label data + self._calib_cache = {} # used to store parsed calibration data + self._timestamp_cache = {} # used to store parsed timestamp + self._pose_cache = {} # used to store parsed pose data + self._tracklet_cache = {} # used to store parsed tracklet data + self._tracklet_mapping = {} # inverse mapping for tracklets + + def __len__(self): + return len(self.frames) + + @property + def sequence_ids(self): + return list(self.frame_dict.keys()) + + @property + def sequence_sizes(self): + return dict(self.frame_dict) + + def _get_date(self, seq_id): + return seq_id[:10] + + def _locate_frame(self, idx): + # use underlying frame index + idx = self.frames[idx] + + for k, v in self.frame_dict.items(): + if idx < (v - self.nframes): + return k, idx + idx -= (v - self.nframes) + raise ValueError("Index larger than dataset size") + + def _preload_calib(self, seq_id): + date = self._get_date(seq_id) + if date in self._calib_cache: + return + + if self.inzip: + with ZipFile(self.base_path / f"{date}_calib.zip") as source: + self._calib_cache[date] = { + "cam_to_cam": utils.load_calib_file(source, f"{date}/calib_cam_to_cam.txt"), + "imu_to_velo": utils.load_calib_file(source, f"{date}/calib_imu_to_velo.txt"), + "velo_to_cam": utils.load_calib_file(source, f"{date}/calib_velo_to_cam.txt") + } + else: + source = self.base_path / date + self._calib_cache[date] = { + "cam_to_cam": utils.load_calib_file(source, "calib_cam_to_cam.txt"), + "imu_to_velo": utils.load_calib_file(source, "calib_imu_to_velo.txt"), + "velo_to_cam": utils.load_calib_file(source, "calib_velo_to_cam.txt") + } + + def _load_calib(self, seq, raw=False): + # load the calibration file data + self._preload_calib(seq) + date = self._get_date(seq) + filedata = self._calib_cache[date] + if raw: + return filedata + + # load matrics + data = TransformSet("velo") + velo_to_cam = np.empty((3, 4)) + velo_to_cam[:3, :3] = filedata['velo_to_cam']['R'].reshape(3, 3) + velo_to_cam[:3, 3] = filedata['velo_to_cam']['T'] + for i in range(4): + S = filedata['cam_to_cam']['S_rect_%02d' % i].tolist() + # TODO: here we have different R_rect's, what's the difference of them against the one used in object detection + R = filedata['cam_to_cam']['R_rect_%02d' % i].reshape(3, 3) + P = filedata['cam_to_cam']['P_rect_%02d' % i].reshape(3, 4) + intri, offset = P[:, :3], P[:, 3] + projection = intri.dot(R) + offset_cartesian = np.linalg.inv(projection).dot(offset) + extri = np.vstack([velo_to_cam, np.array([0,0,0,1])]) + extri[:3, 3] += offset_cartesian + + frame = "cam%d" % i + data.set_intrinsic_camera(frame, projection, S, rotate=False) + data.set_extrinsic(extri, frame_to=frame) + + imu_to_velo = np.empty((3, 4)) + imu_to_velo[:3, :3] = filedata['imu_to_velo']['R'].reshape(3, 3) + imu_to_velo[:3, 3] = filedata['imu_to_velo']['T'] + data.set_intrinsic_general("imu") + data.set_extrinsic(imu_to_velo, frame_from="imu") + + # add position of vehicle bottom center and rear axis center + bc_rt = np.array([ + [1, 0, 0, -0.27], + [0, 1, 0, 0], + [0, 0, 1, 1.73] + ], dtype='f4') + data.set_intrinsic_general("bottom_center") + data.set_extrinsic(bc_rt, frame_to="bottom_center") + + rc_rt = np.array([ + [1, 0, 0, -0.805], + [0, 1, 0, 0], + [0, 0, 1, 0.30] + ]) + data.set_intrinsic_general("rear_center") + data.set_extrinsic(rc_rt, frame_from="bottom_center", frame_to="rear_center") + + return data + + def calibration_data(self, idx, raw=False): + assert not self._return_file_path, "The calibration is not stored in single file!" + if isinstance(idx, int): + seq_id, _ = self._locate_frame(idx) + else: + seq_id, _ = idx + + return self._load_calib(seq_id, raw=raw) + + def _preload_timestamp(self, seq_id): + date = self._get_date(seq_id) + if seq_id in self._timestamp_cache: + return + + tsdict = {} + for frame, folder in self._frame2folder.items(): + fname = Path(date, seq_id, folder, "timestamps.txt") + if self.inzip: + with PatchedZipFile(self.base_path / f"{seq_id}.zip", to_extract=fname) as data: + tsdict[frame] = utils.load_timestamps(data, fname, formatted=True).astype(int) // 1000 + else: + tsdict[frame] = utils.load_timestamps(self.base_path, fname, formatted=True).astype(int) // 1000 + self._timestamp_cache[seq_id] = tsdict + + @expand_idx_name(VALID_CAM_NAMES + VALID_LIDAR_NAMES) + def timestamp(self, idx, names="velo"): + assert not self._return_file_path, "The timestamp is not stored in single file!" + seq_id, frame_idx = idx + self._preload_timestamp(seq_id) + return self._timestamp_cache[seq_id][names][frame_idx] + + def _preload_tracklets(self, seq_id): + if seq_id in self._tracklet_cache: + return + + date = self._get_date(seq_id) + fname = Path(date, seq_id, "tracklet_labels.xml") + if self.inzip: + zname = seq_id[:-len(self.datatype)] + "tracklets" + with ZipFile(self.base_path / f"{zname}.zip") as data: + tracklets = utils.load_tracklets(data, fname) + else: + tracklets = utils.load_tracklets(self.base_path, fname) + + # inverse mapping + objs = defaultdict(list) # (frame -> list of objects) + for tid, tr in enumerate(tracklets): + dim = [tr.l, tr.w, tr.h] + tag = ObjectTag(tr.objectType, KittiObjectClass) + for pose_idx, pose in enumerate(tr.poses): + pos = [pose.tx, pose.ty, pose.tz] + pos[2] += dim[2] / 2 + ori = Rotation.from_euler("ZYX", (pose.rz, pose.ry, pose.rx)) + objs[pose_idx + tr.first_frame].append(ObjectTarget3D(pos, ori, dim, tag, tid=tid)) + + self._tracklet_cache[seq_id] = {k: Target3DArray(l, frame="velo") for k, l in objs.items()} + + @expand_idx + def annotation_3dobject(self, idx): + assert not self._return_file_path, "The annotation is not stored in single file!" + seq_id, frame_idx = idx + self._preload_tracklets(seq_id) + return self._tracklet_cache[seq_id][frame_idx] + + @expand_idx + def pose(self, idx, raw=False): + seq_id, frame_idx = idx + date = self._get_date(seq_id) + + file_name = Path(date, seq_id, "oxts", "data", "%010d.txt" % frame_idx) + if self._return_file_path: + return self.base_path / file_name + + if self.inzip: + with PatchedZipFile(self.base_path / f"{seq_id}.zip", to_extract=file_name) as data: + oxt = utils.load_oxt_file(data, file_name)[0] + else: + oxt = utils.load_oxt_file(self.base_path, file_name)[0] + return utils.parse_pose_from_oxt(oxt) + + @expand_idx_name(VALID_CAM_NAMES) + def camera_data(self, idx, names='cam2'): + seq_id, frame_idx = idx + date = self._get_date(seq_id) + + fname = Path(date, seq_id, self._frame2folder[names], 'data', '%010d.png' % frame_idx) + if self._return_file_path: + return self.base_path / fname + + gray = names in ['cam0', 'cam1'] + if self.inzip: + with PatchedZipFile(self.base_path / f"{seq_id}.zip", to_extract=fname) as source: + return utils.load_image(source, fname, gray=gray) + else: + return utils.load_image(self.base_path, fname, gray=gray) + + @expand_idx_name(VALID_LIDAR_NAMES) + def lidar_data(self, idx, names='velo'): + seq_id, frame_idx = idx + date = self._get_date(seq_id) + + fname = Path(date, seq_id, 'velodyne_points', 'data', '%010d.bin' % frame_idx) + if self._return_file_path: + return self.base_path / fname + + if self.inzip: + with PatchedZipFile(self.base_path / f"{seq_id}.zip", to_extract=fname) as source: + return utils.load_velo_scan(source, fname) + else: + return utils.load_velo_scan(self.base_path, fname) + + @expand_idx + def identity(self, idx): + return idx diff --git a/d3d/dataset/kitti/object.py b/d3d/dataset/kitti/object.py index 8c68aa0..6e04d83 100644 --- a/d3d/dataset/kitti/object.py +++ b/d3d/dataset/kitti/object.py @@ -142,7 +142,7 @@ class KittiObjectLoader(DetectionDatasetBase): For description of constructor parameters, please refer to :class:`d3d.dataset.base.DetectionDatasetBase` """ - VALID_CAM_NAMES = ["cam2", "cam3"] + VALID_CAM_NAMES = ["cam2", "cam3"] # TODO: rename to camera_gray_left, etc. VALID_LIDAR_NAMES = ["velo"] VALID_OBJ_CLASSES = KittiObjectClass