nianticlabs · May 30, 2024
diff --git a/‎README.md
+37 b/‎README.md
+37
diff --git a/‎configs/data/hypersim_default_test.yaml
+10 b/‎configs/data/hypersim_default_test.yaml
+10
diff --git a/‎configs/data/hypersim_default_train.yaml
+9 b/‎configs/data/hypersim_default_train.yaml
+9
diff --git a/‎configs/data/hypersim_default_val.yaml
+9 b/‎configs/data/hypersim_default_val.yaml
+9
diff --git a/‎configs/models/implicit_depth_hypersim.yaml
+27 b/‎configs/models/implicit_depth_hypersim.yaml
+27
diff --git a/‎configs/models/implicit_depth_temporal_hypersim.yaml
+28 b/‎configs/models/implicit_depth_temporal_hypersim.yaml
+28
diff --git a/‎configs/models/regression_model_hypersim.yaml
+19 b/‎configs/models/regression_model_hypersim.yaml
+19
diff --git a/‎data_scripts/generate_hypersim_planar_depths.py
+127 b/‎data_scripts/generate_hypersim_planar_depths.py
+127
diff --git a/‎data_scripts/generate_train_tuples.py
+18-7 b/‎data_scripts/generate_train_tuples.py
+18-7
diff --git a/‎data_splits/hypersim/bd_split/train_eight_view_deepvmvs_bd.txt
+11,572 b/‎data_splits/hypersim/bd_split/train_eight_view_deepvmvs_bd.txt
+11,572
diff --git a/‎data_splits/hypersim/bd_split/train_files_bd.json
+1 b/‎data_splits/hypersim/bd_split/train_files_bd.json
+1
diff --git a/‎data_splits/hypersim/bd_split/val_eight_view_deepvmvs_bd.txt
+1,543 b/‎data_splits/hypersim/bd_split/val_eight_view_deepvmvs_bd.txt
+1,543
diff --git a/‎data_splits/hypersim/bd_split/val_files_bd.json
+1 b/‎data_splits/hypersim/bd_split/val_files_bd.json
+1
diff --git a/‎data_splits/hypersim/standard_split/test_files_all.json
+1 b/‎data_splits/hypersim/standard_split/test_files_all.json
+1
diff --git a/‎data_splits/hypersim/standard_split/train_files_all.json
+1 b/‎data_splits/hypersim/standard_split/train_files_all.json
+1
diff --git a/‎data_splits/hypersim/standard_split/val_files_all.json
+1 b/‎data_splits/hypersim/standard_split/val_files_all.json
+1
diff --git a/‎datasets/hypersim_dataset.py
+828 b/‎datasets/hypersim_dataset.py
+828
diff --git a/‎experiment_modules/depth_model.py
+22-13 b/‎experiment_modules/depth_model.py
+22-13
diff --git a/‎tools/keyframe_buffer.py
+13 b/‎tools/keyframe_buffer.py
+13
diff --git a/‎utils/dataset_utils.py
+4 b/‎utils/dataset_utils.py
+4
@@ -28,6 +28,43 @@ You will need to update the data configs to point to the location of your
 ScanNetv2 data. You can do this by setting `dataset_path: <YOUR_DATA_LOCATION>` in the six 
 `configs/data/scannet_*.yaml` files.
 
+# Hypersim
+
+To Download the Hypersim dataset, please follow the the intructions in the [Hypersim Datatset](https://github.com/apple/ml-hypersim) repo.
+
+Once the dataset has been downloaded and extracted, please update the `dataset_path` argument for Hypersim data configs in `configs/data/` to point to the extracted dataset.
+
+Note that the depth maps provided as part of the dataset are not planar depths and need to be planarised. We have provided helper functions to planarise the depth maps (see `_get_prependicular_depths` method in `datasets/hypersim_dataset.py`). The planarised depth maps can be generated with the help of the `data_scripts/generate_hypersim_planar_depths.py` script:
+
+```bash
+# train
+python ./data_scripts/generate_hypersim_planar_depths.py \
+    --data_config configs/data/hypersim_default_train.yaml \
+    --num_workers 8
+
+# val
+python ./data_scripts/generate_hypersim_planar_depths.py \
+    --data_config configs/data/hypersim_default_val.yaml \
+    --num_workers 8
+```
+
+Next, we need to generate the frame tuples similarly to ScanNetv2 dataset:
+
+```bash
+# train
+python ./data_scripts/generate_train_tuples.py
+    --data_config configs/data/hypersim_default_train.yaml
+    --num_workers 8
+
+# val
+python ./data_scripts/generate_val_tuples.py
+    --data_config configs/data/hypersim_default_val.yaml
+    --num_workers 8
+```
+
+After the tuple generation, you should be ready to train on hypersim using the provided configs!
+
+We provide the train and val splits we used for our experiments (see `data_splits/hypersim/bd_split/train_files_bd.json` and `data_splits/hypersim/bd_split/val_files_bd.json`)
 ## 📦 Models
 
 We provide the following pretrained models for you to try out - we suggest using the HyperSim trained model to obtain the best qualitative results.
 
@@ -0,0 +1,10 @@
+!!python/object:options.Options
+dataset_path: /mnt/nas3/shared/datasets/hypersim
+tuple_info_file_location: data_splits/hypersim/standard_split/
+dataset_scan_split_file: data_splits/hypersim/standard_split/test_files_all.json
+dataset: hypersim
+mv_tuple_file_suffix: _eight_view_deepvmvs.txt
+num_images_in_tuple: 8
+frame_tuple_type: default
+split: test
+use_min_max_depth: True
@@ -0,0 +1,9 @@
+!!python/object:options.Options
+dataset_path: /mnt/nas3/shared/datasets/hypersim/
+tuple_info_file_location: data_splits/hypersim/bd_split/
+dataset_scan_split_file: data_splits/hypersim/bd_split/train_files_bd.json
+dataset: hypersim
+mv_tuple_file_suffix: _eight_view_deepvmvs_bd.txt
+num_images_in_tuple: 8
+frame_tuple_type: default
+split: train
@@ -0,0 +1,9 @@
+!!python/object:options.Options
+dataset_path: /mnt/nas3/shared/datasets/hypersim/
+tuple_info_file_location: data_splits/hypersim/bd_split/
+dataset_scan_split_file: data_splits/hypersim/bd_split/val_files_bd.json
+dataset: hypersim
+mv_tuple_file_suffix: _eight_view_deepvmvs_bd.txt
+num_images_in_tuple: 8
+frame_tuple_type: default
+split: val
@@ -0,0 +1,27 @@
+!!python/object:options.Options
+feature_volume_type: mlp_feature_volume
+batch_size: 12
+cost_volume_aggregation: dot
+cv_encoder_type: multi_scale_encoder
+depth_decoder_name: unet_pp
+gpus: 2
+image_encoder_name: efficientnet
+log_interval: 100
+val_interval: 400
+loss_type: log_l1
+lr: 0.0001
+wd: 0.0001
+matching_encoder_type: resnet
+name: implicit_depth
+num_sanity_val_steps: 0
+num_workers: 12
+precision: 16
+random_seed: 0
+full_depth_supervision: true
+lr_steps: [18000, 36000]
+max_steps: 50000
+lazy_load_weights_from_checkpoint: weights/regression.ckpt
+near_surface_ratio: 0.25
+bd_regularisation_weight: 0.5
+binary_loss_positive_weight: 1.0
+bd_edge_regularision: false
@@ -0,0 +1,28 @@
+!!python/object:options.Options
+feature_volume_type: mlp_feature_volume
+batch_size: 12
+cost_volume_aggregation: dot
+cv_encoder_type: multi_scale_encoder
+depth_decoder_name: unet_pp
+gpus: 2
+image_encoder_name: efficientnet
+log_interval: 100
+val_interval: 400
+loss_type: log_l1
+lr: 0.0001
+wd: 0.0001
+matching_encoder_type: resnet
+name: hero_model_bd_temporal
+num_sanity_val_steps: 0
+num_workers: 12
+precision: 16
+random_seed: 0
+full_depth_supervision: true
+lr_steps: [18000, 36000]
+max_steps: 14000
+lazy_load_weights_from_checkpoint: sr_bd.ckpt
+near_surface_ratio: 0.25
+bd_regularisation_weight: 0.5
+binary_loss_positive_weight: 1.0
+bd_edge_regularision: false
+use_prior: true
@@ -0,0 +1,19 @@
+!!python/object:options.Options
+feature_volume_type: mlp_feature_volume
+batch_size: 16
+cost_volume_aggregation: dot
+cv_encoder_type: multi_scale_encoder
+depth_decoder_name: unet_pp
+gpus: 2
+image_encoder_name: efficientnet
+log_interval: 100
+val_interval: 300
+loss_type: log_l1
+lr: 0.0001
+wd: 0.0001
+matching_encoder_type: resnet
+name: regression
+num_sanity_val_steps: 0
+num_workers: 12
+precision: 16
+random_seed: 0
@@ -0,0 +1,127 @@
+"""Script for generating planar depth maps for the Hypersim Dataset
+
+    Run like so for generating/saving depth maps to dataset_path defined in the yaml:
+    
+    python ./data_scripts/generate_hypersim_planar_depths.py 
+        --data_config configs/data/hypersim_default_train.yaml
+        --num_workers 8 
+
+    where hypersim_default_train.yaml looks like: 
+        !!python/object:options.Options
+        dataset_path: HYPERSIM_PATH/
+        tuple_info_file_location: $tuples_directory$
+        dataset_scan_split_file: $train_split_list_location$
+        dataset: hypersim
+        mv_tuple_file_suffix: _eight_view_deepvmvs.txt
+        num_images_in_tuple: 8
+        frame_tuple_type: default
+        split: train
+
+    For val, use configs/data/hypersim_default_val.yaml.
+
+    This script will save the planar depth maps in the following pattern:
+    {dataset_path}/"data"
+                / {scene}
+                / "images"
+                / f"scene_{cam}_geometry_hdf5"
+                / f"frame.{int(frame_id):04d}.depth_meters_planar.hdf5"
+
+"""
+import sys
+
+sys.path.append("/".join(sys.path[0].split("/")[:-1]))
+
+from functools import partial
+from multiprocessing import Manager
+from multiprocessing.pool import Pool
+
+import options
+from utils.dataset_utils import get_dataset
+
+
+def crawl_subprocess(opts, scan, count, progress):
+    """
+    Generates and saves planar depth maps to disk for a given scene
+
+    Args:
+        scan: scan to operate on.
+        count: total count of multi process scans.
+        progress: a Pool() progress value for tracking progress. For debugging
+            you can pass
+                multiprocessing.Manager().Value('i', 0)
+            for this.
+
+    """
+
+    print(f"Generating planar depths for scene {scan}")
+
+    # get dataset
+    dataset_class, _ = get_dataset(
+        opts.dataset, opts.dataset_scan_split_file, opts.single_debug_scan_id, verbose=False
+    )
+
+    ds = dataset_class(
+        dataset_path=opts.dataset_path,
+        mv_tuple_file_suffix=None,
+        split=opts.split,
+        tuple_info_file_location=opts.tuple_info_file_location,
+        pass_frame_id=True,
+        verbose_init=False,
+    )
+
+    frame_ids = ds._get_frame_ids(opts.split, scan)
+    for frame_ind in frame_ids:
+        ds._save_prependicular_depths_to_disk(scan, frame_ind)
+
+    progress.value += 1
+    print(f"Completed scan {scan}, {progress.value} of total {count}\r")
+
+
+def crawl(opts, scans):
+    """
+    Multiprocessing helper for crawl_subprocess
+
+    Args:
+        opts: options dataclass.
+        scans: scans to multiprocess.
+
+    """
+    pool = Pool(opts.num_workers)
+    manager = Manager()
+
+    count = len(scans)
+    progress = manager.Value("i", 0)
+
+    crawler = crawl_subprocess
+
+    pool.imap_unordered(
+        partial(crawler, opts, count=count, progress=progress),
+        scans,
+    )
+    pool.close()
+    # wait for all issued task to complete
+    pool.join()
+
+
+if __name__ == "__main__":
+    # load options file
+    option_handler = options.OptionsHandler()
+    option_handler.parse_and_merge_options(ignore_cl_args=False)
+    option_handler.pretty_print_options()
+    opts = option_handler.options
+
+    # get dataset
+    dataset_class, scan_names = get_dataset(
+        opts.dataset, opts.dataset_scan_split_file, opts.single_debug_scan_id, verbose=False
+    )
+
+    if opts.single_debug_scan_id is not None:
+        crawler = crawl_subprocess
+        crawler(
+            opts,
+            opts.single_debug_scan_id,
+            0,
+            Manager().Value("i", 0),
+        )
+    else:
+        crawl(opts, scan_names)
@@ -49,7 +49,8 @@
 import numpy as np
 
 import options
-from tools.keyframe_buffer import DVMVS_Config, is_valid_pair
+from datasets.hypersim_dataset import HypersimDataset
+from tools.keyframe_buffer import DVMVS_Config, DVMVS_Hypersim_Config, is_valid_pair
 from utils.dataset_utils import get_dataset
 
 
@@ -177,6 +178,11 @@ def crawl_subprocess_short(opts_temp_filepath, scan, count, progress):
         verbose_init=False,
     )
 
+    if opts.dataset == "hypersim":
+        keyframe_config = DVMVS_Hypersim_Config
+    else:
+        keyframe_config = DVMVS_Config
+
     valid_frames = ds.get_valid_frame_ids(opts.split, scan)
 
     frame_ind_to_frame_id = {}
@@ -197,8 +203,8 @@ def crawl_subprocess_short(opts_temp_filepath, scan, count, progress):
             poses,
             used_pairs,
             is_backward=multiplier[1],
-            initial_pose_dist_min=(multiplier[0] * DVMVS_Config.train_minimum_pose_distance),
-            initial_pose_dist_max=(multiplier[0] * DVMVS_Config.train_maximum_pose_distance),
+            initial_pose_dist_min=(multiplier[0] * keyframe_config.train_minimum_pose_distance),
+            initial_pose_dist_max=(multiplier[0] * keyframe_config.train_maximum_pose_distance),
         )
 
         for pair in pairs:
@@ -267,6 +273,11 @@ def crawl_subprocess_long(opts_temp_filepath, scan, count, progress):
         verbose_init=False,
     )
 
+    if opts.dataset == "hypersim":
+        keyframe_config = DVMVS_Hypersim_Config
+    else:
+        keyframe_config = DVMVS_Config
+
     valid_frames = ds.get_valid_frame_ids(opts.split, scan)
 
     frame_ind_to_frame_id = {}
@@ -289,7 +300,7 @@ def crawl_subprocess_long(opts_temp_filepath, scan, count, progress):
     for i in range(sequence_length):
         used_nodes[i] = 0
 
-    calculated_step = DVMVS_Config.train_crawl_step
+    calculated_step = keyframe_config.train_crawl_step
     samples = []
     for offset, multiplier, is_backward in [
         (0 % calculated_step, 1.0, False),
@@ -338,10 +349,10 @@ def crawl_subprocess_long(opts_temp_filepath, scan, count, progress):
                     check3 = is_valid_pair(
                         poses[previous_index],
                         poses[current_index],
-                        (multiplier * DVMVS_Config.train_minimum_pose_distance),
-                        (multiplier * DVMVS_Config.train_maximum_pose_distance),
+                        (multiplier * keyframe_config.train_minimum_pose_distance),
+                        (multiplier * keyframe_config.train_maximum_pose_distance),
                         t_norm_threshold=(
-                            multiplier * DVMVS_Config.train_minimum_pose_distance * 0.5
+                            multiplier * keyframe_config.train_minimum_pose_distance * 0.5
                         ),
                     )
 
 
@@ -173,8 +173,8 @@ def __init__(self, opts):
 
         # all the losses
         self.si_loss = ScaleInvariantLoss()
-        self.grad_loss = MSGradientLoss()
         self.abs_loss = nn.L1Loss()
+        self.grad_loss = MSGradientLoss()
         self.normals_loss = NormalsLoss()
         self.mv_depth_loss = MVDepthLoss(
             self.run_opts.image_height // 2,
@@ -495,25 +495,34 @@ def compute_losses(self, cur_data, src_data, outputs):
         if not found_scale:
             raise Exception("Could not find a valid scale to compute si loss!")
 
-        grad_loss = self.grad_loss(depth_gt, depth_pred)
+        if self.run_opts.dataset == "hypersim":
+            grad_loss = 0
+        else:
+            grad_loss = self.grad_loss(depth_gt, depth_pred)
         abs_loss = self.abs_loss(depth_gt[mask_b], depth_pred[mask_b])
         si_loss = self.si_loss(log_depth_gt[mask_b], log_depth_pred[mask_b])
 
         mask_b_limit = torch.logical_and(mask_b, depth_pred > 0.1)
         inv_abs_loss = self.abs_loss(1 / depth_gt[mask_b_limit], 1 / depth_pred[mask_b_limit])
 
         log_l1_loss = self.abs_loss(log_depth_gt[mask_b], log_depth_pred[mask_b])
-        normals_loss = self.normals_loss(normals_gt, normals_pred)
-
-        mv_loss = self.mv_depth_loss(
-            depth_pred_b1hw=depth_pred,
-            cur_depth_b1hw=depth_gt,
-            src_depth_bk1hw=src_data["depth_b1hw"],
-            cur_invK_b44=cur_data[f"invK_s0_b44"],
-            src_K_bk44=src_data[f"K_s0_b44"],
-            cur_world_T_cam_b44=cur_data["world_T_cam_b44"],
-            src_cam_T_world_bk44=src_data["cam_T_world_b44"],
-        )
+        if self.run_opts.dataset == "hypersim":
+            normals_loss = 0
+        else:
+            normals_loss = self.normals_loss(normals_gt, normals_pred)
+
+        if self.run_opts.dataset == "hypersim":
+            mv_loss = 0
+        else:
+            mv_loss = self.mv_depth_loss(
+                depth_pred_b1hw=depth_pred,
+                cur_depth_b1hw=depth_gt,
+                src_depth_bk1hw=src_data["depth_b1hw"],
+                cur_invK_b44=cur_data[f"invK_s0_b44"],
+                src_K_bk44=src_data[f"K_s0_b44"],
+                cur_world_T_cam_b44=cur_data["world_T_cam_b44"],
+                src_cam_T_world_bk44=src_data["cam_T_world_b44"],
+            )
 
         loss = ms_loss + 1.0 * grad_loss + 1.0 * normals_loss + 0.2 * mv_loss
 
 
@@ -22,6 +22,19 @@ class DVMVS_Config:
     test_optimal_R_measure = 0.0
 
 
+class DVMVS_Hypersim_Config:
+    # train tuple settings
+    train_minimum_pose_distance = 0.125
+    train_maximum_pose_distance = 2.5
+    train_crawl_step = 3
+
+    # test tuple settings
+    test_keyframe_buffer_size = 30
+    test_keyframe_pose_distance = 0.1
+    test_optimal_t_measure = 0.15
+    test_optimal_R_measure = 0.0
+
+
 def is_pose_available(pose):
     is_nan = np.isnan(pose).any()
     is_inf = np.isinf(pose).any()
 
@@ -6,6 +6,10 @@
 from datasets.scanniverse_dataset import ScanniverseDataset
 from datasets.seven_scenes_dataset import SevenScenesDataset
 from datasets.vdr_dataset import VDRDataset
+from datasets.scanniverse_dataset import ScanniverseDataset
+from datasets.hypersim_dataset import HypersimDataset
+
+import json
 
 
 def get_dataset(dataset_name, split_filepath, single_debug_scan_id=None, verbose=True):