-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
387 lines (365 loc) · 16.5 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description"
content="Helvipad: A Real-World Dataset for Omnidirectional Stereo Depth Estimation">
<meta name="keywords" content="Omnidirectional Imaging, Depth Estimation, Deep Learning">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Helvipad: A Real-World Dataset for Omnidirectional Stereo Depth Estimation</title>
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
rel="stylesheet">
<link rel="stylesheet" href="./static/css/bulma.min.css">
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="./static/css/bulma-slider.min.css">
<link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
<link rel="stylesheet"
href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="./static/css/index.css">
<link rel="icon" href="./static/images/favicon.svg">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script defer src="./static/js/fontawesome.all.min.js"></script>
<script src="./static/js/bulma-carousel.min.js"></script>
<script src="./static/js/bulma-slider.min.js"></script>
<script src="./static/js/index.js"></script>
</head>
<body>
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column has-text-centered">
<!-- Title -->
<h1 class="title is-1 publication-title">
<span style="font-variant: small-caps;">Helvipad</span>: A Real-World Dataset for Omnidirectional Stereo Depth Estimation
</h1>
<!-- Authors -->
<div class="is-size-5 publication-authors">
<span class="author-block">
<a href="https://ch.linkedin.com/in/mehdi-zayene-191a64156">Mehdi Zayene</a><sup>1</sup>,</span>
<span class="author-block">
<a href="https://people.epfl.ch/jannik.endres">Jannik Endres</a><sup>1,2</sup>,</span>
<span class="author-block">
<a href="https://people.epfl.ch/albias.havolli">Albias Havolli</a><sup>1</sup>,
</span>
<span class="author-block">
<a href="https://chcorbi.github.io">Charles Corbière</a><sup>1,*</sup>,
</span><br>
<span class="author-block">
<a href="https://people.epfl.ch/salim.cherkaoui">Salim Cherkaoui</a><sup>1</sup>,
</span>
<span class="author-block">
<a href="https://people.epfl.ch/alexandre.benahmedkontouli">Alexandre Ben Ahmed Kontouli</a><sup>1</sup>,
</span>
<span class="author-block">
<a href="https://people.epfl.ch/alexandre.alahi">Alexandre Alahi</a><sup>1</sup>
</span>
</div>
<!-- Affiliations -->
<div class="is-size-5 publication-authors" style="margin-top: 10px;">
<span class="author-block"><sup>1</sup>École Polytechnique Fédérale de Lausanne (EPFL), </span>
<span class="author-block"><sup> 2</sup>TU Darmstadt</span>
<span class="project-lead"><small><br><sup>*</sup>Project Lead</small></span>
</div>
<!-- Logo -->
<div style="margin-top: 20px;">
<a href="https://www.epfl.ch/labs/vita/">
<img src="static/images/vita-epfl.png" width="300px" alt="VITA EPFL Logo" />
</a>
</div>
<div class="column has-text-centered">
<div class="publication-links">
<!-- PDF Link. -->
<span class="link-block">
<a href="https://arxiv.org/abs/2411.18335"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="ai ai-arxiv"></i>
</span>
<span>arXiv</span>
</a>
</span>
<!-- Code Link. -->
<span class="link-block">
<a href="https://github.com/vita-epfl/Helvipad"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Code</span>
</a>
</span>
<!-- Dataset Link. -->
<span class="link-block">
<a href="https://huggingface.co/datasets/chcorbi/helvipad"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-pirate.svg"
alt="Hugging Face Logo"
style="width: 20px; height: 20px; vertical-align: middle;">
</span>
<span>Dataset</span>
</a>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
<section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<video id="teaser" autoplay muted loop playsinline height="100%">
<source src="./static/videos/helvipad.mov"
type="video/mp4">
</video>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
<p>
Despite considerable progress in stereo depth estimation, omnidirectional imaging remains underexplored,
mainly due to the lack of appropriate data.
</p>
<p>
We introduce <span style="font-variant: small-caps;">Helvipad</span>,
a real-world dataset for omnidirectional stereo depth estimation, consisting of 40K frames from video sequences
across diverse environments, including crowded indoor and outdoor scenes with diverse lighting conditions.
Collected using two 360° cameras in a top-bottom setup and a LiDAR sensor, the dataset includes accurate
depth and disparity labels by projecting 3D point clouds onto equirectangular images. Additionally, we
provide an augmented training set with a significantly increased label density by using depth completion.
</p>
<p>
We benchmark leading stereo depth estimation models for both standard and omnidirectional images.
The results show that while recent stereo methods perform decently, a significant challenge persists in accurately
estimating depth in omnidirectional imaging. To address this, we introduce necessary adaptations to stereo models,
achieving improved performance.
</p>
</div>
</div>
</div>
</section>
<!-- Dataset Statistics Section -->
<section class="section" id="dataset">
<div class="container is-max-desktop">
<h2 class="title is-3">Dataset</h2>
<div class="content has-text-justified">
<p>
The <span style="font-variant: small-caps;">Helvipad</span> dataset includes of 39,553 labeled frames from indoor and outdoor scenes under various lighting conditions.
</p>
<figure>
<img src="static/images/front_page.png" alt="Dataset visualisations">
</figure>
<p>
The equipment setup of our data acquisition includes:
</p>
<ul>
<li>
<strong>2 Ricoh Theta V cameras</strong>, capturing images in 4K/UHD equirectangular format with an initial size of 3840 × 1920 pixels at 30 fps, mounted in a top-bottom arrangement with a 19.1 cm baseline between them.
</li>
<li>
<strong>Ouster OS1-64 LiDAR Sensor</strong>, providing 64 beams, a vertical field of view of 45°, and capable of measuring depths from 0 to 120 meters at 10 fps, mounted 45.0 cm below the bottom camera.
</li>
<li>
<strong>Nvidia Jetson Xavier</strong>, serving as the central processor to manage data capture and ensure synchronization across all devices during data collection.
</li>
</ul>
<figure>
<img src="static/images/lidar_mapping.png" alt="LiDAR to 360° Mapping Illustration" style="width: 80%; height: auto;">
</figure>
<p>
Data was extracted from video sequences captured between December 2023 and February 2024. Each sequence is synchronized with its corresponding
LiDAR point clouds, which are projected on frames to obtain depth maps and disparity maps.
</p>
<div class="columns is-centered">
<div class="column is-one-third">
<figure>
<img src="static/images/depth_histograms_all.png" alt="Histogram of Depth Values - All Scenes">
<figcaption>Depth Distribution - All</figcaption>
</figure>
</div>
<div class="column is-one-third">
<figure>
<img src="static/images/depth_histograms_indoor.png" alt="Histogram of Depth Values - Indoor Scenes">
<figcaption>Depth Distribution - Indoor</figcaption>
</figure>
</div>
<div class="column is-one-third">
<figure>
<img src="static/images/depth_histograms_outdoor.png" alt="Histogram of Depth Values - Outdoor Scenes">
<figcaption>Depth Distribution - Outdoor</figcaption>
</figure>
</div>
</div>
</div>
<p>
Depth values range from 0.5 to 225 meters, with averages of 8.1 meters overall, 5.4 meters for
indoor scenes, and 9.2 meters for combined day and night outdoor scenes.
</p>
</div>
</section>
<!-- Benchmark Results Section -->
<section class="section" id="benchmark-results">
<div class="container is-max-desktop">
<h2 class="title is-3">Benchmark Results</h2>
<div class="content has-text-justified">
<p>
We evaluate the performance of multiple state-of-the-art and popular stereo matching methods, both for standard and 360° images. All models are trained on a single NVIDIA A100 GPU with
the largest possible batch size to ensure comparable use of computational resources.
</p>
<table class="table is-striped is-bordered is-hoverable is-fullwidth">
<thead>
<tr>
<th rowspan="2">Method</th>
<th rowspan="2">Type</th>
<th colspan="3" class="has-text-centered">Disparity (°)</th>
<th colspan="3" class="has-text-centered">Depth (m)</th>
</tr>
<tr>
<th class="has-text-centered">MAE</th>
<th class="has-text-centered">RMSE</th>
<th class="has-text-centered">MARE</th>
<th class="has-text-centered">MAE</th>
<th class="has-text-centered">RMSE</th>
<th class="has-text-centered">MARE</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<a href="https://arxiv.org/abs/1803.08669" target="_blank">PSMNet</a>
</td>
<td>stereo</td>
<td class="has-text-centered">0.33</td>
<td class="has-text-centered">0.54</td>
<td class="has-text-centered">0.20</td>
<td class="has-text-centered">2.79</td>
<td class="has-text-centered">6.17</td>
<td class="has-text-centered">0.29</td>
</tr>
<tr>
<td>
<a href="https://arxiv.org/abs/1911.04460" target="_blank">360SD-Net</a>
</td>
<td>360° stereo</td>
<td class="has-text-centered">0.21</td>
<td class="has-text-centered">0.42</td>
<td class="has-text-centered">0.18</td>
<td class="has-text-centered">2.14</td>
<td class="has-text-centered">5.12</td>
<td class="has-text-centered">0.15</td>
</tr>
<tr>
<td>
<a href="https://arxiv.org/abs/2303.06615" target="_blank">IGEV-Stereo</a>
</td>
<td>stereo</td>
<td class="has-text-centered">0.22</td>
<td class="has-text-centered">0.41</td>
<td class="has-text-centered">0.17</td>
<td class="has-text-centered">1.85</td>
<td class="has-text-centered">4.44</td>
<td class="has-text-centered">0.15</td>
</tr>
<tr>
<td>360-IGEV-Stereo</td>
<td>360° stereo</td>
<td class="has-text-centered"><b>0.18</b></td>
<td class="has-text-centered"><b>0.39</b></td>
<td class="has-text-centered"><b>0.15</b></td>
<td class="has-text-centered"><b>1.77</b></td>
<td class="has-text-centered"><b>4.36</b></td>
<td class="has-text-centered"><b>0.14</b></td>
</tr>
</tbody>
</table>
<p>
The dataset is also an ideal testbed for assessing the robustness of depth estimation methods to diverse lighting conditions and depth ranges
by training and evaluating models on different subsets of the dataset (e.g., indoor vs. outdoor scenes).
</p>
<figure>
<img src="static/images/cross_scene_generalization.png" alt="Cross-Scene Generalization Performance">
<figcaption>Cross-Scene Generalization Performance</figcaption>
</figure>
</div>
</div>
</section>
<section class="section" id="dataset-structure">
<div class="container is-max-desktop content">
<h2 class="title is-3">Download</h2>
<p>
Use the link below to access the dataset on HuggingFace Hub.
</p>
<div class="has-text-centered">
<a href="https://huggingface.co/datasets/chcorbi/helvipad" target="_blank" class="button is-primary is-rounded is-large">
Download Dataset
</a>
</div>
<div style="margin-top: 30px;">
<p>
The dataset is organized into training and testing subsets, whose structure is outlined below:
</p>
<pre style="background: #f5f5f5; padding: 20px; border-radius: 5px; overflow: auto;">
<code>helvipad/
├── train/
│ ├── depth_maps # Depth maps generated from LiDAR data
│ ├── depth_maps_augmented # Augmented depth maps using depth completion
│ ├── disparity_maps # Disparity maps computed from depth maps
│ ├── disparity_maps_augmented # Augmented disparity maps using depth completion
│ ├── images_top # Top-camera RGB images
│ ├── images_bottom # Bottom-camera RGB images
│ ├── LiDAR_pcd # Original LiDAR point cloud data
├── test/
│ ├── depth_maps # Depth maps generated from LiDAR data
│ ├── disparity_maps # Disparity maps computed from depth maps
│ ├── images_top # Top-camera RGB images
│ ├── images_bottom # Bottom-camera RGB images
│ ├── LiDAR_pcd # Original LiDAR point cloud data
</code></pre>
</div>
</div>
</section>
<section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<p>
If you use the <span style="font-variant: small-caps;">Helvipad</span> dataset in your research, please cite it using the following BibTeX entry:
</p>
<pre><code>@misc{zayene2024helvipad,
author = {Zayene, Mehdi and Endres, Jannik and Havolli, Albias and Corbi\`{e}re, Charles and Cherkaoui, Salim and Ben Ahmed Kontouli, Alexandre and Alahi, Alexandre},
title = {Helvipad: A Real-World Dataset for Omnidirectional Stereo Depth Estimation},
year = {2024},
eprint = {2403.16999},
archivePrefix = {arXiv},
primaryClass = {cs.CV}
}</code></pre>
</div>
</section>
<footer class="footer">
<div class="columns is-centered">
<div class="column is-8">
<div class="content">
<p>
This website is licensed under a <a rel="license"
href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
Commons Attribution-ShareAlike 4.0 International License</a>.
</p>
<p>
This page was built using the
<a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a>
which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
</p>
</div>
</div>
</div>
</div>
</footer>
</body>
</html>