index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Helvipad: A Real-World Dataset for Omnidirectional Stereo Depth Estimation">
  <meta name="keywords" content="Omnidirectional Imaging, Depth Estimation, Deep Learning">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Helvipad: A Real-World Dataset for Omnidirectional Stereo Depth Estimation</title>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
        rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/favicon.svg">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>
</head>
<body>

<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <!-- Title -->
          <h1 class="title is-1 publication-title">
            <span style="font-variant: small-caps;">Helvipad</span>: A Real-World Dataset for Omnidirectional Stereo Depth Estimation
          </h1>

          <!-- Authors -->
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              <a href="https://ch.linkedin.com/in/mehdi-zayene-191a64156">Mehdi Zayene</a><sup>1</sup>,</span>
            <span class="author-block">
              <a href="https://people.epfl.ch/jannik.endres">Jannik Endres</a><sup>1,2</sup>,</span>
            <span class="author-block">
              <a href="https://people.epfl.ch/albias.havolli">Albias Havolli</a><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://chcorbi.github.io">Charles Corbière</a><sup>1,*</sup>,
            </span><br>
            <span class="author-block">
              <a href="https://people.epfl.ch/salim.cherkaoui">Salim Cherkaoui</a><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://people.epfl.ch/alexandre.benahmedkontouli">Alexandre Ben Ahmed Kontouli</a><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://people.epfl.ch/alexandre.alahi">Alexandre Alahi</a><sup>1</sup>
            </span>
          </div>

          <!-- Affiliations -->
          <div class="is-size-5 publication-authors" style="margin-top: 10px;">
            <span class="author-block"><sup>1</sup>École Polytechnique Fédérale de Lausanne (EPFL),  </span>
            <span class="author-block"><sup> 2</sup>TU Darmstadt</span>
            <span class="project-lead"><small><br><sup>*</sup>Project Lead</small></span>
          </div>

          <!-- Logo -->
          <div style="margin-top: 20px;">
            <a href="https://www.epfl.ch/labs/vita/">
              <img src="static/images/vita-epfl.png" width="300px" alt="VITA EPFL Logo" />
            </a>
          </div>

          <div class="column has-text-centered">
            <div class="publication-links">
              <!-- PDF Link. -->
              <span class="link-block">
                <a href="https://arxiv.org/abs/2411.18335"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
              <!-- Code Link. -->
              <span class="link-block">
                <a href="https://github.com/vita-epfl/Helvipad"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                  </a>
              </span>
              <!-- Dataset Link. -->
              <span class="link-block">
                <a href="https://huggingface.co/datasets/chcorbi/helvipad"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-pirate.svg"
                           alt="Hugging Face Logo"
                           style="width: 20px; height: 20px; vertical-align: middle;">
                  </span>
                  <span>Dataset</span>
                  </a>
            </div>

          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <video id="teaser" autoplay muted loop playsinline height="100%">
        <source src="./static/videos/helvipad.mov"
                type="video/mp4">
      </video>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Despite considerable progress in stereo depth estimation, omnidirectional imaging remains underexplored,
            mainly due to the lack of appropriate data.
          </p>
          <p>
            We introduce <span style="font-variant: small-caps;">Helvipad</span>,
            a real-world dataset for omnidirectional stereo depth estimation, consisting of 40K frames from video sequences
            across diverse environments, including crowded indoor and outdoor scenes with diverse lighting conditions.
            Collected using two 360° cameras in a top-bottom setup and a LiDAR sensor, the dataset includes accurate
            depth and disparity labels by projecting 3D point clouds onto equirectangular images. Additionally, we
            provide an augmented training set with a significantly increased label density by using depth completion.
          </p>
          <p>
            We benchmark leading stereo depth estimation models for both standard and omnidirectional images.
            The results show that while recent stereo methods perform decently, a significant challenge persists in accurately
            estimating depth in omnidirectional imaging. To address this, we introduce necessary adaptations to stereo models,
            achieving improved performance.
          </p>
        </div>
      </div>
    </div>
</section>

<!-- Dataset Statistics Section -->
<section class="section" id="dataset">
  <div class="container is-max-desktop">
    <h2 class="title is-3">Dataset</h2>
    <div class="content has-text-justified">
      <p>
  The <span style="font-variant: small-caps;">Helvipad</span> dataset includes of 39,553 labeled frames from indoor and outdoor scenes under various lighting conditions.

      </p>
      <figure>
        <img src="static/images/front_page.png" alt="Dataset visualisations">
      </figure>
      <p>
      The equipment setup of our data acquisition includes:
    </p>
    <ul>
      <li>
        <strong>2 Ricoh Theta V cameras</strong>, capturing images in 4K/UHD equirectangular format with an initial size of 3840 × 1920 pixels at 30 fps, mounted in a top-bottom arrangement with a 19.1 cm baseline between them.
      </li>
      <li>
        <strong>Ouster OS1-64 LiDAR Sensor</strong>, providing 64 beams, a vertical field of view of 45°, and capable of measuring depths from 0 to 120 meters at 10 fps, mounted 45.0 cm below the bottom camera.
      </li>
      <li>
        <strong>Nvidia Jetson Xavier</strong>, serving as the central processor to manage data capture and ensure synchronization across all devices during data collection.
      </li>
    </ul>
      <figure>
        <img src="static/images/lidar_mapping.png" alt="LiDAR to 360° Mapping Illustration" style="width: 80%; height: auto;">
      </figure>
      <p>
        Data was extracted from video sequences captured between December 2023 and February 2024. Each sequence is synchronized with its corresponding
        LiDAR point clouds, which are projected on frames to obtain depth maps and disparity maps.
      </p>
      <div class="columns is-centered">
        <div class="column is-one-third">
          <figure>
            <img src="static/images/depth_histograms_all.png" alt="Histogram of Depth Values - All Scenes">
            <figcaption>Depth Distribution - All</figcaption>
          </figure>
        </div>
        <div class="column is-one-third">
          <figure>
            <img src="static/images/depth_histograms_indoor.png" alt="Histogram of Depth Values - Indoor Scenes">
            <figcaption>Depth Distribution - Indoor</figcaption>
          </figure>
        </div>
        <div class="column is-one-third">
          <figure>
            <img src="static/images/depth_histograms_outdoor.png" alt="Histogram of Depth Values - Outdoor Scenes">
            <figcaption>Depth Distribution - Outdoor</figcaption>
          </figure>
        </div>
      </div>
    </div>
    <p>
      Depth values range from 0.5 to 225  meters, with averages of 8.1 meters overall, 5.4 meters for
      indoor scenes, and 9.2 meters for combined day and night outdoor scenes.
    </p>
  </div>
</section>


<!-- Benchmark Results Section -->
<section class="section" id="benchmark-results">
  <div class="container is-max-desktop">
    <h2 class="title is-3">Benchmark Results</h2>
    <div class="content has-text-justified">
      <p>
        We evaluate the performance of multiple state-of-the-art and popular stereo matching methods, both for standard and 360° images. All models are trained on a single NVIDIA A100 GPU with
the largest possible batch size to ensure comparable use of computational resources.
      </p>
    <table class="table is-striped is-bordered is-hoverable is-fullwidth">
      <thead>
        <tr>
          <th rowspan="2">Method</th>
          <th rowspan="2">Type</th>
          <th colspan="3" class="has-text-centered">Disparity (°)</th>
          <th colspan="3" class="has-text-centered">Depth (m)</th>
        </tr>
        <tr>
          <th class="has-text-centered">MAE</th>
          <th class="has-text-centered">RMSE</th>
          <th class="has-text-centered">MARE</th>
          <th class="has-text-centered">MAE</th>
          <th class="has-text-centered">RMSE</th>
          <th class="has-text-centered">MARE</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td>
            <a href="https://arxiv.org/abs/1803.08669" target="_blank">PSMNet</a>
          </td>
          <td>stereo</td>
          <td class="has-text-centered">0.33</td>
          <td class="has-text-centered">0.54</td>
          <td class="has-text-centered">0.20</td>
          <td class="has-text-centered">2.79</td>
          <td class="has-text-centered">6.17</td>
          <td class="has-text-centered">0.29</td>
        </tr>
        <tr>
          <td>
            <a href="https://arxiv.org/abs/1911.04460" target="_blank">360SD-Net</a>
          </td>
          <td>360° stereo</td>
          <td class="has-text-centered">0.21</td>
          <td class="has-text-centered">0.42</td>
          <td class="has-text-centered">0.18</td>
          <td class="has-text-centered">2.14</td>
          <td class="has-text-centered">5.12</td>
          <td class="has-text-centered">0.15</td>
        </tr>
        <tr>
          <td>
            <a href="https://arxiv.org/abs/2303.06615" target="_blank">IGEV-Stereo</a>
          </td>
          <td>stereo</td>
          <td class="has-text-centered">0.22</td>
          <td class="has-text-centered">0.41</td>
          <td class="has-text-centered">0.17</td>
          <td class="has-text-centered">1.85</td>
          <td class="has-text-centered">4.44</td>
          <td class="has-text-centered">0.15</td>
        </tr>
        <tr>
          <td>360-IGEV-Stereo</td>
          <td>360° stereo</td>
          <td class="has-text-centered"><b>0.18</b></td>
          <td class="has-text-centered"><b>0.39</b></td>
          <td class="has-text-centered"><b>0.15</b></td>
          <td class="has-text-centered"><b>1.77</b></td>
          <td class="has-text-centered"><b>4.36</b></td>
          <td class="has-text-centered"><b>0.14</b></td>
        </tr>
      </tbody>
    </table>
    <p>
      The dataset is also an ideal testbed for assessing the robustness of depth estimation methods to diverse lighting conditions and depth ranges
      by training and evaluating models on different subsets of the dataset (e.g., indoor vs. outdoor scenes).
    </p>
      <figure>
        <img src="static/images/cross_scene_generalization.png" alt="Cross-Scene Generalization Performance">
        <figcaption>Cross-Scene Generalization Performance</figcaption>
      </figure>
    </div>
  </div>
</section>

<section class="section" id="dataset-structure">
  <div class="container is-max-desktop content">
    <h2 class="title is-3">Download</h2>
    <p>
      Use the link below to access the dataset on HuggingFace Hub.
    </p>
    <div class="has-text-centered">
      <a href="https://huggingface.co/datasets/chcorbi/helvipad" target="_blank" class="button is-primary is-rounded is-large">
        Download Dataset
      </a>
    </div>
    <div style="margin-top: 30px;">
      <p>
        The dataset is organized into training and testing subsets, whose structure is outlined below:
      </p>
      <pre style="background: #f5f5f5; padding: 20px; border-radius: 5px; overflow: auto;">
<code>helvipad/
├── train/
│   ├── depth_maps                # Depth maps generated from LiDAR data
│   ├── depth_maps_augmented      # Augmented depth maps using depth completion
│   ├── disparity_maps            # Disparity maps computed from depth maps
│   ├── disparity_maps_augmented  # Augmented disparity maps using depth completion
│   ├── images_top                # Top-camera RGB images
│   ├── images_bottom             # Bottom-camera RGB images
│   ├── LiDAR_pcd                 # Original LiDAR point cloud data
├── test/
│   ├── depth_maps                # Depth maps generated from LiDAR data
│   ├── disparity_maps            # Disparity maps computed from depth maps
│   ├── images_top                # Top-camera RGB images
│   ├── images_bottom             # Bottom-camera RGB images
│   ├── LiDAR_pcd                 # Original LiDAR point cloud data
</code></pre>
    </div>
  </div>
</section>


<section class="section" id="BibTeX">
  <div class="container is-max-desktop content">
    <h2 class="title">BibTeX</h2>
    <p>
      If you use the <span style="font-variant: small-caps;">Helvipad</span> dataset in your research, please cite it using the following BibTeX entry:
    </p>
    <pre><code>@misc{zayene2024helvipad,
  author        = {Zayene, Mehdi and Endres, Jannik and Havolli, Albias and Corbi\`{e}re, Charles and Cherkaoui, Salim and Ben Ahmed Kontouli, Alexandre and Alahi, Alexandre},
  title         = {Helvipad: A Real-World Dataset for Omnidirectional Stereo Depth Estimation},
  year          = {2024},
  eprint        = {2403.16999},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CV}
}</code></pre>
  </div>
</section>


<footer class="footer">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">
          <p>
            This website is licensed under a <a rel="license"
                                                href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>
          <p>
            This page was built using the
            <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a>
            which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
          </p>
        </div>
      </div>
    </div>
  </div>
</footer>

</body>
</html>