index.html

<!DOCTYPE html>
<html>

<head>
  <meta charset="utf-8" />
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description"
    content="PreAfford: Universal Affordance-Based Pre-Grasping for Diverse Objects and Environments. Robotic manipulation with two-finger grippers is challenged by objects lacking distinct graspable features. Traditional pre-grasping methods, which typically involve repositioning objects or utilizing external aids like table edges, are limited in their adaptability across different object categories and environments. To overcome these limitations, we introduce PreAfford, a novel pre-grasping planning framework that incorporates a point-level affordance representation and a relay training approach. Our method significantly improves adaptability, allowing effective manipulation across a wide range of environments and object types. When evaluated on the ShapeNet-v2 dataset, PreAfford not only enhances grasping success rates by 69\% but also demonstrates its practicality through successful real-world experiments. These improvements highlight PreAfford's potential to redefine standards for robotic handling of complex manipulation tasks in diverse settings." />
  <meta name="keywords"
    content="PreAfford: Universal Affordance-Based Pre-Grasping for Diverse Objects and Environments" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />

  <title>
    PreAfford | Project Page
  </title>
  <link rel="icon" type="image/x-icon" href="static/images/favicon.ico" />
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet" />

  <link rel="stylesheet" href="static/css/bulma.min.css" />
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css" />
  <link rel="stylesheet" href="static/css/bulma-slider.min.css" />
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css" />
  <link rel="stylesheet" href="static/css/index.css" />
  <link rel="stylesheet" href="https://unpkg.com/beerslider/dist/BeerSlider.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>

<body>
  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h2 class="title is-2 publication-title">
              PreAfford: Universal Affordance-Based Pre-Grasping for Diverse Objects and Environments
            </h2>
            <div class="is-size-5 publication-authors">
              <!-- Paper authors -->

              <span class="author-block">
                <a href="https://robot-k.github.io/" target="_blank">Kairui Ding</a>
                <sup>1</sup>
              ,</span>

              <span class="author-block">
                Boyuan Chen
                <sup>1</sup>
              ,</span>

              <span class="author-block">
                <a href="https://warshallrho.github.io/" target="_blank">Ruihai Wu</a>
                <sup>2</sup>
              ,</span>

              <span class="author-block">
                <a href="https://yuyangli.com/" target="_blank">Yuyang Li</a>
                <sup>3</sup>
              ,</span>

              <span class="author-block">
                Zongzheng Zhang
                <sup>1</sup>
              ,</span>

              <span class="author-block">
                <a href="https://c7w.tech/about/" target="_blank">Huan-ang Gao</a>
                <sup>1</sup>
              ,</span>

              <span class="author-block">
                Siqi Li
                <sup>1</sup>
              ,</span>

              <span class="author-block">
                <a href="https://yzhu.io/" target="_blank">Yixin Zhu</a>
                <sup>3</sup>
              ,</span>

              <span class="author-block">
                Guyue Zhou
                <sup>1,4</sup>
              ,</span>

              <span class="author-block">
                <a href="https://zsdonghao.github.io/" target="_blank">Hao Dong</a>
                <sup>2</sup>
              ,</span>

              <span class="author-block">
                <a href="https://sites.google.com/view/fromandto" target="_blank">Hao Zhao</a>
                <sup>&dagger;1</sup>
              </span>
            </div>
            <!-- a margin of 0.5em -->
            <div style="margin: 0.5em;"></div>
            <div class="is-size-5 publication-authors">
              <span class="author-block is-size-6">
                <sup>1</sup> Institute for AI Industry Research (AIR), Tsinghua University
                <br>
                <sup>2</sup> CFCS, School of Computer Science, Peking University &nbsp;&nbsp;&nbsp;
                <sup>3</sup> Institute for Artificial Intelligence, Peking University  <br/>
                <sup>4</sup> School of Vehicle and Mobility, Tsinghua University <br/>
                <!-- <div style="margin: 0.1em;"></div> -->
                <!-- <span class="eql-cntrb"><small><br /><sup>*</sup>Indicates Equal Contribution</small></span>
                <!-- a span of 5em -->
                <!-- <span style="margin: 1em;"></span>  -->
                <span class="eql-cntrb"><small><sup>&dagger;</sup>Indicates Corresponding Author</small></span>
            </div>

            <div class="column has-text-centered">
              <div class="publication-links">
                <!-- Arxiv PDF link -->
                <span class="link-block">
                  <a href="https://arxiv.org/pdf/2404.03634" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                    </span>
                    <span>Paper</span>
                  </a>
                </span>

                <!-- ArXiv abstract Link -->
                <span class="link-block">
                  <a href="https://arxiv.org/abs/2404.03634" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="ai ai-arxiv"></i>
                    </span>
                    <span>arXiv</span>
                  </a>
                </span>

                <!-- Github link -->
                <span class="link-block">
                  <a href="https://github.com/Robot-K/PreAfford" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-github"></i>
                    </span>
                    <span>Code</span>
                  </a>
                </span>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>
  </section>

  <!-- Teaser video-->
  <section class="hero teaser">
    <div class="container is-max-desktop">
      <div class="hero-body">
        <video poster="" id="tree" autoplay controls muted loop height="100%">
        <!-- Your video here -->
        <source src="static/video/output.mp4" type="video/mp4" />
        </video>
        <!-- centering the image -->
        <!-- <div class="columns is-centered">
          <!-- <div class="column is-four-fifths"> -->
          <!-- <div class="publication-video"> -->
          <!-- <img src="static/images/Teaser_cs1.jpg" width="100%" /> -->
          <!-- </div> -->
          <!-- </div> -->
        <!-- </div> -->
        <!-- <img src="static/images/Teaser_cs1.jpg" width="100%" /> -->
        <h2 class="has-text-centered is-size-6">
         Demonstration Video of <b>PreAfford</b>.
        </h2>
      </div>
    </div>
  </section>
  <!-- End teaser video -->

  <!-- Paper abstract -->
  <section class="section hero is-light">
    <div class="container is-max-desktop">
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          <h2 class="title is-3">Abstract</h2>
          <div class="content has-text-justified">
            <p>
              Robotic manipulation with two-finger grippers is challenged by objects lacking distinct graspable features. Traditional pre-grasping methods, which typically involve repositioning objects or utilizing external aids like table edges, are limited in their adaptability across different object categories and environments. To overcome these limitations, we introduce PreAfford, a novel pre-grasping planning framework that incorporates a point-level affordance representation and a relay training approach. Our method significantly improves adaptability, allowing effective manipulation across a wide range of environments and object types. When evaluated on the ShapeNet-v2 dataset, PreAfford not only enhances grasping success rates by 69% but also demonstrates its practicality through successful real-world experiments. These improvements highlight PreAfford's potential to redefine standards for robotic handling of complex manipulation tasks in diverse settings.
            </p>
          </div>
        </div>
      </div>
    </div>
  </section>
  <!-- End paper abstract -->

  <!-- Youtube video -->
  <section class="hero is-small">
    <div class="hero-body">
      <div class="container">
        
        <h2 class="title is-3">Introduction & Method</h2>

        <div class="columns is-centered has-text-centered"
          style="width: 100%; display: flex; justify-content: center; align-items: center; flex-direction: column">
          <div style="width: 70%; display: flex; justify-content: center; align-items: stretch; flex-direction: row">
            <div style="width: 100%;">
              <img src="static/images/teaser.png" alt="">
            </div>
          </div>
          <div style="width: 70%;">
            <b>Illustration of <i>PreAfford</i>, demonstrating the application of a <i>relay training</i> paradigm where two synergistic modules cooperate to facilitate the manipulation of objects typically considered ungraspable.</b> The <i>pre-grasping</i> module assesses environmental features such as edges, slopes, slots, and walls to propose strategic pre-grasping actions that enhance the likelihood of a successful grasp. Simultaneously, the <i>grasping</i> module evaluates these actions and provides feedback in the form of rewards, which are used to refine and optimize the pre-grasping strategies. Two color bars represent the pre-grasping and grasping phases, respectively, with the color intensity reflecting the calculated affordance values; higher values denote more optimal interaction conditions.
          </div>
        </div>
        <br /><br />

        <div class="columns is-centered has-text-centered"
          style="width: 100%; display: flex; justify-content: center; align-items: center; flex-direction: column">
          <div style="width: 70%; display: flex; justify-content: center; align-items: stretch; flex-direction: row">
            <div style="width: 100%;">
              <img src="static/images/framework.png" alt="">
            </div>
          </div>
          <div style="width: 70%;">
            <b>The framework of PreAfford.</b> The framework consists of two main modules, each incorporating three networks: an affordance network, a proposal network, and a critic network. These networks respectively handle tasks of choosing the contact point, generating a proposal, and evaluating the proposal. PointNet++ (PN++) and MLP are employed to process point clouds and facilitate decision-making. During the inference phase, both modules collaborate to develop strategies for pre-grasping and grasping. In contrast, during the training phase, the grasping module generates rewards that are used to train the pre-grasping module, a process we refer to as <i>relay</i>.
          </div>
        </div>

      </div>
    </div>
  </section>
  <!-- End youtube video -->

  <!-- Youtube video -->
  <section class="hero is-small is-light">
    <div class="hero-body">
      <div class="container">
        <!-- Paper video. -->
        <h2 class="title is-3">Results</h2>
        
        <div class="columns is-centered has-text-centered"
          style="width: 100%; display: flex; justify-content: center; align-items: center; flex-direction: column">

          <div style="width: 70%; display: flex; justify-content: center; align-items: stretch; flex-direction: row">
            <div style="width: 100%;">
              <img src="static/images/main.png" alt="">
            </div>
          </div>

          <div style="width: 70%;">
            <b>Qualitative Results.</b> Here we demonstrate pre-grasping manipulation on training and testing categories in four scenarios—edge, slot, slope, and wall. Affordance maps highlight effective interaction areas, showing \method’s capability to devise suitable pre-grasping and grasping strategies for various object categories and scenes, including both seen and unseen objects.
          </div>

          <div style="margin: 1em;"></div>

          <div style="width: 50%; display: flex; justify-content: center; align-items: stretch; flex-direction: row">
            <div style="width: 100%;">
              <img src="static/images/multiple.png" alt="">
            </div>
          </div>

          <div style="width: 70%;">
            <b>Multi-feature scenario:</b>} PreAfford effectively addresses scenarios where multiple environmental features are present simultaneously. (a) A complex environment, (b) Affordance heatmap.
          </div>

          <div style="margin: 1em;"></div>

          <div style="width: 85%;">
            <b>Comparison with baselines.</b> Pre-grasping increases grasping success rates by 52.9%. A closed-loop strategy further enhances this improvement by 16.4% across all categories.
          </div>

          <!-- Table with training and testing object categories -->
          <table border="1">
            <tr>
              <th rowspan="2"><strong>Setting</strong></th>
              <th colspan="6"><strong>Train object categories</strong></th>
              <th colspan="6"><strong>Test object categories</strong></th>
            </tr>
            <tr>
              <td><strong>Edge</strong></td>
              <td><strong>Wall</strong></td>
              <td><strong>Slope</strong></td>
              <td><strong>Slot</strong></td>
              <td><strong>Multi</strong></td>
              <td><strong>Avg.</strong></td>
              <td><strong>Edge</strong></td>
              <td><strong>Wall</strong></td>
              <td><strong>Slope</strong></td>
              <td><strong>Slot</strong></td>
              <td><strong>Multi</strong></td>
              <td><strong>Avg.</strong></td>
            </tr>
            <tr>
              <td>W/o pre-grasping</td>
              <td>2.3</td>
              <td>3.8</td>
              <td>4.3</td>
              <td>3.4</td>
              <td>4.0</td>
              <td>3.6</td>
              <td>6.1</td>
              <td>2.3</td>
              <td>2.9</td>
              <td>5.7</td>
              <td>6.0</td>
              <td>4.6</td>
            </tr>
            <tr>
              <td>Random-direction Push</td>
              <td>21.6</td>
              <td>10.3</td>
              <td>6.4</td>
              <td>16.8</td>
              <td>18.1</td>
              <td>14.6</td>
              <td>24.9</td>
              <td>17.2</td>
              <td>12.1</td>
              <td>18.4</td>
              <td>23.0</td>
              <td>19.1</td>
            </tr>
            <tr>
              <td>Center-point Push</td>
              <td>32.5</td>
              <td>23.7</td>
              <td>40.5</td>
              <td>39.2</td>
              <td>39.0</td>
              <td>35.0</td>
              <td>25.1</td>
              <td>17.4</td>
              <td>28.0</td>
              <td>30.2</td>
              <td>21.5</td>
              <td>24.4</td>
            </tr>
            <tr>
              <td>Ours w/o closed-loop</td>
              <td>67.2</td>
              <td>41.5</td>
              <td>58.3</td>
              <td>76.9</td>
              <td>63.6</td>
              <td>61.5</td>
              <td>56.4</td>
              <td>37.3</td>
              <td>62.6</td>
              <td>75.8</td>
              <td>55.4</td>
              <td>57.5</td>
            </tr>
            <tr>
              <td><strong>Ours</strong></td>
              <td><strong>81.4</strong></td>
              <td><strong>43.4</strong></td>
              <td><strong>73.1</strong></td>
              <td><strong>83.5</strong></td>
              <td><strong>74.1</strong></td>
              <td><strong>71.1</strong></td>
              <td><strong>83.7</strong></td>
              <td><strong>47.6</strong></td>
              <td><strong>80.5</strong></td>
              <td><strong>83.0</strong></td>
              <td><strong>74.6</strong></td>
              <td><strong>73.9</strong></td>
            </tr>
          </table>

          <div style="margin: 1em;"></div>
          
          <div style="width: 70%; display: flex; justify-content: center; align-items: stretch; flex-direction: row">
            <div style="width: 100%;">
              <img src="static/images/experiment.png" alt="">
            </div>
          </div>
            
          <div style="width: 70%;">
            <b>Real world pre-grasping manipulations with affordance maps.</b> Red areas in the maps indicate optimal pushing locations. Point clouds are captured by Femto Bolt. (a) move a tablet to table edge, (b) push a plate towards a wall, (c) push a keyboard up a slope, and (d) slide a tablet into a slot.
          </div>

          <div style="margin: 1em;"></div>
          
          <div style="width: 85%;">
            <b>Real-world experiment results.</b> Experiments were conducted twice for each object in every scene, comparing direct grasping (without pre-grasping) to grasping after pre-grasping. Success rates are presented as percentages.
          </div>
          
            <!-- Table with seen and unseen categories -->
            <table border="1">
              <tr>
                <th rowspan="2"><strong>Setting</strong></th>
                <th colspan="6"><strong>Seen categories</strong></th>
                <th colspan="6"><strong>Unseen categories</strong></th>
              </tr>
              <tr>
                <td><strong>Edge</strong></td>
                <td><strong>Wall</strong></td>
                <td><strong>Slope</strong></td>
                <td><strong>Slot</strong></td>
                <td><strong>Multi</strong></td>
                <td><strong>Avg.</strong></td>
                <td><strong>Edge</strong></td>
                <td><strong>Wall</strong></td>
                <td><strong>Slope</strong></td>
                <td><strong>Slot</strong></td>
                <td><strong>Multi</strong></td>
                <td><strong>Avg.</strong></td>
              </tr>
              <tr>
                <td>W/o pre-grasping</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>10</td>
                <td>0</td>
                <td>5</td>
                <td>0</td>
                <td>0</td>
                <td>3</td>
              </tr>
              <tr>
                <td>With pre-grasping</td>
                <td>70</td>
                <td>45</td>
                <td>80</td>
                <td>90</td>
                <td>85</td>
                <td>74</td>
                <td>80</td>
                <td>30</td>
                <td>75</td>
                <td>90</td>
                <td>85</td>
                <td>72</td>
              </tr>
            </table>

        </div>
      </div>
    </div>
  </section>
  <!-- End youtube video -->

  <!--BibTex citation -->
  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      If you find our work useful in your research, please consider citing:
      <div style="margin: 0.5em;"></div>
      <pre><code>@misc{ding2024preafford,
      title={PreAfford: Universal Affordance-Based Pre-Grasping for Diverse Objects and Environments}, 
      author={Kairui Ding and Boyuan Chen and Ruihai Wu and Yuyang Li and Zongzheng Zhang and Huan-ang Gao and Siqi Li and Yixin Zhu and Guyue Zhou and Hao Dong and Hao Zhao},
      year={2024},
      eprint={2404.03634},
      archivePrefix={arXiv},
      primaryClass={cs.RO}
}</code></pre>
    </div>
  </section>
  <!--End BibTex citation -->

  <!-- Statcounter tracking code -->

  <!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

  <!-- End of Statcounter Code -->
  <script src="https://unpkg.com/beerslider/dist/BeerSlider.js"></script>
  <script>
    new BeerSlider(document.getElementById('slider1'), { start: '40' });
    new BeerSlider(document.getElementById('slider2'), { start: '40' });
    new BeerSlider(document.getElementById('slider3'), { start: '40' });
    new BeerSlider(document.getElementById('slider4'), { start: '40' });
    new BeerSlider(document.getElementById('slider5'), { start: '40' });
    new BeerSlider(document.getElementById('slider6'), { start: '40' });
    new BeerSlider(document.getElementById('slider7'), { start: '40' });
    new BeerSlider(document.getElementById('slider8'), { start: '40' });
    new BeerSlider(document.getElementById('slider9'), { start: '40' });
    new BeerSlider(document.getElementById('slider10'), { start: '40' });
  </script>
</body>

</html>