From 1f2cd14f76a631120812850b2de8e508e76eaa91 Mon Sep 17 00:00:00 2001
From: StoneT2000 <stonezt2019@gmail.com>
Date: Mon, 22 Jan 2024 20:52:03 -0800
Subject: [PATCH] fix a few envs, mark pick clutter as to be fixed in the
 future.

---
 .../agents/controllers/base_controller.py     |   2 +-
 mani_skill2/agents/controllers/pd_ee_pose.py  |   6 +-
 .../agents/controllers/pd_joint_vel.py        |   3 +-
 mani_skill2/agents/robots/xmate3/xmate3.py    | 101 ++++--
 .../envs/assembly/peg_insertion_side.py       |  16 +-
 mani_skill2/envs/pick_and_place/__init__.py   |   1 -
 mani_skill2/envs/pick_and_place/base_env.py   |   6 +-
 .../envs/pick_and_place/pick_clutter.py       | 290 ------------------
 mani_skill2/envs/pick_and_place/pick_cube.py  |   4 +-
 .../envs/pick_and_place/pick_single.py        |   7 +-
 mani_skill2/envs/pick_and_place/stack_cube.py | 173 ++++++-----
 mani_skill2/envs/sapien_env.py                |  40 +--
 mani_skill2/envs/scene.py                     |  18 +-
 mani_skill2/utils/download_asset.py           |   2 +
 mani_skill2/utils/registration.py             |   1 +
 mani_skill2/utils/sapien_utils.py             |  19 +-
 .../table/table_scene_builder.py              |  11 +
 mani_skill2/utils/structs/actor.py            |   2 +
 mani_skill2/utils/structs/articulation.py     |  30 +-
 mani_skill2/utils/structs/joint.py            |   4 +
 mani_skill2/utils/wrappers/common.py          |  26 --
 mani_skill2/utils/wrappers/observation.py     |  23 +-
 tests/test_envs.py                            |   6 +-
 tests/test_venv.py                            |   4 +-
 tests/utils.py                                |  40 ++-
 25 files changed, 303 insertions(+), 532 deletions(-)
 delete mode 100644 mani_skill2/envs/pick_and_place/pick_clutter.py

diff --git a/mani_skill2/agents/controllers/base_controller.py b/mani_skill2/agents/controllers/base_controller.py
index 71c20738f..0ca1840ae 100644
--- a/mani_skill2/agents/controllers/base_controller.py
+++ b/mani_skill2/agents/controllers/base_controller.py
@@ -284,7 +284,7 @@ def set_action(self, action: np.ndarray):
 
         for uid, controller in self.controllers.items():
             start, end = self.action_mapping[uid]
-            controller.set_action(to_tensor(action[..., start:end]))
+            controller.set_action(action[..., start:end])
 
     def to_action_dict(self, action: np.ndarray):
         """Convert a flat action to a dict of actions."""
diff --git a/mani_skill2/agents/controllers/pd_ee_pose.py b/mani_skill2/agents/controllers/pd_ee_pose.py
index 1aaeeffb8..b56fdc9f2 100644
--- a/mani_skill2/agents/controllers/pd_ee_pose.py
+++ b/mani_skill2/agents/controllers/pd_ee_pose.py
@@ -4,6 +4,7 @@
 import numpy as np
 import sapien
 import sapien.physx as physx
+import torch
 from gymnasium import spaces
 from scipy.spatial.transform import Rotation
 
@@ -167,12 +168,13 @@ def _initialize_action_space(self):
         self.action_space = spaces.Box(low, high, dtype=np.float32)
 
     def _clip_and_scale_action(self, action):
+        # TODO (stao): support batched actions
         # NOTE(xiqiang): rotation should be clipped by norm.
         pos_action = clip_and_scale_action(
-            action[:3], self._action_space.low[:3], self._action_space.high[:3]
+            action[:3], self.action_space_low[:3], self.action_space_high[:3]
         )
         rot_action = action[3:]
-        rot_norm = np.linalg.norm(rot_action)
+        rot_norm = torch.linalg.norm(rot_action)
         if rot_norm > 1:
             rot_action = rot_action / rot_norm
         rot_action = rot_action * self.config.rot_bound
diff --git a/mani_skill2/agents/controllers/pd_joint_vel.py b/mani_skill2/agents/controllers/pd_joint_vel.py
index 0f7eaf19c..d96e53a33 100644
--- a/mani_skill2/agents/controllers/pd_joint_vel.py
+++ b/mani_skill2/agents/controllers/pd_joint_vel.py
@@ -29,8 +29,7 @@ def set_drive_property(self):
 
     def set_action(self, action: np.ndarray):
         action = self._preprocess_action(action)
-        for i, joint in enumerate(self.joints):
-            joint.set_drive_velocity_target(action[i])
+        self.articulation.set_joint_drive_velocity_targets(action, self.joints)
 
 
 @dataclass
diff --git a/mani_skill2/agents/robots/xmate3/xmate3.py b/mani_skill2/agents/robots/xmate3/xmate3.py
index b56ef30fc..8828e361f 100644
--- a/mani_skill2/agents/robots/xmate3/xmate3.py
+++ b/mani_skill2/agents/robots/xmate3/xmate3.py
@@ -1,18 +1,20 @@
 import numpy as np
 import sapien
 import sapien.physx as physx
+import torch
 
 from mani_skill2 import ASSET_DIR
 from mani_skill2.agents.base_agent import BaseAgent
 from mani_skill2.agents.controllers import *
 from mani_skill2.sensors.camera import CameraConfig
-from mani_skill2.utils.common import np_compute_angle_between
+from mani_skill2.utils.common import compute_angle_between, np_compute_angle_between
 from mani_skill2.utils.sapien_utils import (
     compute_total_impulse,
     get_actor_contacts,
     get_obj_by_name,
     get_pairwise_contact_impulse,
 )
+from mani_skill2.utils.structs.actor import Actor
 
 
 class Xmate3Robotiq(BaseAgent):
@@ -172,38 +174,83 @@ def controller_configs(self):
         ),
     ]
 
-    def is_grasping(self, object: sapien.Entity = None, min_impulse=1e-6, max_angle=85):
-        contacts = self.scene.get_contacts()
-        if object is None:
-            finger1_contacts = get_actor_contacts(contacts, self.finger1_link)
-            finger2_contacts = get_actor_contacts(contacts, self.finger2_link)
-            return (
-                np.linalg.norm(compute_total_impulse(finger1_contacts)) >= min_impulse
-                and np.linalg.norm(compute_total_impulse(finger2_contacts))
-                >= min_impulse
+    def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
+        # TODO (stao): is_grasping code needs to be updated for new GPU sim
+        if physx.is_gpu_enabled():
+            if object.name not in self.queries:
+                body_pairs = list(zip(self.finger1_link._bodies, object._bodies))
+                body_pairs += list(zip(self.finger2_link._bodies, object._bodies))
+                self.queries[object.name] = (
+                    self.scene.px.gpu_create_contact_query(body_pairs),
+                    (len(object._bodies), 3),
+                )
+                print(f"Create query for Panda grasp({object.name})")
+            query, contacts_shape = self.queries[object.name]
+            self.scene.px.gpu_query_contacts(query)
+            # query.cuda_contacts # (num_unique_pairs * num_envs, 3)
+            contacts = query.cuda_contacts.clone().reshape((-1, *contacts_shape))
+            lforce = torch.linalg.norm(contacts[0], axis=1)
+            rforce = torch.linalg.norm(contacts[1], axis=1)
+
+            # NOTE (stao): 0.5 * time_step is a decent value when tested on a pick cube task.
+            min_force = 0.5 * self.scene.px.timestep
+
+            # direction to open the gripper
+            ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
+            rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
+            langle = compute_angle_between(ldirection, contacts[0])
+            rangle = compute_angle_between(rdirection, contacts[1])
+            lflag = torch.logical_and(
+                lforce >= min_force, torch.rad2deg(langle) <= max_angle
             )
+            rflag = torch.logical_and(
+                rforce >= min_force, torch.rad2deg(rangle) <= max_angle
+            )
+
+            return torch.logical_and(lflag, rflag)
         else:
-            limpulse = get_pairwise_contact_impulse(contacts, self.finger1_link, object)
-            rimpulse = get_pairwise_contact_impulse(contacts, self.finger2_link, object)
+            contacts = self.scene.get_contacts()
 
-            # direction to open the gripper
-            ldirection = self.finger1_link.pose.to_transformation_matrix()[:3, 1]
-            rdirection = -self.finger2_link.pose.to_transformation_matrix()[:3, 1]
+            if object is None:
+                finger1_contacts = get_actor_contacts(contacts, self.finger1_link)
+                finger2_contacts = get_actor_contacts(contacts, self.finger2_link)
+                return (
+                    np.linalg.norm(compute_total_impulse(finger1_contacts))
+                    >= min_impulse
+                    and np.linalg.norm(compute_total_impulse(finger2_contacts))
+                    >= min_impulse
+                )
+            else:
+                limpulse = get_pairwise_contact_impulse(
+                    contacts, self.finger1_link, object
+                )
+                rimpulse = get_pairwise_contact_impulse(
+                    contacts, self.finger2_link, object
+                )
 
-            # angle between impulse and open direction
-            langle = np_compute_angle_between(ldirection, limpulse)
-            rangle = np_compute_angle_between(rdirection, rimpulse)
+                # direction to open the gripper
+                ldirection = self.finger1_link.pose.to_transformation_matrix()[
+                    ..., :3, 1
+                ]
+                rdirection = -self.finger2_link.pose.to_transformation_matrix()[
+                    ..., :3, 1
+                ]
 
-            lflag = (
-                np.linalg.norm(limpulse) >= min_impulse
-                and np.rad2deg(langle) <= max_angle
-            )
-            rflag = (
-                np.linalg.norm(rimpulse) >= min_impulse
-                and np.rad2deg(rangle) <= max_angle
-            )
+                # TODO Convert this to batched code
+                # angle between impulse and open direction
+                langle = np_compute_angle_between(ldirection[0], limpulse)
+                rangle = np_compute_angle_between(rdirection[0], rimpulse)
+
+                lflag = (
+                    np.linalg.norm(limpulse) >= min_impulse
+                    and np.rad2deg(langle) <= max_angle
+                )
+                rflag = (
+                    np.linalg.norm(rimpulse) >= min_impulse
+                    and np.rad2deg(rangle) <= max_angle
+                )
 
-            return all([lflag, rflag])
+                return all([lflag, rflag])
 
     @staticmethod
     def build_grasp_pose(approaching, closing, center):
diff --git a/mani_skill2/envs/assembly/peg_insertion_side.py b/mani_skill2/envs/assembly/peg_insertion_side.py
index 582d5ba18..f84a5766f 100644
--- a/mani_skill2/envs/assembly/peg_insertion_side.py
+++ b/mani_skill2/envs/assembly/peg_insertion_side.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import sapien
+import torch
 from sapien import Pose
 from transforms3d.euler import euler2quat
 
@@ -58,7 +59,8 @@ def _build_box_with_hole(
         return builder.build_static(name)
 
     def _load_actors(self):
-        TableSceneBuilder().build(self._scene)
+        self.table_scene = TableSceneBuilder(env=self)
+        self.table_scene.build()
 
         # peg
         # length, radius = 0.1, 0.02
@@ -105,6 +107,7 @@ def _load_actors(self):
         self.box_hole_radius = inner_radius
 
     def _initialize_actors(self):
+        self.table_scene.initialize()
         xy = self._episode_rng.uniform([-0.1, -0.3], [0.1, 0])
         pos = np.hstack([xy, self.peg_half_size[2]])
         ori = np.pi / 2 + self._episode_rng.uniform(-np.pi / 3, np.pi / 3)
@@ -167,14 +170,17 @@ def has_peg_inserted(self):
         # Only head position is used in fact
         peg_head_pos_at_hole = (self.box_hole_pose.inv() * self.peg_head_pose).p
         # x-axis is hole direction
-        x_flag = -0.015 <= peg_head_pos_at_hole[0]
+        x_flag = -0.015 <= peg_head_pos_at_hole[:, 0]
         y_flag = (
-            -self.box_hole_radius <= peg_head_pos_at_hole[1] <= self.box_hole_radius
+            -self.box_hole_radius <= peg_head_pos_at_hole[:, 1] <= self.box_hole_radius
         )
         z_flag = (
-            -self.box_hole_radius <= peg_head_pos_at_hole[2] <= self.box_hole_radius
+            -self.box_hole_radius <= peg_head_pos_at_hole[:, 2] <= self.box_hole_radius
+        )
+        return (
+            torch.logical_and(torch.logical_and(x_flag, y_flag), z_flag),
+            peg_head_pos_at_hole,
         )
-        return (x_flag and y_flag and z_flag), peg_head_pos_at_hole
 
     def evaluate(self, **kwargs) -> dict:
         success, peg_head_pos_at_hole = self.has_peg_inserted()
diff --git a/mani_skill2/envs/pick_and_place/__init__.py b/mani_skill2/envs/pick_and_place/__init__.py
index 8ecfa771a..d7c325c3a 100644
--- a/mani_skill2/envs/pick_and_place/__init__.py
+++ b/mani_skill2/envs/pick_and_place/__init__.py
@@ -2,4 +2,3 @@
 from . import pick_cube
 from . import stack_cube
 from . import pick_single
-from . import pick_clutter
diff --git a/mani_skill2/envs/pick_and_place/base_env.py b/mani_skill2/envs/pick_and_place/base_env.py
index 0f4a8e667..80ef53b2d 100644
--- a/mani_skill2/envs/pick_and_place/base_env.py
+++ b/mani_skill2/envs/pick_and_place/base_env.py
@@ -12,7 +12,7 @@
 from mani_skill2.agents.robots.xmate3 import Xmate3Robotiq
 from mani_skill2.envs.sapien_env import BaseEnv
 from mani_skill2.sensors.camera import CameraConfig
-from mani_skill2.utils.sapien_utils import hide_entity, look_at
+from mani_skill2.utils.sapien_utils import hide_entity, look_at, to_numpy
 from mani_skill2.utils.structs.pose import vectorize_pose
 
 
@@ -34,7 +34,7 @@ def _build_cube(
         if render_material is None:
             render_material = self._renderer.create_material()
             render_material.base_color = (*color, 1.0)
-
+        half_size = to_numpy(half_size)
         builder = self._scene.create_actor_builder()
         builder.add_box_collision(half_size=half_size)
         builder.add_box_visual(half_size=half_size, material=render_material)
@@ -123,7 +123,7 @@ def _initialize_agent_v1(self):
             raise NotImplementedError(self.robot_uid)
 
     def _register_sensors(self):
-        pose = look_at([0.3, 0, 0.6], [-0.1, 0, 0.1])
+        pose = look_at([0.3, 0, 0.6], [-0.1, 0, 0.9])
         return CameraConfig(
             "base_camera", pose.p, pose.q, 128, 128, np.pi / 2, 0.01, 10
         )
diff --git a/mani_skill2/envs/pick_and_place/pick_clutter.py b/mani_skill2/envs/pick_and_place/pick_clutter.py
deleted file mode 100644
index b95d8e2ea..000000000
--- a/mani_skill2/envs/pick_and_place/pick_clutter.py
+++ /dev/null
@@ -1,290 +0,0 @@
-from collections import OrderedDict
-from pathlib import Path
-from typing import Dict, List
-
-import numpy as np
-import sapien
-import sapien.physx as physx
-from sapien import Pose
-
-from mani_skill2 import format_path
-from mani_skill2.utils.common import random_choice
-from mani_skill2.utils.io_utils import load_json
-from mani_skill2.utils.registration import register_env
-from mani_skill2.utils.sapien_utils import look_at, set_entity_visibility
-from mani_skill2.utils.scene_builder import TableSceneBuilder
-from mani_skill2.utils.structs.pose import vectorize_pose
-
-from .base_env import StationaryManipulationEnv
-from .pick_single import PickSingleYCBEnv, build_actor_ycb
-
-
-class PickClutterEnv(StationaryManipulationEnv):
-    DEFAULT_EPISODE_JSON: str
-    DEFAULT_ASSET_ROOT: str
-    DEFAULT_MODEL_JSON: str
-
-    obj: sapien.Entity  # target object
-
-    def __init__(
-        self,
-        episode_json: str = None,
-        asset_root: str = None,
-        model_json: str = None,
-        **kwargs,
-    ):
-        # Load episode configurations
-        if episode_json is None:
-            episode_json = self.DEFAULT_EPISODE_JSON
-        episode_json = format_path(episode_json)
-        if not Path(episode_json).exists():
-            raise FileNotFoundError(
-                f"Episode json ({episode_json}) is not found."
-                "To download default json:"
-                "`python -m mani_skill2.utils.download_asset pick_clutter_ycb`."
-            )
-        self.episodes: List[Dict] = load_json(episode_json)
-
-        # Root directory of object models
-        if asset_root is None:
-            asset_root = self.DEFAULT_ASSET_ROOT
-        self.asset_root = Path(format_path(asset_root))
-
-        # Information of object models
-        if model_json is None:
-            model_json = self.DEFAULT_MODEL_JSON
-        model_json = self.asset_root / format_path(model_json)
-        self.model_db: Dict[str, Dict] = load_json(model_json)
-
-        self.episode_idx = -1
-
-        self.goal_thresh = 0.025
-
-        super().__init__(**kwargs)
-
-    def _load_actors(self):
-        TableSceneBuilder().build(self._scene)
-
-        self.objs: List[sapien.Entity] = []
-        self.bbox_sizes = []
-        for actor_cfg in self.episode["actors"]:
-            model_id = actor_cfg["model_id"]
-            model_scale = actor_cfg["scale"]
-            obj = self._load_model(model_id, model_scale=model_scale)
-            self.objs.append(obj)
-
-            bbox = self.model_db[model_id]["bbox"]
-            bbox_size = np.array(bbox["max"]) - np.array(bbox["min"])
-            self.bbox_sizes.append(bbox_size * model_scale)
-
-        self.target_site = self._build_sphere_site(
-            0.01, color=(1, 1, 0), name="_target_site"
-        )
-        self.goal_site = self._build_sphere_site(
-            0.01, color=(0, 1, 0), name="_goal_site"
-        )
-
-    def _load_model(self, model_id, model_scale=1.0) -> sapien.Entity:
-        raise NotImplementedError
-
-    def reset(self, seed=None, options=None):
-        if options is None:
-            options = dict()
-        self._set_episode_rng(seed)
-        episode_idx = options.pop("episode_idx", None)
-        reconfigure = options.pop("reconfigure", False)
-        _reconfigure = self._set_episode(episode_idx)
-        reconfigure = _reconfigure or reconfigure
-        options["reconfigure"] = reconfigure
-        return super().reset(seed=self._episode_seed, options=options)
-
-    def _set_episode(self, episode_idx=None):
-        reconfigure = False
-
-        if episode_idx is None:
-            episode_idx = self._episode_rng.randint(len(self.episodes))
-            # episode_idx = (self.episode_idx + 1) % len(self.episodes)
-
-        # TODO(jigu): check whether assets (and scales) are the same instead
-        if self.episode_idx != episode_idx:
-            reconfigure = True
-
-        self.episode_idx = episode_idx
-        self.episode = self.episodes[self.episode_idx]
-
-        return reconfigure
-
-    def _initialize_actors(self):
-        for i, actor_cfg in enumerate(self.episode["actors"]):
-            pose = np.float32(actor_cfg["pose"])
-            # Add a small offset to avoid numerical issues for simulation
-            self.objs[i].set_pose(Pose(pose[:3] + [0, 0, 1e-3], pose[3:]))
-
-        # Settle
-        self.agent.robot.set_pose(Pose([10, 0, 0]))
-        for _ in range(self.control_freq):
-            self._scene.step()
-
-    def _initialize_agent(self):
-        if self.robot_uid == "panda":
-            # fmt: off
-            qpos = np.array(
-                [0.0, 0, 0, -np.pi * 2 / 3, 0, np.pi * 2 / 3, np.pi / 4, 0.04, 0.04]
-            )
-            # fmt: on
-            qpos[:-2] += self._episode_rng.normal(
-                0, self.robot_init_qpos_noise, len(qpos) - 2
-            )
-            self.agent.reset(qpos)
-            self.agent.robot.set_pose(Pose([-0.544, 0, 0]))
-        else:
-            raise NotImplementedError(self.robot_uid)
-
-    @property
-    def obj_pose(self):
-        """Get the center of mass (COM) pose."""
-        return (
-            self.obj.pose
-            * self.obj.find_component_by_type(
-                physx.PhysxRigidDynamicComponent
-            ).cmass_local_pose
-        )
-
-    def _initialize_task(self):
-        self._set_target()
-        self._set_goal()
-
-    def _set_target(self):
-        visible_inds = []
-        for i, actor_cfg in enumerate(self.episode["actors"]):
-            if actor_cfg["rep_pts"] is not None:
-                visible_inds.append(i)
-        assert len(visible_inds) > 0, self.episode_idx
-
-        actor_idx = random_choice(visible_inds, self._episode_rng)
-        self.obj = self.objs[actor_idx]
-        obj_rep_pts = self.episode["actors"][actor_idx]["rep_pts"]
-        self.obj_start_pos = np.float32(random_choice(obj_rep_pts, self._episode_rng))
-        self.bbox_size = self.bbox_sizes[actor_idx]
-
-        self.target_site.set_pose(Pose(self.obj_start_pos))
-
-    def _set_goal(self):
-        goal_pos = self._episode_rng.uniform([-0.15, -0.25, 0.35], [0.15, 0.25, 0.45])
-        self.goal_pos = goal_pos
-        self.goal_site.set_pose(Pose(self.goal_pos))
-
-    def _get_obs_extra(self) -> OrderedDict:
-        obs = OrderedDict(
-            tcp_pose=vectorize_pose(self.agent.tcp.pose),
-            goal_pos=self.goal_pos,
-            obj_start_pos=self.obj_start_pos,
-        )
-        if self._obs_mode in ["state", "state_dict"]:
-            obs.update(
-                tcp_to_goal_pos=self.goal_pos - self.agent.tcp.pose.p,
-                obj_pose=vectorize_pose(self.obj_pose),
-                tcp_to_obj_pos=self.obj_pose.p - self.agent.tcp.pose.p,
-                obj_to_goal_pos=self.goal_pos - self.obj_pose.p,
-            )
-        return obs
-
-    def check_robot_static(self, thresh=0.2):
-        # Assume that the last two DoF is gripper
-        qvel = self.agent.robot.get_qvel()[:-2]
-        return np.max(np.abs(qvel)) <= thresh
-
-    def evaluate(self, **kwargs):
-        obj_to_goal_pos = self.goal_pos - self.obj_pose.p
-        is_obj_placed = np.linalg.norm(obj_to_goal_pos) <= self.goal_thresh
-        is_robot_static = self.check_robot_static()
-        return dict(
-            obj_to_goal_pos=obj_to_goal_pos,
-            is_obj_placed=is_obj_placed,
-            is_robot_static=is_robot_static,
-            success=is_obj_placed and is_robot_static,
-        )
-
-    def compute_dense_reward(self, info, **kwargs):
-        reward = 0.0
-
-        if info["success"]:
-            reward = 10.0
-        else:
-            obj_pose = self.obj_pose
-
-            # reaching reward
-            tcp_wrt_obj_pose = obj_pose.inv() * self.agent.tcp.pose
-            tcp_to_obj_dist = np.linalg.norm(tcp_wrt_obj_pose.p)
-            reaching_reward = 1 - np.tanh(
-                3.0 * np.maximum(tcp_to_obj_dist - np.linalg.norm(self.bbox_size), 0.0)
-            )
-            reward = reward + reaching_reward
-
-            # grasp reward
-            is_grasped = self.agent.is_grasping(self.obj, max_angle=60)
-            reward += 3.0 if is_grasped else 0.0
-
-            # reaching-goal reward
-            if is_grasped:
-                obj_to_goal_pos = self.goal_pos - obj_pose.p
-                obj_to_goal_dist = np.linalg.norm(obj_to_goal_pos)
-                reaching_goal_reward = 3 * (1 - np.tanh(3.0 * obj_to_goal_dist))
-                reward += reaching_goal_reward
-
-        return reward
-
-    def compute_normalized_dense_reward(self, **kwargs):
-        return self.compute_dense_reward(**kwargs) / 10.0
-
-    def _register_render_cameras(self):
-        cam_cfg = super()._register_render_cameras()
-        cam_cfg.pose = look_at([0.3, 0, 1.0], [0.0, 0.0, 0.5])
-        return cam_cfg
-
-    def render_human(self):
-        set_entity_visibility(self.target_site, 0.8)
-        set_entity_visibility(self.goal_site, 0.5)
-        ret = super().render_human()
-        set_entity_visibility(self.target_site, 0)
-        set_entity_visibility(self.goal_site, 0)
-        return ret
-
-    def render_rgb_array(self):
-        set_entity_visibility(self.target_site, 0.8)
-        set_entity_visibility(self.goal_site, 0.5)
-        ret = super().render_rgb_array()
-        set_entity_visibility(self.target_site, 0)
-        set_entity_visibility(self.goal_site, 0)
-        return ret
-
-    def get_state(self) -> np.ndarray:
-        state = super().get_state()
-        # TODO(jigu): whether to add obj_start_pos
-        return np.hstack([state, self.goal_pos])
-
-    def set_state(self, state):
-        self.goal_pos = state[-3:]
-        super().set_state(state[:-3])
-
-
-@register_env("PickClutterYCB-v0", max_episode_steps=200)
-class PickClutterYCBEnv(PickClutterEnv):
-    DEFAULT_EPISODE_JSON = "{ASSET_DIR}/pick_clutter/ycb_train_5k.json.gz"
-    DEFAULT_ASSET_ROOT = PickSingleYCBEnv.DEFAULT_ASSET_ROOT
-    DEFAULT_MODEL_JSON = PickSingleYCBEnv.DEFAULT_MODEL_JSON
-
-    def _load_model(self, model_id, model_scale=1.0):
-        density = self.model_db[model_id].get("density", 1000)
-        obj = build_actor_ycb(
-            model_id,
-            self._scene,
-            scale=model_scale,
-            density=density,
-            root_dir=self.asset_root,
-        )
-        obj.name = model_id
-        obj_comp = obj.find_component_by_type(physx.PhysxRigidDynamicComponent)
-        obj_comp.set_linear_damping(0.1)
-        obj_comp.set_angular_damping(0.1)
-        return obj
diff --git a/mani_skill2/envs/pick_and_place/pick_cube.py b/mani_skill2/envs/pick_and_place/pick_cube.py
index 77bed1c70..d09b560cf 100644
--- a/mani_skill2/envs/pick_and_place/pick_cube.py
+++ b/mani_skill2/envs/pick_and_place/pick_cube.py
@@ -163,8 +163,8 @@ def get_state(self):
         return torch.hstack([state, self.goal_pos])
 
     def set_state(self, state):
-        self.goal_pos = state[-3:]
-        super().set_state(state[:-3])
+        self.goal_pos = state[:, -3:]
+        super().set_state(state[:, :-3])
 
 
 @register_env("LiftCube-v0", max_episode_steps=200)
diff --git a/mani_skill2/envs/pick_and_place/pick_single.py b/mani_skill2/envs/pick_and_place/pick_single.py
index 75b383a05..07647a074 100644
--- a/mani_skill2/envs/pick_and_place/pick_single.py
+++ b/mani_skill2/envs/pick_and_place/pick_single.py
@@ -11,6 +11,7 @@
 from transforms3d.quaternions import axangle2quat, qmult
 
 from mani_skill2 import ASSET_DIR, format_path
+from mani_skill2.envs.scene import ManiSkillScene
 from mani_skill2.utils.common import random_choice
 from mani_skill2.utils.io_utils import load_json
 from mani_skill2.utils.registration import register_env
@@ -292,8 +293,8 @@ def get_state(self):
         return torch.hstack([state, self.goal_pos])
 
     def set_state(self, state):
-        self.goal_pos = state[-3:]
-        super().set_state(state[:-3])
+        self.goal_pos = state[:, -3:]
+        super().set_state(state[:, :-3])
 
 
 # ---------------------------------------------------------------------------- #
@@ -301,7 +302,7 @@ def set_state(self, state):
 # ---------------------------------------------------------------------------- #
 def build_actor_ycb(
     model_id: str,
-    scene: sapien.Scene,
+    scene: ManiSkillScene,
     name: str,
     scale: float = 1.0,
     physical_material: physx.PhysxMaterial = None,
diff --git a/mani_skill2/envs/pick_and_place/stack_cube.py b/mani_skill2/envs/pick_and_place/stack_cube.py
index 2ce5e7dd2..731dca162 100644
--- a/mani_skill2/envs/pick_and_place/stack_cube.py
+++ b/mani_skill2/envs/pick_and_place/stack_cube.py
@@ -66,15 +66,18 @@ def sample(self, radius, max_trials, append=True, verbose=False):
 @register_env("StackCube-v0", max_episode_steps=200)
 class StackCubeEnv(StationaryManipulationEnv):
     def _load_actors(self):
-        TableSceneBuilder().build(self._scene)
-
-        self.box_half_size = np.float32([0.02] * 3)
+        self.table_scene = TableSceneBuilder(
+            env=self, robot_init_qpos_noise=self.robot_init_qpos_noise
+        )
+        self.table_scene.build()
+        self.box_half_size = torch.Tensor([0.02] * 3, device=self.device)
         self.cubeA = self._build_cube(self.box_half_size, color=(1, 0, 0), name="cubeA")
         self.cubeB = self._build_cube(
             self.box_half_size, color=(0, 1, 0), name="cubeB", kinematic=False
         )
 
     def _initialize_actors(self):
+        self.table_scene.initialize()
         xy = self._episode_rng.uniform(-0.1, 0.1, [2])
         region = [[-0.1, -0.2], [0.1, 0.2]]
         sampler = UniformSampler(region, self._episode_rng)
@@ -136,90 +139,86 @@ def compute_dense_reward(self, info, **kwargs):
         gripper_width = (
             self.agent.robot.get_qlimit()[-1, 1] * 2
         )  # NOTE: hard-coded with panda
-        reward = 0.0
-
-        if info["success"]:
-            reward = 15.0
-        else:
-            # grasp pose rotation reward
-            grasp_rot_loss_fxn = lambda A: np.tanh(
-                1 / 8 * np.trace(A.T @ A)
-            )  # trace(A.T @ A) has range [0,8] for A being difference of rotation matrices
-            tcp_pose_wrt_cubeA = self.cubeA.pose.inv() * self.agent.tcp.pose
-            tcp_rot_wrt_cubeA = tcp_pose_wrt_cubeA.to_transformation_matrix()[:3, :3]
-            gt_rots = [
-                np.array([[0, 1, 0], [1, 0, 0], [0, 0, -1]]),
-                np.array([[0, -1, 0], [-1, 0, 0], [0, 0, -1]]),
-                np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]),
-                np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]]),
-            ]
-            grasp_rot_loss = min(
-                [grasp_rot_loss_fxn(x - tcp_rot_wrt_cubeA) for x in gt_rots]
-            )
-            reward += 1 - grasp_rot_loss
-
-            cubeB_comp = self.cubeB.find_component_by_type(
-                physx.PhysxRigidDynamicComponent
-            )
-            cubeB_vel_penalty = np.linalg.norm(
-                cubeB_comp.linear_velocity
-            ) + np.linalg.norm(cubeB_comp.angular_velocity)
-            reward -= cubeB_vel_penalty
-
-            # reaching object reward
-            tcp_pose = self.agent.tcp.pose.p
-            cubeA_pos = self.cubeA.pose.p
-            cubeA_to_tcp_dist = np.linalg.norm(tcp_pose - cubeA_pos)
-            reaching_reward = 1 - np.tanh(3.0 * cubeA_to_tcp_dist)
-            reward += reaching_reward
-
-            # check if cubeA is on cubeB
-            cubeA_pos = self.cubeA.pose.p
-            cubeB_pos = self.cubeB.pose.p
-            goal_xyz = np.hstack(
-                [cubeB_pos[0:2], cubeB_pos[2] + self.box_half_size[2] * 2]
-            )
-            cubeA_on_cubeB = (
-                np.linalg.norm(goal_xyz[:2] - cubeA_pos[:2])
-                < self.box_half_size[0] * 0.8
-            )
-            cubeA_on_cubeB = cubeA_on_cubeB and (
-                np.abs(goal_xyz[2] - cubeA_pos[2]) <= 0.005
-            )
-            if cubeA_on_cubeB:
-                reward = 10.0
-                # ungrasp reward
-                is_cubeA_grasped = self.agent.is_grasping(self.cubeA)
-                if not is_cubeA_grasped:
-                    reward += 2.0
-                else:
-                    reward = (
-                        reward
-                        + 2.0 * np.sum(self.agent.robot.get_qpos()[-2:]) / gripper_width
-                    )
-            else:
-                # grasping reward
-                is_cubeA_grasped = self.agent.is_grasping(self.cubeA)
-                if is_cubeA_grasped:
-                    reward += 1.0
-
-                # reaching goal reward, ensuring that cubeA has appropriate height during this process
-                if is_cubeA_grasped:
-                    cubeA_to_goal = goal_xyz - cubeA_pos
-                    # cubeA_to_goal_xy_dist = np.linalg.norm(cubeA_to_goal[:2])
-                    cubeA_to_goal_dist = np.linalg.norm(cubeA_to_goal)
-                    appropriate_height_penalty = np.maximum(
-                        np.maximum(2 * cubeA_to_goal[2], 0.0),
-                        np.maximum(2 * (-0.02 - cubeA_to_goal[2]), 0.0),
-                    )
-                    reaching_reward2 = 2 * (
-                        1 - np.tanh(5.0 * appropriate_height_penalty)
-                    )
-                    # qvel_penalty = np.sum(np.abs(self.agent.robot.get_qvel())) # prevent the robot arm from moving too fast
-                    # reaching_reward2 -= 0.0003 * qvel_penalty
-                    # if appropriate_height_penalty < 0.01:
-                    reaching_reward2 += 4 * (1 - np.tanh(5.0 * cubeA_to_goal_dist))
-                    reward += np.maximum(reaching_reward2, 0.0)
+        # TODO (stao): rewrite dense reward for this task. TBH it should just be nearly the same as pick cube.
+        # # grasp pose rotation reward
+        # grasp_rot_loss_fxn = lambda A: np.tanh(
+        #     1 / 8 * np.trace(A.T @ A)
+        # )  # trace(A.T @ A) has range [0,8] for A being difference of rotation matrices
+        # tcp_pose_wrt_cubeA = self.cubeA.pose.inv() * self.agent.tcp.pose
+        # tcp_rot_wrt_cubeA = tcp_pose_wrt_cubeA.to_transformation_matrix()[:3, :3]
+        # gt_rots = [
+        #     np.array([[0, 1, 0], [1, 0, 0], [0, 0, -1]]),
+        #     np.array([[0, -1, 0], [-1, 0, 0], [0, 0, -1]]),
+        #     np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]),
+        #     np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]]),
+        # ]
+        # grasp_rot_loss = min(
+        #     [grasp_rot_loss_fxn(x - tcp_rot_wrt_cubeA) for x in gt_rots]
+        # )
+        # reward += 1 - grasp_rot_loss
+
+        # cubeB_comp = self.cubeB.find_component_by_type(
+        #     physx.PhysxRigidDynamicComponent
+        # )
+        # cubeB_vel_penalty = np.linalg.norm(
+        #     cubeB_comp.linear_velocity
+        # ) + np.linalg.norm(cubeB_comp.angular_velocity)
+        # reward -= cubeB_vel_penalty
+
+        # reaching object reward
+        tcp_pose = self.agent.tcp.pose.p
+        cubeA_pos = self.cubeA.pose.p
+        cubeA_to_tcp_dist = np.linalg.norm(tcp_pose - cubeA_pos, axis=1)
+        reaching_reward = 1 - np.tanh(3.0 * cubeA_to_tcp_dist)
+        reward = reaching_reward
+
+        # # check if cubeA is on cubeB
+        # cubeA_pos = self.cubeA.pose.p
+        # cubeB_pos = self.cubeB.pose.p
+        # goal_xyz = np.hstack(
+        #     [cubeB_pos[0:2], cubeB_pos[2] + self.box_half_size[2] * 2]
+        # )
+        # cubeA_on_cubeB = (
+        #     np.linalg.norm(goal_xyz[:2] - cubeA_pos[:2])
+        #     < self.box_half_size[0] * 0.8
+        # )
+        # cubeA_on_cubeB = cubeA_on_cubeB and (
+        #     np.abs(goal_xyz[2] - cubeA_pos[2]) <= 0.005
+        # )
+        # if cubeA_on_cubeB:
+        #     reward = 10.0
+        #     # ungrasp reward
+        #     is_cubeA_grasped = self.agent.is_grasping(self.cubeA)
+        #     if not is_cubeA_grasped:
+        #         reward += 2.0
+        #     else:
+        #         reward = (
+        #             reward
+        #             + 2.0 * np.sum(self.agent.robot.get_qpos()[-2:]) / gripper_width
+        #         )
+        # else:
+        #     # grasping reward
+        #     is_cubeA_grasped = self.agent.is_grasping(self.cubeA)
+        #     if is_cubeA_grasped:
+        #         reward += 1.0
+
+        #     # reaching goal reward, ensuring that cubeA has appropriate height during this process
+        #     if is_cubeA_grasped:
+        #         cubeA_to_goal = goal_xyz - cubeA_pos
+        #         # cubeA_to_goal_xy_dist = np.linalg.norm(cubeA_to_goal[:2])
+        #         cubeA_to_goal_dist = np.linalg.norm(cubeA_to_goal)
+        #         appropriate_height_penalty = np.maximum(
+        #             np.maximum(2 * cubeA_to_goal[2], 0.0),
+        #             np.maximum(2 * (-0.02 - cubeA_to_goal[2]), 0.0),
+        #         )
+        #         reaching_reward2 = 2 * (
+        #             1 - np.tanh(5.0 * appropriate_height_penalty)
+        #         )
+        #         # qvel_penalty = np.sum(np.abs(self.agent.robot.get_qvel())) # prevent the robot arm from moving too fast
+        #         # reaching_reward2 -= 0.0003 * qvel_penalty
+        #         # if appropriate_height_penalty < 0.01:
+        #         reaching_reward2 += 4 * (1 - np.tanh(5.0 * cubeA_to_goal_dist))
+        #         reward += np.maximum(reaching_reward2, 0.0)
 
         return reward
 
diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py
index a1a7f6e57..f29377bfb 100644
--- a/mani_skill2/envs/sapien_env.py
+++ b/mani_skill2/envs/sapien_env.py
@@ -124,7 +124,11 @@ def __init__(
                 sapien.physx.enable_gpu()
                 sapien.set_cuda_tensor_backend("torch")
             self.gpu_sim_cfgs = gpu_sim_cfgs
-
+            self.device = torch.device(
+                "cuda"
+            )  # TODO (stao): fix this for multi gpu support?
+        else:
+            self.device = torch.device("cpu")
         # TODO(jigu): Change to `warning` after lighting in VecEnv is fixed.
         # TODO Ms2 set log level. What to do now?
         # self._engine.set_log_level(os.getenv("MS2_SIM_LOG_LEVEL", "error"))
@@ -324,13 +328,17 @@ def get_obs(self):
             return OrderedDict()
         elif self._obs_mode == "state":
             state_dict = self._get_obs_state_dict()
-            return flatten_state_dict(state_dict, squeeze_dims=squeeze_dims)
+            obs = flatten_state_dict(state_dict, squeeze_dims=squeeze_dims)
         elif self._obs_mode == "state_dict":
-            return self._get_obs_state_dict()
+            obs = self._get_obs_state_dict()
         elif self._obs_mode == "image":
-            return self._get_obs_images()
+            obs = self._get_obs_images()
         else:
             raise NotImplementedError(self._obs_mode)
+        if physx.is_gpu_enabled():
+            return obs
+        else:
+            return unbatch(to_numpy(obs))
 
     def _get_obs_state_dict(self):
         """Get (ground-truth) state-based observations."""
@@ -383,15 +391,8 @@ def get_camera_params(self) -> Dict[str, Dict[str, np.ndarray]]:
         return params
 
     def _get_obs_images(self) -> OrderedDict:
-        # TODO (stao): do we still need this part?
-        if self._renderer_type == "client":
-            # NOTE: not compatible with StereoDepthCamera
-            cameras = [x.camera for x in self._sensors.values()]
-            # TODO: upgrade
-            self._scene._update_render_and_take_pictures(cameras)
-        else:
-            self.update_render()
-            self.capture_sensor_data()
+        self.update_render()
+        self.capture_sensor_data()
         return OrderedDict(
             agent=self._get_obs_agent(),
             extra=self._get_obs_extra(),
@@ -415,9 +416,11 @@ def get_reward(self, obs: Any, action: Array, info: Dict):
         if self._reward_mode == "sparse":
             return to_tensor(info["success"])
         elif self._reward_mode == "dense":
-            return self.compute_dense_reward(obs, action, info)
+            return self.compute_dense_reward(obs=obs, action=action, info=info)
         elif self._reward_mode == "normalized_dense":
-            return self.compute_normalized_dense_reward(obs, action, info)
+            return self.compute_normalized_dense_reward(
+                obs=obs, action=action, info=info
+            )
         else:
             raise NotImplementedError(self._reward_mode)
 
@@ -454,7 +457,6 @@ def reconfigure(self):
         # Cache entites and articulations
         self._actors = self.get_actors()
         self._articulations = self.get_articulations()
-        self.device = torch.device("cpu")
         if sapien.physx.is_gpu_enabled():
             self._scene._setup_gpu()
             self.device = self.physx_system.cuda_rigid_body_data.device
@@ -638,14 +640,14 @@ def step_action(self, action):
         if action is None:  # simulation without action
             pass
         elif isinstance(action, np.ndarray):
-            self.agent.set_action(action)
+            self.agent.set_action(to_tensor(action))
             set_action = True
         elif isinstance(action, dict):
             if action["control_mode"] != self.agent.control_mode:
                 self.agent.set_control_mode(action["control_mode"])
-            self.agent.set_action(action["action"])
+            self.agent.set_action(to_tensor(action["action"]))
             set_action = True
-        elif torch is not None and isinstance(action, torch.Tensor):
+        elif isinstance(action, torch.Tensor):
             self.agent.set_action(action)
             set_action = True
         else:
diff --git a/mani_skill2/envs/scene.py b/mani_skill2/envs/scene.py
index dfbc87322..edc97e751 100644
--- a/mani_skill2/envs/scene.py
+++ b/mani_skill2/envs/scene.py
@@ -457,6 +457,8 @@ def num_envs(self):
     def get_sim_state(self) -> torch.Tensor:
         """Get simulation state. Returns a tensor of shape (N, D) for N parallel environments and D dimensions of padded state per environment"""
         state = []
+        # TODO (stao): Should we store state as a dictionary? What shape to store for parallel settings to make
+        # it loadable between parallel and non parallel settings?
         for actor in self.actors.values():
             # TODO (stao) (in parallelized environment situation we may need to pad as some of these actors do not exist in other parallel envs)
             state.append(actor.get_state())
@@ -465,16 +467,14 @@ def get_sim_state(self) -> torch.Tensor:
         return torch.hstack(state)
 
     def set_sim_state(self, state: Array):
-        return
-        """Set simulation state."""
-        KINEMANTIC_DIM = 13  # [pos, quat, lin_vel, ang_vel]
+        KINEMATIC_DIM = 13  # [pos, quat, lin_vel, ang_vel]
         start = 0
-        for actor in self._actors:
-            set_actor_state(actor, state[start : start + KINEMANTIC_DIM])
-            start += KINEMANTIC_DIM
-        for articulation in self._articulations:
-            ndim = KINEMANTIC_DIM + 2 * articulation.dof
-            set_articulation_state(articulation, state[start : start + ndim])
+        for actor in self.actors.values():
+            actor.set_state(state[:, start : start + KINEMATIC_DIM])
+            start += KINEMATIC_DIM
+        for articulation in self.articulations.values():
+            ndim = KINEMATIC_DIM + 2 * articulation.dof
+            articulation.set_state(state[:, start : start + ndim])
             start += ndim
 
     # ---------------------------------------------------------------------------- #
diff --git a/mani_skill2/utils/download_asset.py b/mani_skill2/utils/download_asset.py
index dabefb13e..b7b5300fc 100644
--- a/mani_skill2/utils/download_asset.py
+++ b/mani_skill2/utils/download_asset.py
@@ -7,6 +7,7 @@
 
 import argparse
 import hashlib
+import os.path as osp
 import shutil
 import urllib.request
 import zipfile
@@ -167,6 +168,7 @@ def download(
     if target_path is None:
         target_path = url.split("/")[-1]
     output_path = output_dir / target_path
+    output_dir = osp.dirname(output_path)
 
     # Clean up existing files
     if output_path.exists():
diff --git a/mani_skill2/utils/registration.py b/mani_skill2/utils/registration.py
index a4b55a837..c40fe612e 100644
--- a/mani_skill2/utils/registration.py
+++ b/mani_skill2/utils/registration.py
@@ -83,6 +83,7 @@ def make(env_id, as_gym=True, enable_segmentation=False, **kwargs):
         kwargs["obs_mode"] = "image"
 
     # Add segmentation texture
+    # TODO (stao): we need to rework how segementation is done now
     if "robot_seg" in obs_mode:
         enable_segmentation = True
     if enable_segmentation:
diff --git a/mani_skill2/utils/sapien_utils.py b/mani_skill2/utils/sapien_utils.py
index e1107e7ca..fcb8141b0 100644
--- a/mani_skill2/utils/sapien_utils.py
+++ b/mani_skill2/utils/sapien_utils.py
@@ -70,9 +70,8 @@ def _unbatch(array: Union[Array, Sequence]):
         return {k: _unbatch(v) for k, v in array.items()}
     if isinstance(array, str):
         return array
-    if torch is not None:
-        if isinstance(array, torch.Tensor):
-            return array.squeeze(0)
+    if isinstance(array, torch.Tensor):
+        return array.squeeze(0)
     if isinstance(array, np.ndarray):
         if np.iterable(array) and array.shape[0] == 1:
             return array.squeeze(0)
@@ -83,7 +82,10 @@ def _unbatch(array: Union[Array, Sequence]):
 
 
 def unbatch(*args: Tuple[Union[Array, Sequence]]):
-    return tuple([_unbatch(x) for x in args])
+    x = [_unbatch(x) for x in args]
+    if len(args) == 1:
+        return x[0]
+    return tuple(x)
 
 
 def clone_tensor(array: Array):
@@ -291,15 +293,6 @@ def get_articulation_state(articulation: physx.PhysxArticulation):
     return np.hstack([pose.p, pose.q, vel, ang_vel, qpos, qvel])
 
 
-def set_articulation_state(articulation: physx.PhysxArticulation, state: np.ndarray):
-    articulation.set_root_pose(sapien.Pose(state[0:3], state[3:7]))
-    articulation.set_root_velocity(state[7:10])
-    articulation.set_root_angular_velocity(state[10:13])
-    qpos, qvel = np.split(state[13:], 2)
-    articulation.set_qpos(qpos)
-    articulation.set_qvel(qvel)
-
-
 def get_articulation_padded_state(articulation: physx.PhysxArticulation, max_dof: int):
     state = get_articulation_state(articulation)
     qpos, qvel = np.split(state[13:], 2)
diff --git a/mani_skill2/utils/scene_builder/table/table_scene_builder.py b/mani_skill2/utils/scene_builder/table/table_scene_builder.py
index 82e45c285..c88da9e8f 100644
--- a/mani_skill2/utils/scene_builder/table/table_scene_builder.py
+++ b/mani_skill2/utils/scene_builder/table/table_scene_builder.py
@@ -69,6 +69,17 @@ def initialize(self):
             )
             self.env.agent.reset(qpos)
             self.env.agent.robot.set_pose(sapien.Pose([-0.615, 0, 0]))
+        elif self.env.robot_uid == "panda_realsensed435":
+            # fmt: off
+            qpos = np.array(
+                [0.0, np.pi / 8, 0, -np.pi * 5 / 8, 0, np.pi * 3 / 4, -np.pi / 4, 0.04, 0.04]
+            )
+            # fmt: on
+            qpos[:-2] += self.env._episode_rng.normal(
+                0, self.robot_init_qpos_noise, len(qpos) - 2
+            )
+            self.env.agent.reset(qpos)
+            self.env.agent.robot.set_pose(sapien.Pose([-0.615, 0, 0]))
         elif self.env.robot_uid == "xmate3_robotiq":
             qpos = np.array(
                 [0, np.pi / 6, 0, np.pi / 3, 0, np.pi / 2, -np.pi / 2, 0, 0]
diff --git a/mani_skill2/utils/structs/actor.py b/mani_skill2/utils/structs/actor.py
index 31c6923a9..27eb671e0 100644
--- a/mani_skill2/utils/structs/actor.py
+++ b/mani_skill2/utils/structs/actor.py
@@ -7,6 +7,7 @@
 import sapien.render
 import torch
 
+from mani_skill2.utils.sapien_utils import to_numpy
 from mani_skill2.utils.structs.base import BaseStruct, PhysxRigidDynamicComponentStruct
 from mani_skill2.utils.structs.pose import Pose, to_sapien_pose, vectorize_pose
 from mani_skill2.utils.structs.types import Array
@@ -80,6 +81,7 @@ def set_state(self, state: Array):
                 "You should not set state on a GPU enabled actor."
             )
         else:
+            state = to_numpy(state[0])
             self.set_pose(sapien.Pose(state[0:3], state[3:7]))
             if self.px_body_type == "dynamic":
                 self.set_linear_velocity(state[7:10])
diff --git a/mani_skill2/utils/structs/articulation.py b/mani_skill2/utils/structs/articulation.py
index 94744965c..1fd8d5a3c 100644
--- a/mani_skill2/utils/structs/articulation.py
+++ b/mani_skill2/utils/structs/articulation.py
@@ -7,7 +7,7 @@
 import sapien.physx as physx
 import torch
 
-from mani_skill2.utils.sapien_utils import to_tensor
+from mani_skill2.utils.sapien_utils import to_numpy, to_tensor
 from mani_skill2.utils.structs.base import BaseStruct
 from mani_skill2.utils.structs.joint import Joint
 from mani_skill2.utils.structs.link import Link
@@ -119,6 +119,20 @@ def get_state(self):
         qvel = self.get_qvel()
         return torch.hstack([pose.p, pose.q, vel, ang_vel, qpos, qvel])
 
+    def set_state(self, state: Array):
+        if physx.is_gpu_enabled():
+            raise NotImplementedError(
+                "You should not set state on a GPU enabled actor."
+            )
+        else:
+            state = to_numpy(state[0])
+            self.set_root_pose(sapien.Pose(state[0:3], state[3:7]))
+            # self.set_root_linear_velocity(state[7:10])
+            # self.set_root_angular_velocity(state[10:13])
+            qpos, qvel = np.split(state[13:], 2)
+            self.set_qpos(qpos)
+            self.set_qvel(qvel)
+
     # -------------------------------------------------------------------------- #
     # Functions from physx.PhysxArticulation
     # -------------------------------------------------------------------------- #
@@ -371,3 +385,17 @@ def set_joint_drive_targets(
         else:
             for i, joint in enumerate(joints):
                 joint.set_drive_target(targets[0, i])
+
+    def set_joint_drive_velocity_targets(
+        self,
+        targets: Array,
+        joints: List[Joint] = None,
+    ):
+        if physx.is_gpu_enabled():
+            targets = to_tensor(targets)
+            raise NotImplementedError(
+                "Setting joint velocity targets is currently not supported on the GPU"
+            )
+        else:
+            for i, joint in enumerate(joints):
+                joint.set_drive_velocity_target(targets[i])
diff --git a/mani_skill2/utils/structs/joint.py b/mani_skill2/utils/structs/joint.py
index 8f4476129..d49d3e56f 100644
--- a/mani_skill2/utils/structs/joint.py
+++ b/mani_skill2/utils/structs/joint.py
@@ -220,6 +220,10 @@ def drive_velocity_target(self, arg1: Array) -> None:
                 "Cannot set drive velocity targets at the moment in GPU simulation"
             )
         else:
+            if arg1.shape == ():
+                arg1 = arg1.reshape(
+                    1,
+                )
             self._objs[0].drive_velocity_target = arg1
 
     @property
diff --git a/mani_skill2/utils/wrappers/common.py b/mani_skill2/utils/wrappers/common.py
index 9157fd341..de1885d36 100644
--- a/mani_skill2/utils/wrappers/common.py
+++ b/mani_skill2/utils/wrappers/common.py
@@ -1,30 +1,4 @@
 import gymnasium as gym
-from gymnasium import spaces
-
-from ..common import (
-    clip_and_scale_action,
-    inv_clip_and_scale_action,
-    normalize_action_space,
-)
-
-
-class NormalizeBoxActionWrapper(gym.ActionWrapper):
-    """Normalize box action space to [-1, 1]."""
-
-    def __init__(self, env):
-        super().__init__(env)
-        assert isinstance(self.env.action_space, spaces.Box), self.env.action_space
-        self.action_space = normalize_action_space(self.env.action_space)
-
-    def action(self, action):
-        return clip_and_scale_action(
-            action, self.env.action_space.low, self.env.action_space.high
-        )
-
-    def reverse_action(self, action):
-        return inv_clip_and_scale_action(
-            action, self.env.action_space.low, self.env.action_space.high
-        )
 
 
 class ResetSeedWrapper(gym.Wrapper):
diff --git a/mani_skill2/utils/wrappers/observation.py b/mani_skill2/utils/wrappers/observation.py
index bd7dc616b..476dbdfb3 100644
--- a/mani_skill2/utils/wrappers/observation.py
+++ b/mani_skill2/utils/wrappers/observation.py
@@ -50,7 +50,7 @@ def update_observation_space(space: spaces.Dict):
                     new_cam_space["rgb"] = spaces.Box(
                         low=0, high=255, shape=(height, width, 3), dtype=np.uint8
                     )
-                elif key == "Position":
+                elif key == "PositionSegmentation":
                     height, width = ori_cam_space[key].shape[:2]
                     new_cam_space["depth"] = spaces.Box(
                         low=0, high=np.inf, shape=(height, width, 1), dtype=np.float32
@@ -66,9 +66,7 @@ def observation(self, observation: dict):
             for key in ori_images:
                 if key == "Color":
                     rgb = ori_images[key][..., :3]  # [H, W, 4]
-                    if isinstance(rgb, np.ndarray):
-                        rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
-                    else:
+                    if isinstance(rgb, torch.Tensor):
                         rgb = rgb.clone()
                     new_images["rgb"] = rgb  # [H, W, 4]
                 elif key == "PositionSegmentation":
@@ -95,6 +93,7 @@ def merge_dict_spaces(dict_spaces: Sequence[spaces.Dict]):
     return spaces.Dict(OrderedDict(reverse_spaces))
 
 
+# TODO (stao): fix this wrapper
 class PointCloudObservationWrapper(BaseGymObservationWrapper):
     """Convert Position textures to world-space point cloud."""
 
@@ -115,21 +114,15 @@ def update_observation_space(space: spaces.Dict):
             cam_image_space = image_space[cam_uid]
             cam_pcd_space = OrderedDict()
 
-            h, w = cam_image_space["Position"].shape[:2]
+            h, w = cam_image_space["PositionSegmentation"].shape[:2]
             cam_pcd_space["xyzw"] = spaces.Box(
                 low=-np.inf, high=np.inf, shape=(h * w, 4), dtype=np.float32
             )
-
             # Extra keys
             if "Color" in cam_image_space.spaces:
                 cam_pcd_space["rgb"] = spaces.Box(
                     low=0, high=255, shape=(h * w, 3), dtype=np.uint8
                 )
-            if "Segmentation" in cam_image_space.spaces:
-                cam_pcd_space["Segmentation"] = spaces.Box(
-                    low=0, high=(2**32 - 1), shape=(h * w, 4), dtype=np.uint32
-                )
-
             pcd_space[cam_uid] = spaces.Dict(cam_pcd_space)
 
         pcd_space = merge_dict_spaces(pcd_space.values())
@@ -143,10 +136,10 @@ def observation(self, observation: dict):
         for cam_uid, images in image_obs.items():
             cam_pcd = {}
 
-            # Each pixel is (x, y, z, z_buffer_depth) in OpenGL camera space
-            position = images["Position"]
-            # position[..., 3] = position[..., 3] < 1
-            position[..., 3] = position[..., 2] < 0
+            # Each pixel is (x, y, z, actor_id) in OpenGL camera space
+            # actor_id = 0 for the background
+            position = images["PositionSegmentation"]
+            position[..., 3] = position[..., 2] == 0
 
             # Convert to world space
             cam2world = camera_params[cam_uid]["cam2world_gl"]
diff --git a/tests/test_envs.py b/tests/test_envs.py
index adb9ba17d..786df156a 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -97,8 +97,8 @@ def test_states(env_id):
 
 
 @pytest.mark.parametrize("env_id", ENV_IDS)
-@pytest.mark.parametrize("robot", ROBOTS)
-def test_robots(env_id, robot):
+@pytest.mark.parametrize("robot_uid", ROBOTS)
+def test_robots(env_id, robot_uid):
     if env_id in [
         "PandaAvoidObstacles-v0",
         "PegInsertionSide-v0",
@@ -110,7 +110,7 @@ def test_robots(env_id, robot):
         "MoveBucket-v1",
     ]:
         pytest.skip(reason=f"Env {env_id} does not support robots other than panda")
-    env = gym.make(env_id, robot=robot)
+    env = gym.make(env_id, robot_uid=robot_uid)
     env.reset()
     action_space = env.action_space
     for _ in range(5):
diff --git a/tests/test_venv.py b/tests/test_venv.py
index b2aa3ea3f..48de2e0b7 100644
--- a/tests/test_venv.py
+++ b/tests/test_venv.py
@@ -21,7 +21,7 @@ def make_env(env_id, obs_mode):
 
 @pytest.mark.parametrize("env_id", ["PickCube-v0", "TurnFaucet-v0"])
 @pytest.mark.parametrize("obs_mode", VENV_OBS_MODES)
-def test_vecenv_obs_mode(env_id, obs_mode):
+def test_cpu_vecenv_obs_mode(env_id, obs_mode):
     n_envs = 2
 
     env_fns = [partial(make_env, env_id, obs_mode=obs_mode) for _ in range(n_envs)]
@@ -63,7 +63,7 @@ def test_vecenv_obs_mode(env_id, obs_mode):
 
 @pytest.mark.parametrize("env_id", ["PickCube-v0", "TurnFaucet-v0"])
 @pytest.mark.parametrize("obs_mode", VENV_OBS_MODES)
-def test_gymnasium_vecenv(env_id, obs_mode):
+def test_gymnasium_cpu_vecenv(env_id, obs_mode):
     n_envs = 2
 
     gym_env = gym.make_vec(
diff --git a/tests/utils.py b/tests/utils.py
index e2568e805..e08990947 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -2,23 +2,22 @@
 import torch
 
 from mani_skill2.utils.common import flatten_dict_keys
-
+# TODO (stao): reactivate old tasks once fixed
 ENV_IDS = [
     "LiftCube-v0",
     "PickCube-v0",
     "StackCube-v0",
     "PickSingleYCB-v0",
-    "PickSingleEGAD-v0",
-    "PickClutterYCB-v0",
-    "AssemblingKits-v0",
-    "PegInsertionSide-v0",
-    "PlugCharger-v0",
-    "PandaAvoidObstacles-v0",
-    "TurnFaucet-v0",
-    "OpenCabinetDoor-v1",
-    "OpenCabinetDrawer-v1",
-    "PushChair-v1",
-    "MoveBucket-v1",
+    # "PickClutterYCB-v0",
+    # "AssemblingKits-v0",
+    # "PegInsertionSide-v0",
+    # "PlugCharger-v0",
+    # "PandaAvoidObstacles-v0",
+    # "TurnFaucet-v0",
+    # "OpenCabinetDoor-v1",
+    # "OpenCabinetDrawer-v1",
+    # "PushChair-v1",
+    # "MoveBucket-v1",
 ]
 
 STATIONARY_ENV_IDS = [
@@ -26,13 +25,12 @@
     "PickCube-v0",
     "StackCube-v0",
     "PickSingleYCB-v0",
-    "PickSingleEGAD-v0",
-    "PickClutterYCB-v0",
-    "AssemblingKits-v0",
-    "PegInsertionSide-v0",
-    "PlugCharger-v0",
-    "PandaAvoidObstacles-v0",
-    "TurnFaucet-v0",
+    # "PickClutterYCB-v0",
+    # "AssemblingKits-v0",
+    # "PegInsertionSide-v0",
+    # "PlugCharger-v0",
+    # "PandaAvoidObstacles-v0",
+    # "TurnFaucet-v0",
 ]
 
 REWARD_MODES = ["dense", "normalized_dense", "sparse"]
@@ -49,8 +47,8 @@
     "state",
     "rgbd",
     "pointcloud",
-    "rgbd_robot_seg",
-    "pointcloud_robot_seg",
+    # "rgbd_robot_seg",
+    # "pointcloud_robot_seg",
 ]
 VENV_OBS_MODES = [
     "image",