From 1f2cd14f76a631120812850b2de8e508e76eaa91 Mon Sep 17 00:00:00 2001 From: StoneT2000 Date: Mon, 22 Jan 2024 20:52:03 -0800 Subject: [PATCH] fix a few envs, mark pick clutter as to be fixed in the future. --- .../agents/controllers/base_controller.py | 2 +- mani_skill2/agents/controllers/pd_ee_pose.py | 6 +- .../agents/controllers/pd_joint_vel.py | 3 +- mani_skill2/agents/robots/xmate3/xmate3.py | 101 ++++-- .../envs/assembly/peg_insertion_side.py | 16 +- mani_skill2/envs/pick_and_place/__init__.py | 1 - mani_skill2/envs/pick_and_place/base_env.py | 6 +- .../envs/pick_and_place/pick_clutter.py | 290 ------------------ mani_skill2/envs/pick_and_place/pick_cube.py | 4 +- .../envs/pick_and_place/pick_single.py | 7 +- mani_skill2/envs/pick_and_place/stack_cube.py | 173 ++++++----- mani_skill2/envs/sapien_env.py | 40 +-- mani_skill2/envs/scene.py | 18 +- mani_skill2/utils/download_asset.py | 2 + mani_skill2/utils/registration.py | 1 + mani_skill2/utils/sapien_utils.py | 19 +- .../table/table_scene_builder.py | 11 + mani_skill2/utils/structs/actor.py | 2 + mani_skill2/utils/structs/articulation.py | 30 +- mani_skill2/utils/structs/joint.py | 4 + mani_skill2/utils/wrappers/common.py | 26 -- mani_skill2/utils/wrappers/observation.py | 23 +- tests/test_envs.py | 6 +- tests/test_venv.py | 4 +- tests/utils.py | 40 ++- 25 files changed, 303 insertions(+), 532 deletions(-) delete mode 100644 mani_skill2/envs/pick_and_place/pick_clutter.py diff --git a/mani_skill2/agents/controllers/base_controller.py b/mani_skill2/agents/controllers/base_controller.py index 71c20738f..0ca1840ae 100644 --- a/mani_skill2/agents/controllers/base_controller.py +++ b/mani_skill2/agents/controllers/base_controller.py @@ -284,7 +284,7 @@ def set_action(self, action: np.ndarray): for uid, controller in self.controllers.items(): start, end = self.action_mapping[uid] - controller.set_action(to_tensor(action[..., start:end])) + controller.set_action(action[..., start:end]) def to_action_dict(self, action: np.ndarray): """Convert a flat action to a dict of actions.""" diff --git a/mani_skill2/agents/controllers/pd_ee_pose.py b/mani_skill2/agents/controllers/pd_ee_pose.py index 1aaeeffb8..b56fdc9f2 100644 --- a/mani_skill2/agents/controllers/pd_ee_pose.py +++ b/mani_skill2/agents/controllers/pd_ee_pose.py @@ -4,6 +4,7 @@ import numpy as np import sapien import sapien.physx as physx +import torch from gymnasium import spaces from scipy.spatial.transform import Rotation @@ -167,12 +168,13 @@ def _initialize_action_space(self): self.action_space = spaces.Box(low, high, dtype=np.float32) def _clip_and_scale_action(self, action): + # TODO (stao): support batched actions # NOTE(xiqiang): rotation should be clipped by norm. pos_action = clip_and_scale_action( - action[:3], self._action_space.low[:3], self._action_space.high[:3] + action[:3], self.action_space_low[:3], self.action_space_high[:3] ) rot_action = action[3:] - rot_norm = np.linalg.norm(rot_action) + rot_norm = torch.linalg.norm(rot_action) if rot_norm > 1: rot_action = rot_action / rot_norm rot_action = rot_action * self.config.rot_bound diff --git a/mani_skill2/agents/controllers/pd_joint_vel.py b/mani_skill2/agents/controllers/pd_joint_vel.py index 0f7eaf19c..d96e53a33 100644 --- a/mani_skill2/agents/controllers/pd_joint_vel.py +++ b/mani_skill2/agents/controllers/pd_joint_vel.py @@ -29,8 +29,7 @@ def set_drive_property(self): def set_action(self, action: np.ndarray): action = self._preprocess_action(action) - for i, joint in enumerate(self.joints): - joint.set_drive_velocity_target(action[i]) + self.articulation.set_joint_drive_velocity_targets(action, self.joints) @dataclass diff --git a/mani_skill2/agents/robots/xmate3/xmate3.py b/mani_skill2/agents/robots/xmate3/xmate3.py index b56ef30fc..8828e361f 100644 --- a/mani_skill2/agents/robots/xmate3/xmate3.py +++ b/mani_skill2/agents/robots/xmate3/xmate3.py @@ -1,18 +1,20 @@ import numpy as np import sapien import sapien.physx as physx +import torch from mani_skill2 import ASSET_DIR from mani_skill2.agents.base_agent import BaseAgent from mani_skill2.agents.controllers import * from mani_skill2.sensors.camera import CameraConfig -from mani_skill2.utils.common import np_compute_angle_between +from mani_skill2.utils.common import compute_angle_between, np_compute_angle_between from mani_skill2.utils.sapien_utils import ( compute_total_impulse, get_actor_contacts, get_obj_by_name, get_pairwise_contact_impulse, ) +from mani_skill2.utils.structs.actor import Actor class Xmate3Robotiq(BaseAgent): @@ -172,38 +174,83 @@ def controller_configs(self): ), ] - def is_grasping(self, object: sapien.Entity = None, min_impulse=1e-6, max_angle=85): - contacts = self.scene.get_contacts() - if object is None: - finger1_contacts = get_actor_contacts(contacts, self.finger1_link) - finger2_contacts = get_actor_contacts(contacts, self.finger2_link) - return ( - np.linalg.norm(compute_total_impulse(finger1_contacts)) >= min_impulse - and np.linalg.norm(compute_total_impulse(finger2_contacts)) - >= min_impulse + def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85): + # TODO (stao): is_grasping code needs to be updated for new GPU sim + if physx.is_gpu_enabled(): + if object.name not in self.queries: + body_pairs = list(zip(self.finger1_link._bodies, object._bodies)) + body_pairs += list(zip(self.finger2_link._bodies, object._bodies)) + self.queries[object.name] = ( + self.scene.px.gpu_create_contact_query(body_pairs), + (len(object._bodies), 3), + ) + print(f"Create query for Panda grasp({object.name})") + query, contacts_shape = self.queries[object.name] + self.scene.px.gpu_query_contacts(query) + # query.cuda_contacts # (num_unique_pairs * num_envs, 3) + contacts = query.cuda_contacts.clone().reshape((-1, *contacts_shape)) + lforce = torch.linalg.norm(contacts[0], axis=1) + rforce = torch.linalg.norm(contacts[1], axis=1) + + # NOTE (stao): 0.5 * time_step is a decent value when tested on a pick cube task. + min_force = 0.5 * self.scene.px.timestep + + # direction to open the gripper + ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1] + rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1] + langle = compute_angle_between(ldirection, contacts[0]) + rangle = compute_angle_between(rdirection, contacts[1]) + lflag = torch.logical_and( + lforce >= min_force, torch.rad2deg(langle) <= max_angle ) + rflag = torch.logical_and( + rforce >= min_force, torch.rad2deg(rangle) <= max_angle + ) + + return torch.logical_and(lflag, rflag) else: - limpulse = get_pairwise_contact_impulse(contacts, self.finger1_link, object) - rimpulse = get_pairwise_contact_impulse(contacts, self.finger2_link, object) + contacts = self.scene.get_contacts() - # direction to open the gripper - ldirection = self.finger1_link.pose.to_transformation_matrix()[:3, 1] - rdirection = -self.finger2_link.pose.to_transformation_matrix()[:3, 1] + if object is None: + finger1_contacts = get_actor_contacts(contacts, self.finger1_link) + finger2_contacts = get_actor_contacts(contacts, self.finger2_link) + return ( + np.linalg.norm(compute_total_impulse(finger1_contacts)) + >= min_impulse + and np.linalg.norm(compute_total_impulse(finger2_contacts)) + >= min_impulse + ) + else: + limpulse = get_pairwise_contact_impulse( + contacts, self.finger1_link, object + ) + rimpulse = get_pairwise_contact_impulse( + contacts, self.finger2_link, object + ) - # angle between impulse and open direction - langle = np_compute_angle_between(ldirection, limpulse) - rangle = np_compute_angle_between(rdirection, rimpulse) + # direction to open the gripper + ldirection = self.finger1_link.pose.to_transformation_matrix()[ + ..., :3, 1 + ] + rdirection = -self.finger2_link.pose.to_transformation_matrix()[ + ..., :3, 1 + ] - lflag = ( - np.linalg.norm(limpulse) >= min_impulse - and np.rad2deg(langle) <= max_angle - ) - rflag = ( - np.linalg.norm(rimpulse) >= min_impulse - and np.rad2deg(rangle) <= max_angle - ) + # TODO Convert this to batched code + # angle between impulse and open direction + langle = np_compute_angle_between(ldirection[0], limpulse) + rangle = np_compute_angle_between(rdirection[0], rimpulse) + + lflag = ( + np.linalg.norm(limpulse) >= min_impulse + and np.rad2deg(langle) <= max_angle + ) + rflag = ( + np.linalg.norm(rimpulse) >= min_impulse + and np.rad2deg(rangle) <= max_angle + ) - return all([lflag, rflag]) + return all([lflag, rflag]) @staticmethod def build_grasp_pose(approaching, closing, center): diff --git a/mani_skill2/envs/assembly/peg_insertion_side.py b/mani_skill2/envs/assembly/peg_insertion_side.py index 582d5ba18..f84a5766f 100644 --- a/mani_skill2/envs/assembly/peg_insertion_side.py +++ b/mani_skill2/envs/assembly/peg_insertion_side.py @@ -2,6 +2,7 @@ import numpy as np import sapien +import torch from sapien import Pose from transforms3d.euler import euler2quat @@ -58,7 +59,8 @@ def _build_box_with_hole( return builder.build_static(name) def _load_actors(self): - TableSceneBuilder().build(self._scene) + self.table_scene = TableSceneBuilder(env=self) + self.table_scene.build() # peg # length, radius = 0.1, 0.02 @@ -105,6 +107,7 @@ def _load_actors(self): self.box_hole_radius = inner_radius def _initialize_actors(self): + self.table_scene.initialize() xy = self._episode_rng.uniform([-0.1, -0.3], [0.1, 0]) pos = np.hstack([xy, self.peg_half_size[2]]) ori = np.pi / 2 + self._episode_rng.uniform(-np.pi / 3, np.pi / 3) @@ -167,14 +170,17 @@ def has_peg_inserted(self): # Only head position is used in fact peg_head_pos_at_hole = (self.box_hole_pose.inv() * self.peg_head_pose).p # x-axis is hole direction - x_flag = -0.015 <= peg_head_pos_at_hole[0] + x_flag = -0.015 <= peg_head_pos_at_hole[:, 0] y_flag = ( - -self.box_hole_radius <= peg_head_pos_at_hole[1] <= self.box_hole_radius + -self.box_hole_radius <= peg_head_pos_at_hole[:, 1] <= self.box_hole_radius ) z_flag = ( - -self.box_hole_radius <= peg_head_pos_at_hole[2] <= self.box_hole_radius + -self.box_hole_radius <= peg_head_pos_at_hole[:, 2] <= self.box_hole_radius + ) + return ( + torch.logical_and(torch.logical_and(x_flag, y_flag), z_flag), + peg_head_pos_at_hole, ) - return (x_flag and y_flag and z_flag), peg_head_pos_at_hole def evaluate(self, **kwargs) -> dict: success, peg_head_pos_at_hole = self.has_peg_inserted() diff --git a/mani_skill2/envs/pick_and_place/__init__.py b/mani_skill2/envs/pick_and_place/__init__.py index 8ecfa771a..d7c325c3a 100644 --- a/mani_skill2/envs/pick_and_place/__init__.py +++ b/mani_skill2/envs/pick_and_place/__init__.py @@ -2,4 +2,3 @@ from . import pick_cube from . import stack_cube from . import pick_single -from . import pick_clutter diff --git a/mani_skill2/envs/pick_and_place/base_env.py b/mani_skill2/envs/pick_and_place/base_env.py index 0f4a8e667..80ef53b2d 100644 --- a/mani_skill2/envs/pick_and_place/base_env.py +++ b/mani_skill2/envs/pick_and_place/base_env.py @@ -12,7 +12,7 @@ from mani_skill2.agents.robots.xmate3 import Xmate3Robotiq from mani_skill2.envs.sapien_env import BaseEnv from mani_skill2.sensors.camera import CameraConfig -from mani_skill2.utils.sapien_utils import hide_entity, look_at +from mani_skill2.utils.sapien_utils import hide_entity, look_at, to_numpy from mani_skill2.utils.structs.pose import vectorize_pose @@ -34,7 +34,7 @@ def _build_cube( if render_material is None: render_material = self._renderer.create_material() render_material.base_color = (*color, 1.0) - + half_size = to_numpy(half_size) builder = self._scene.create_actor_builder() builder.add_box_collision(half_size=half_size) builder.add_box_visual(half_size=half_size, material=render_material) @@ -123,7 +123,7 @@ def _initialize_agent_v1(self): raise NotImplementedError(self.robot_uid) def _register_sensors(self): - pose = look_at([0.3, 0, 0.6], [-0.1, 0, 0.1]) + pose = look_at([0.3, 0, 0.6], [-0.1, 0, 0.9]) return CameraConfig( "base_camera", pose.p, pose.q, 128, 128, np.pi / 2, 0.01, 10 ) diff --git a/mani_skill2/envs/pick_and_place/pick_clutter.py b/mani_skill2/envs/pick_and_place/pick_clutter.py deleted file mode 100644 index b95d8e2ea..000000000 --- a/mani_skill2/envs/pick_and_place/pick_clutter.py +++ /dev/null @@ -1,290 +0,0 @@ -from collections import OrderedDict -from pathlib import Path -from typing import Dict, List - -import numpy as np -import sapien -import sapien.physx as physx -from sapien import Pose - -from mani_skill2 import format_path -from mani_skill2.utils.common import random_choice -from mani_skill2.utils.io_utils import load_json -from mani_skill2.utils.registration import register_env -from mani_skill2.utils.sapien_utils import look_at, set_entity_visibility -from mani_skill2.utils.scene_builder import TableSceneBuilder -from mani_skill2.utils.structs.pose import vectorize_pose - -from .base_env import StationaryManipulationEnv -from .pick_single import PickSingleYCBEnv, build_actor_ycb - - -class PickClutterEnv(StationaryManipulationEnv): - DEFAULT_EPISODE_JSON: str - DEFAULT_ASSET_ROOT: str - DEFAULT_MODEL_JSON: str - - obj: sapien.Entity # target object - - def __init__( - self, - episode_json: str = None, - asset_root: str = None, - model_json: str = None, - **kwargs, - ): - # Load episode configurations - if episode_json is None: - episode_json = self.DEFAULT_EPISODE_JSON - episode_json = format_path(episode_json) - if not Path(episode_json).exists(): - raise FileNotFoundError( - f"Episode json ({episode_json}) is not found." - "To download default json:" - "`python -m mani_skill2.utils.download_asset pick_clutter_ycb`." - ) - self.episodes: List[Dict] = load_json(episode_json) - - # Root directory of object models - if asset_root is None: - asset_root = self.DEFAULT_ASSET_ROOT - self.asset_root = Path(format_path(asset_root)) - - # Information of object models - if model_json is None: - model_json = self.DEFAULT_MODEL_JSON - model_json = self.asset_root / format_path(model_json) - self.model_db: Dict[str, Dict] = load_json(model_json) - - self.episode_idx = -1 - - self.goal_thresh = 0.025 - - super().__init__(**kwargs) - - def _load_actors(self): - TableSceneBuilder().build(self._scene) - - self.objs: List[sapien.Entity] = [] - self.bbox_sizes = [] - for actor_cfg in self.episode["actors"]: - model_id = actor_cfg["model_id"] - model_scale = actor_cfg["scale"] - obj = self._load_model(model_id, model_scale=model_scale) - self.objs.append(obj) - - bbox = self.model_db[model_id]["bbox"] - bbox_size = np.array(bbox["max"]) - np.array(bbox["min"]) - self.bbox_sizes.append(bbox_size * model_scale) - - self.target_site = self._build_sphere_site( - 0.01, color=(1, 1, 0), name="_target_site" - ) - self.goal_site = self._build_sphere_site( - 0.01, color=(0, 1, 0), name="_goal_site" - ) - - def _load_model(self, model_id, model_scale=1.0) -> sapien.Entity: - raise NotImplementedError - - def reset(self, seed=None, options=None): - if options is None: - options = dict() - self._set_episode_rng(seed) - episode_idx = options.pop("episode_idx", None) - reconfigure = options.pop("reconfigure", False) - _reconfigure = self._set_episode(episode_idx) - reconfigure = _reconfigure or reconfigure - options["reconfigure"] = reconfigure - return super().reset(seed=self._episode_seed, options=options) - - def _set_episode(self, episode_idx=None): - reconfigure = False - - if episode_idx is None: - episode_idx = self._episode_rng.randint(len(self.episodes)) - # episode_idx = (self.episode_idx + 1) % len(self.episodes) - - # TODO(jigu): check whether assets (and scales) are the same instead - if self.episode_idx != episode_idx: - reconfigure = True - - self.episode_idx = episode_idx - self.episode = self.episodes[self.episode_idx] - - return reconfigure - - def _initialize_actors(self): - for i, actor_cfg in enumerate(self.episode["actors"]): - pose = np.float32(actor_cfg["pose"]) - # Add a small offset to avoid numerical issues for simulation - self.objs[i].set_pose(Pose(pose[:3] + [0, 0, 1e-3], pose[3:])) - - # Settle - self.agent.robot.set_pose(Pose([10, 0, 0])) - for _ in range(self.control_freq): - self._scene.step() - - def _initialize_agent(self): - if self.robot_uid == "panda": - # fmt: off - qpos = np.array( - [0.0, 0, 0, -np.pi * 2 / 3, 0, np.pi * 2 / 3, np.pi / 4, 0.04, 0.04] - ) - # fmt: on - qpos[:-2] += self._episode_rng.normal( - 0, self.robot_init_qpos_noise, len(qpos) - 2 - ) - self.agent.reset(qpos) - self.agent.robot.set_pose(Pose([-0.544, 0, 0])) - else: - raise NotImplementedError(self.robot_uid) - - @property - def obj_pose(self): - """Get the center of mass (COM) pose.""" - return ( - self.obj.pose - * self.obj.find_component_by_type( - physx.PhysxRigidDynamicComponent - ).cmass_local_pose - ) - - def _initialize_task(self): - self._set_target() - self._set_goal() - - def _set_target(self): - visible_inds = [] - for i, actor_cfg in enumerate(self.episode["actors"]): - if actor_cfg["rep_pts"] is not None: - visible_inds.append(i) - assert len(visible_inds) > 0, self.episode_idx - - actor_idx = random_choice(visible_inds, self._episode_rng) - self.obj = self.objs[actor_idx] - obj_rep_pts = self.episode["actors"][actor_idx]["rep_pts"] - self.obj_start_pos = np.float32(random_choice(obj_rep_pts, self._episode_rng)) - self.bbox_size = self.bbox_sizes[actor_idx] - - self.target_site.set_pose(Pose(self.obj_start_pos)) - - def _set_goal(self): - goal_pos = self._episode_rng.uniform([-0.15, -0.25, 0.35], [0.15, 0.25, 0.45]) - self.goal_pos = goal_pos - self.goal_site.set_pose(Pose(self.goal_pos)) - - def _get_obs_extra(self) -> OrderedDict: - obs = OrderedDict( - tcp_pose=vectorize_pose(self.agent.tcp.pose), - goal_pos=self.goal_pos, - obj_start_pos=self.obj_start_pos, - ) - if self._obs_mode in ["state", "state_dict"]: - obs.update( - tcp_to_goal_pos=self.goal_pos - self.agent.tcp.pose.p, - obj_pose=vectorize_pose(self.obj_pose), - tcp_to_obj_pos=self.obj_pose.p - self.agent.tcp.pose.p, - obj_to_goal_pos=self.goal_pos - self.obj_pose.p, - ) - return obs - - def check_robot_static(self, thresh=0.2): - # Assume that the last two DoF is gripper - qvel = self.agent.robot.get_qvel()[:-2] - return np.max(np.abs(qvel)) <= thresh - - def evaluate(self, **kwargs): - obj_to_goal_pos = self.goal_pos - self.obj_pose.p - is_obj_placed = np.linalg.norm(obj_to_goal_pos) <= self.goal_thresh - is_robot_static = self.check_robot_static() - return dict( - obj_to_goal_pos=obj_to_goal_pos, - is_obj_placed=is_obj_placed, - is_robot_static=is_robot_static, - success=is_obj_placed and is_robot_static, - ) - - def compute_dense_reward(self, info, **kwargs): - reward = 0.0 - - if info["success"]: - reward = 10.0 - else: - obj_pose = self.obj_pose - - # reaching reward - tcp_wrt_obj_pose = obj_pose.inv() * self.agent.tcp.pose - tcp_to_obj_dist = np.linalg.norm(tcp_wrt_obj_pose.p) - reaching_reward = 1 - np.tanh( - 3.0 * np.maximum(tcp_to_obj_dist - np.linalg.norm(self.bbox_size), 0.0) - ) - reward = reward + reaching_reward - - # grasp reward - is_grasped = self.agent.is_grasping(self.obj, max_angle=60) - reward += 3.0 if is_grasped else 0.0 - - # reaching-goal reward - if is_grasped: - obj_to_goal_pos = self.goal_pos - obj_pose.p - obj_to_goal_dist = np.linalg.norm(obj_to_goal_pos) - reaching_goal_reward = 3 * (1 - np.tanh(3.0 * obj_to_goal_dist)) - reward += reaching_goal_reward - - return reward - - def compute_normalized_dense_reward(self, **kwargs): - return self.compute_dense_reward(**kwargs) / 10.0 - - def _register_render_cameras(self): - cam_cfg = super()._register_render_cameras() - cam_cfg.pose = look_at([0.3, 0, 1.0], [0.0, 0.0, 0.5]) - return cam_cfg - - def render_human(self): - set_entity_visibility(self.target_site, 0.8) - set_entity_visibility(self.goal_site, 0.5) - ret = super().render_human() - set_entity_visibility(self.target_site, 0) - set_entity_visibility(self.goal_site, 0) - return ret - - def render_rgb_array(self): - set_entity_visibility(self.target_site, 0.8) - set_entity_visibility(self.goal_site, 0.5) - ret = super().render_rgb_array() - set_entity_visibility(self.target_site, 0) - set_entity_visibility(self.goal_site, 0) - return ret - - def get_state(self) -> np.ndarray: - state = super().get_state() - # TODO(jigu): whether to add obj_start_pos - return np.hstack([state, self.goal_pos]) - - def set_state(self, state): - self.goal_pos = state[-3:] - super().set_state(state[:-3]) - - -@register_env("PickClutterYCB-v0", max_episode_steps=200) -class PickClutterYCBEnv(PickClutterEnv): - DEFAULT_EPISODE_JSON = "{ASSET_DIR}/pick_clutter/ycb_train_5k.json.gz" - DEFAULT_ASSET_ROOT = PickSingleYCBEnv.DEFAULT_ASSET_ROOT - DEFAULT_MODEL_JSON = PickSingleYCBEnv.DEFAULT_MODEL_JSON - - def _load_model(self, model_id, model_scale=1.0): - density = self.model_db[model_id].get("density", 1000) - obj = build_actor_ycb( - model_id, - self._scene, - scale=model_scale, - density=density, - root_dir=self.asset_root, - ) - obj.name = model_id - obj_comp = obj.find_component_by_type(physx.PhysxRigidDynamicComponent) - obj_comp.set_linear_damping(0.1) - obj_comp.set_angular_damping(0.1) - return obj diff --git a/mani_skill2/envs/pick_and_place/pick_cube.py b/mani_skill2/envs/pick_and_place/pick_cube.py index 77bed1c70..d09b560cf 100644 --- a/mani_skill2/envs/pick_and_place/pick_cube.py +++ b/mani_skill2/envs/pick_and_place/pick_cube.py @@ -163,8 +163,8 @@ def get_state(self): return torch.hstack([state, self.goal_pos]) def set_state(self, state): - self.goal_pos = state[-3:] - super().set_state(state[:-3]) + self.goal_pos = state[:, -3:] + super().set_state(state[:, :-3]) @register_env("LiftCube-v0", max_episode_steps=200) diff --git a/mani_skill2/envs/pick_and_place/pick_single.py b/mani_skill2/envs/pick_and_place/pick_single.py index 75b383a05..07647a074 100644 --- a/mani_skill2/envs/pick_and_place/pick_single.py +++ b/mani_skill2/envs/pick_and_place/pick_single.py @@ -11,6 +11,7 @@ from transforms3d.quaternions import axangle2quat, qmult from mani_skill2 import ASSET_DIR, format_path +from mani_skill2.envs.scene import ManiSkillScene from mani_skill2.utils.common import random_choice from mani_skill2.utils.io_utils import load_json from mani_skill2.utils.registration import register_env @@ -292,8 +293,8 @@ def get_state(self): return torch.hstack([state, self.goal_pos]) def set_state(self, state): - self.goal_pos = state[-3:] - super().set_state(state[:-3]) + self.goal_pos = state[:, -3:] + super().set_state(state[:, :-3]) # ---------------------------------------------------------------------------- # @@ -301,7 +302,7 @@ def set_state(self, state): # ---------------------------------------------------------------------------- # def build_actor_ycb( model_id: str, - scene: sapien.Scene, + scene: ManiSkillScene, name: str, scale: float = 1.0, physical_material: physx.PhysxMaterial = None, diff --git a/mani_skill2/envs/pick_and_place/stack_cube.py b/mani_skill2/envs/pick_and_place/stack_cube.py index 2ce5e7dd2..731dca162 100644 --- a/mani_skill2/envs/pick_and_place/stack_cube.py +++ b/mani_skill2/envs/pick_and_place/stack_cube.py @@ -66,15 +66,18 @@ def sample(self, radius, max_trials, append=True, verbose=False): @register_env("StackCube-v0", max_episode_steps=200) class StackCubeEnv(StationaryManipulationEnv): def _load_actors(self): - TableSceneBuilder().build(self._scene) - - self.box_half_size = np.float32([0.02] * 3) + self.table_scene = TableSceneBuilder( + env=self, robot_init_qpos_noise=self.robot_init_qpos_noise + ) + self.table_scene.build() + self.box_half_size = torch.Tensor([0.02] * 3, device=self.device) self.cubeA = self._build_cube(self.box_half_size, color=(1, 0, 0), name="cubeA") self.cubeB = self._build_cube( self.box_half_size, color=(0, 1, 0), name="cubeB", kinematic=False ) def _initialize_actors(self): + self.table_scene.initialize() xy = self._episode_rng.uniform(-0.1, 0.1, [2]) region = [[-0.1, -0.2], [0.1, 0.2]] sampler = UniformSampler(region, self._episode_rng) @@ -136,90 +139,86 @@ def compute_dense_reward(self, info, **kwargs): gripper_width = ( self.agent.robot.get_qlimit()[-1, 1] * 2 ) # NOTE: hard-coded with panda - reward = 0.0 - - if info["success"]: - reward = 15.0 - else: - # grasp pose rotation reward - grasp_rot_loss_fxn = lambda A: np.tanh( - 1 / 8 * np.trace(A.T @ A) - ) # trace(A.T @ A) has range [0,8] for A being difference of rotation matrices - tcp_pose_wrt_cubeA = self.cubeA.pose.inv() * self.agent.tcp.pose - tcp_rot_wrt_cubeA = tcp_pose_wrt_cubeA.to_transformation_matrix()[:3, :3] - gt_rots = [ - np.array([[0, 1, 0], [1, 0, 0], [0, 0, -1]]), - np.array([[0, -1, 0], [-1, 0, 0], [0, 0, -1]]), - np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]), - np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]]), - ] - grasp_rot_loss = min( - [grasp_rot_loss_fxn(x - tcp_rot_wrt_cubeA) for x in gt_rots] - ) - reward += 1 - grasp_rot_loss - - cubeB_comp = self.cubeB.find_component_by_type( - physx.PhysxRigidDynamicComponent - ) - cubeB_vel_penalty = np.linalg.norm( - cubeB_comp.linear_velocity - ) + np.linalg.norm(cubeB_comp.angular_velocity) - reward -= cubeB_vel_penalty - - # reaching object reward - tcp_pose = self.agent.tcp.pose.p - cubeA_pos = self.cubeA.pose.p - cubeA_to_tcp_dist = np.linalg.norm(tcp_pose - cubeA_pos) - reaching_reward = 1 - np.tanh(3.0 * cubeA_to_tcp_dist) - reward += reaching_reward - - # check if cubeA is on cubeB - cubeA_pos = self.cubeA.pose.p - cubeB_pos = self.cubeB.pose.p - goal_xyz = np.hstack( - [cubeB_pos[0:2], cubeB_pos[2] + self.box_half_size[2] * 2] - ) - cubeA_on_cubeB = ( - np.linalg.norm(goal_xyz[:2] - cubeA_pos[:2]) - < self.box_half_size[0] * 0.8 - ) - cubeA_on_cubeB = cubeA_on_cubeB and ( - np.abs(goal_xyz[2] - cubeA_pos[2]) <= 0.005 - ) - if cubeA_on_cubeB: - reward = 10.0 - # ungrasp reward - is_cubeA_grasped = self.agent.is_grasping(self.cubeA) - if not is_cubeA_grasped: - reward += 2.0 - else: - reward = ( - reward - + 2.0 * np.sum(self.agent.robot.get_qpos()[-2:]) / gripper_width - ) - else: - # grasping reward - is_cubeA_grasped = self.agent.is_grasping(self.cubeA) - if is_cubeA_grasped: - reward += 1.0 - - # reaching goal reward, ensuring that cubeA has appropriate height during this process - if is_cubeA_grasped: - cubeA_to_goal = goal_xyz - cubeA_pos - # cubeA_to_goal_xy_dist = np.linalg.norm(cubeA_to_goal[:2]) - cubeA_to_goal_dist = np.linalg.norm(cubeA_to_goal) - appropriate_height_penalty = np.maximum( - np.maximum(2 * cubeA_to_goal[2], 0.0), - np.maximum(2 * (-0.02 - cubeA_to_goal[2]), 0.0), - ) - reaching_reward2 = 2 * ( - 1 - np.tanh(5.0 * appropriate_height_penalty) - ) - # qvel_penalty = np.sum(np.abs(self.agent.robot.get_qvel())) # prevent the robot arm from moving too fast - # reaching_reward2 -= 0.0003 * qvel_penalty - # if appropriate_height_penalty < 0.01: - reaching_reward2 += 4 * (1 - np.tanh(5.0 * cubeA_to_goal_dist)) - reward += np.maximum(reaching_reward2, 0.0) + # TODO (stao): rewrite dense reward for this task. TBH it should just be nearly the same as pick cube. + # # grasp pose rotation reward + # grasp_rot_loss_fxn = lambda A: np.tanh( + # 1 / 8 * np.trace(A.T @ A) + # ) # trace(A.T @ A) has range [0,8] for A being difference of rotation matrices + # tcp_pose_wrt_cubeA = self.cubeA.pose.inv() * self.agent.tcp.pose + # tcp_rot_wrt_cubeA = tcp_pose_wrt_cubeA.to_transformation_matrix()[:3, :3] + # gt_rots = [ + # np.array([[0, 1, 0], [1, 0, 0], [0, 0, -1]]), + # np.array([[0, -1, 0], [-1, 0, 0], [0, 0, -1]]), + # np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]), + # np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]]), + # ] + # grasp_rot_loss = min( + # [grasp_rot_loss_fxn(x - tcp_rot_wrt_cubeA) for x in gt_rots] + # ) + # reward += 1 - grasp_rot_loss + + # cubeB_comp = self.cubeB.find_component_by_type( + # physx.PhysxRigidDynamicComponent + # ) + # cubeB_vel_penalty = np.linalg.norm( + # cubeB_comp.linear_velocity + # ) + np.linalg.norm(cubeB_comp.angular_velocity) + # reward -= cubeB_vel_penalty + + # reaching object reward + tcp_pose = self.agent.tcp.pose.p + cubeA_pos = self.cubeA.pose.p + cubeA_to_tcp_dist = np.linalg.norm(tcp_pose - cubeA_pos, axis=1) + reaching_reward = 1 - np.tanh(3.0 * cubeA_to_tcp_dist) + reward = reaching_reward + + # # check if cubeA is on cubeB + # cubeA_pos = self.cubeA.pose.p + # cubeB_pos = self.cubeB.pose.p + # goal_xyz = np.hstack( + # [cubeB_pos[0:2], cubeB_pos[2] + self.box_half_size[2] * 2] + # ) + # cubeA_on_cubeB = ( + # np.linalg.norm(goal_xyz[:2] - cubeA_pos[:2]) + # < self.box_half_size[0] * 0.8 + # ) + # cubeA_on_cubeB = cubeA_on_cubeB and ( + # np.abs(goal_xyz[2] - cubeA_pos[2]) <= 0.005 + # ) + # if cubeA_on_cubeB: + # reward = 10.0 + # # ungrasp reward + # is_cubeA_grasped = self.agent.is_grasping(self.cubeA) + # if not is_cubeA_grasped: + # reward += 2.0 + # else: + # reward = ( + # reward + # + 2.0 * np.sum(self.agent.robot.get_qpos()[-2:]) / gripper_width + # ) + # else: + # # grasping reward + # is_cubeA_grasped = self.agent.is_grasping(self.cubeA) + # if is_cubeA_grasped: + # reward += 1.0 + + # # reaching goal reward, ensuring that cubeA has appropriate height during this process + # if is_cubeA_grasped: + # cubeA_to_goal = goal_xyz - cubeA_pos + # # cubeA_to_goal_xy_dist = np.linalg.norm(cubeA_to_goal[:2]) + # cubeA_to_goal_dist = np.linalg.norm(cubeA_to_goal) + # appropriate_height_penalty = np.maximum( + # np.maximum(2 * cubeA_to_goal[2], 0.0), + # np.maximum(2 * (-0.02 - cubeA_to_goal[2]), 0.0), + # ) + # reaching_reward2 = 2 * ( + # 1 - np.tanh(5.0 * appropriate_height_penalty) + # ) + # # qvel_penalty = np.sum(np.abs(self.agent.robot.get_qvel())) # prevent the robot arm from moving too fast + # # reaching_reward2 -= 0.0003 * qvel_penalty + # # if appropriate_height_penalty < 0.01: + # reaching_reward2 += 4 * (1 - np.tanh(5.0 * cubeA_to_goal_dist)) + # reward += np.maximum(reaching_reward2, 0.0) return reward diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py index a1a7f6e57..f29377bfb 100644 --- a/mani_skill2/envs/sapien_env.py +++ b/mani_skill2/envs/sapien_env.py @@ -124,7 +124,11 @@ def __init__( sapien.physx.enable_gpu() sapien.set_cuda_tensor_backend("torch") self.gpu_sim_cfgs = gpu_sim_cfgs - + self.device = torch.device( + "cuda" + ) # TODO (stao): fix this for multi gpu support? + else: + self.device = torch.device("cpu") # TODO(jigu): Change to `warning` after lighting in VecEnv is fixed. # TODO Ms2 set log level. What to do now? # self._engine.set_log_level(os.getenv("MS2_SIM_LOG_LEVEL", "error")) @@ -324,13 +328,17 @@ def get_obs(self): return OrderedDict() elif self._obs_mode == "state": state_dict = self._get_obs_state_dict() - return flatten_state_dict(state_dict, squeeze_dims=squeeze_dims) + obs = flatten_state_dict(state_dict, squeeze_dims=squeeze_dims) elif self._obs_mode == "state_dict": - return self._get_obs_state_dict() + obs = self._get_obs_state_dict() elif self._obs_mode == "image": - return self._get_obs_images() + obs = self._get_obs_images() else: raise NotImplementedError(self._obs_mode) + if physx.is_gpu_enabled(): + return obs + else: + return unbatch(to_numpy(obs)) def _get_obs_state_dict(self): """Get (ground-truth) state-based observations.""" @@ -383,15 +391,8 @@ def get_camera_params(self) -> Dict[str, Dict[str, np.ndarray]]: return params def _get_obs_images(self) -> OrderedDict: - # TODO (stao): do we still need this part? - if self._renderer_type == "client": - # NOTE: not compatible with StereoDepthCamera - cameras = [x.camera for x in self._sensors.values()] - # TODO: upgrade - self._scene._update_render_and_take_pictures(cameras) - else: - self.update_render() - self.capture_sensor_data() + self.update_render() + self.capture_sensor_data() return OrderedDict( agent=self._get_obs_agent(), extra=self._get_obs_extra(), @@ -415,9 +416,11 @@ def get_reward(self, obs: Any, action: Array, info: Dict): if self._reward_mode == "sparse": return to_tensor(info["success"]) elif self._reward_mode == "dense": - return self.compute_dense_reward(obs, action, info) + return self.compute_dense_reward(obs=obs, action=action, info=info) elif self._reward_mode == "normalized_dense": - return self.compute_normalized_dense_reward(obs, action, info) + return self.compute_normalized_dense_reward( + obs=obs, action=action, info=info + ) else: raise NotImplementedError(self._reward_mode) @@ -454,7 +457,6 @@ def reconfigure(self): # Cache entites and articulations self._actors = self.get_actors() self._articulations = self.get_articulations() - self.device = torch.device("cpu") if sapien.physx.is_gpu_enabled(): self._scene._setup_gpu() self.device = self.physx_system.cuda_rigid_body_data.device @@ -638,14 +640,14 @@ def step_action(self, action): if action is None: # simulation without action pass elif isinstance(action, np.ndarray): - self.agent.set_action(action) + self.agent.set_action(to_tensor(action)) set_action = True elif isinstance(action, dict): if action["control_mode"] != self.agent.control_mode: self.agent.set_control_mode(action["control_mode"]) - self.agent.set_action(action["action"]) + self.agent.set_action(to_tensor(action["action"])) set_action = True - elif torch is not None and isinstance(action, torch.Tensor): + elif isinstance(action, torch.Tensor): self.agent.set_action(action) set_action = True else: diff --git a/mani_skill2/envs/scene.py b/mani_skill2/envs/scene.py index dfbc87322..edc97e751 100644 --- a/mani_skill2/envs/scene.py +++ b/mani_skill2/envs/scene.py @@ -457,6 +457,8 @@ def num_envs(self): def get_sim_state(self) -> torch.Tensor: """Get simulation state. Returns a tensor of shape (N, D) for N parallel environments and D dimensions of padded state per environment""" state = [] + # TODO (stao): Should we store state as a dictionary? What shape to store for parallel settings to make + # it loadable between parallel and non parallel settings? for actor in self.actors.values(): # TODO (stao) (in parallelized environment situation we may need to pad as some of these actors do not exist in other parallel envs) state.append(actor.get_state()) @@ -465,16 +467,14 @@ def get_sim_state(self) -> torch.Tensor: return torch.hstack(state) def set_sim_state(self, state: Array): - return - """Set simulation state.""" - KINEMANTIC_DIM = 13 # [pos, quat, lin_vel, ang_vel] + KINEMATIC_DIM = 13 # [pos, quat, lin_vel, ang_vel] start = 0 - for actor in self._actors: - set_actor_state(actor, state[start : start + KINEMANTIC_DIM]) - start += KINEMANTIC_DIM - for articulation in self._articulations: - ndim = KINEMANTIC_DIM + 2 * articulation.dof - set_articulation_state(articulation, state[start : start + ndim]) + for actor in self.actors.values(): + actor.set_state(state[:, start : start + KINEMATIC_DIM]) + start += KINEMATIC_DIM + for articulation in self.articulations.values(): + ndim = KINEMATIC_DIM + 2 * articulation.dof + articulation.set_state(state[:, start : start + ndim]) start += ndim # ---------------------------------------------------------------------------- # diff --git a/mani_skill2/utils/download_asset.py b/mani_skill2/utils/download_asset.py index dabefb13e..b7b5300fc 100644 --- a/mani_skill2/utils/download_asset.py +++ b/mani_skill2/utils/download_asset.py @@ -7,6 +7,7 @@ import argparse import hashlib +import os.path as osp import shutil import urllib.request import zipfile @@ -167,6 +168,7 @@ def download( if target_path is None: target_path = url.split("/")[-1] output_path = output_dir / target_path + output_dir = osp.dirname(output_path) # Clean up existing files if output_path.exists(): diff --git a/mani_skill2/utils/registration.py b/mani_skill2/utils/registration.py index a4b55a837..c40fe612e 100644 --- a/mani_skill2/utils/registration.py +++ b/mani_skill2/utils/registration.py @@ -83,6 +83,7 @@ def make(env_id, as_gym=True, enable_segmentation=False, **kwargs): kwargs["obs_mode"] = "image" # Add segmentation texture + # TODO (stao): we need to rework how segementation is done now if "robot_seg" in obs_mode: enable_segmentation = True if enable_segmentation: diff --git a/mani_skill2/utils/sapien_utils.py b/mani_skill2/utils/sapien_utils.py index e1107e7ca..fcb8141b0 100644 --- a/mani_skill2/utils/sapien_utils.py +++ b/mani_skill2/utils/sapien_utils.py @@ -70,9 +70,8 @@ def _unbatch(array: Union[Array, Sequence]): return {k: _unbatch(v) for k, v in array.items()} if isinstance(array, str): return array - if torch is not None: - if isinstance(array, torch.Tensor): - return array.squeeze(0) + if isinstance(array, torch.Tensor): + return array.squeeze(0) if isinstance(array, np.ndarray): if np.iterable(array) and array.shape[0] == 1: return array.squeeze(0) @@ -83,7 +82,10 @@ def _unbatch(array: Union[Array, Sequence]): def unbatch(*args: Tuple[Union[Array, Sequence]]): - return tuple([_unbatch(x) for x in args]) + x = [_unbatch(x) for x in args] + if len(args) == 1: + return x[0] + return tuple(x) def clone_tensor(array: Array): @@ -291,15 +293,6 @@ def get_articulation_state(articulation: physx.PhysxArticulation): return np.hstack([pose.p, pose.q, vel, ang_vel, qpos, qvel]) -def set_articulation_state(articulation: physx.PhysxArticulation, state: np.ndarray): - articulation.set_root_pose(sapien.Pose(state[0:3], state[3:7])) - articulation.set_root_velocity(state[7:10]) - articulation.set_root_angular_velocity(state[10:13]) - qpos, qvel = np.split(state[13:], 2) - articulation.set_qpos(qpos) - articulation.set_qvel(qvel) - - def get_articulation_padded_state(articulation: physx.PhysxArticulation, max_dof: int): state = get_articulation_state(articulation) qpos, qvel = np.split(state[13:], 2) diff --git a/mani_skill2/utils/scene_builder/table/table_scene_builder.py b/mani_skill2/utils/scene_builder/table/table_scene_builder.py index 82e45c285..c88da9e8f 100644 --- a/mani_skill2/utils/scene_builder/table/table_scene_builder.py +++ b/mani_skill2/utils/scene_builder/table/table_scene_builder.py @@ -69,6 +69,17 @@ def initialize(self): ) self.env.agent.reset(qpos) self.env.agent.robot.set_pose(sapien.Pose([-0.615, 0, 0])) + elif self.env.robot_uid == "panda_realsensed435": + # fmt: off + qpos = np.array( + [0.0, np.pi / 8, 0, -np.pi * 5 / 8, 0, np.pi * 3 / 4, -np.pi / 4, 0.04, 0.04] + ) + # fmt: on + qpos[:-2] += self.env._episode_rng.normal( + 0, self.robot_init_qpos_noise, len(qpos) - 2 + ) + self.env.agent.reset(qpos) + self.env.agent.robot.set_pose(sapien.Pose([-0.615, 0, 0])) elif self.env.robot_uid == "xmate3_robotiq": qpos = np.array( [0, np.pi / 6, 0, np.pi / 3, 0, np.pi / 2, -np.pi / 2, 0, 0] diff --git a/mani_skill2/utils/structs/actor.py b/mani_skill2/utils/structs/actor.py index 31c6923a9..27eb671e0 100644 --- a/mani_skill2/utils/structs/actor.py +++ b/mani_skill2/utils/structs/actor.py @@ -7,6 +7,7 @@ import sapien.render import torch +from mani_skill2.utils.sapien_utils import to_numpy from mani_skill2.utils.structs.base import BaseStruct, PhysxRigidDynamicComponentStruct from mani_skill2.utils.structs.pose import Pose, to_sapien_pose, vectorize_pose from mani_skill2.utils.structs.types import Array @@ -80,6 +81,7 @@ def set_state(self, state: Array): "You should not set state on a GPU enabled actor." ) else: + state = to_numpy(state[0]) self.set_pose(sapien.Pose(state[0:3], state[3:7])) if self.px_body_type == "dynamic": self.set_linear_velocity(state[7:10]) diff --git a/mani_skill2/utils/structs/articulation.py b/mani_skill2/utils/structs/articulation.py index 94744965c..1fd8d5a3c 100644 --- a/mani_skill2/utils/structs/articulation.py +++ b/mani_skill2/utils/structs/articulation.py @@ -7,7 +7,7 @@ import sapien.physx as physx import torch -from mani_skill2.utils.sapien_utils import to_tensor +from mani_skill2.utils.sapien_utils import to_numpy, to_tensor from mani_skill2.utils.structs.base import BaseStruct from mani_skill2.utils.structs.joint import Joint from mani_skill2.utils.structs.link import Link @@ -119,6 +119,20 @@ def get_state(self): qvel = self.get_qvel() return torch.hstack([pose.p, pose.q, vel, ang_vel, qpos, qvel]) + def set_state(self, state: Array): + if physx.is_gpu_enabled(): + raise NotImplementedError( + "You should not set state on a GPU enabled actor." + ) + else: + state = to_numpy(state[0]) + self.set_root_pose(sapien.Pose(state[0:3], state[3:7])) + # self.set_root_linear_velocity(state[7:10]) + # self.set_root_angular_velocity(state[10:13]) + qpos, qvel = np.split(state[13:], 2) + self.set_qpos(qpos) + self.set_qvel(qvel) + # -------------------------------------------------------------------------- # # Functions from physx.PhysxArticulation # -------------------------------------------------------------------------- # @@ -371,3 +385,17 @@ def set_joint_drive_targets( else: for i, joint in enumerate(joints): joint.set_drive_target(targets[0, i]) + + def set_joint_drive_velocity_targets( + self, + targets: Array, + joints: List[Joint] = None, + ): + if physx.is_gpu_enabled(): + targets = to_tensor(targets) + raise NotImplementedError( + "Setting joint velocity targets is currently not supported on the GPU" + ) + else: + for i, joint in enumerate(joints): + joint.set_drive_velocity_target(targets[i]) diff --git a/mani_skill2/utils/structs/joint.py b/mani_skill2/utils/structs/joint.py index 8f4476129..d49d3e56f 100644 --- a/mani_skill2/utils/structs/joint.py +++ b/mani_skill2/utils/structs/joint.py @@ -220,6 +220,10 @@ def drive_velocity_target(self, arg1: Array) -> None: "Cannot set drive velocity targets at the moment in GPU simulation" ) else: + if arg1.shape == (): + arg1 = arg1.reshape( + 1, + ) self._objs[0].drive_velocity_target = arg1 @property diff --git a/mani_skill2/utils/wrappers/common.py b/mani_skill2/utils/wrappers/common.py index 9157fd341..de1885d36 100644 --- a/mani_skill2/utils/wrappers/common.py +++ b/mani_skill2/utils/wrappers/common.py @@ -1,30 +1,4 @@ import gymnasium as gym -from gymnasium import spaces - -from ..common import ( - clip_and_scale_action, - inv_clip_and_scale_action, - normalize_action_space, -) - - -class NormalizeBoxActionWrapper(gym.ActionWrapper): - """Normalize box action space to [-1, 1].""" - - def __init__(self, env): - super().__init__(env) - assert isinstance(self.env.action_space, spaces.Box), self.env.action_space - self.action_space = normalize_action_space(self.env.action_space) - - def action(self, action): - return clip_and_scale_action( - action, self.env.action_space.low, self.env.action_space.high - ) - - def reverse_action(self, action): - return inv_clip_and_scale_action( - action, self.env.action_space.low, self.env.action_space.high - ) class ResetSeedWrapper(gym.Wrapper): diff --git a/mani_skill2/utils/wrappers/observation.py b/mani_skill2/utils/wrappers/observation.py index bd7dc616b..476dbdfb3 100644 --- a/mani_skill2/utils/wrappers/observation.py +++ b/mani_skill2/utils/wrappers/observation.py @@ -50,7 +50,7 @@ def update_observation_space(space: spaces.Dict): new_cam_space["rgb"] = spaces.Box( low=0, high=255, shape=(height, width, 3), dtype=np.uint8 ) - elif key == "Position": + elif key == "PositionSegmentation": height, width = ori_cam_space[key].shape[:2] new_cam_space["depth"] = spaces.Box( low=0, high=np.inf, shape=(height, width, 1), dtype=np.float32 @@ -66,9 +66,7 @@ def observation(self, observation: dict): for key in ori_images: if key == "Color": rgb = ori_images[key][..., :3] # [H, W, 4] - if isinstance(rgb, np.ndarray): - rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8) - else: + if isinstance(rgb, torch.Tensor): rgb = rgb.clone() new_images["rgb"] = rgb # [H, W, 4] elif key == "PositionSegmentation": @@ -95,6 +93,7 @@ def merge_dict_spaces(dict_spaces: Sequence[spaces.Dict]): return spaces.Dict(OrderedDict(reverse_spaces)) +# TODO (stao): fix this wrapper class PointCloudObservationWrapper(BaseGymObservationWrapper): """Convert Position textures to world-space point cloud.""" @@ -115,21 +114,15 @@ def update_observation_space(space: spaces.Dict): cam_image_space = image_space[cam_uid] cam_pcd_space = OrderedDict() - h, w = cam_image_space["Position"].shape[:2] + h, w = cam_image_space["PositionSegmentation"].shape[:2] cam_pcd_space["xyzw"] = spaces.Box( low=-np.inf, high=np.inf, shape=(h * w, 4), dtype=np.float32 ) - # Extra keys if "Color" in cam_image_space.spaces: cam_pcd_space["rgb"] = spaces.Box( low=0, high=255, shape=(h * w, 3), dtype=np.uint8 ) - if "Segmentation" in cam_image_space.spaces: - cam_pcd_space["Segmentation"] = spaces.Box( - low=0, high=(2**32 - 1), shape=(h * w, 4), dtype=np.uint32 - ) - pcd_space[cam_uid] = spaces.Dict(cam_pcd_space) pcd_space = merge_dict_spaces(pcd_space.values()) @@ -143,10 +136,10 @@ def observation(self, observation: dict): for cam_uid, images in image_obs.items(): cam_pcd = {} - # Each pixel is (x, y, z, z_buffer_depth) in OpenGL camera space - position = images["Position"] - # position[..., 3] = position[..., 3] < 1 - position[..., 3] = position[..., 2] < 0 + # Each pixel is (x, y, z, actor_id) in OpenGL camera space + # actor_id = 0 for the background + position = images["PositionSegmentation"] + position[..., 3] = position[..., 2] == 0 # Convert to world space cam2world = camera_params[cam_uid]["cam2world_gl"] diff --git a/tests/test_envs.py b/tests/test_envs.py index adb9ba17d..786df156a 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -97,8 +97,8 @@ def test_states(env_id): @pytest.mark.parametrize("env_id", ENV_IDS) -@pytest.mark.parametrize("robot", ROBOTS) -def test_robots(env_id, robot): +@pytest.mark.parametrize("robot_uid", ROBOTS) +def test_robots(env_id, robot_uid): if env_id in [ "PandaAvoidObstacles-v0", "PegInsertionSide-v0", @@ -110,7 +110,7 @@ def test_robots(env_id, robot): "MoveBucket-v1", ]: pytest.skip(reason=f"Env {env_id} does not support robots other than panda") - env = gym.make(env_id, robot=robot) + env = gym.make(env_id, robot_uid=robot_uid) env.reset() action_space = env.action_space for _ in range(5): diff --git a/tests/test_venv.py b/tests/test_venv.py index b2aa3ea3f..48de2e0b7 100644 --- a/tests/test_venv.py +++ b/tests/test_venv.py @@ -21,7 +21,7 @@ def make_env(env_id, obs_mode): @pytest.mark.parametrize("env_id", ["PickCube-v0", "TurnFaucet-v0"]) @pytest.mark.parametrize("obs_mode", VENV_OBS_MODES) -def test_vecenv_obs_mode(env_id, obs_mode): +def test_cpu_vecenv_obs_mode(env_id, obs_mode): n_envs = 2 env_fns = [partial(make_env, env_id, obs_mode=obs_mode) for _ in range(n_envs)] @@ -63,7 +63,7 @@ def test_vecenv_obs_mode(env_id, obs_mode): @pytest.mark.parametrize("env_id", ["PickCube-v0", "TurnFaucet-v0"]) @pytest.mark.parametrize("obs_mode", VENV_OBS_MODES) -def test_gymnasium_vecenv(env_id, obs_mode): +def test_gymnasium_cpu_vecenv(env_id, obs_mode): n_envs = 2 gym_env = gym.make_vec( diff --git a/tests/utils.py b/tests/utils.py index e2568e805..e08990947 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -2,23 +2,22 @@ import torch from mani_skill2.utils.common import flatten_dict_keys - +# TODO (stao): reactivate old tasks once fixed ENV_IDS = [ "LiftCube-v0", "PickCube-v0", "StackCube-v0", "PickSingleYCB-v0", - "PickSingleEGAD-v0", - "PickClutterYCB-v0", - "AssemblingKits-v0", - "PegInsertionSide-v0", - "PlugCharger-v0", - "PandaAvoidObstacles-v0", - "TurnFaucet-v0", - "OpenCabinetDoor-v1", - "OpenCabinetDrawer-v1", - "PushChair-v1", - "MoveBucket-v1", + # "PickClutterYCB-v0", + # "AssemblingKits-v0", + # "PegInsertionSide-v0", + # "PlugCharger-v0", + # "PandaAvoidObstacles-v0", + # "TurnFaucet-v0", + # "OpenCabinetDoor-v1", + # "OpenCabinetDrawer-v1", + # "PushChair-v1", + # "MoveBucket-v1", ] STATIONARY_ENV_IDS = [ @@ -26,13 +25,12 @@ "PickCube-v0", "StackCube-v0", "PickSingleYCB-v0", - "PickSingleEGAD-v0", - "PickClutterYCB-v0", - "AssemblingKits-v0", - "PegInsertionSide-v0", - "PlugCharger-v0", - "PandaAvoidObstacles-v0", - "TurnFaucet-v0", + # "PickClutterYCB-v0", + # "AssemblingKits-v0", + # "PegInsertionSide-v0", + # "PlugCharger-v0", + # "PandaAvoidObstacles-v0", + # "TurnFaucet-v0", ] REWARD_MODES = ["dense", "normalized_dense", "sparse"] @@ -49,8 +47,8 @@ "state", "rgbd", "pointcloud", - "rgbd_robot_seg", - "pointcloud_robot_seg", + # "rgbd_robot_seg", + # "pointcloud_robot_seg", ] VENV_OBS_MODES = [ "image",