From 3d6a75b18df8848c50d10891698c60e822541a21 Mon Sep 17 00:00:00 2001
From: StoneT2000 <stonezt2019@gmail.com>
Date: Tue, 23 Jan 2024 00:58:17 -0800
Subject: [PATCH] add minimal pick cube task with "task sheet"

---
 mani_skill2/envs/minimal_template.py          |  46 +++++++
 mani_skill2/envs/tasks/README.md              |   1 +
 mani_skill2/envs/tasks/__init__.py            |   1 +
 mani_skill2/envs/tasks/pick_cube.py           | 120 ++++++++++++++++++
 mani_skill2/envs/tasks/push_cube.py           |  22 +++-
 mani_skill2/envs/template.py                  |  18 ++-
 .../envs/utils/randomization/__init__.py      |   2 +
 mani_skill2/envs/utils/randomization/pose.py  |  27 ++++
 .../envs/utils/randomization/samplers.py      |   0
 mani_skill2/utils/sapien_utils.py             |   2 +
 mani_skill2/utils/structs/pose.py             |   2 -
 tests/utils.py                                |   2 +-
 12 files changed, 235 insertions(+), 8 deletions(-)
 create mode 100644 mani_skill2/envs/minimal_template.py
 create mode 100644 mani_skill2/envs/tasks/pick_cube.py
 create mode 100644 mani_skill2/envs/utils/randomization/__init__.py
 create mode 100644 mani_skill2/envs/utils/randomization/pose.py
 create mode 100644 mani_skill2/envs/utils/randomization/samplers.py

diff --git a/mani_skill2/envs/minimal_template.py b/mani_skill2/envs/minimal_template.py
new file mode 100644
index 000000000..a77db62f4
--- /dev/null
+++ b/mani_skill2/envs/minimal_template.py
@@ -0,0 +1,46 @@
+from collections import OrderedDict
+from typing import Any, Dict
+
+import numpy as np
+import torch
+
+from mani_skill2.envs.sapien_env import BaseEnv
+from mani_skill2.sensors.camera import CameraConfig
+from mani_skill2.utils.registration import register_env
+from mani_skill2.utils.sapien_utils import look_at
+
+
+@register_env(name="CustomEnv-v0", max_episode_steps=200)
+class CustomEnv(BaseEnv):
+    def __init__(self, *args, robot_uid="panda", robot_init_qpos_noise=0.02, **kwargs):
+        self.robot_init_qpos_noise = robot_init_qpos_noise
+        super().__init__(*args, robot_uid=robot_uid, **kwargs)
+
+    def _register_sensors(self):
+        pose = look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
+        return [
+            CameraConfig("base_camera", pose.p, pose.q, 128, 128, np.pi / 2, 0.01, 10)
+        ]
+
+    def _register_render_cameras(self):
+        pose = look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35])
+        return CameraConfig("render_camera", pose.p, pose.q, 512, 512, 1, 0.01, 10)
+
+    def _load_actors(self):
+        pass
+
+    def _initialize_actors(self):
+        pass
+
+    def _get_obs_extra(self):
+        return OrderedDict()
+
+    def evaluate(self, obs: Any):
+        return {"success": torch.zeros(self.num_envs, device=self.device)}
+
+    def compute_dense_reward(self, obs: Any, action: np.ndarray, info: Dict):
+        return torch.zeros(self.num_envs, device=self.device)
+
+    def compute_normalized_dense_reward(self, obs: Any, action: np.ndarray, info: Dict):
+        max_reward = 1.0
+        return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
diff --git a/mani_skill2/envs/tasks/README.md b/mani_skill2/envs/tasks/README.md
index c5ab7e493..fffa92f2e 100644
--- a/mani_skill2/envs/tasks/README.md
+++ b/mani_skill2/envs/tasks/README.md
@@ -13,3 +13,4 @@ optimal solution horizon: average time to solve the task when uniformally perfor
 
 env_id, task code, max_episode_steps, optimal solution_horizon, difficulty
 PushCube-v0, push_cube.py, 50, ~12, 1
+PickCube-v0, pick_cube.py, 100, ~20, 3?
\ No newline at end of file
diff --git a/mani_skill2/envs/tasks/__init__.py b/mani_skill2/envs/tasks/__init__.py
index 0544b09b8..ccee7261a 100644
--- a/mani_skill2/envs/tasks/__init__.py
+++ b/mani_skill2/envs/tasks/__init__.py
@@ -1,3 +1,4 @@
 from .fmb.fmb import FMBEnv
+from .pick_cube import PickCubeEnv
 from .push_cube import PushCubeEnv
 from .push_object import PushObjectEnv
diff --git a/mani_skill2/envs/tasks/pick_cube.py b/mani_skill2/envs/tasks/pick_cube.py
new file mode 100644
index 000000000..8df2ffb8e
--- /dev/null
+++ b/mani_skill2/envs/tasks/pick_cube.py
@@ -0,0 +1,120 @@
+from collections import OrderedDict
+from typing import Any, Dict
+
+import numpy as np
+import torch
+
+import mani_skill2.envs.utils.randomization as randomization
+from mani_skill2.envs.sapien_env import BaseEnv
+from mani_skill2.sensors.camera import CameraConfig
+from mani_skill2.utils.building.actors import build_cube, build_sphere
+from mani_skill2.utils.registration import register_env
+from mani_skill2.utils.sapien_utils import look_at
+from mani_skill2.utils.scene_builder.table.table_scene_builder import TableSceneBuilder
+from mani_skill2.utils.structs.pose import Pose
+
+
+@register_env("PickCube-v1", max_episode_steps=100)
+class PickCubeEnv(BaseEnv):
+    """
+    A simple task where the objective is to grasp a cube and move it to a target goal position.
+
+    Randomizations
+    --------------
+    - the cube's xy position is randomized on top of a table in the region [0.1, 0.1] x [-0.1, -0.1]. It is placed flat on the table
+    - the cube's z-axis rotation is randomized to a random angle
+    - the target goal position (marked by a green sphere) of the cube has its xy position randomized in the region [0.1, 0.1] x [-0.1, -0.1] and z randomized in [0, 0.3]
+
+
+    Success Conditions
+    ------------------
+    - the cube position is within goal_thresh (default 0.025) euclidean distance of the goal position
+
+    Visualization: TODO: ADD LINK HERE
+
+    Changelog:
+    Different to v0, v1 does not require the robot to be static at the end which makes this task similar to other benchmarks and also easier
+    """
+
+    cube_half_size = 0.02
+    goal_thresh = 0.025
+
+    def __init__(self, *args, robot_uid="panda", robot_init_qpos_noise=0.02, **kwargs):
+        self.robot_init_qpos_noise = robot_init_qpos_noise
+        super().__init__(*args, robot_uid=robot_uid, **kwargs)
+
+    def _register_sensors(self):
+        pose = look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
+        return [
+            CameraConfig("base_camera", pose.p, pose.q, 128, 128, np.pi / 2, 0.01, 10)
+        ]
+
+    def _register_render_cameras(self):
+        pose = look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35])
+        return CameraConfig("render_camera", pose.p, pose.q, 512, 512, 1, 0.01, 10)
+
+    def _load_actors(self):
+        self.table_scene = TableSceneBuilder(
+            self, robot_init_qpos_noise=self.robot_init_qpos_noise
+        )
+        self.table_scene.build()
+        self.cube = build_cube(
+            self._scene, half_size=self.cube_half_size, color=[1, 0, 0, 1], name="cube"
+        )
+        self.goal_site = build_sphere(
+            self._scene,
+            radius=self.goal_thresh,
+            color=[0, 1, 0, 1],
+            name="goal_site",
+            body_type="kinematic",
+            add_collision=False,
+        )
+
+    def _initialize_actors(self):
+        self.table_scene.initialize()
+        xyz = np.zeros((self.num_envs, 3))
+        xyz[:, :2] = self._episode_rng.uniform(-0.1, 0.1, [self.num_envs, 2])
+        xyz[:, 2] = self.cube_half_size
+        qs = randomization.random_quaternions(
+            self._episode_rng, lock_x=True, lock_y=True, n=self.num_envs
+        )
+        self.cube.set_pose(Pose.create_from_pq(xyz, qs, device=self.device))
+
+        goal_xyz = np.zeros((self.num_envs, 3))
+        goal_xyz[:, :2] = self._episode_rng.uniform(-0.1, 0.1, [self.num_envs, 2])
+        goal_xyz[:, 2] = self._episode_rng.uniform(0, 0.3, [self.num_envs]) + xyz[:, 2]
+        self.goal_site.set_pose(Pose.create_from_pq(goal_xyz, device=self.device))
+
+    def _get_obs_extra(self):
+        obs = OrderedDict(tcp_pose=self.agent.tcp.pose, goal_pos=self.goal_site)
+        if "state" in self.obs_mode:
+            obs.update(obs_pose=self.cube.pose.raw_pose)
+
+    def evaluate(self, obs: Any):
+        is_obj_placed = (
+            torch.linalg.norm(self.goal_site.pose.p - self.cube.pose.p, axis=1)
+            <= self.goal_thresh
+        )
+        return {"success": is_obj_placed, "is_obj_placed": is_obj_placed}
+
+    def compute_dense_reward(self, obs: Any, action: np.ndarray, info: Dict):
+        tcp_to_obj_dist = torch.linalg.norm(
+            self.cube.pose.p - self.agent.tcp.pose.p, axis=1
+        )
+        reaching_reward = 1 - torch.tanh(5 * tcp_to_obj_dist)
+        reward = reaching_reward
+
+        is_grasped = self.agent.is_grasping(self.cube)
+        reward += is_grasped
+
+        obj_to_goal_dist = torch.linalg.norm(
+            self.goal_site.pose.p - self.cube.pose.p, axis=1
+        )
+        place_reward = 1 - torch.tanh(5 * obj_to_goal_dist)
+        reward += place_reward * is_grasped
+
+        reward[info["success"]] = 4
+        return reward
+
+    def compute_normalized_dense_reward(self, obs: Any, action: np.ndarray, info: Dict):
+        return self.compute_dense_reward(obs=obs, action=action, info=info) / 4
diff --git a/mani_skill2/envs/tasks/push_cube.py b/mani_skill2/envs/tasks/push_cube.py
index 6a9244b5f..c1cbf2186 100644
--- a/mani_skill2/envs/tasks/push_cube.py
+++ b/mani_skill2/envs/tasks/push_cube.py
@@ -30,12 +30,28 @@
 from mani_skill2.utils.registration import register_env
 from mani_skill2.utils.sapien_utils import look_at
 from mani_skill2.utils.scene_builder.table.table_scene_builder import TableSceneBuilder
-from mani_skill2.utils.structs.pose import Pose, vectorize_pose
+from mani_skill2.utils.structs.pose import Pose
 from mani_skill2.utils.structs.types import Array
 
 
 @register_env("PushCube-v0", max_episode_steps=50)
 class PushCubeEnv(BaseEnv):
+    """
+    A simple task where the objective is to push and move a cube to a goal region in front of it
+
+    Randomizations
+    --------------
+    - the cube's xy position is randomized on top of a table in the region [0.1, 0.1] x [-0.1, -0.1]. It is placed flat on the table
+    - the target goal region is marked by a red/white circular target. The position of the target is fixed to be the cube xy position + [0.1 + goal_radius, 0]
+
+
+    Success Conditions
+    ------------------
+    - the cube's xy position is within goal_radius (default 0.1) of the target's xy position by euclidean distance.
+
+    Visualization: TODO: ADD LINK HERE
+    """
+
     # Specify some supported robot types
     agent: Union[Panda, Xmate3Robotiq]
 
@@ -124,14 +140,14 @@ def _get_obs_extra(self):
         # some useful observation info for solving the task includes the pose of the tcp (tool center point) which is the point between the
         # grippers of the robot
         obs = OrderedDict(
-            tcp_pose=vectorize_pose(self.agent.tcp.pose),
+            tcp_pose=self.agent.tcp.pose.raw_pose,
             goal_pos=self.goal_region.pose.p,
         )
         if self._obs_mode in ["state", "state_dict"]:
             # if the observation mode is state/state_dict, we provide ground truth information about where the cube is.
             # for visual observation modes one should rely on the sensed visual data to determine where the cube is
             obs.update(
-                obj_pose=vectorize_pose(self.obj.pose),
+                obj_pose=self.obj.pose.raw_pose,
             )
         return obs
 
diff --git a/mani_skill2/envs/template.py b/mani_skill2/envs/template.py
index a9867fef2..075c59750 100644
--- a/mani_skill2/envs/template.py
+++ b/mani_skill2/envs/template.py
@@ -28,12 +28,12 @@
 
 from mani_skill2.envs.sapien_env import BaseEnv
 from mani_skill2.sensors.camera import CameraConfig
-from mani_skill2.utils.registration import register
+from mani_skill2.utils.registration import register_env
 from mani_skill2.utils.sapien_utils import look_at
 
 
 # register the environment by a unique ID and specify a max time limit. Now once this file is imported you can do gym.make("CustomEnv-v0")
-@register(name="CustomEnv-v0", max_episode_steps=200)
+@register_env(name="CustomEnv-v0", max_episode_steps=200)
 class CustomEnv(BaseEnv):
     # in the __init__ function you can pick a default robot your task should use e.g. the panda robot
     def __init__(self, *args, robot_uid="panda", robot_init_qpos_noise=0.02, **kwargs):
@@ -130,3 +130,17 @@ def compute_normalized_dense_reward(self, obs: Any, action: np.ndarray, info: Di
         # this should be equal to compute_dense_reward / max possible reward
         max_reward = 1.0
         return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
+
+    def get_state(self):
+        # this function is important in order to allow accurate replaying of trajectories. Make sure to specify any
+        # non simulation state related data such as a random 3D goal position you generated
+        # alternatively you can skip this part if the environment's rewards, observations, success etc. are dependent on simulation data
+        # e.g. self.your_custom_actor.pose.p will always give you your actor's 3D position
+        state = super().get_state()
+        return torch.hstack([state, self.goal_pos])
+
+    def set_state(self, state):
+        # this function complements get_state and sets any non simulation state related data correctly so the environment behaves
+        # the exact same in terms of output rewards, observations, success etc. should you reset state to a given state and take the same actions
+        self.goal_pos = state[:, -3:]
+        super().set_state(state[:, :-3])
diff --git a/mani_skill2/envs/utils/randomization/__init__.py b/mani_skill2/envs/utils/randomization/__init__.py
new file mode 100644
index 000000000..6490b4c7c
--- /dev/null
+++ b/mani_skill2/envs/utils/randomization/__init__.py
@@ -0,0 +1,2 @@
+from .pose import *
+from .samplers import *
diff --git a/mani_skill2/envs/utils/randomization/pose.py b/mani_skill2/envs/utils/randomization/pose.py
new file mode 100644
index 000000000..608ed3fc3
--- /dev/null
+++ b/mani_skill2/envs/utils/randomization/pose.py
@@ -0,0 +1,27 @@
+import numpy as np
+import transforms3d
+
+from mani_skill2.utils.geometry.rotation_conversions import (
+    euler_angles_to_matrix,
+    matrix_to_quaternion,
+)
+from mani_skill2.utils.sapien_utils import to_tensor
+
+
+def random_quaternions(
+    rng: np.random.RandomState,
+    lock_x: bool = False,
+    lock_y: bool = False,
+    lock_z: bool = False,
+    n=1,
+):
+    xyz_angles = rng.uniform(0, np.pi * 2, (n, 3))
+    if lock_x:
+        xyz_angles[:, 0] *= 0
+    if lock_y:
+        xyz_angles[:, 1] *= 0
+    if lock_z:
+        xyz_angles[:, 2] *= 0
+    return matrix_to_quaternion(
+        euler_angles_to_matrix(to_tensor(xyz_angles), convention="XYZ")
+    )
diff --git a/mani_skill2/envs/utils/randomization/samplers.py b/mani_skill2/envs/utils/randomization/samplers.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/mani_skill2/utils/sapien_utils.py b/mani_skill2/utils/sapien_utils.py
index d82ebef84..a99472dd7 100644
--- a/mani_skill2/utils/sapien_utils.py
+++ b/mani_skill2/utils/sapien_utils.py
@@ -76,6 +76,8 @@ def _unbatch(array: Union[Array, Sequence]):
     if isinstance(array, torch.Tensor):
         return array.squeeze(0)
     if isinstance(array, np.ndarray):
+        if array.shape == (1,):
+            return array.item()
         if np.iterable(array) and array.shape[0] == 1:
             return array.squeeze(0)
     if isinstance(array, list):
diff --git a/mani_skill2/utils/structs/pose.py b/mani_skill2/utils/structs/pose.py
index f5485d8ac..02d845056 100644
--- a/mani_skill2/utils/structs/pose.py
+++ b/mani_skill2/utils/structs/pose.py
@@ -5,9 +5,7 @@
 import sapien
 import sapien.physx as physx
 import torch
-from transforms3d.quaternions import qmult, quat2mat
 
-from mani_skill2.utils.geometry.geometry import rotate_vector
 from mani_skill2.utils.geometry.rotation_conversions import (
     quaternion_apply,
     quaternion_multiply,
diff --git a/tests/utils.py b/tests/utils.py
index b2ff5b089..5eab228ec 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -6,7 +6,7 @@
 # TODO (stao): reactivate old tasks once fixed
 ENV_IDS = [
     "LiftCube-v0",
-    "PickCube-v0",
+    "PickCube-v1",
     "StackCube-v0",
     "PickSingleYCB-v0",
     # "PickClutterYCB-v0",