facebookresearch · jimmytyyang · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 15, 2024
diff --git a/bd_spot_wrapper/spot_wrapper/spot.py b/bd_spot_wrapper/spot_wrapper/spot.py
@@ -407,7 +407,7 @@ def move_gripper_to_points(
         return success_status
 
     def move_gripper_to_point(
-        self, point, rotation, seconds_to_goal=3.0, timeout_sec=10
+        self, point, rotation, seconds_to_goal=3.0, timeout_sec=10, return_cmd=False
     ):
         """
         Moves EE to a point relative to body frame
@@ -451,6 +451,9 @@ def move_gripper_to_point(
         command = robot_command_pb2.RobotCommand(
             synchronized_command=synchronized_command
         )
+        if return_cmd:
+            return command
+
         cmd_id = self.command_client.robot_command(command)
 
         success_status = self.block_until_arm_arrives(cmd_id, timeout_sec=timeout_sec)
@@ -1072,6 +1075,38 @@ def set_base_vel_and_arm_pos(
         )
         return cmd_id
 
+    def set_base_vel_and_arm_ee_pos(
+        self,
+        x_vel,
+        y_vel,
+        ang_vel,
+        arm_ee_action,
+        travel_time,
+        disable_obstacle_avoidance=False,
+    ):
+        print(f"in set_base_vel_and_arm_ee_pos: {arm_ee_action} {travel_time}")
+        # TODO: semantic place ee: temp hack to distance the base vel
+        # base_cmd = self.set_base_velocity(
+        #     x_vel,
+        #     y_vel,
+        #     ang_vel,
+        #     vel_time=travel_time,
+        #     disable_obstacle_avoidance=disable_obstacle_avoidance,
+        #     return_cmd=True,
+        # )
+        arm_cmd = self.move_gripper_to_point(
+            point=arm_ee_action[0:3],
+            rotation=list(arm_ee_action[3:]),
+            seconds_to_goal=travel_time,
+            return_cmd=True,
+        )
+        # synchro_command = RobotCommandBuilder.build_synchro_command(base_cmd, arm_cmd)
+        synchro_command = RobotCommandBuilder.build_synchro_command(arm_cmd)
+        cmd_id = self.command_client.robot_command(
+            synchro_command, end_time_secs=time.time() + travel_time
+        )
+        return cmd_id
+
     def get_xy_yaw(self, use_boot_origin=False, robot_state=None):
         """
         Returns the relative x and y distance from start, as well as relative heading

diff --git a/spot_rl_experiments/configs/config.yaml b/spot_rl_experiments/configs/config.yaml
@@ -19,6 +19,9 @@ WEIGHTS_TORCHSCRIPT:
   # Semantic place
   SEMANTIC_PLACE: "weights/semantic_place/sp12lhb6v10_12_ckpt.199.torchscript"
 
+  # Semantic place EE
+  SEMANTIC_PLACE_EE: "weights/semantic_place_ee/sp_jt_ckpt.16.torchscript"
+
   # Open close drawer
   OPEN_CLOSE_DRAWER: "weights/open_close_drawer/od15lr_56_ckpt.59.combined_net.torchscript"
 
@@ -41,6 +44,9 @@ WEIGHTS:
   # Semantic place
   SEMANTIC_PLACE: "weights/semantic_place/sp12lhb6v10_12_ckpt.199.torchscript"
 
+  # Semantic place EE
+  SEMANTIC_PLACE_EE: "weights/semantic_place_ee/sp_jt_ckpt.16.torchscript"
+
   # Open close drawer
   OPEN_CLOSE_DRAWER: "weights/open_close_drawer/od15lr_56_ckpt.59.combined_net.torchscript"
 
@@ -99,6 +105,9 @@ PLACE_ACTION_SPACE_LENGTH: 4
 SEMANTIC_PLACE_ACTION_SPACE_LENGTH: 9
 SEMANTIC_PLACE_JOINT_BLACKLIST: [3]
 
+# Semantic Place EE
+SEMANTIC_PLACE_EE_ACTION_SPACE_LENGTH: 10
+
 # Open Close Drawer env
 MAX_LIN_DIST_OPEN_CLOSE_DRAWER: 0.0
 MAX_ANG_DIST_OPEN_CLOSE_DRAWER: 3.73 # degrees

diff --git a/spot_rl_experiments/experiments/skill_test/test_sem_place_ee.py b/spot_rl_experiments/experiments/skill_test/test_sem_place_ee.py
@@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import rospy
+from perception_and_utils.utils.generic_utils import map_user_input_to_boolean
+from spot_rl.envs.skill_manager import SpotSkillManager
+
+if __name__ == "__main__":
+    # Know which location we are doing experiments
+    in_fre_lab = map_user_input_to_boolean("Are you JT in FRE? Y/N ")
+    enable_estimation_before_place = map_user_input_to_boolean(
+        "Enable estimation before place? Y/N "
+    )
+
+    if in_fre_lab:
+        # at FRE
+        place_target = "coffee_table"
+    else:
+        # at NYC
+        place_target = "test_desk"
+
+    spotskillmanager = SpotSkillManager(
+        use_mobile_pick=False, use_semantic_place=True, use_semantic_place_ee=True
+    )
+
+    is_local = False
+    if enable_estimation_before_place:
+        place_target = None
+        is_local = True
+
+    # Start testing
+    contnue = True
+    while contnue:
+        rospy.set_param("is_gripper_blocked", 0)
+        spotskillmanager.place(place_target, is_local=is_local, visualize=True)
+        contnue = map_user_input_to_boolean("Do you want to do it again ? Y/N ")
diff --git a/spot_rl_experiments/spot_rl/envs/base_env.py b/spot_rl_experiments/spot_rl/envs/base_env.py
@@ -102,6 +102,22 @@ def rescale_actions(actions, action_thresh=0.05, silence_only=False):
     return actions
 
 
+def rescale_arm_ee_actions(actions, action_thresh=0.05, silence_only=False):
+    actions = np.clip(actions, -1, 1)
+    # Silence low actions
+    actions[np.abs(actions) < action_thresh] = 0.0
+    if silence_only:
+        return actions
+
+    # Remap action scaling to compensate for silenced values
+    action_offsets = np.ones_like(actions) * action_thresh
+    action_offsets[actions < 0] = -action_offsets[actions < 0]
+    action_offsets[actions == 0] = 0
+    actions = (actions - np.array(action_offsets)) / (1.0 - action_thresh)
+
+    return actions
+
+
 class SpotBaseEnv(SpotRobotSubscriberMixin, gym.Env):
     node_name = "spot_reality_gym"
     no_raw = True
@@ -287,6 +303,7 @@ def step(  # noqa
 
         :param base_action: np.array of velocities (linear, angular)
         :param arm_action: np.array of radians denoting how each joint is to be moved
+        :param arm_ee_action: np.array of IK control based on x, y, z, roll, pitch, yaw
         :param grasp: whether to call the grasp_hand_depth() method
         :param place: whether to call the open_gripper() method
         :param semantic_place: whether to call the open_gripper() method, but distable turning wrist behavior
@@ -297,6 +314,7 @@ def step(  # noqa
         # Get Base & Arm actions, grasp and place from action dictionary
         base_action = action_dict.get("base_action", None)
         arm_action = action_dict.get("arm_action", None)
+        arm_ee_action = action_dict.get("arm_ee_action", None)
         semantic_place = action_dict.get("semantic_place", False)
         grasp = action_dict.get("grasp", False)
         place = action_dict.get("place", False)
@@ -401,6 +419,20 @@ def step(  # noqa
             else:
                 arm_action = None
 
+        if arm_ee_action is not None:
+            arm_ee_action = rescale_arm_ee_actions(arm_ee_action)
+            if np.count_nonzero(arm_ee_action) > 0:
+                # TODO: semantic place ee: move this to config
+                arm_ee_action[0:3] *= 0.1  # 0.015
+                arm_ee_action[3:6] *= 0.1  # 0.0125
+                xyz, rpy = self.spot.get_ee_pos_in_body_frame()
+                cur_ee_pose = np.concatenate((xyz, rpy), axis=0)
+                # Wrap the heading
+                arm_ee_action += cur_ee_pose
+                arm_ee_action[3:] = wrap_heading(arm_ee_action[3:])
+            else:
+                arm_ee_action = None
+
         if not (grasp or place):
             if self.slowdown_base > -1 and base_action is not None:
                 # self.ctrl_hz = self.slowdown_base
@@ -417,6 +449,15 @@ def step(  # noqa
                     travel_time=self.config.ARM_TRAJECTORY_TIME_IN_SECONDS,
                     disable_obstacle_avoidance=disable_oa,
                 )
+            elif base_action is not None and arm_ee_action is not None:
+                print("input base_action velocity:", arr2str(base_action))
+                print("input arm_ee_action:", arr2str(arm_ee_action))
+                self.spot.set_base_vel_and_arm_ee_pos(
+                    *base_action,
+                    arm_ee_action,
+                    travel_time=self.config.ARM_TRAJECTORY_TIME_IN_SECONDS,
+                    disable_obstacle_avoidance=disable_oa,
+                )
             elif base_action is not None:
                 self.spot.set_base_velocity(
                     *base_action,
@@ -435,8 +476,12 @@ def step(  # noqa
         print(f"base_action: {arr2str(base_action)}\tarm_action: {arr2str(arm_action)}")
 
         # Spin until enough time has passed during this step
+        # TODO: semantic place ee: temp hack to command this out
+        # for not using base velocity
         start_time = time.time()
-        if base_action is not None or arm_action is not None:
+        if arm_ee_action is not None:
+            print("do not move the base")
+        elif base_action is not None or arm_action is not None:
             while time.time() < start_time + 1 / self.ctrl_hz:
                 if target_yaw is not None and abs(
                     wrap_heading(self.yaw - target_yaw)

diff --git a/spot_rl_experiments/spot_rl/envs/place_env.py b/spot_rl_experiments/spot_rl/envs/place_env.py
@@ -219,4 +219,165 @@ def get_observations(self):
             "joint": self.get_arm_joints(self.config.SEMANTIC_PLACE_JOINT_BLACKLIST),
             "is_holding": np.ones((1,)),
         }
+        print("---")
+        return observations
+
+
+class SpotSemanticPlaceEEEnv(SpotBaseEnv):
+    """This is Spot semantic place class"""
+
+    def __init__(self, config, spot: Spot):
+        # We set the initial arm joints
+        config.INITIAL_ARM_JOINT_ANGLES = copy.deepcopy(
+            config.INITIAL_ARM_JOINT_ANGLES_SEMANTIC_PLACE
+        )
+        super().__init__(config, spot)
+
+        # Define the place variables
+        self.place_target = None
+        self.place_target_is_local = False
+        self.placed = False
+
+        # Set gripper variables
+        self.ee_gripper_offset = mn.Vector3(config.EE_GRIPPER_OFFSET)
+
+        # Define the observation variables
+        self.initial_ee_pose = None
+        self.target_object_pose = None
+
+        # Overwrite joint limits for semantic_place skills
+        self.arm_lower_limits = np.deg2rad(config.ARM_LOWER_LIMITS_SEMANTIC_PLACE)
+        self.arm_upper_limits = np.deg2rad(config.ARM_UPPER_LIMITS_SEMANTIC_PLACE)
+
+        # Place steps
+        self._time_step = 0
+
+    def decide_init_arm_joint(self, ee_orientation_at_grasping):
+        """Decide the place location"""
+
+        # User does not set the gripper orientation
+        if ee_orientation_at_grasping is None:
+            self.initial_arm_joint_angles = np.deg2rad(
+                self.config.INITIAL_ARM_JOINT_ANGLES_SEMANTIC_PLACE
+            )
+        else:
+            # Get the pitch and yaw
+            pitch = ee_orientation_at_grasping[1]
+            yaw = ee_orientation_at_grasping[2]
+            print("ee_orientation_at_grasping:", ee_orientation_at_grasping)
+            if abs(pitch) <= 1.309:  # 75 degree in pitch
+                if yaw > 0:  # gripper is in object's right hand side
+                    self.initial_arm_joint_angles = np.deg2rad(
+                        self.config.INITIAL_ARM_JOINT_ANGLES_SEMANTIC_PLACE
+                    )
+                else:  # gripper is in object's left hand side
+                    self.initial_arm_joint_angles = np.deg2rad(
+                        self.config.INITIAL_ARM_JOINT_ANGLES_SEMANTIC_PLACE_LEFT_HAND
+                    )
+            else:
+                self.initial_arm_joint_angles = np.deg2rad(
+                    self.config.INITIAL_ARM_JOINT_ANGLES_SEMANTIC_PLACE_TOP_DOWN
+                )
+
+    def reset(self, place_target, target_is_local=False, *args, **kwargs):
+        assert place_target is not None
+        self.place_target = np.array(place_target)
+        self.place_target_is_local = target_is_local
+
+        self._time_step = 0
+
+        # Decide the reset arm angle and then reset the arm
+        self.decide_init_arm_joint(kwargs["ee_orientation_at_grasping"])
+        self.reset_arm()
+
+        # We wait for a second to let the arm in the placing
+        # ready location
+        time.sleep(1.0)
+        # Sometimes, there will be a bit of mistchmatch of joints after resetting
+        # So we can reset the arm again here using the following
+        # ee_position, ee_orientation = self.spot.get_ee_pos_in_body_frame()
+        # self.spot.move_gripper_to_point(ee_position, [np.pi / 2, 0, 0])
+
+        # Set the initial ee pose
+        self.initial_ee_pose = self.spot.get_ee_quaternion_in_body_frame()
+        # Set the target pose
+        self.target_object_pose = self.spot.get_ee_quaternion_in_body_frame()
+        # Automatically use intelrealsense camera
+        rospy.set_param("is_gripper_blocked", 1)
+        observations = super().reset()
+        rospy.set_param("is_whiten_black", False)
+        self.placed = False
+        return observations
+
+    def step(self, action_dict: Dict[str, Any], *args, **kwargs):
+
+        place = False
+
+        # Place command is issued if the place action is smaller than zero
+        # TODO: semantic place ee: check how grip_action behaves!
+        place = action_dict.get("grip_action", None) <= 0.0
+
+        # If the time steps have been passed for 50 steps and gripper is in the desired place location
+        cur_place_sensor_xyz = self.get_place_sensor(True)
+        if (
+            abs(cur_place_sensor_xyz[2]) < 0.05
+            and np.linalg.norm(
+                np.array([cur_place_sensor_xyz[0], cur_place_sensor_xyz[1]])
+            )
+            < 0.25
+            and self._time_step >= 50
+        ):
+            place = True
+
+        # If the time steps have been passed for 75 steps, we will just place the object
+        if self._time_step >= 75:
+            place = True
+
+        self._time_step += 1
+
+        # Write into action dict
+        action_dict["place"] = place
+        action_dict["semantic_place"] = place
+
+        # Set the travel time scale so that the arm movement is smooth
+        return super().step(
+            action_dict, travel_time_scale=1.0 / 0.9 * 1.75, *args, **kwargs
+        )
+
+    def get_success(self, observations):
+        return self.place_attempted
+
+    def get_observations(self):
+        assert self.initial_ee_pose is not None
+        assert self.target_object_pose is not None
+
+        # Get the gaol sensor
+        obj_goal_sensor = self.get_place_sensor(True)
+
+        # Get the delta ee orientation
+        current_gripper_orientation = self.spot.get_ee_quaternion_in_body_frame()
+        delta_ee = angle_between_quat(self.initial_ee_pose, current_gripper_orientation)
+        delta_ee = np.array([delta_ee], dtype=np.float32)
+
+        # Get the delta object orientation
+        delta_obj = angle_between_quat(
+            self.target_object_pose, current_gripper_orientation
+        )
+        # remove the offset from the base to object
+        delta_obj = np.array(
+            [delta_obj - abs(self.spot.get_cur_ee_pose_offset())], dtype=np.float32
+        )
+        # Get the jaw image
+        arm_depth, _ = self.get_gripper_images()
+
+        # Get ee's pose -- x,y,z
+        xyz, _ = self.spot.get_ee_pos_in_body_frame()
+        observations = {
+            "obj_goal_sensor": obj_goal_sensor,
+            "relative_initial_ee_orientation": delta_ee,
+            "relative_target_object_orientation": delta_obj,
+            "articulated_agent_jaw_depth": arm_depth,
+            "ee_pos": xyz,
+            "is_holding": np.ones((1,)),
+        }
         return observations