Skip to content

Commit

Permalink
rendering and arm movement fixes (#474)
Browse files Browse the repository at this point in the history
  • Loading branch information
reginald-mclean authored Apr 23, 2024
1 parent 0745f6c commit 87ac948
Show file tree
Hide file tree
Showing 35 changed files with 40 additions and 34 deletions.
1 change: 0 additions & 1 deletion docs/CNAME

This file was deleted.

14 changes: 8 additions & 6 deletions metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,8 @@ def __init__(
np.array([+1, +1, +1, +1]),
dtype=np.float64,
)

# Technically these observation lengths are different between v1 and v2,
# but we handle that elsewhere and just stick with v2 numbers here
self._obs_obj_max_len = 14

self._set_task_called = False

self.hand_init_pos = None # OVERRIDE ME
self._target_pos = None # OVERRIDE ME
self._random_reset_space = None # OVERRIDE ME
Expand All @@ -189,6 +184,8 @@ def __init__(
# doesn't seem to matter (it will only effect frame-stacking for the
# very first observation)

self.init_qpos = np.copy(self.data.qpos)
self.init_qvel = np.copy(self.data.qvel)
self._prev_obs = self._get_curr_obs_combined_no_goal()

EzPickle.__init__(
Expand Down Expand Up @@ -538,10 +535,15 @@ def evaluate_state(self, obs, action):
# V1 environments don't have to implement it
raise NotImplementedError

def reset_model(self):
qpos = self.init_qpos
qvel = self.init_qvel
self.set_state(qpos, qvel)

def reset(self, seed=None, options=None):
self.curr_path_length = 0
self.reset_model()
obs, info = super().reset()
mujoco.mj_forward(self.model, self.data)
self._prev_obs = obs[:18].copy()
obs[18:36] = self._prev_obs
obs = np.float64(obs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def reset_model(self):
mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "goal")
]
self._set_obj_xyz(self.obj_init_pos)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def reset_model(self):
mujoco.mj_step(self.model, self.data)

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def reset_model(self):
] = pos_machine

self._target_pos = pos_mug_goal
self.model.site("mug_goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def reset_model(self):
] = pos_machine

self._target_pos = pos_mug_goal
self.model.site("coffee_goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
1 change: 1 addition & 0 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def reset_model(self):
mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "dial")
] = self.obj_init_pos
self.dial_push_position = self._get_pos_objects() + np.array([0.05, 0.02, 0.09])
self.model.site("goal").pos = self._target_pos
mujoco.mj_forward(self.model, self.data)
return self._get_obs()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def reset_model(self):

# keep the door open after resetting initial positions
self._set_obj_xyz(-1.5708)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

@_assert_task_is_set
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
self.data.geom("handle").xpos[:-1] - self._target_pos[:-1]
)
self.target_reward = 1000 * self.maxPullDist + 1000 * 2

self.model.site("goal").pos = self._target_pos
return self._get_obs()

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
# Pull drawer out all the way and mark its starting position
self._set_obj_xyz(-self.maxDist)
self.obj_init_pos = self._get_pos_objects()

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
3 changes: 1 addition & 2 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ def reset_model(self):
self._target_pos = self.obj_init_pos + np.array(
[0.0, -0.16 - self.maxDist, 0.09]
)
mujoco.mj_forward(self.model, self.data)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def reset_model(self):
[-self._handle_length, 0.0, 0.125]
)
mujoco.mj_forward(self.model, self.data)
self.model.site("goal_close").pos = self._target_pos
return self._get_obs()

def _reset_hand(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def reset_model(self):
self._target_pos = self.obj_init_pos + np.array(
[+self._handle_length, 0.0, 0.125]
)
mujoco.mj_forward(self.model, self.data)
self.model.site("goal_open").pos = self._target_pos
return self._get_obs()

def _reset_hand(self):
Expand Down
1 change: 0 additions & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def reset_model(self):
self.nail_init_pos = self._get_site_pos("nailHead")
self.obj_init_pos = self.hammer_init_pos.copy()
self._set_hammer_xyz(self.hammer_init_pos)

return self._get_obs()

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def reset_model(self):
self._target_pos = goal_pos[-3:]

self._set_obj_xyz(self.obj_init_pos)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def evaluate_state(self, obs, action):
object_grasped,
in_place,
) = self.compute_reward(action, obs)

info = {
"success": float(target_to_obj <= self.TARGET_RADIUS),
"near_object": float(tcp_to_obj <= 0.05),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
self._target_pos = self.obj_init_pos + np.array(
[0.12, 0.0, 0.25 + self.LEVER_RADIUS]
)
mujoco.mj_forward(self.model, self.data)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def reset_model(self):
mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box")
] = pos_box
self._target_pos = pos_box + np.array([0.03, 0.0, 0.13])
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def reset_model(self):
self.obj_init_pos = self._get_site_pos("pegEnd")

self._target_pos = pos_plug + np.array([0.15, 0.0, 0.0])

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def reset_model(self):
self.obj_init_pos = pos_obj
self._set_obj_xyz(self.obj_init_pos)
self._target_pos = pos_goal

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def reset_model(self):
self.init_right_pad = self.get_body_com("rightpad")

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def reset_model(self):
self.obj_init_pos = goal_pos[:3]

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def reset_model(self):
self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]]))

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def reset_model(self):
self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]]))

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
1 change: 1 addition & 0 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def reset_model(self):
self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]]))

self._set_obj_xyz(self.obj_init_pos)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
4 changes: 2 additions & 2 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import mujoco
import numpy as np
from gymnasium.spaces import Box
from scipy.spatial.transform import Rotation
Expand Down Expand Up @@ -113,7 +112,8 @@ def reset_model(self):
self._target_pos = goal_pos[-3:]
self.obj_init_pos = goal_pos[:3]
self._set_obj_xyz(self.obj_init_pos)
mujoco.mj_forward(self.model, self.data)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, actions, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def reset_model(self):
self.obj_init_pos = goal_pos[:3]

self._set_obj_xyz(self.obj_init_pos)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, actions, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def reset_model(self):
)

self._set_obj_xyz(self.obj_init_pos)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
self.maxPushDist = np.linalg.norm(
self.obj_init_pos[:2] - np.array(self._target_pos)[:2]
)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def reset_model(self):
self._set_stick_xyz(self.stick_init_pos)
self._set_obj_xyz(self.obj_init_qpos)
self.obj_init_pos = self.get_body_com("object").copy()

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _stick_is_inserted(self, handle, end_of_stick):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def reset_model(self):
self._set_stick_xyz(self.stick_init_pos)
self._set_obj_xyz(self.obj_init_qpos)
self.obj_init_pos = self.get_body_com("object").copy()

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def reset_model(self):
self.maxPushDist = np.linalg.norm(
self.obj_init_pos[:2] - np.array(self._target_pos)[:2]
)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def reset_model(self):
self.get_body_com("obj")[:-1] - self._target_pos[:-1]
)
self.target_reward = 1000 * self.maxPushDist + 1000 * 2

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def reset_model(self):
[0.2, 0.0, 0.0]
)
self.data.joint("window_slide").qpos = 0.2
mujoco.mj_forward(self.model, self.data)
self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _reset_hand(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def reset_model(self):

self.window_handle_pos_init = self._get_pos_objects()
self.data.joint("window_slide").qpos = 0.0
mujoco.mj_forward(self.model, self.data)
self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, actions, obs):
Expand Down

0 comments on commit 87ac948

Please sign in to comment.