From 3f7ab8d2f20bdb2fdc2b3525ebf1dc1d87b31548 Mon Sep 17 00:00:00 2001 From: Ashwin Reddy Date: Tue, 27 Jun 2017 23:52:31 -0700 Subject: [PATCH 1/5] added peg insertion --- agent_zoo/demo_peg_insertion.py | 29 +++ roboschool/__init__.py | 6 + roboschool/gym_peg_insertion.py | 56 ++++++ .../mujoco_assets/peg_insertion_arm.xml | 183 ++++++++++++++++++ 4 files changed, 274 insertions(+) create mode 100644 agent_zoo/demo_peg_insertion.py create mode 100644 roboschool/gym_peg_insertion.py create mode 100644 roboschool/mujoco_assets/peg_insertion_arm.xml diff --git a/agent_zoo/demo_peg_insertion.py b/agent_zoo/demo_peg_insertion.py new file mode 100644 index 0000000..b2ed4f3 --- /dev/null +++ b/agent_zoo/demo_peg_insertion.py @@ -0,0 +1,29 @@ +import os, sys, subprocess +import numpy as np +import gym +import roboschool + +env = gym.make('RoboschoolPegInsertion-v0') +while 1: + frame = 0 + score = 0 + restart_delay = 0 + obs = env.reset() + + while 1: + a = env.action_space.sample() + # a = np.array([0]*7) + obs, r, done, _ = env.step(a) + score += r + frame += 1 + still_open = env.render("rgb_array") + # import cv2 + # cv2.imwrite('image.jpg', still_open) + + if not done: continue + if restart_delay==0: + print("score=%0.2f in %i frames" % (score, frame)) + restart_delay = 60*2 # 2 sec at 60 fps + else: + restart_delay -= 1 + if restart_delay==0: break diff --git a/roboschool/__init__.py b/roboschool/__init__.py index 8ea0d21..91f8ad7 100644 --- a/roboschool/__init__.py +++ b/roboschool/__init__.py @@ -76,6 +76,11 @@ entry_point='roboschool:RoboschoolPong', max_episode_steps=1000 ) +register( + id='RoboschoolPegInsertion-v0', + entry_point='roboschool:RoboschoolPegInsertion', + max_episode_steps=1000 +) from roboschool.gym_pendulums import RoboschoolInvertedPendulum from roboschool.gym_pendulums import RoboschoolInvertedPendulumSwingup @@ -89,3 +94,4 @@ from roboschool.gym_humanoid_flagrun import RoboschoolHumanoidFlagrun from roboschool.gym_humanoid_flagrun import RoboschoolHumanoidFlagrunHarder from roboschool.gym_pong import RoboschoolPong +from roboschool.gym_peg_insertion import RoboschoolPegInsertion diff --git a/roboschool/gym_peg_insertion.py b/roboschool/gym_peg_insertion.py new file mode 100644 index 0000000..5585655 --- /dev/null +++ b/roboschool/gym_peg_insertion.py @@ -0,0 +1,56 @@ +import gym, roboschool +import numpy as np +from roboschool.gym_mujoco_xml_env import RoboschoolMujocoXmlEnv +from roboschool.scene_abstract import SingleRobotEmptyScene +import os, sys + +class RoboschoolPegInsertion(RoboschoolMujocoXmlEnv): + def __init__(self): + RoboschoolMujocoXmlEnv.__init__(self, 'peg_insertion_arm.xml', 'arm', action_dim=7, obs_dim=110) + + def create_single_player_scene(self): + return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) + + def robot_specific_reset(self): + for name, joint in self.jdict.items(): + joint.reset_current_position(np.random.uniform(low=-.1, high=.1),0) + joint.set_motor_torque(0) + + def calc_state(self): + part_coords = [] + for name, part in self.parts.items(): + # NOTE: received errors trying to do this the straightforward way + def add_to_parts_list(x): + part_coords.append([x[0], x[1], x[2]]) + add_to_parts_list(part.pose().xyz()) + joint_pos_s = [] + for name, joint in self.jdict.items(): + joint_pos_s.append(joint.current_position()) + result = np.append(np.array(part_coords), np.array(joint_pos_s)) + return result + + def _step(self, action): + self.apply_action(action) + self.scene.global_step() + + state = self.calc_state() + done = False + self.rewards = [] + ball = self.parts['ball'].pose().xyz() + final_pose = np.array([0, 0.3, -0.47]) + + self.rewards.append(-np.linalg.norm(ball-final_pose)) + self.HUD(state, action, done) + return state, sum(self.rewards), done, {} + + def apply_action(self, a): + assert(np.isfinite(a).all()) + idx = 0 + for name, joint in self.jdict.items(): + action = 100*float(np.clip(a[idx], -1, +1)) + joint.set_motor_torque(action) + idx += 1 + + def camera_adjust(self): + # self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5) + self.camera.move_and_look_at(0, 0, -0.188, 0.0, 0.3, -0.55) diff --git a/roboschool/mujoco_assets/peg_insertion_arm.xml b/roboschool/mujoco_assets/peg_insertion_arm.xml new file mode 100644 index 0000000..a7e28ce --- /dev/null +++ b/roboschool/mujoco_assets/peg_insertion_arm.xml @@ -0,0 +1,183 @@ + + + + + + From 5bcddc87ec614d91ca6583947ffe13d7625c07e7 Mon Sep 17 00:00:00 2001 From: Ashwin Reddy Date: Wed, 28 Jun 2017 00:38:23 -0700 Subject: [PATCH 2/5] fix terminal condition --- roboschool/gym_peg_insertion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roboschool/gym_peg_insertion.py b/roboschool/gym_peg_insertion.py index 5585655..b636fe9 100644 --- a/roboschool/gym_peg_insertion.py +++ b/roboschool/gym_peg_insertion.py @@ -34,12 +34,13 @@ def _step(self, action): self.scene.global_step() state = self.calc_state() - done = False + self.rewards = [] ball = self.parts['ball'].pose().xyz() final_pose = np.array([0, 0.3, -0.47]) self.rewards.append(-np.linalg.norm(ball-final_pose)) + done = False if self.rewards[0] > 0.05 else True self.HUD(state, action, done) return state, sum(self.rewards), done, {} From b499dc279138c2302e820a18e2bfa8997387d6e2 Mon Sep 17 00:00:00 2001 From: Ashwin Reddy <19ashwinr@students.harker.org> Date: Thu, 10 Aug 2017 04:00:41 -0700 Subject: [PATCH 3/5] Update gym_peg_insertion.py --- roboschool/gym_peg_insertion.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/roboschool/gym_peg_insertion.py b/roboschool/gym_peg_insertion.py index b636fe9..670b551 100644 --- a/roboschool/gym_peg_insertion.py +++ b/roboschool/gym_peg_insertion.py @@ -9,7 +9,7 @@ def __init__(self): RoboschoolMujocoXmlEnv.__init__(self, 'peg_insertion_arm.xml', 'arm', action_dim=7, obs_dim=110) def create_single_player_scene(self): - return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) + return SingleRobotEmptyScene(gravity=9.8, timestep=0.01, frame_skip=1) def robot_specific_reset(self): for name, joint in self.jdict.items(): @@ -35,14 +35,21 @@ def _step(self, action): state = self.calc_state() - self.rewards = [] - ball = self.parts['ball'].pose().xyz() - final_pose = np.array([0, 0.3, -0.47]) + consts = dict(w_u=1e-6, w_p=1, alpha=0) + p_x_t = self.parts['ball'].pose().xyz() + p_star = np.array([0, 0.3, -0.47]) + diff = p_x_t-p_star + + loss = .5 * consts['w_u'] * np.linalg.norm(action)**2 + def l12(z): + return .5*np.linalg.norm(z)**2+np.sqrt(consts['alpha'] + z**2) + + loss += consts['w_p']*l12(diff) + self.rewards = [-np.sum(loss)] + done = True if np.linalg.norm(diff) < 0.05 else False - self.rewards.append(-np.linalg.norm(ball-final_pose)) - done = False if self.rewards[0] > 0.05 else True self.HUD(state, action, done) - return state, sum(self.rewards), done, {} + return state, self.rewards[0], done, {} def apply_action(self, a): assert(np.isfinite(a).all()) From dd89901e6de4bccf0e6431e31d817cb4d5bda96b Mon Sep 17 00:00:00 2001 From: Ashwin Reddy Date: Sun, 13 Aug 2017 08:44:43 -0700 Subject: [PATCH 4/5] fix reward --- roboschool/__init__.py | 2 +- roboschool/gym_peg_insertion.py | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/roboschool/__init__.py b/roboschool/__init__.py index 3e44e8e..2455e32 100644 --- a/roboschool/__init__.py +++ b/roboschool/__init__.py @@ -89,7 +89,7 @@ max_episode_steps=1000 ) register( - id='RoboschoolPegInsertion-v0', + id='RoboschoolPegInsertion-v1', entry_point='roboschool:RoboschoolPegInsertion', max_episode_steps=1000 ) diff --git a/roboschool/gym_peg_insertion.py b/roboschool/gym_peg_insertion.py index b636fe9..9e8c5c8 100644 --- a/roboschool/gym_peg_insertion.py +++ b/roboschool/gym_peg_insertion.py @@ -9,7 +9,7 @@ def __init__(self): RoboschoolMujocoXmlEnv.__init__(self, 'peg_insertion_arm.xml', 'arm', action_dim=7, obs_dim=110) def create_single_player_scene(self): - return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) + return SingleRobotEmptyScene(gravity=9.8, timestep=0.01, frame_skip=1) def robot_specific_reset(self): for name, joint in self.jdict.items(): @@ -35,14 +35,23 @@ def _step(self, action): state = self.calc_state() - self.rewards = [] - ball = self.parts['ball'].pose().xyz() - final_pose = np.array([0, 0.3, -0.47]) + consts = dict(w_u=1e-6, w_p=1, alpha=0) + pose = self.parts['ball'].pose() + + p_x_t = pose.xyz() + p_star = np.array([0, 0.3, -0.47]) + diff = p_x_t-p_star + + loss = .5 * consts['w_u'] * np.linalg.norm(action)**2 + def l12(z): + return .5*np.linalg.norm(z)**2+np.sqrt(consts['alpha'] + z**2) + + loss += consts['w_p']*l12(diff) + self.rewards = [-np.sum(loss)] + done = True if np.linalg.norm(diff) == 0 else False - self.rewards.append(-np.linalg.norm(ball-final_pose)) - done = False if self.rewards[0] > 0.05 else True self.HUD(state, action, done) - return state, sum(self.rewards), done, {} + return state, self.rewards[0], done, {} def apply_action(self, a): assert(np.isfinite(a).all()) From bb4813a338d206719e64de9003a95683e103ef87 Mon Sep 17 00:00:00 2001 From: Ashwin Reddy Date: Sun, 13 Aug 2017 14:09:02 -0700 Subject: [PATCH 5/5] updated model file --- .../mujoco_assets/peg_insertion_arm.xml | 290 ++++++++---------- 1 file changed, 122 insertions(+), 168 deletions(-) diff --git a/roboschool/mujoco_assets/peg_insertion_arm.xml b/roboschool/mujoco_assets/peg_insertion_arm.xml index a7e28ce..4bd2117 100644 --- a/roboschool/mujoco_assets/peg_insertion_arm.xml +++ b/roboschool/mujoco_assets/peg_insertion_arm.xml @@ -1,183 +1,137 @@ - - - -